Adding the paper

This commit is contained in:
Denshooter
2022-03-30 21:20:13 +02:00
parent 52976f607b
commit 837d3b47bc
25 changed files with 1220 additions and 0 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 116 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

BIN
Paper/Figures/LossPlot.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 506 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

87
Paper/Literatur.bib Normal file
View File

@@ -0,0 +1,87 @@
% Encoding: UTF-8
@Misc{jetsonNano,
howpublished = {https://developer.nvidia.com/embedded/jetson-nano-developer-kit},
note = {Accessed: 2022-03-24},
title = {{Jetson Nano Developer Kit}},
}
@Misc{nvidia3070ti,
howpublished = {https://www.nvidia.com/en-us/geforce/graphics-cards/30-series/rtx-3070-3070ti/},
note = {Accessed: 2022-03-24},
title = {{GeForce RTX 3070 Familiy - Specs}},
}
@Misc{jetsonNanoTensorFlow,
howpublished = {https://forums.developer.nvidia.com/t/official-tensorflow-for-jetson-nano/71770},
note = {Accessed: 2022-03-24},
title = {{Official TensorFlow for Jetson Nano!}},
}
@Misc{opencv,
howpublished = {https://opencv.org/releases/},
note = {Accessed: 2022-03-24},
title = {{OpenCV - releases}},
}
@Misc{resnet,
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
title = {Deep Residual Learning for Image Recognition},
year = {2015},
copyright = {arXiv.org perpetual, non-exclusive license},
doi = {10.48550/ARXIV.1512.03385},
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences},
publisher = {arXiv},
url = {https://arxiv.org/abs/1512.03385},
}
@InProceedings{vanishingGradients,
author = {Tan, Hong Hui and Lim, King Hann},
booktitle = {2019 7th International Conference on Smart Computing Communications (ICSCC)},
title = {Vanishing Gradient Mitigation with Deep Learning Neural Network Optimization},
year = {2019},
pages = {1-4},
doi = {10.1109/ICSCC.2019.8843652},
}
@Misc{overparameterization,
author = {Allen-Zhu, Zeyuan and Li, Yuanzhi and Liang, Yingyu},
title = {Learning and Generalization in Overparameterized Neural Networks, Going Beyond Two Layers},
year = {2018},
copyright = {arXiv.org perpetual, non-exclusive license},
doi = {10.48550/ARXIV.1811.04918},
keywords = {Machine Learning (cs.LG), Data Structures and Algorithms (cs.DS), Neural and Evolutionary Computing (cs.NE), Optimization and Control (math.OC), Machine Learning (stat.ML), FOS: Computer and information sciences, FOS: Mathematics},
publisher = {arXiv},
url = {https://arxiv.org/abs/1811.04918},
}
@Misc{autoencoderImg,
howpublished = {https://en.wikipedia.org/wiki/Autoencoder\#/media/File:Autoencoder\_structure.png},
note = {Accessed: 2022-03-24},
title = {{Schematic structure of an autoencoder with 3 fully connected hidden layers. The code (z, or h for reference in the text) is the most internal layer.}},
}
@Misc{residualConnectionImg,
howpublished = {https://i.stack.imgur.com/d9HNk.png},
note = {Accessed: 2022-03-24},
title = {{Figure of a residual connection}},
}
@Misc{ConvolutionAnimation,
howpublished = {https://spinkk.github.io/singlekernel\_nopadding.html},
note = {Accessed: 2022-03-24},
title = {{Animation of a Convolution}},
}
@Article{colorize,
author = {Zhang, Richard and Isola, Phillip and Efros, Alexei A.},
title = {Colorful Image Colorization},
year = {2016},
copyright = {arXiv.org perpetual, non-exclusive license},
doi = {10.48550/ARXIV.1603.08511},
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
publisher = {arXiv},
url = {https://arxiv.org/abs/1603.08511},
}
@Comment{jabref-meta: databaseType:bibtex;}

87
Paper/Literatur.bib.bak Normal file
View File

@@ -0,0 +1,87 @@
% Encoding: UTF-8
@Misc{jetsonNano,
howpublished = {https://developer.nvidia.com/embedded/jetson-nano-developer-kit},
note = {Accessed: 2022-03-24},
title = {{Jetson Nano Developer Kit}},
}
@Misc{nvidia3070ti,
howpublished = {https://www.nvidia.com/en-us/geforce/graphics-cards/30-series/rtx-3070-3070ti/},
note = {Accessed: 2022-03-24},
title = {{GeForce RTX 3070 Familiy - Specs}},
}
@Misc{jetsonNanoTensorFlow,
howpublished = {https://forums.developer.nvidia.com/t/official-tensorflow-for-jetson-nano/71770},
note = {Accessed: 2022-03-24},
title = {{Official TensorFlow for Jetson Nano!}},
}
@Misc{opencv,
howpublished = {https://opencv.org/releases/},
note = {Accessed: 2022-03-24},
title = {{OpenCV - releases}},
}
@Misc{resnet,
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
title = {Deep Residual Learning for Image Recognition},
year = {2015},
copyright = {arXiv.org perpetual, non-exclusive license},
doi = {10.48550/ARXIV.1512.03385},
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences},
publisher = {arXiv},
url = {https://arxiv.org/abs/1512.03385},
}
@InProceedings{vanishingGradients,
author = {Tan, Hong Hui and Lim, King Hann},
booktitle = {2019 7th International Conference on Smart Computing Communications (ICSCC)},
title = {Vanishing Gradient Mitigation with Deep Learning Neural Network Optimization},
year = {2019},
pages = {1-4},
doi = {10.1109/ICSCC.2019.8843652},
}
@Misc{overparameterization,
author = {Allen-Zhu, Zeyuan and Li, Yuanzhi and Liang, Yingyu},
title = {Learning and Generalization in Overparameterized Neural Networks, Going Beyond Two Layers},
year = {2018},
copyright = {arXiv.org perpetual, non-exclusive license},
doi = {10.48550/ARXIV.1811.04918},
keywords = {Machine Learning (cs.LG), Data Structures and Algorithms (cs.DS), Neural and Evolutionary Computing (cs.NE), Optimization and Control (math.OC), Machine Learning (stat.ML), FOS: Computer and information sciences, FOS: Mathematics},
publisher = {arXiv},
url = {https://arxiv.org/abs/1811.04918},
}
@Misc{autoencoderImg,
howpublished = {https://en.wikipedia.org/wiki/Autoencoder\#/media/File:Autoencoder\_structure.png},
note = {Accessed: 2022-03-24},
title = {{Schematic structure of an autoencoder with 3 fully connected hidden layers. The code (z, or h for reference in the text) is the most internal layer.}},
}
@Misc{residualConnectionImg,
howpublished = {https://i.stack.imgur.com/d9HNk.png},
note = {Accessed: 2022-03-24},
title = {{Figure of a residual connection}},
}
@Misc{ConvolutionAnimation,
howpublished = {https://spinkk.github.io/singlekernel\_nopadding.html},
note = {Accessed: 2022-03-24},
title = {{Animation of a Convolution}},
}
@Article{colorize,
author = {Zhang, Richard and Isola, Phillip and Efros, Alexei A.},
title = {Colorful Image Colorization},
year = {2016},
copyright = {arXiv.org perpetual, non-exclusive license},
doi = {10.48550/ARXIV.1603.08511},
keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
publisher = {arXiv},
url = {https://arxiv.org/abs/1603.08511},
}
@Comment{jabref-meta: databaseType:bibtex;}

70
Paper/Main.aux Normal file
View File

@@ -0,0 +1,70 @@
\relax
\providecommand*\new@tpo@label[2]{}
\citation{ConvolutionAnimation}
\citation{ConvolutionAnimation}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces OpenCV window on the Jetson Nano displaying the original, grey, colorized camera stream and corresponding loss between original and colorized image.}}{1}{}\protected@file@percent }
\newlabel{fig:OpenCV_window}{{1}{1}}
\@writefile{toc}{\contentsline {section}{\numberline {2}Convolutional Autoencoder}{1}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Convolutions}{1}{}\protected@file@percent }
\citation{autoencoderImg}
\citation{autoencoderImg}
\citation{jetsonNanoTensorFlow}
\citation{opencv}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Concept of a convolution\nobreakspace {}\cite {ConvolutionAnimation}.}}{2}{}\protected@file@percent }
\newlabel{fig:convolution}{{2}{2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Autoencoder}{2}{}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces An Autoencoder compresses and decompresses the data\nobreakspace {}\cite {autoencoderImg}.}}{2}{}\protected@file@percent }
\newlabel{fig:autoencoder}{{3}{2}}
\@writefile{toc}{\contentsline {section}{\numberline {3}Setup}{3}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Software}{3}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Hardware}{3}{}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {4}Training}{3}{}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Train and test loss during training.}}{3}{}\protected@file@percent }
\newlabel{fig:trainTestLoss}{{4}{3}}
\citation{residualConnectionImg}
\citation{residualConnectionImg}
\citation{vanishingGradients}
\citation{resnet}
\citation{resnet}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Model}{4}{}\protected@file@percent }
\newlabel{lst:ourModel_summary}{{1}{4}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {1}Parameter amount of our model (output of \texttt {summary()} call).}{4}{}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {5}Optimizing the model to run on the Jetson Nano}{4}{}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Concept of a residual connection\nobreakspace {}\cite {residualConnectionImg}.}}{4}{}\protected@file@percent }
\newlabel{fig:ResidualConnection}{{7}{4}}
\citation{overparameterization}
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Encoder layers.}}{5}{}\protected@file@percent }
\newlabel{fig:EncoderLayer}{{5}{5}}
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Decoder layers.}}{5}{}\protected@file@percent }
\newlabel{fig:DecoderLayer}{{6}{5}}
\citation{colorize}
\citation{colorize}
\citation{colorize}
\citation{colorize}
\@writefile{toc}{\contentsline {section}{\numberline {6}Evaluation: Compare with Colorful Image Colorization}{6}{}\protected@file@percent }
\newlabel{lst:theirModel_summary}{{2}{6}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {2}Parameter amount of the Colorful Image Colorization model (output of \texttt {summary()} call).}{6}{}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Colorized images generated by the Colorful Image Colorization model from Richard Zhang et al. and by our model.}}{7}{}\protected@file@percent }
\newlabel{fig:ColoredImages_compareModels}{{8}{7}}
\bibstyle{ieeetr}
\bibdata{Literatur}
\bibcite{jetsonNano}{1}
\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces Loss based on colorized images by the Colorful Image Colorization model from Richard Zhang et al. and by our model.}}{8}{}\protected@file@percent }
\newlabel{fig:Loss_compareModels}{{9}{8}}
\@writefile{toc}{\contentsline {section}{\numberline {7}Conclusion}{8}{}\protected@file@percent }
\bibcite{nvidia3070ti}{2}
\bibcite{ConvolutionAnimation}{3}
\bibcite{autoencoderImg}{4}
\bibcite{jetsonNanoTensorFlow}{5}
\bibcite{opencv}{6}
\bibcite{residualConnectionImg}{7}
\bibcite{vanishingGradients}{8}
\bibcite{resnet}{9}
\bibcite{overparameterization}{10}
\bibcite{colorize}{11}
\@writefile{toc}{\contentsline {section}{References}{9}{}\protected@file@percent }
\global\@namedef{scr@dte@section@lastmaxnumwidth}{11.87997pt}
\global\@namedef{scr@dte@subsection@lastmaxnumwidth}{19.71361pt}
\@writefile{toc}{\providecommand\tocbasic@end@toc@file{}\tocbasic@end@toc@file}
\gdef \@abspage@last{11}

53
Paper/Main.bbl Normal file
View File

@@ -0,0 +1,53 @@
\begin{thebibliography}{10}
\bibitem{jetsonNano}
``{Jetson Nano Developer Kit}.''
https://developer.nvidia.com/embedded/jetson-nano-developer-kit.
\newblock Accessed: 2022-03-24.
\bibitem{nvidia3070ti}
``{GeForce RTX 3070 Familiy - Specs}.''
https://www.nvidia.com/en-us/geforce/graphics-cards/30-series/rtx-3070-3070ti/.
\newblock Accessed: 2022-03-24.
\bibitem{ConvolutionAnimation}
``{Animation of a Convolution}.''
https://spinkk.github.io/singlekernel\_nopadding.html.
\newblock Accessed: 2022-03-24.
\bibitem{autoencoderImg}
``{Schematic structure of an autoencoder with 3 fully connected hidden layers.
The code (z, or h for reference in the text) is the most internal layer.}.''
https://en.wikipedia.org/wiki/Autoencoder\#/media/File:Autoencoder\_structure.png.
\newblock Accessed: 2022-03-24.
\bibitem{jetsonNanoTensorFlow}
``{Official TensorFlow for Jetson Nano!}.''
https://forums.developer.nvidia.com/t/official-tensorflow-for-jetson-nano/71770.
\newblock Accessed: 2022-03-24.
\bibitem{opencv}
``{OpenCV - releases}.'' https://opencv.org/releases/.
\newblock Accessed: 2022-03-24.
\bibitem{residualConnectionImg}
``{Figure of a residual connection}.'' https://i.stack.imgur.com/d9HNk.png.
\newblock Accessed: 2022-03-24.
\bibitem{vanishingGradients}
H.~H. Tan and K.~H. Lim, ``Vanishing gradient mitigation with deep learning
neural network optimization,'' in {\em 2019 7th International Conference on
Smart Computing Communications (ICSCC)}, pp.~1--4, 2019.
\bibitem{resnet}
K.~He, X.~Zhang, S.~Ren, and J.~Sun, ``Deep residual learning for image
recognition,'' 2015.
\bibitem{overparameterization}
Z.~Allen-Zhu, Y.~Li, and Y.~Liang, ``Learning and generalization in
overparameterized neural networks, going beyond two layers,'' 2018.
\bibitem{colorize}
R.~Zhang, P.~Isola, and A.~A. Efros, ``Colorful image colorization,'' 2016.
\end{thebibliography}

48
Paper/Main.blg Normal file
View File

@@ -0,0 +1,48 @@
This is BibTeX, Version 0.99d (TeX Live 2021/W32TeX)
Capacity: max_strings=200000, hash_size=200000, hash_prime=170003
The top-level auxiliary file: Main.aux
The style file: ieeetr.bst
Database file #1: Literatur.bib
Warning--empty journal in colorize
You've used 11 entries,
1876 wiz_defined-function locations,
524 strings with 5025 characters,
and the built_in function-call counts, 1546 in all, are:
= -- 140
> -- 51
< -- 0
+ -- 23
- -- 12
* -- 70
:= -- 200
add.period$ -- 18
call.type$ -- 11
change.case$ -- 11
chr.to.int$ -- 0
cite$ -- 12
duplicate$ -- 62
empty$ -- 211
format.name$ -- 12
if$ -- 405
int.to.chr$ -- 0
int.to.str$ -- 11
missing$ -- 2
newline$ -- 43
num.names$ -- 4
pop$ -- 83
preamble$ -- 1
purify$ -- 0
quote$ -- 0
skip$ -- 39
stack$ -- 0
substring$ -- 12
swap$ -- 5
text.length$ -- 0
text.prefix$ -- 0
top$ -- 0
type$ -- 0
warning$ -- 1
while$ -- 6
width$ -- 13
write$ -- 88
(There was 1 warning)

BIN
Paper/Main.dvi Normal file

Binary file not shown.

1
Paper/Main.lof Normal file
View File

@@ -0,0 +1 @@
\providecommand \tocbasic@end@toc@file {}\tocbasic@end@toc@file

507
Paper/Main.log Normal file
View File

@@ -0,0 +1,507 @@
This is pdfTeX, Version 3.141592653-2.6-1.40.23 (TeX Live 2021/W32TeX) (preloaded format=pdflatex 2022.2.8) 30 MAR 2022 18:21
entering extended mode
restricted \write18 enabled.
%&-line parsing enabled.
**Main.tex
(./Main.tex
LaTeX2e <2021-11-15> patch level 1
L3 programming layer <2022-02-05>
(c:/texlive/2021/texmf-dist/tex/latex/koma-script/scrartcl.cls
Document Class: scrartcl 2021/11/13 v3.35 KOMA-Script document class (article)
(c:/texlive/2021/texmf-dist/tex/latex/koma-script/scrkbase.sty
Package: scrkbase 2021/11/13 v3.35 KOMA-Script package (KOMA-Script-dependent b
asics and keyval usage)
(c:/texlive/2021/texmf-dist/tex/latex/koma-script/scrbase.sty
Package: scrbase 2021/11/13 v3.35 KOMA-Script package (KOMA-Script-independent
basics and keyval usage)
(c:/texlive/2021/texmf-dist/tex/latex/koma-script/scrlfile.sty
Package: scrlfile 2021/11/13 v3.35 KOMA-Script package (file load hooks)
(c:/texlive/2021/texmf-dist/tex/latex/koma-script/scrlfile-hook.sty
Package: scrlfile-hook 2021/11/13 v3.35 KOMA-Script package (using LaTeX hooks)
(c:/texlive/2021/texmf-dist/tex/latex/koma-script/scrlogo.sty
Package: scrlogo 2021/11/13 v3.35 KOMA-Script package (logo)
)))
(c:/texlive/2021/texmf-dist/tex/latex/graphics/keyval.sty
Package: keyval 2014/10/28 v1.15 key=value parser (DPC)
\KV@toks@=\toks16
)
Applying: [2021/05/01] Usage of raw or classic option list on input line 252.
Already applied: [0000/00/00] Usage of raw or classic option list on input line
368.
))
(c:/texlive/2021/texmf-dist/tex/latex/koma-script/tocbasic.sty
Package: tocbasic 2021/11/13 v3.35 KOMA-Script package (handling toc-files)
\scr@dte@tocline@numberwidth=\skip47
\scr@dte@tocline@numbox=\box50
)
Package tocbasic Info: babel extension for `toc' omitted
(tocbasic) because of missing \bbl@set@language on input line 135.
Package scrartcl Info: You've used standard option `12pt'.
(scrartcl) This is correct!
(scrartcl) Internally I'm using `fontsize=12pt'.
(scrartcl) If you'd like to set the option with \KOMAoptions,
(scrartcl) you'd have to use `fontsize=12pt' there
(scrartcl) instead of `12pt', too.
Class scrartcl Info: File `scrsize12pt.clo' used to setup font sizes on input l
ine 2242.
(c:/texlive/2021/texmf-dist/tex/latex/koma-script/scrsize12pt.clo
File: scrsize12pt.clo 2021/11/13 v3.35 KOMA-Script font size class option (12pt
)
)
(c:/texlive/2021/texmf-dist/tex/latex/koma-script/typearea.sty
Package: typearea 2021/11/13 v3.35 KOMA-Script package (type area)
\ta@bcor=\skip48
\ta@div=\count185
Package typearea Info: You've used standard option `a4paper'.
(typearea) This is correct!
(typearea) Internally I'm using `paper=a4'.
(typearea) If you'd like to set the option with \KOMAoptions,
(typearea) you'd have to use `paper=a4' there
(typearea) instead of `a4paper', too.
\ta@hblk=\skip49
\ta@vblk=\skip50
\ta@temp=\skip51
\footheight=\skip52
Package typearea Info: These are the values describing the layout:
(typearea) DIV = 12
(typearea) BCOR = 0.0pt
(typearea) \paperwidth = 597.50793pt
(typearea) \textwidth = 448.13095pt
(typearea) DIV departure = -6%
(typearea) \evensidemargin = 2.4185pt
(typearea) \oddsidemargin = 2.4185pt
(typearea) \paperheight = 845.04694pt
(typearea) \textheight = 635.5pt
(typearea) \topmargin = -41.72441pt
(typearea) \headheight = 18.125pt
(typearea) \headsep = 21.75pt
(typearea) \topskip = 12.0pt
(typearea) \footskip = 50.75pt
(typearea) \baselineskip = 14.5pt
(typearea) on input line 1743.
)
\c@part=\count186
\c@section=\count187
\c@subsection=\count188
\c@subsubsection=\count189
\c@paragraph=\count190
\c@subparagraph=\count191
\scr@dte@section@maxnumwidth=\skip53
Class scrartcl Info: using compatibility default `runin=bysign'
(scrartcl) for `\section on input line 4852.
Class scrartcl Info: using compatibility default `afterindent=bysign'
(scrartcl) for `\section on input line 4852.
\scr@dte@part@maxnumwidth=\skip54
Class scrartcl Info: using compatibility default `afterindent=false'
(scrartcl) for `\part on input line 4860.
\scr@dte@subsection@maxnumwidth=\skip55
Class scrartcl Info: using compatibility default `runin=bysign'
(scrartcl) for `\subsection on input line 4870.
Class scrartcl Info: using compatibility default `afterindent=bysign'
(scrartcl) for `\subsection on input line 4870.
\scr@dte@subsubsection@maxnumwidth=\skip56
Class scrartcl Info: using compatibility default `runin=bysign'
(scrartcl) for `\subsubsection on input line 4880.
Class scrartcl Info: using compatibility default `afterindent=bysign'
(scrartcl) for `\subsubsection on input line 4880.
\scr@dte@paragraph@maxnumwidth=\skip57
Class scrartcl Info: using compatibility default `runin=bysign'
(scrartcl) for `\paragraph on input line 4891.
Class scrartcl Info: using compatibility default `afterindent=bysign'
(scrartcl) for `\paragraph on input line 4891.
\scr@dte@subparagraph@maxnumwidth=\skip58
Class scrartcl Info: using compatibility default `runin=bysign'
(scrartcl) for `\subparagraph on input line 4901.
Class scrartcl Info: using compatibility default `afterindent=bysign'
(scrartcl) for `\subparagraph on input line 4901.
\abovecaptionskip=\skip59
\belowcaptionskip=\skip60
\c@pti@nb@sid@b@x=\box51
Package tocbasic Info: babel extension for `lof' omitted
(tocbasic) because of missing \bbl@set@language on input line 6076.
\scr@dte@figure@maxnumwidth=\skip61
\c@figure=\count192
Package tocbasic Info: babel extension for `lot' omitted
(tocbasic) because of missing \bbl@set@language on input line 6091.
\scr@dte@table@maxnumwidth=\skip62
\c@table=\count193
Class scrartcl Info: Redefining `\numberline' on input line 6258.
\bibindent=\dimen138
)
(c:/texlive/2021/texmf-dist/tex/latex/graphics/graphicx.sty
Package: graphicx 2021/09/16 v1.2d Enhanced LaTeX Graphics (DPC,SPQR)
(c:/texlive/2021/texmf-dist/tex/latex/graphics/graphics.sty
Package: graphics 2021/03/04 v1.4d Standard LaTeX Graphics (DPC,SPQR)
(c:/texlive/2021/texmf-dist/tex/latex/graphics/trig.sty
Package: trig 2021/08/11 v1.11 sin cos tan (DPC)
)
(c:/texlive/2021/texmf-dist/tex/latex/graphics-cfg/graphics.cfg
File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration
)
Package graphics Info: Driver file: pdftex.def on input line 107.
(c:/texlive/2021/texmf-dist/tex/latex/graphics-def/pdftex.def
File: pdftex.def 2020/10/05 v1.2a Graphics/color driver for pdftex
))
\Gin@req@height=\dimen139
\Gin@req@width=\dimen140
)
(c:/texlive/2021/texmf-dist/tex/latex/graphics/color.sty
Package: color 2021/12/07 v1.3c Standard LaTeX Color (DPC)
(c:/texlive/2021/texmf-dist/tex/latex/graphics-cfg/color.cfg
File: color.cfg 2016/01/02 v1.6 sample color configuration
)
Package color Info: Driver file: pdftex.def on input line 149.
)
(c:/texlive/2021/texmf-dist/tex/latex/listings/listings.sty
\lst@mode=\count194
\lst@gtempboxa=\box52
\lst@token=\toks17
\lst@length=\count195
\lst@currlwidth=\dimen141
\lst@column=\count196
\lst@pos=\count197
\lst@lostspace=\dimen142
\lst@width=\dimen143
\lst@newlines=\count198
\lst@lineno=\count199
\lst@maxwidth=\dimen144
(c:/texlive/2021/texmf-dist/tex/latex/listings/lstmisc.sty
File: lstmisc.sty 2020/03/24 1.8d (Carsten Heinz)
\c@lstnumber=\count266
\lst@skipnumbers=\count267
\lst@framebox=\box53
)
(c:/texlive/2021/texmf-dist/tex/latex/listings/listings.cfg
File: listings.cfg 2020/03/24 1.8d listings configuration
))
Package: listings 2020/03/24 1.8d (Carsten Heinz)
(c:/texlive/2021/texmf-dist/tex/latex/blindtext/blindtext.sty
Package: blindtext 2012/01/06 V2.0 blindtext-Package
(c:/texlive/2021/texmf-dist/tex/latex/tools/xspace.sty
Package: xspace 2014/10/28 v1.13 Space after command names (DPC,MH)
)
\c@blindtext=\count268
\c@Blindtext=\count269
\c@blind@countparstart=\count270
\blind@countxx=\count271
\blindtext@numBlindtext=\count272
\blind@countyy=\count273
\c@blindlist=\count274
\c@blindlistlevel=\count275
\c@blindlist@level=\count276
\blind@listitem=\count277
\c@blind@listcount=\count278
\c@blind@levelcount=\count279
\blind@mathformula=\count280
\blind@Mathformula=\count281
\c@blind@randomcount=\count282
\c@blind@randommax=\count283
\c@blind@pangramcount=\count284
\c@blind@pangrammax=\count285
)
(c:/texlive/2021/texmf-dist/tex/latex/wrapfig/wrapfig.sty
\wrapoverhang=\dimen145
\WF@size=\dimen146
\c@WF@wrappedlines=\count286
\WF@box=\box54
\WF@everypar=\toks18
Package: wrapfig 2003/01/31 v 3.6
)
(c:/texlive/2021/texmf-dist/tex/generic/ulem/ulem.sty
\UL@box=\box55
\UL@hyphenbox=\box56
\UL@skip=\skip63
\UL@hook=\toks19
\UL@height=\dimen147
\UL@pe=\count287
\UL@pixel=\dimen148
\ULC@box=\box57
Package: ulem 2019/11/18
\ULdepth=\dimen149
)
Class scrartcl Warning: Usage of package `tocbibind' together
(scrartcl) with a KOMA-Script class is not recommended.
(scrartcl) I'd suggest to use options like `listof=totoc'
(scrartcl) or `bibliography=totoc', or commands like
(scrartcl) `\setuptoc{toc}{totoc}' instead of this package,
(scrartcl) because it breaks several KOMA-Script features of
(scrartcl) the list of figures, list of tables, bibliography,
(scrartcl) index and the running head.
(scrartcl) Nevertheless, using requested
(scrartcl) package `tocbibind' on input line 22.
(c:/texlive/2021/texmf-dist/tex/latex/tocbibind/tocbibind.sty
Package: tocbibind 2010/10/13 v1.5k extra ToC listings
Package tocbibind Info: The document has section divisions on input line 50.
Package tocbibind Note: Using section or other style headings.
) (c:/texlive/2021/texmf-dist/tex/latex/setspace/setspace.sty
Package: setspace 2011/12/19 v6.7a set line spacing
)
(c:/texlive/2021/texmf-dist/tex/latex/titling/titling.sty
Package: titling 2009/09/04 v2.1d maketitle typesetting
\thanksmarkwidth=\skip64
\thanksmargin=\skip65
\droptitle=\skip66
)
(c:/texlive/2021/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def
File: l3backend-pdftex.def 2022-01-12 L3 backend support: PDF output (pdfTeX)
\l__color_backend_stack_int=\count288
\l__pdf_internal_box=\box58
)
(./Main.aux)
\openout1 = `Main.aux'.
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 40.
LaTeX Font Info: ... okay on input line 40.
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 40.
LaTeX Font Info: ... okay on input line 40.
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 40.
LaTeX Font Info: ... okay on input line 40.
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 40.
LaTeX Font Info: ... okay on input line 40.
LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 40.
LaTeX Font Info: ... okay on input line 40.
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 40.
LaTeX Font Info: ... okay on input line 40.
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 40.
LaTeX Font Info: ... okay on input line 40.
Package scrbase Info: activating english \contentsname on input line 40.
Package scrbase Info: activating english \listfigurename on input line 40.
Package scrbase Info: activating english \listtablename on input line 40.
Package tocbasic Info: usage of `tocbibind' detected on input line 40.
Package tocbasic Warning: `tocbibind' redefinition of `\listoffigures'
(tocbasic) detected.
(tocbasic) Note: Loading `tocbibind' without option `notlof'
(tocbasic) can break several features of `tocbasic'.
Package tocbasic Warning: `tocbibind' redefinition of `\listoftables'
(tocbasic) detected.
(tocbasic) Note: Loading `tocbibind' without option `notlot'
(tocbasic) can break several features of `tocbasic'.
(c:/texlive/2021/texmf-dist/tex/context/base/mkii/supp-pdf.mkii
[Loading MPS to PDF converter (version 2006.09.02).]
\scratchcounter=\count289
\scratchdimen=\dimen150
\scratchbox=\box59
\nofMPsegments=\count290
\nofMParguments=\count291
\everyMPshowfont=\toks20
\MPscratchCnt=\count292
\MPscratchDim=\dimen151
\MPnumerator=\count293
\makeMPintoPDFobject=\count294
\everyMPtoPDFconversion=\toks21
) (c:/texlive/2021/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty
Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf
Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 4
85.
(c:/texlive/2021/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg
File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv
e
))
\c@lstlisting=\count295
LaTeX Font Info: External font `cmex10' loaded for size
(Font) <14.4> on input line 43.
LaTeX Font Info: External font `cmex10' loaded for size
(Font) <7> on input line 43.
[1
{c:/texlive/2021/texmf-var/fonts/map/pdftex/updmap/pdftex.map}] (./Main.toc
LaTeX Font Info: External font `cmex10' loaded for size
(Font) <12> on input line 1.
LaTeX Font Info: External font `cmex10' loaded for size
(Font) <8> on input line 1.
LaTeX Font Info: External font `cmex10' loaded for size
(Font) <6> on input line 1.
)
\tf@toc=\write3
\openout3 = `Main.toc'.
[1]
<Figures/OpenCV_window.png, id=12, 929.72343pt x 231.11343pt>
File: Figures/OpenCV_window.png Graphic file (type png)
<use Figures/OpenCV_window.png>
Package pdftex.def Info: Figures/OpenCV_window.png used on input line 66.
(pdftex.def) Requested size: 457.8383pt x 113.81102pt.
Overfull \hbox (9.70735pt too wide) in paragraph at lines 66--67
[][]
[]
<Figures/convolution.png, id=13, 971.88094pt x 360.59718pt>
File: Figures/convolution.png Graphic file (type png)
<use Figures/convolution.png>
Package pdftex.def Info: Figures/convolution.png used on input line 91.
(pdftex.def) Requested size: 383.42163pt x 142.26378pt.
[1
<./Figures/OpenCV_window.png>]
<Figures/Autoencoder.png, id=17, 351.3125pt x 262.9825pt>
File: Figures/Autoencoder.png Graphic file (type png)
<use Figures/Autoencoder.png>
Package pdftex.def Info: Figures/Autoencoder.png used on input line 108.
(pdftex.def) Requested size: 190.05435pt x 142.26378pt.
[2 <./Figures/Convolution.png> <./Figures/Autoencoder.png>]
<Figures/LossPlot.png, id=24, 462.528pt x 346.896pt>
File: Figures/LossPlot.png Graphic file (type png)
<use Figures/LossPlot.png>
Package pdftex.def Info: Figures/LossPlot.png used on input line 136.
(pdftex.def) Requested size: 227.6217pt x 170.71652pt.
Overfull \hbox (48.37207pt too wide) in paragraph at lines 136--137
[][]
[]
[3 <./Figures/LossPlot.png>]
(c:/texlive/2021/texmf-dist/tex/latex/listings/lstlang1.sty
File: lstlang1.sty 2020/03/24 1.8d listings language file
)
(c:/texlive/2021/texmf-dist/tex/latex/listings/lstlang1.sty
File: lstlang1.sty 2020/03/24 1.8d listings language file
)
LaTeX Font Info: Font shape `OT1/cmtt/bx/n' in size <11> not available
(Font) Font shape `OT1/cmtt/m/n' tried instead on input line 167.
<Figures/EncoderLayer.png, id=30, 767.86874pt x 1406.25375pt>
File: Figures/EncoderLayer.png Graphic file (type png)
<use Figures/EncoderLayer.png>
Package pdftex.def Info: Figures/EncoderLayer.png used on input line 196.
(pdftex.def) Requested size: 179.24963pt x 328.25954pt.
<Figures/DecoderLayer.png, id=31, 767.86874pt x 1406.25375pt>
File: Figures/DecoderLayer.png Graphic file (type png)
<use Figures/DecoderLayer.png>
Package pdftex.def Info: Figures/DecoderLayer.png used on input line 202.
(pdftex.def) Requested size: 179.24963pt x 328.25954pt.
LaTeX Warning: `h' float specifier changed to `ht'.
<Figures/ResidualConnection.png, id=32, 566.115pt x 326.21875pt>
File: Figures/ResidualConnection.png Graphic file (type png)
<use Figures/ResidualConnection.png>
Package pdftex.def Info: Figures/ResidualConnection.png used on input line 224
.
(pdftex.def) Requested size: 197.50398pt x 113.81102pt.
Overfull \hbox (18.25435pt too wide) in paragraph at lines 224--225
[][]
[]
Underfull \hbox (badness 3792) in paragraph at lines 225--225
[][]\OT1/cmr/m/n/12 Concept of a resid-ual
[]
[4 <./Figures/ResidualConnection.png>]
Underfull \hbox (badness 1142) in paragraph at lines 236--242
[]\OT1/cmr/m/n/12 As men-tioned in the first sec-tion, the Jet-
[]
[5 <./Figures/EncoderLayer.png> <./Figures/DecoderLayer.png>]
Class scrartcl Warning: `\caption' outside float.
(scrartcl) Seems you are using `\caption' outside a float.
(scrartcl) Maybe you are using a package that uses `\@makecaption'
(scrartcl) without setting `\@captype' before.
(scrartcl) Because I cannot detect the caption type, I'm using
(scrartcl) the empty one. on input line 263.
<Figures/ColoredImages_compareModels.png, id=45, 1322.69156pt x 1650.91782pt>
File: Figures/ColoredImages_compareModels.png Graphic file (type png)
<use Figures/ColoredImages_compareModels.png>
Package pdftex.def Info: Figures/ColoredImages_compareModels.png used on input
line 285.
(pdftex.def) Requested size: 296.34143pt x 369.88582pt.
<Figures/ColorizedImagesLossPlot_comparedModels.png, id=46, 462.528pt x 346.896
pt>
File: Figures/ColorizedImagesLossPlot_comparedModels.png Graphic file (type png
)
<use Figures/ColorizedImagesLossPlot_comparedModels.png>
Package pdftex.def Info: Figures/ColorizedImagesLossPlot_comparedModels.png us
ed on input line 303.
(pdftex.def) Requested size: 227.6217pt x 170.71652pt.
LaTeX Warning: `h' float specifier changed to `ht'.
[6] [7 <./Figures/ColoredImages_compareModels.png>] (./Main.bbl
Underfull \hbox (badness 1365) in paragraph at lines 4--7
[]\OT1/cmr/m/n/12 ``Jetson Nano De-vel-oper Kit.'' https://developer.nvidia.com
/embedded/jetson-
[]
[8 <./Figures/ColorizedImagesLossPlot_comparedModels.png>]
Underfull \hbox (badness 10000) in paragraph at lines 9--12
[]\OT1/cmr/m/n/12 ``GeForce RTX 3070 Familiy - Specs.'' https://www.nvidia.com/
en-
[]
Overfull \hbox (10.61928pt too wide) in paragraph at lines 14--17
[]\OT1/cmr/m/n/12 ``Animation of a Con-vo-lu-tion.'' https://spinkk.github.io/s
inglekernel[]nopadding.html.
[]
Overfull \hbox (16.2595pt too wide) in paragraph at lines 19--23
\OT1/cmr/m/n/12 https://en.wikipedia.org/wiki/Autoencoder#/media/File:Autoencod
er[]structure.png.
[]
Overfull \hbox (17.87395pt too wide) in paragraph at lines 25--28
[]\OT1/cmr/m/n/12 ``Official Ten-sor-Flow for Jet-son Nano!.'' https://forums.d
eveloper.nvidia.com/t/official-
[]
) [9] (./Main.aux) )
Here is how much of TeX's memory you used:
6710 strings out of 478278
129479 string characters out of 5850547
741288 words of memory out of 5000000
24859 multiletter control sequences out of 15000+600000
475654 words of font info for 51 fonts, out of 8000000 for 9000
1141 hyphenation exceptions out of 8191
108i,8n,106p,10599b,1261s stack positions out of 5000i,500n,10000p,200000b,80000s
<c:/texlive/2021/texmf-dist/fonts/type1/public/amsfonts/cm/
cmex10.pfb><c:/texlive/2021/texmf-dist/fonts/type1/public/amsfonts/cm/cmitt10.p
fb><c:/texlive/2021/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi12.pfb><c:/te
xlive/2021/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi8.pfb><c:/texlive/2021
/texmf-dist/fonts/type1/public/amsfonts/cm/cmr12.pfb><c:/texlive/2021/texmf-dis
t/fonts/type1/public/amsfonts/cm/cmr17.pfb><c:/texlive/2021/texmf-dist/fonts/ty
pe1/public/amsfonts/cm/cmr8.pfb><c:/texlive/2021/texmf-dist/fonts/type1/public/
amsfonts/cm/cmssbx10.pfb><c:/texlive/2021/texmf-dist/fonts/type1/public/amsfont
s/cm/cmsy10.pfb><c:/texlive/2021/texmf-dist/fonts/type1/public/amsfonts/cm/cmti
12.pfb><c:/texlive/2021/texmf-dist/fonts/type1/public/amsfonts/cm/cmtt10.pfb><c
:/texlive/2021/texmf-dist/fonts/type1/public/amsfonts/cm/cmtt12.pfb>
Output written on Main.pdf (11 pages, 6968779 bytes).
PDF statistics:
115 PDF objects out of 1000 (max. 8388607)
63 compressed objects within 1 object stream
0 named destinations out of 1000 (max. 500000)
46 words of extra memory for PDF output out of 10000 (max. 10000000)

1
Paper/Main.lot Normal file
View File

@@ -0,0 +1 @@
\providecommand \tocbasic@end@toc@file {}\tocbasic@end@toc@file

BIN
Paper/Main.pdf Normal file

Binary file not shown.

BIN
Paper/Main.synctex.gz Normal file

Binary file not shown.

352
Paper/Main.tex Normal file
View File

@@ -0,0 +1,352 @@
\documentclass[a4paper,12pt, listof=totoc,toc=sectionentrywithdots]{scrartcl}
\usepackage{graphicx}
\usepackage{color}
\usepackage{listings}
\definecolor{GrayCodeBlock}{RGB}{241,241,241}
\definecolor{BlackText}{RGB}{110,107,94}
\definecolor{RedTypename}{RGB}{182,86,17}
\definecolor{GreenString}{RGB}{96,172,57}
\definecolor{PurpleKeyword}{RGB}{184,84,212}
\definecolor{GrayComment}{RGB}{170,170,170}
\definecolor{GoldDocumentation}{RGB}{180,165,45}
\usepackage{blindtext}
\usepackage{wrapfig}
\usepackage{ulem}
\usepackage[nottoc]{tocbibind}
\usepackage{setspace}
\usepackage{titling}
\renewcommand\maketitlehooka{\null\mbox{}\vfill}
\renewcommand\maketitlehookd{\vfill\null}
\title{Colorization of Grey Images by applying a Convolutional Autoencoder on the Jetson Nano}
\date{}
\author{Tim Niklas Witte and Dennis Konkol}
\lstset{
numbersep=8pt,
frame = single,
framexleftmargin=15pt,
framesep=1.5pt, framerule=1.5pt}
\begin{document}
\begin{titlingpage}
\maketitle
\end{titlingpage}
\tableofcontents
\pagenumbering{gobble}
\cleardoublepage
\pagenumbering{arabic}
\section{Introduction}
Embedded GPUs such as the Jetson Nano provide limited hardware resources than desktop/server GPUs.
For example, the Jetson Nano has 128 CUDA cores and 4 GB of video memory, compared to the NVIDIA GeForce RTX 3070 Ti which has 6144 CUDA cores and 8 GB of video memory.
Inference done by massive artificial neural networks (ANN) e.g. over 25.000.000 parameters on the Jetson Nano, becomes slow - about 0.01 forward pass per second.
An NVIDIA GeForce RTX 3070 Ti does 32 forward passes through the same huge ANN, and this can be achieved within a second.
This paper presents a convolutional autoencoder for grey image colorization with 300.000 parameters optimized to run on embedded GPUs.
In order to demonstrate the results during runtime on the Jetson Nano, the live grey camera stream is colorized, as shown in Figure~\ref{fig:OpenCV_window}.
\begin{figure}[h]
\centering
\includegraphics[totalheight=4cm]{Figures/OpenCV_window.png}
\caption{OpenCV window on the Jetson Nano displaying the original, grey, colorized camera stream and corresponding loss between original and colorized image.}
\label{fig:OpenCV_window}
\end{figure}
This paper is organized as follows:
The concept of a convolutional autoencoder will be covered in section 2.
Section 3 explains the necessary software and hardware setup on the Jetson Nano.
The training procedure, including the model architecture, is discussed in section 4.
Optimization techniques of our model considering running on the Jetson Nano are presented in section 5.
In section 6, the performance of our model is evaluated by comparing the colorized images generated by our models and by a state-of-the-art ANN for grey image colorization, which has about 25.000.000 parameters.
Finally, the final results are summed up in section 7.
\section{Convolutional Autoencoder}
\subsection{Convolutions}
Convolutions detect features and extract these from images by applying a filter kernel which is a weight matrix.
As shown in Figure ~\ref{fig:convolution}, a convolution iterates a filter kernel over the entire image.
During each iteration, an area with the same size as the kernel is processed by an element-wise multiplication followed by summing each value up, representing the result for the area of this image.
This area is shifted one step (depending on side size) further to the right in the next step.
The same processing step occurs again.
\begin{figure}[h]
\centering
\includegraphics[totalheight=5cm]{Figures/convolution.png}
\caption{Concept of a convolution~\cite{ConvolutionAnimation}.}
\label{fig:convolution}
\end{figure}
\subsection{Autoencoder}
Autoencoders are artificial neural networks used to learn features of unlabeled data.
As presented in Figure~\ref{fig:autoencoder}, the encoder part compresses the data by gradually decrease of the layer size.
The resulting embedding/code is passed to the decoder part responsible for
reconstructing it.
In the decoder, the layer size increases per layer.
Overall, the input \texttt{X} and output \texttt{X'} shall be the same.
\begin{figure}[h]
\centering
\includegraphics[totalheight=5cm]{Figures/Autoencoder.png}
\caption{An Autoencoder compresses and decompresses the data~\cite{autoencoderImg}.}
\label{fig:autoencoder}
\end{figure}
Instead of fully connected layers, a convolutional autoencoder applies convolutions in the encoder and transposes convolutions in the decoder.
\section{Setup}
\subsection{Software}
TensorFlow was installed following the official guide from NVIDIA~\cite{jetsonNanoTensorFlow}.
Furthermore, it is not recommended to install the current version of OpenCV via pip3 due to compatibility issues with the CSI camera.
The CSI camera i.e. the \texttt{gstream} can only be accessed with an OpenCV version lower than 3.3.1.
This version was installed manually by downloading the source code from the official website and compiling it~\cite{opencv}.
Besides, for speed purposes, the maximal performance mode was enabled by the command \texttt{sudo nvpmodel -m 0}.
In order to enable the Jetson Clock, the command \texttt{sudo jetson\_clocks} was used.
\subsection{Hardware}
The CSI camera was plugged into the corresponding slot in the Jetson Nano.
Furthermore, the HDMI display shows the OpenCV window as presented in Figure~\ref{fig:OpenCV_window}.
\section{Training}
\begin{wrapfigure}{r}{.4\textwidth}
\centering
\includegraphics[totalheight=6cm]{Figures/LossPlot.png}
\caption{Train and test loss during training.}
\label{fig:trainTestLoss}
\end{wrapfigure}
At the beginning of training our model, we used the common RGB color space.
In other words, the input was the grey scaled image, and the output was the RGB image.
However, we lost too much information in the picture.
So the general input picture was detectable but with a lot of "compression".
The reason for this is that for one pixel, all three values of RGB are responsible for the brightness of that pixel.
So it is possible to get the right color but not the correct brightness. That is why we switched to the CIE LAB color space.
Here we also have three values for each pixel, the L channel for the
'brightness' and A and B as the color channel.
The L channel is like the grayscale image for the model.
The model's output is two values, the A and B channels.
So with the combination of the given A, B, and our old L values, we get the colored image. We get an overall correct image because of the kept L channel, even if the colors would not match the original
image.
The model was trained for 13 epochs (in total: 15 hours) with the ImageNet2012 dataset.
It contains ca. 1.300.000 training images and 300.000 validation images used for test data.
As presented in Figure~\ref{fig:trainTestLoss} the model was successfully trained to convergence because, after about ten epochs, the train loss does not change significantly ($< 0.0001$) compared with the loss to the next epoch.
\subsection{Model}
As shown in Listing~\ref{lst:ourModel_summary}, our convolutional autoencoder has about 300.000 parameters.
The model's memory size is about 1.2 MB ($300000 \cdot 4$ Byte).
Encoder and decoder parts of the ANN are equally balanced due to having almost the same amount of parameters.
\begin{lstlisting}[language=bash, caption=Parameter amount of our model (output of \texttt{summary()} call)., label={lst:ourModel_summary}, basicstyle=\fontsize{11}{9}\selectfont\ttfamily]
Model: "autoencoder"
_______________________________________________________________
Layer (type) Output Shape Param #
===============================================================
encoder (Encoder) multiple 148155
decoder (Decoder) multiple 150145
===============================================================
Total params: 298,302
Trainable params: 297,210
Non-trainable params: 1,092
_______________________________________________________________
\end{lstlisting}
Figure~\ref{fig:EncoderLayer} and~\ref{fig:DecoderLayer} present the structure of the layers contained in the encoder and decoder.
The encoder receives a 256x256 pixel grey image.
Due to the grey color, there is only one color channel.
Convolutions can be seen as feature extractors.
At the first convolution in the encoder (see \texttt{Conv2D\_0} in Figure~\ref{fig:EncoderLayer}), there are 75 features extracted from this grey image.
These extracted features are represented as channels (similar to color channels but not colors) called feature maps.
Literally speaking, a feature map could be seen as a heatmap in which the pixel belonging to the corresponding feature has a high magnitude.
Due to the stride size of 2, the size of these features maps is halved.
A convolution operation is followed by a batch normalization layer and an activation layer (the drive is normalized before its goes into the activation function).
In the encoder this occurs four times.
With each step, the amount of filters increases.
\begin{figure}[h]
\centering
\begin{minipage}[b]{0.4\textwidth}
\includegraphics[width=\textwidth]{Figures/EncoderLayer.png}
\caption{Encoder layers.}
\label{fig:EncoderLayer}
\end{minipage}
\hfill
\begin{minipage}[b]{0.4\textwidth}
\includegraphics[width=\textwidth]{Figures/DecoderLayer.png}
\caption{Decoder layers.}
\label{fig:DecoderLayer}
\end{minipage}
\end{figure}
The resulting embedding is passed into the decoder.
Instead of convolutions reducing the feature map size, transpose convolutions increase the feature map size by a factor of 2.
Like the encoder, a transpose convolution is followed by batch normalization and activation layers.
In the decoder this occurs four times.
With each step, the amount of filters decreases.
Except for the last transpose convolution, which is a bottleneck layer:
It decreases the amount of filters from 75 to 2 (\textit{a} and \textit{b} channel) and keeps the feature map size constant (stride size = 1).
\section{Optimizing the model to run on the Jetson Nano }
\begin{wrapfigure}{r}{.4\textwidth}
\centering
\includegraphics[totalheight=4cm]{Figures/ResidualConnection.png}
\caption{Concept of a residual connection~\cite{residualConnectionImg}.}
\label{fig:ResidualConnection}
\end{wrapfigure}
Residual connections also called skip connections in neural networks, face the vanishing gradient problem (tiny weight adjustments~\cite{vanishingGradients}) in the backpropagation algorithm~\cite{resnet}.
As shown in Figure~\ref{fig:ResidualConnection}, the output \texttt{x} of a layer is added two layers further to the input of the third layer~\cite{resnet}.
The output \texttt{x} must be saved due to it is used in a later time step.
Therefore, residual connections need a lot of GPU memory, causing a outsource of a part of other data needed for the model.
To speed up the FPS, our model does not have residual connections.
As mentioned in the first section, the Jetson~Nano has 128 CUDA cores.
The amount of filters per layer does not exceed this number of cores.
This limitation enables TensorFlow simple scheduling of a feature map calculation to a specific core during the output calculation of a layer.
In other words, there are no cores that must do a second filter map calculation after the first one while other cores are idling.
The calculation of a previous layer must be finished before starting with the next layer.
Furthermore, limiting the amount of filer reduces the model size.
In Deep Learning, overparameterization often occurs:
As a result, the number of trainable parameters is much larger than the number of training examples.
As a consequence, the model tends to overfit the data~\cite{overparameterization}.
The opposite applies to our model.
Literally speaking, our model is "under-parameterized" -
Due to there being only 300.000 parameters on about 1.3 million training images, our model is forced to generalize as strong as possible during training.
To archive such generalization the model is trained multiple epochs (iteration over the entire training dataset).
It is assumed that such generalization results in similar results compared with a model which has considerable amounts of parameters.
In other words, the higher costs for training a small model compared with a larger model shall result in similar results but the latency to generate the result with the smaller model is lower.
Besides, the non-existence of skip connections increases the chance of vanishing gradients during training.
Although, multiple training epochs compensate this problem.
To clarify, millions of tiny weight changes sum up into an effective weight adjustment.
\section{Evaluation: Compare with Colorful Image Colorization}
As demonstrated in Listing~\ref{lst:theirModel_summary}, the Colorful Image Colorization model from Richard Zhang et al. has about 25 million parameters~\cite{colorize}.
The model presented in this paper is about 80 times smaller.
Its input shape is 256x256x1 and the same as our model.
\begin{lstlisting}[language=bash, caption=Parameter amount of the Colorful Image Colorization model (output of \texttt{summary()} call)., label={lst:theirModel_summary}, basicstyle=\fontsize{11}{9}\selectfont\ttfamily]
Model: "ColorfulImageColorization"
_______________________________________________________________
Layer (type) Output Shape Param #
===============================================================
[...]
===============================================================
Total params: 24,793,081
Trainable params: 24,788,345
Non-trainable params: 4,736
_______________________________________________________________
\end{lstlisting}
Figure~\ref{fig:ColoredImages_compareModels} shows grey images colorized by the Colorful Image Colorization model~\cite{colorize} and by our model.
Our model tends to colorize the images with a grey touch and the colors are not saturated compared with the Colorful Image Colorization model.
\begin{figure}
\centering
\includegraphics[totalheight=13cm]{Figures/ColoredImages_compareModels.png}
\caption{Colorized images generated by the Colorful Image Colorization model from Richard Zhang et al. and by our model.}
\label{fig:ColoredImages_compareModels}
\end{figure}
Our model does regression by predicting the \textit{ab} values.
The model output shape is 256x256x2 (see \texttt{tanh\_3} in Figure~\ref{fig:DecoderLayer}).
In contrast to the model from Richard Zhang et al., classification is applied here:
There is a probability distribution for each pixel approximating which color it may be.
For demonstration purposes, there were 313 colors available.
As a consequence, the model output shape is 256x256x313~\cite{colorize}.
Compared to our model, the larger output shape requires a more extensive (ca. 80 times) amount of parameters.
\begin{figure}[h]
\centering
\includegraphics[totalheight=6cm]{Figures/ColorizedImagesLossPlot_comparedModels.png}
\caption{Loss based on colorized images by the
Colorful Image Colorization model from Richard Zhang et al. and by our model.}
\label{fig:Loss_compareModels}
\end{figure}
Considering the loss as shown in Figure~\ref{fig:Loss_compareModels},
our model outperforms the model from Richard Zhang et al.
However, the euclidean loss (mean squared error) $L_2(\hat{y}, y)$ for the prediction $y$ and the target (also called ground truth) $\hat{y}$ was applied:
\[ L_2(\hat{y}, y) = \frac{1}{2} \cdot \sum_{h,w} || y_{h,w} - \hat{y}_{h,w} ||^{2} \]
The loss function is ambiguous for the colorization problem.
Consider the prediction $y$ for a single pixel with a loss of $d$:
There are two corresponding targets $\hat{y} = y \pm d$ possible instead of a single one.
Furthermore, consider a set of pixels. For each of these pixels, a corresponding color will be predicted.
The optimal solution is the mean of all pixels within this set.
In the case of color prediction, this averaging causes a grey bias and desaturated colors~\cite{colorize}.
\section{Conclusion}
Our model predicts the most possible color by applying regression.
In contrast to the model proposed by Richard Zhang et al. which classifies the most possible color.
Due to the one-hot encoding applied for these color classifications, over 80 times more parameters are needed as required for our model, considering the parameter balance between hidden layers and output layers.
Comparing the colorized images generated by an ANN based on classification and by regression, regression-based ANN tends to colorize images with a grey touch and unsaturated colors because of an ambiguous loss function for the colorization problem.
However, the results are acceptable considering the difference in the number of parameters between the two models.
Furthermore, a GPU cannot ultimately accelerate a classification-based model because the last part of the model is a sampling process.
This process is an argmax operation over 313 possible colors (see model shape) which runs on the CPU.
Note that transferring data from GPU to CPU could be seen as a performance bottleneck.
Overall, our model archives about 10 FPS on the Jetson Nanos.
Running the Richard Zhang et al. model will result in less than 0.01 FPS.
\bibliographystyle{ieeetr}
\bibliography{Literatur}
\end{document}

14
Paper/Main.toc Normal file
View File

@@ -0,0 +1,14 @@
\contentsline {section}{\numberline {1}Introduction}{1}{}%
\contentsline {section}{\numberline {2}Convolutional Autoencoder}{1}{}%
\contentsline {subsection}{\numberline {2.1}Convolutions}{1}{}%
\contentsline {subsection}{\numberline {2.2}Autoencoder}{2}{}%
\contentsline {section}{\numberline {3}Setup}{3}{}%
\contentsline {subsection}{\numberline {3.1}Software}{3}{}%
\contentsline {subsection}{\numberline {3.2}Hardware}{3}{}%
\contentsline {section}{\numberline {4}Training}{3}{}%
\contentsline {subsection}{\numberline {4.1}Model}{4}{}%
\contentsline {section}{\numberline {5}Optimizing the model to run on the Jetson Nano}{4}{}%
\contentsline {section}{\numberline {6}Evaluation: Compare with Colorful Image Colorization}{6}{}%
\contentsline {section}{\numberline {7}Conclusion}{8}{}%
\contentsline {section}{References}{9}{}%
\providecommand \tocbasic@end@toc@file {}\tocbasic@end@toc@file

View File