diff --git a/Paper/Figures/Autoencoder.png b/Paper/Figures/Autoencoder.png new file mode 100644 index 0000000..a32f3c6 Binary files /dev/null and b/Paper/Figures/Autoencoder.png differ diff --git a/Paper/Figures/ColoredImages_compareModels.png b/Paper/Figures/ColoredImages_compareModels.png new file mode 100644 index 0000000..ab4385a Binary files /dev/null and b/Paper/Figures/ColoredImages_compareModels.png differ diff --git a/Paper/Figures/ColorizedImagesLossPlot_comparedModels.png b/Paper/Figures/ColorizedImagesLossPlot_comparedModels.png new file mode 100644 index 0000000..7241378 Binary files /dev/null and b/Paper/Figures/ColorizedImagesLossPlot_comparedModels.png differ diff --git a/Paper/Figures/Convolution.png b/Paper/Figures/Convolution.png new file mode 100644 index 0000000..a444085 Binary files /dev/null and b/Paper/Figures/Convolution.png differ diff --git a/Paper/Figures/DecoderLayer.png b/Paper/Figures/DecoderLayer.png new file mode 100644 index 0000000..d5a235f Binary files /dev/null and b/Paper/Figures/DecoderLayer.png differ diff --git a/Paper/Figures/EncoderLayer.png b/Paper/Figures/EncoderLayer.png new file mode 100644 index 0000000..2543263 Binary files /dev/null and b/Paper/Figures/EncoderLayer.png differ diff --git a/Paper/Figures/LossPlot.png b/Paper/Figures/LossPlot.png new file mode 100644 index 0000000..2bbed3a Binary files /dev/null and b/Paper/Figures/LossPlot.png differ diff --git a/Paper/Figures/OpenCV_window.png b/Paper/Figures/OpenCV_window.png new file mode 100644 index 0000000..1e885ef Binary files /dev/null and b/Paper/Figures/OpenCV_window.png differ diff --git a/Paper/Figures/ResidualConnection.png b/Paper/Figures/ResidualConnection.png new file mode 100644 index 0000000..b608d46 Binary files /dev/null and b/Paper/Figures/ResidualConnection.png differ diff --git a/Paper/Literatur.bib b/Paper/Literatur.bib new file mode 100644 index 0000000..2d6d04e --- /dev/null +++ b/Paper/Literatur.bib @@ -0,0 +1,87 @@ +% Encoding: UTF-8 + +@Misc{jetsonNano, + howpublished = {https://developer.nvidia.com/embedded/jetson-nano-developer-kit}, + note = {Accessed: 2022-03-24}, + title = {{Jetson Nano Developer Kit}}, +} + +@Misc{nvidia3070ti, + howpublished = {https://www.nvidia.com/en-us/geforce/graphics-cards/30-series/rtx-3070-3070ti/}, + note = {Accessed: 2022-03-24}, + title = {{GeForce RTX 3070 Familiy - Specs}}, +} + +@Misc{jetsonNanoTensorFlow, + howpublished = {https://forums.developer.nvidia.com/t/official-tensorflow-for-jetson-nano/71770}, + note = {Accessed: 2022-03-24}, + title = {{Official TensorFlow for Jetson Nano!}}, +} + +@Misc{opencv, + howpublished = {https://opencv.org/releases/}, + note = {Accessed: 2022-03-24}, + title = {{OpenCV - releases}}, +} + +@Misc{resnet, + author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + title = {Deep Residual Learning for Image Recognition}, + year = {2015}, + copyright = {arXiv.org perpetual, non-exclusive license}, + doi = {10.48550/ARXIV.1512.03385}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences}, + publisher = {arXiv}, + url = {https://arxiv.org/abs/1512.03385}, +} + +@InProceedings{vanishingGradients, + author = {Tan, Hong Hui and Lim, King Hann}, + booktitle = {2019 7th International Conference on Smart Computing Communications (ICSCC)}, + title = {Vanishing Gradient Mitigation with Deep Learning Neural Network Optimization}, + year = {2019}, + pages = {1-4}, + doi = {10.1109/ICSCC.2019.8843652}, +} + +@Misc{overparameterization, + author = {Allen-Zhu, Zeyuan and Li, Yuanzhi and Liang, Yingyu}, + title = {Learning and Generalization in Overparameterized Neural Networks, Going Beyond Two Layers}, + year = {2018}, + copyright = {arXiv.org perpetual, non-exclusive license}, + doi = {10.48550/ARXIV.1811.04918}, + keywords = {Machine Learning (cs.LG), Data Structures and Algorithms (cs.DS), Neural and Evolutionary Computing (cs.NE), Optimization and Control (math.OC), Machine Learning (stat.ML), FOS: Computer and information sciences, FOS: Mathematics}, + publisher = {arXiv}, + url = {https://arxiv.org/abs/1811.04918}, +} + +@Misc{autoencoderImg, + howpublished = {https://en.wikipedia.org/wiki/Autoencoder\#/media/File:Autoencoder\_structure.png}, + note = {Accessed: 2022-03-24}, + title = {{Schematic structure of an autoencoder with 3 fully connected hidden layers. The code (z, or h for reference in the text) is the most internal layer.}}, +} + +@Misc{residualConnectionImg, + howpublished = {https://i.stack.imgur.com/d9HNk.png}, + note = {Accessed: 2022-03-24}, + title = {{Figure of a residual connection}}, +} + +@Misc{ConvolutionAnimation, + howpublished = {https://spinkk.github.io/singlekernel\_nopadding.html}, + note = {Accessed: 2022-03-24}, + title = {{Animation of a Convolution}}, +} + +@Article{colorize, + author = {Zhang, Richard and Isola, Phillip and Efros, Alexei A.}, + title = {Colorful Image Colorization}, + year = {2016}, + copyright = {arXiv.org perpetual, non-exclusive license}, + doi = {10.48550/ARXIV.1603.08511}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, + publisher = {arXiv}, + url = {https://arxiv.org/abs/1603.08511}, +} + +@Comment{jabref-meta: databaseType:bibtex;} diff --git a/Paper/Literatur.bib.bak b/Paper/Literatur.bib.bak new file mode 100644 index 0000000..2d6d04e --- /dev/null +++ b/Paper/Literatur.bib.bak @@ -0,0 +1,87 @@ +% Encoding: UTF-8 + +@Misc{jetsonNano, + howpublished = {https://developer.nvidia.com/embedded/jetson-nano-developer-kit}, + note = {Accessed: 2022-03-24}, + title = {{Jetson Nano Developer Kit}}, +} + +@Misc{nvidia3070ti, + howpublished = {https://www.nvidia.com/en-us/geforce/graphics-cards/30-series/rtx-3070-3070ti/}, + note = {Accessed: 2022-03-24}, + title = {{GeForce RTX 3070 Familiy - Specs}}, +} + +@Misc{jetsonNanoTensorFlow, + howpublished = {https://forums.developer.nvidia.com/t/official-tensorflow-for-jetson-nano/71770}, + note = {Accessed: 2022-03-24}, + title = {{Official TensorFlow for Jetson Nano!}}, +} + +@Misc{opencv, + howpublished = {https://opencv.org/releases/}, + note = {Accessed: 2022-03-24}, + title = {{OpenCV - releases}}, +} + +@Misc{resnet, + author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + title = {Deep Residual Learning for Image Recognition}, + year = {2015}, + copyright = {arXiv.org perpetual, non-exclusive license}, + doi = {10.48550/ARXIV.1512.03385}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences}, + publisher = {arXiv}, + url = {https://arxiv.org/abs/1512.03385}, +} + +@InProceedings{vanishingGradients, + author = {Tan, Hong Hui and Lim, King Hann}, + booktitle = {2019 7th International Conference on Smart Computing Communications (ICSCC)}, + title = {Vanishing Gradient Mitigation with Deep Learning Neural Network Optimization}, + year = {2019}, + pages = {1-4}, + doi = {10.1109/ICSCC.2019.8843652}, +} + +@Misc{overparameterization, + author = {Allen-Zhu, Zeyuan and Li, Yuanzhi and Liang, Yingyu}, + title = {Learning and Generalization in Overparameterized Neural Networks, Going Beyond Two Layers}, + year = {2018}, + copyright = {arXiv.org perpetual, non-exclusive license}, + doi = {10.48550/ARXIV.1811.04918}, + keywords = {Machine Learning (cs.LG), Data Structures and Algorithms (cs.DS), Neural and Evolutionary Computing (cs.NE), Optimization and Control (math.OC), Machine Learning (stat.ML), FOS: Computer and information sciences, FOS: Mathematics}, + publisher = {arXiv}, + url = {https://arxiv.org/abs/1811.04918}, +} + +@Misc{autoencoderImg, + howpublished = {https://en.wikipedia.org/wiki/Autoencoder\#/media/File:Autoencoder\_structure.png}, + note = {Accessed: 2022-03-24}, + title = {{Schematic structure of an autoencoder with 3 fully connected hidden layers. The code (z, or h for reference in the text) is the most internal layer.}}, +} + +@Misc{residualConnectionImg, + howpublished = {https://i.stack.imgur.com/d9HNk.png}, + note = {Accessed: 2022-03-24}, + title = {{Figure of a residual connection}}, +} + +@Misc{ConvolutionAnimation, + howpublished = {https://spinkk.github.io/singlekernel\_nopadding.html}, + note = {Accessed: 2022-03-24}, + title = {{Animation of a Convolution}}, +} + +@Article{colorize, + author = {Zhang, Richard and Isola, Phillip and Efros, Alexei A.}, + title = {Colorful Image Colorization}, + year = {2016}, + copyright = {arXiv.org perpetual, non-exclusive license}, + doi = {10.48550/ARXIV.1603.08511}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, + publisher = {arXiv}, + url = {https://arxiv.org/abs/1603.08511}, +} + +@Comment{jabref-meta: databaseType:bibtex;} diff --git a/Paper/Main.aux b/Paper/Main.aux new file mode 100644 index 0000000..fdf087c --- /dev/null +++ b/Paper/Main.aux @@ -0,0 +1,70 @@ +\relax +\providecommand*\new@tpo@label[2]{} +\citation{ConvolutionAnimation} +\citation{ConvolutionAnimation} +\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{}\protected@file@percent } +\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces OpenCV window on the Jetson Nano displaying the original, grey, colorized camera stream and corresponding loss between original and colorized image.}}{1}{}\protected@file@percent } +\newlabel{fig:OpenCV_window}{{1}{1}} +\@writefile{toc}{\contentsline {section}{\numberline {2}Convolutional Autoencoder}{1}{}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Convolutions}{1}{}\protected@file@percent } +\citation{autoencoderImg} +\citation{autoencoderImg} +\citation{jetsonNanoTensorFlow} +\citation{opencv} +\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Concept of a convolution\nobreakspace {}\cite {ConvolutionAnimation}.}}{2}{}\protected@file@percent } +\newlabel{fig:convolution}{{2}{2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Autoencoder}{2}{}\protected@file@percent } +\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces An Autoencoder compresses and decompresses the data\nobreakspace {}\cite {autoencoderImg}.}}{2}{}\protected@file@percent } +\newlabel{fig:autoencoder}{{3}{2}} +\@writefile{toc}{\contentsline {section}{\numberline {3}Setup}{3}{}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Software}{3}{}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Hardware}{3}{}\protected@file@percent } +\@writefile{toc}{\contentsline {section}{\numberline {4}Training}{3}{}\protected@file@percent } +\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Train and test loss during training.}}{3}{}\protected@file@percent } +\newlabel{fig:trainTestLoss}{{4}{3}} +\citation{residualConnectionImg} +\citation{residualConnectionImg} +\citation{vanishingGradients} +\citation{resnet} +\citation{resnet} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Model}{4}{}\protected@file@percent } +\newlabel{lst:ourModel_summary}{{1}{4}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {1}Parameter amount of our model (output of \texttt {summary()} call).}{4}{}\protected@file@percent } +\@writefile{toc}{\contentsline {section}{\numberline {5}Optimizing the model to run on the Jetson Nano}{4}{}\protected@file@percent } +\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Concept of a residual connection\nobreakspace {}\cite {residualConnectionImg}.}}{4}{}\protected@file@percent } +\newlabel{fig:ResidualConnection}{{7}{4}} +\citation{overparameterization} +\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Encoder layers.}}{5}{}\protected@file@percent } +\newlabel{fig:EncoderLayer}{{5}{5}} +\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Decoder layers.}}{5}{}\protected@file@percent } +\newlabel{fig:DecoderLayer}{{6}{5}} +\citation{colorize} +\citation{colorize} +\citation{colorize} +\citation{colorize} +\@writefile{toc}{\contentsline {section}{\numberline {6}Evaluation: Compare with Colorful Image Colorization}{6}{}\protected@file@percent } +\newlabel{lst:theirModel_summary}{{2}{6}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {2}Parameter amount of the Colorful Image Colorization model (output of \texttt {summary()} call).}{6}{}\protected@file@percent } +\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Colorized images generated by the Colorful Image Colorization model from Richard Zhang et al. and by our model.}}{7}{}\protected@file@percent } +\newlabel{fig:ColoredImages_compareModels}{{8}{7}} +\bibstyle{ieeetr} +\bibdata{Literatur} +\bibcite{jetsonNano}{1} +\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces Loss based on colorized images by the Colorful Image Colorization model from Richard Zhang et al. and by our model.}}{8}{}\protected@file@percent } +\newlabel{fig:Loss_compareModels}{{9}{8}} +\@writefile{toc}{\contentsline {section}{\numberline {7}Conclusion}{8}{}\protected@file@percent } +\bibcite{nvidia3070ti}{2} +\bibcite{ConvolutionAnimation}{3} +\bibcite{autoencoderImg}{4} +\bibcite{jetsonNanoTensorFlow}{5} +\bibcite{opencv}{6} +\bibcite{residualConnectionImg}{7} +\bibcite{vanishingGradients}{8} +\bibcite{resnet}{9} +\bibcite{overparameterization}{10} +\bibcite{colorize}{11} +\@writefile{toc}{\contentsline {section}{References}{9}{}\protected@file@percent } +\global\@namedef{scr@dte@section@lastmaxnumwidth}{11.87997pt} +\global\@namedef{scr@dte@subsection@lastmaxnumwidth}{19.71361pt} +\@writefile{toc}{\providecommand\tocbasic@end@toc@file{}\tocbasic@end@toc@file} +\gdef \@abspage@last{11} diff --git a/Paper/Main.bbl b/Paper/Main.bbl new file mode 100644 index 0000000..4305a3b --- /dev/null +++ b/Paper/Main.bbl @@ -0,0 +1,53 @@ +\begin{thebibliography}{10} + +\bibitem{jetsonNano} +``{Jetson Nano Developer Kit}.'' + https://developer.nvidia.com/embedded/jetson-nano-developer-kit. +\newblock Accessed: 2022-03-24. + +\bibitem{nvidia3070ti} +``{GeForce RTX 3070 Familiy - Specs}.'' + https://www.nvidia.com/en-us/geforce/graphics-cards/30-series/rtx-3070-3070ti/. +\newblock Accessed: 2022-03-24. + +\bibitem{ConvolutionAnimation} +``{Animation of a Convolution}.'' + https://spinkk.github.io/singlekernel\_nopadding.html. +\newblock Accessed: 2022-03-24. + +\bibitem{autoencoderImg} +``{Schematic structure of an autoencoder with 3 fully connected hidden layers. + The code (z, or h for reference in the text) is the most internal layer.}.'' + https://en.wikipedia.org/wiki/Autoencoder\#/media/File:Autoencoder\_structure.png. +\newblock Accessed: 2022-03-24. + +\bibitem{jetsonNanoTensorFlow} +``{Official TensorFlow for Jetson Nano!}.'' + https://forums.developer.nvidia.com/t/official-tensorflow-for-jetson-nano/71770. +\newblock Accessed: 2022-03-24. + +\bibitem{opencv} +``{OpenCV - releases}.'' https://opencv.org/releases/. +\newblock Accessed: 2022-03-24. + +\bibitem{residualConnectionImg} +``{Figure of a residual connection}.'' https://i.stack.imgur.com/d9HNk.png. +\newblock Accessed: 2022-03-24. + +\bibitem{vanishingGradients} +H.~H. Tan and K.~H. Lim, ``Vanishing gradient mitigation with deep learning + neural network optimization,'' in {\em 2019 7th International Conference on + Smart Computing Communications (ICSCC)}, pp.~1--4, 2019. + +\bibitem{resnet} +K.~He, X.~Zhang, S.~Ren, and J.~Sun, ``Deep residual learning for image + recognition,'' 2015. + +\bibitem{overparameterization} +Z.~Allen-Zhu, Y.~Li, and Y.~Liang, ``Learning and generalization in + overparameterized neural networks, going beyond two layers,'' 2018. + +\bibitem{colorize} +R.~Zhang, P.~Isola, and A.~A. Efros, ``Colorful image colorization,'' 2016. + +\end{thebibliography} diff --git a/Paper/Main.blg b/Paper/Main.blg new file mode 100644 index 0000000..12f9dba --- /dev/null +++ b/Paper/Main.blg @@ -0,0 +1,48 @@ +This is BibTeX, Version 0.99d (TeX Live 2021/W32TeX) +Capacity: max_strings=200000, hash_size=200000, hash_prime=170003 +The top-level auxiliary file: Main.aux +The style file: ieeetr.bst +Database file #1: Literatur.bib +Warning--empty journal in colorize +You've used 11 entries, + 1876 wiz_defined-function locations, + 524 strings with 5025 characters, +and the built_in function-call counts, 1546 in all, are: += -- 140 +> -- 51 +< -- 0 ++ -- 23 +- -- 12 +* -- 70 +:= -- 200 +add.period$ -- 18 +call.type$ -- 11 +change.case$ -- 11 +chr.to.int$ -- 0 +cite$ -- 12 +duplicate$ -- 62 +empty$ -- 211 +format.name$ -- 12 +if$ -- 405 +int.to.chr$ -- 0 +int.to.str$ -- 11 +missing$ -- 2 +newline$ -- 43 +num.names$ -- 4 +pop$ -- 83 +preamble$ -- 1 +purify$ -- 0 +quote$ -- 0 +skip$ -- 39 +stack$ -- 0 +substring$ -- 12 +swap$ -- 5 +text.length$ -- 0 +text.prefix$ -- 0 +top$ -- 0 +type$ -- 0 +warning$ -- 1 +while$ -- 6 +width$ -- 13 +write$ -- 88 +(There was 1 warning) diff --git a/Paper/Main.dvi b/Paper/Main.dvi new file mode 100644 index 0000000..e4aed16 Binary files /dev/null and b/Paper/Main.dvi differ diff --git a/Paper/Main.lof b/Paper/Main.lof new file mode 100644 index 0000000..8cc0d0f --- /dev/null +++ b/Paper/Main.lof @@ -0,0 +1 @@ +\providecommand \tocbasic@end@toc@file {}\tocbasic@end@toc@file diff --git a/Paper/Main.log b/Paper/Main.log new file mode 100644 index 0000000..1d145a4 --- /dev/null +++ b/Paper/Main.log @@ -0,0 +1,507 @@ +This is pdfTeX, Version 3.141592653-2.6-1.40.23 (TeX Live 2021/W32TeX) (preloaded format=pdflatex 2022.2.8) 30 MAR 2022 18:21 +entering extended mode + restricted \write18 enabled. + %&-line parsing enabled. +**Main.tex +(./Main.tex +LaTeX2e <2021-11-15> patch level 1 +L3 programming layer <2022-02-05> +(c:/texlive/2021/texmf-dist/tex/latex/koma-script/scrartcl.cls +Document Class: scrartcl 2021/11/13 v3.35 KOMA-Script document class (article) +(c:/texlive/2021/texmf-dist/tex/latex/koma-script/scrkbase.sty +Package: scrkbase 2021/11/13 v3.35 KOMA-Script package (KOMA-Script-dependent b +asics and keyval usage) + +(c:/texlive/2021/texmf-dist/tex/latex/koma-script/scrbase.sty +Package: scrbase 2021/11/13 v3.35 KOMA-Script package (KOMA-Script-independent +basics and keyval usage) + +(c:/texlive/2021/texmf-dist/tex/latex/koma-script/scrlfile.sty +Package: scrlfile 2021/11/13 v3.35 KOMA-Script package (file load hooks) + +(c:/texlive/2021/texmf-dist/tex/latex/koma-script/scrlfile-hook.sty +Package: scrlfile-hook 2021/11/13 v3.35 KOMA-Script package (using LaTeX hooks) + + +(c:/texlive/2021/texmf-dist/tex/latex/koma-script/scrlogo.sty +Package: scrlogo 2021/11/13 v3.35 KOMA-Script package (logo) +))) +(c:/texlive/2021/texmf-dist/tex/latex/graphics/keyval.sty +Package: keyval 2014/10/28 v1.15 key=value parser (DPC) +\KV@toks@=\toks16 +) +Applying: [2021/05/01] Usage of raw or classic option list on input line 252. +Already applied: [0000/00/00] Usage of raw or classic option list on input line + 368. +)) +(c:/texlive/2021/texmf-dist/tex/latex/koma-script/tocbasic.sty +Package: tocbasic 2021/11/13 v3.35 KOMA-Script package (handling toc-files) +\scr@dte@tocline@numberwidth=\skip47 +\scr@dte@tocline@numbox=\box50 +) +Package tocbasic Info: babel extension for `toc' omitted +(tocbasic) because of missing \bbl@set@language on input line 135. +Package scrartcl Info: You've used standard option `12pt'. +(scrartcl) This is correct! +(scrartcl) Internally I'm using `fontsize=12pt'. +(scrartcl) If you'd like to set the option with \KOMAoptions, +(scrartcl) you'd have to use `fontsize=12pt' there +(scrartcl) instead of `12pt', too. +Class scrartcl Info: File `scrsize12pt.clo' used to setup font sizes on input l +ine 2242. + +(c:/texlive/2021/texmf-dist/tex/latex/koma-script/scrsize12pt.clo +File: scrsize12pt.clo 2021/11/13 v3.35 KOMA-Script font size class option (12pt +) +) +(c:/texlive/2021/texmf-dist/tex/latex/koma-script/typearea.sty +Package: typearea 2021/11/13 v3.35 KOMA-Script package (type area) +\ta@bcor=\skip48 +\ta@div=\count185 +Package typearea Info: You've used standard option `a4paper'. +(typearea) This is correct! +(typearea) Internally I'm using `paper=a4'. +(typearea) If you'd like to set the option with \KOMAoptions, +(typearea) you'd have to use `paper=a4' there +(typearea) instead of `a4paper', too. +\ta@hblk=\skip49 +\ta@vblk=\skip50 +\ta@temp=\skip51 +\footheight=\skip52 +Package typearea Info: These are the values describing the layout: +(typearea) DIV = 12 +(typearea) BCOR = 0.0pt +(typearea) \paperwidth = 597.50793pt +(typearea) \textwidth = 448.13095pt +(typearea) DIV departure = -6% +(typearea) \evensidemargin = 2.4185pt +(typearea) \oddsidemargin = 2.4185pt +(typearea) \paperheight = 845.04694pt +(typearea) \textheight = 635.5pt +(typearea) \topmargin = -41.72441pt +(typearea) \headheight = 18.125pt +(typearea) \headsep = 21.75pt +(typearea) \topskip = 12.0pt +(typearea) \footskip = 50.75pt +(typearea) \baselineskip = 14.5pt +(typearea) on input line 1743. +) +\c@part=\count186 +\c@section=\count187 +\c@subsection=\count188 +\c@subsubsection=\count189 +\c@paragraph=\count190 +\c@subparagraph=\count191 +\scr@dte@section@maxnumwidth=\skip53 +Class scrartcl Info: using compatibility default `runin=bysign' +(scrartcl) for `\section on input line 4852. +Class scrartcl Info: using compatibility default `afterindent=bysign' +(scrartcl) for `\section on input line 4852. +\scr@dte@part@maxnumwidth=\skip54 +Class scrartcl Info: using compatibility default `afterindent=false' +(scrartcl) for `\part on input line 4860. +\scr@dte@subsection@maxnumwidth=\skip55 +Class scrartcl Info: using compatibility default `runin=bysign' +(scrartcl) for `\subsection on input line 4870. +Class scrartcl Info: using compatibility default `afterindent=bysign' +(scrartcl) for `\subsection on input line 4870. +\scr@dte@subsubsection@maxnumwidth=\skip56 +Class scrartcl Info: using compatibility default `runin=bysign' +(scrartcl) for `\subsubsection on input line 4880. +Class scrartcl Info: using compatibility default `afterindent=bysign' +(scrartcl) for `\subsubsection on input line 4880. +\scr@dte@paragraph@maxnumwidth=\skip57 +Class scrartcl Info: using compatibility default `runin=bysign' +(scrartcl) for `\paragraph on input line 4891. +Class scrartcl Info: using compatibility default `afterindent=bysign' +(scrartcl) for `\paragraph on input line 4891. +\scr@dte@subparagraph@maxnumwidth=\skip58 +Class scrartcl Info: using compatibility default `runin=bysign' +(scrartcl) for `\subparagraph on input line 4901. +Class scrartcl Info: using compatibility default `afterindent=bysign' +(scrartcl) for `\subparagraph on input line 4901. +\abovecaptionskip=\skip59 +\belowcaptionskip=\skip60 +\c@pti@nb@sid@b@x=\box51 +Package tocbasic Info: babel extension for `lof' omitted +(tocbasic) because of missing \bbl@set@language on input line 6076. + +\scr@dte@figure@maxnumwidth=\skip61 +\c@figure=\count192 +Package tocbasic Info: babel extension for `lot' omitted +(tocbasic) because of missing \bbl@set@language on input line 6091. + +\scr@dte@table@maxnumwidth=\skip62 +\c@table=\count193 +Class scrartcl Info: Redefining `\numberline' on input line 6258. +\bibindent=\dimen138 +) +(c:/texlive/2021/texmf-dist/tex/latex/graphics/graphicx.sty +Package: graphicx 2021/09/16 v1.2d Enhanced LaTeX Graphics (DPC,SPQR) + +(c:/texlive/2021/texmf-dist/tex/latex/graphics/graphics.sty +Package: graphics 2021/03/04 v1.4d Standard LaTeX Graphics (DPC,SPQR) + +(c:/texlive/2021/texmf-dist/tex/latex/graphics/trig.sty +Package: trig 2021/08/11 v1.11 sin cos tan (DPC) +) +(c:/texlive/2021/texmf-dist/tex/latex/graphics-cfg/graphics.cfg +File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration +) +Package graphics Info: Driver file: pdftex.def on input line 107. + +(c:/texlive/2021/texmf-dist/tex/latex/graphics-def/pdftex.def +File: pdftex.def 2020/10/05 v1.2a Graphics/color driver for pdftex +)) +\Gin@req@height=\dimen139 +\Gin@req@width=\dimen140 +) +(c:/texlive/2021/texmf-dist/tex/latex/graphics/color.sty +Package: color 2021/12/07 v1.3c Standard LaTeX Color (DPC) + +(c:/texlive/2021/texmf-dist/tex/latex/graphics-cfg/color.cfg +File: color.cfg 2016/01/02 v1.6 sample color configuration +) +Package color Info: Driver file: pdftex.def on input line 149. +) +(c:/texlive/2021/texmf-dist/tex/latex/listings/listings.sty +\lst@mode=\count194 +\lst@gtempboxa=\box52 +\lst@token=\toks17 +\lst@length=\count195 +\lst@currlwidth=\dimen141 +\lst@column=\count196 +\lst@pos=\count197 +\lst@lostspace=\dimen142 +\lst@width=\dimen143 +\lst@newlines=\count198 +\lst@lineno=\count199 +\lst@maxwidth=\dimen144 + +(c:/texlive/2021/texmf-dist/tex/latex/listings/lstmisc.sty +File: lstmisc.sty 2020/03/24 1.8d (Carsten Heinz) +\c@lstnumber=\count266 +\lst@skipnumbers=\count267 +\lst@framebox=\box53 +) +(c:/texlive/2021/texmf-dist/tex/latex/listings/listings.cfg +File: listings.cfg 2020/03/24 1.8d listings configuration +)) +Package: listings 2020/03/24 1.8d (Carsten Heinz) + +(c:/texlive/2021/texmf-dist/tex/latex/blindtext/blindtext.sty +Package: blindtext 2012/01/06 V2.0 blindtext-Package + +(c:/texlive/2021/texmf-dist/tex/latex/tools/xspace.sty +Package: xspace 2014/10/28 v1.13 Space after command names (DPC,MH) +) +\c@blindtext=\count268 +\c@Blindtext=\count269 +\c@blind@countparstart=\count270 +\blind@countxx=\count271 +\blindtext@numBlindtext=\count272 +\blind@countyy=\count273 +\c@blindlist=\count274 +\c@blindlistlevel=\count275 +\c@blindlist@level=\count276 +\blind@listitem=\count277 +\c@blind@listcount=\count278 +\c@blind@levelcount=\count279 +\blind@mathformula=\count280 +\blind@Mathformula=\count281 +\c@blind@randomcount=\count282 +\c@blind@randommax=\count283 +\c@blind@pangramcount=\count284 +\c@blind@pangrammax=\count285 +) +(c:/texlive/2021/texmf-dist/tex/latex/wrapfig/wrapfig.sty +\wrapoverhang=\dimen145 +\WF@size=\dimen146 +\c@WF@wrappedlines=\count286 +\WF@box=\box54 +\WF@everypar=\toks18 +Package: wrapfig 2003/01/31 v 3.6 +) +(c:/texlive/2021/texmf-dist/tex/generic/ulem/ulem.sty +\UL@box=\box55 +\UL@hyphenbox=\box56 +\UL@skip=\skip63 +\UL@hook=\toks19 +\UL@height=\dimen147 +\UL@pe=\count287 +\UL@pixel=\dimen148 +\ULC@box=\box57 +Package: ulem 2019/11/18 +\ULdepth=\dimen149 +) + +Class scrartcl Warning: Usage of package `tocbibind' together +(scrartcl) with a KOMA-Script class is not recommended. +(scrartcl) I'd suggest to use options like `listof=totoc' +(scrartcl) or `bibliography=totoc', or commands like +(scrartcl) `\setuptoc{toc}{totoc}' instead of this package, +(scrartcl) because it breaks several KOMA-Script features of +(scrartcl) the list of figures, list of tables, bibliography, +(scrartcl) index and the running head. +(scrartcl) Nevertheless, using requested +(scrartcl) package `tocbibind' on input line 22. + +(c:/texlive/2021/texmf-dist/tex/latex/tocbibind/tocbibind.sty +Package: tocbibind 2010/10/13 v1.5k extra ToC listings +Package tocbibind Info: The document has section divisions on input line 50. + + +Package tocbibind Note: Using section or other style headings. + +) (c:/texlive/2021/texmf-dist/tex/latex/setspace/setspace.sty +Package: setspace 2011/12/19 v6.7a set line spacing +) +(c:/texlive/2021/texmf-dist/tex/latex/titling/titling.sty +Package: titling 2009/09/04 v2.1d maketitle typesetting +\thanksmarkwidth=\skip64 +\thanksmargin=\skip65 +\droptitle=\skip66 +) +(c:/texlive/2021/texmf-dist/tex/latex/l3backend/l3backend-pdftex.def +File: l3backend-pdftex.def 2022-01-12 L3 backend support: PDF output (pdfTeX) +\l__color_backend_stack_int=\count288 +\l__pdf_internal_box=\box58 +) +(./Main.aux) +\openout1 = `Main.aux'. + +LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 40. +LaTeX Font Info: ... okay on input line 40. +LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 40. +LaTeX Font Info: ... okay on input line 40. +LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 40. +LaTeX Font Info: ... okay on input line 40. +LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 40. +LaTeX Font Info: ... okay on input line 40. +LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 40. +LaTeX Font Info: ... okay on input line 40. +LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 40. +LaTeX Font Info: ... okay on input line 40. +LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 40. +LaTeX Font Info: ... okay on input line 40. +Package scrbase Info: activating english \contentsname on input line 40. +Package scrbase Info: activating english \listfigurename on input line 40. +Package scrbase Info: activating english \listtablename on input line 40. +Package tocbasic Info: usage of `tocbibind' detected on input line 40. + + +Package tocbasic Warning: `tocbibind' redefinition of `\listoffigures' +(tocbasic) detected. +(tocbasic) Note: Loading `tocbibind' without option `notlof' +(tocbasic) can break several features of `tocbasic'. + + +Package tocbasic Warning: `tocbibind' redefinition of `\listoftables' +(tocbasic) detected. +(tocbasic) Note: Loading `tocbibind' without option `notlot' +(tocbasic) can break several features of `tocbasic'. + +(c:/texlive/2021/texmf-dist/tex/context/base/mkii/supp-pdf.mkii +[Loading MPS to PDF converter (version 2006.09.02).] +\scratchcounter=\count289 +\scratchdimen=\dimen150 +\scratchbox=\box59 +\nofMPsegments=\count290 +\nofMParguments=\count291 +\everyMPshowfont=\toks20 +\MPscratchCnt=\count292 +\MPscratchDim=\dimen151 +\MPnumerator=\count293 +\makeMPintoPDFobject=\count294 +\everyMPtoPDFconversion=\toks21 +) (c:/texlive/2021/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty +Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf +Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 4 +85. + +(c:/texlive/2021/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg +File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv +e +)) +\c@lstlisting=\count295 +LaTeX Font Info: External font `cmex10' loaded for size +(Font) <14.4> on input line 43. +LaTeX Font Info: External font `cmex10' loaded for size +(Font) <7> on input line 43. + [1 + +{c:/texlive/2021/texmf-var/fonts/map/pdftex/updmap/pdftex.map}] (./Main.toc +LaTeX Font Info: External font `cmex10' loaded for size +(Font) <12> on input line 1. +LaTeX Font Info: External font `cmex10' loaded for size +(Font) <8> on input line 1. +LaTeX Font Info: External font `cmex10' loaded for size +(Font) <6> on input line 1. +) +\tf@toc=\write3 +\openout3 = `Main.toc'. + + [1] + +File: Figures/OpenCV_window.png Graphic file (type png) + +Package pdftex.def Info: Figures/OpenCV_window.png used on input line 66. +(pdftex.def) Requested size: 457.8383pt x 113.81102pt. + +Overfull \hbox (9.70735pt too wide) in paragraph at lines 66--67 + [][] + [] + + +File: Figures/convolution.png Graphic file (type png) + +Package pdftex.def Info: Figures/convolution.png used on input line 91. +(pdftex.def) Requested size: 383.42163pt x 142.26378pt. +[1 + + + <./Figures/OpenCV_window.png>] + +File: Figures/Autoencoder.png Graphic file (type png) + +Package pdftex.def Info: Figures/Autoencoder.png used on input line 108. +(pdftex.def) Requested size: 190.05435pt x 142.26378pt. + [2 <./Figures/Convolution.png> <./Figures/Autoencoder.png>] + +File: Figures/LossPlot.png Graphic file (type png) + +Package pdftex.def Info: Figures/LossPlot.png used on input line 136. +(pdftex.def) Requested size: 227.6217pt x 170.71652pt. + +Overfull \hbox (48.37207pt too wide) in paragraph at lines 136--137 + [][] + [] + +[3 <./Figures/LossPlot.png>] +(c:/texlive/2021/texmf-dist/tex/latex/listings/lstlang1.sty +File: lstlang1.sty 2020/03/24 1.8d listings language file +) +(c:/texlive/2021/texmf-dist/tex/latex/listings/lstlang1.sty +File: lstlang1.sty 2020/03/24 1.8d listings language file +) +LaTeX Font Info: Font shape `OT1/cmtt/bx/n' in size <11> not available +(Font) Font shape `OT1/cmtt/m/n' tried instead on input line 167. + +File: Figures/EncoderLayer.png Graphic file (type png) + +Package pdftex.def Info: Figures/EncoderLayer.png used on input line 196. +(pdftex.def) Requested size: 179.24963pt x 328.25954pt. + +File: Figures/DecoderLayer.png Graphic file (type png) + +Package pdftex.def Info: Figures/DecoderLayer.png used on input line 202. +(pdftex.def) Requested size: 179.24963pt x 328.25954pt. + + +LaTeX Warning: `h' float specifier changed to `ht'. + + +File: Figures/ResidualConnection.png Graphic file (type png) + +Package pdftex.def Info: Figures/ResidualConnection.png used on input line 224 +. +(pdftex.def) Requested size: 197.50398pt x 113.81102pt. + +Overfull \hbox (18.25435pt too wide) in paragraph at lines 224--225 + [][] + [] + + +Underfull \hbox (badness 3792) in paragraph at lines 225--225 +[][]\OT1/cmr/m/n/12 Concept of a resid-ual + [] + +[4 <./Figures/ResidualConnection.png>] +Underfull \hbox (badness 1142) in paragraph at lines 236--242 +[]\OT1/cmr/m/n/12 As men-tioned in the first sec-tion, the Jet- + [] + +[5 <./Figures/EncoderLayer.png> <./Figures/DecoderLayer.png>] + +Class scrartcl Warning: `\caption' outside float. +(scrartcl) Seems you are using `\caption' outside a float. +(scrartcl) Maybe you are using a package that uses `\@makecaption' + +(scrartcl) without setting `\@captype' before. +(scrartcl) Because I cannot detect the caption type, I'm using +(scrartcl) the empty one. on input line 263. + + +File: Figures/ColoredImages_compareModels.png Graphic file (type png) + +Package pdftex.def Info: Figures/ColoredImages_compareModels.png used on input + line 285. +(pdftex.def) Requested size: 296.34143pt x 369.88582pt. + +File: Figures/ColorizedImagesLossPlot_comparedModels.png Graphic file (type png +) + +Package pdftex.def Info: Figures/ColorizedImagesLossPlot_comparedModels.png us +ed on input line 303. +(pdftex.def) Requested size: 227.6217pt x 170.71652pt. + +LaTeX Warning: `h' float specifier changed to `ht'. + +[6] [7 <./Figures/ColoredImages_compareModels.png>] (./Main.bbl +Underfull \hbox (badness 1365) in paragraph at lines 4--7 +[]\OT1/cmr/m/n/12 ``Jetson Nano De-vel-oper Kit.'' https://developer.nvidia.com +/embedded/jetson- + [] + +[8 <./Figures/ColorizedImagesLossPlot_comparedModels.png>] +Underfull \hbox (badness 10000) in paragraph at lines 9--12 +[]\OT1/cmr/m/n/12 ``GeForce RTX 3070 Familiy - Specs.'' https://www.nvidia.com/ +en- + [] + + +Overfull \hbox (10.61928pt too wide) in paragraph at lines 14--17 +[]\OT1/cmr/m/n/12 ``Animation of a Con-vo-lu-tion.'' https://spinkk.github.io/s +inglekernel[]nopadding.html. + [] + + +Overfull \hbox (16.2595pt too wide) in paragraph at lines 19--23 +\OT1/cmr/m/n/12 https://en.wikipedia.org/wiki/Autoencoder#/media/File:Autoencod +er[]structure.png. + [] + + +Overfull \hbox (17.87395pt too wide) in paragraph at lines 25--28 +[]\OT1/cmr/m/n/12 ``Official Ten-sor-Flow for Jet-son Nano!.'' https://forums.d +eveloper.nvidia.com/t/official- + [] + +) [9] (./Main.aux) ) +Here is how much of TeX's memory you used: + 6710 strings out of 478278 + 129479 string characters out of 5850547 + 741288 words of memory out of 5000000 + 24859 multiletter control sequences out of 15000+600000 + 475654 words of font info for 51 fonts, out of 8000000 for 9000 + 1141 hyphenation exceptions out of 8191 + 108i,8n,106p,10599b,1261s stack positions out of 5000i,500n,10000p,200000b,80000s + +Output written on Main.pdf (11 pages, 6968779 bytes). +PDF statistics: + 115 PDF objects out of 1000 (max. 8388607) + 63 compressed objects within 1 object stream + 0 named destinations out of 1000 (max. 500000) + 46 words of extra memory for PDF output out of 10000 (max. 10000000) + diff --git a/Paper/Main.lot b/Paper/Main.lot new file mode 100644 index 0000000..8cc0d0f --- /dev/null +++ b/Paper/Main.lot @@ -0,0 +1 @@ +\providecommand \tocbasic@end@toc@file {}\tocbasic@end@toc@file diff --git a/Paper/Main.pdf b/Paper/Main.pdf new file mode 100644 index 0000000..f94d799 Binary files /dev/null and b/Paper/Main.pdf differ diff --git a/Paper/Main.synctex.gz b/Paper/Main.synctex.gz new file mode 100644 index 0000000..b24b9e7 Binary files /dev/null and b/Paper/Main.synctex.gz differ diff --git a/Paper/Main.tex b/Paper/Main.tex new file mode 100644 index 0000000..15d5b14 --- /dev/null +++ b/Paper/Main.tex @@ -0,0 +1,352 @@ + +\documentclass[a4paper,12pt, listof=totoc,toc=sectionentrywithdots]{scrartcl} + +\usepackage{graphicx} + +\usepackage{color} +\usepackage{listings} +\definecolor{GrayCodeBlock}{RGB}{241,241,241} +\definecolor{BlackText}{RGB}{110,107,94} +\definecolor{RedTypename}{RGB}{182,86,17} +\definecolor{GreenString}{RGB}{96,172,57} +\definecolor{PurpleKeyword}{RGB}{184,84,212} +\definecolor{GrayComment}{RGB}{170,170,170} +\definecolor{GoldDocumentation}{RGB}{180,165,45} + + + +\usepackage{blindtext} +\usepackage{wrapfig} +\usepackage{ulem} +\usepackage[nottoc]{tocbibind} +\usepackage{setspace} + +\usepackage{titling} +\renewcommand\maketitlehooka{\null\mbox{}\vfill} +\renewcommand\maketitlehookd{\vfill\null} + + +\title{Colorization of Grey Images by applying a Convolutional Autoencoder on the Jetson Nano} +\date{} +\author{Tim Niklas Witte and Dennis Konkol} + + +\lstset{ +numbersep=8pt, +frame = single, +framexleftmargin=15pt, +framesep=1.5pt, framerule=1.5pt} + +\begin{document} + +\begin{titlingpage} +\maketitle +\end{titlingpage} + + + +\tableofcontents + +\pagenumbering{gobble} + + +\cleardoublepage +\pagenumbering{arabic} + +\section{Introduction} +Embedded GPUs such as the Jetson Nano provide limited hardware resources than desktop/server GPUs. +For example, the Jetson Nano has 128 CUDA cores and 4 GB of video memory, compared to the NVIDIA GeForce RTX 3070 Ti which has 6144 CUDA cores and 8 GB of video memory. +Inference done by massive artificial neural networks (ANN) e.g. over 25.000.000 parameters on the Jetson Nano, becomes slow - about 0.01 forward pass per second. +An NVIDIA GeForce RTX 3070 Ti does 32 forward passes through the same huge ANN, and this can be achieved within a second. +This paper presents a convolutional autoencoder for grey image colorization with 300.000 parameters optimized to run on embedded GPUs. +In order to demonstrate the results during runtime on the Jetson Nano, the live grey camera stream is colorized, as shown in Figure~\ref{fig:OpenCV_window}. + +\begin{figure}[h] +\centering + \includegraphics[totalheight=4cm]{Figures/OpenCV_window.png} + \caption{OpenCV window on the Jetson Nano displaying the original, grey, colorized camera stream and corresponding loss between original and colorized image.} + \label{fig:OpenCV_window} +\end{figure} + +This paper is organized as follows: +The concept of a convolutional autoencoder will be covered in section 2. +Section 3 explains the necessary software and hardware setup on the Jetson Nano. +The training procedure, including the model architecture, is discussed in section 4. +Optimization techniques of our model considering running on the Jetson Nano are presented in section 5. +In section 6, the performance of our model is evaluated by comparing the colorized images generated by our models and by a state-of-the-art ANN for grey image colorization, which has about 25.000.000 parameters. +Finally, the final results are summed up in section 7. + +\section{Convolutional Autoencoder} + +\subsection{Convolutions} + +Convolutions detect features and extract these from images by applying a filter kernel which is a weight matrix. +As shown in Figure ~\ref{fig:convolution}, a convolution iterates a filter kernel over the entire image. +During each iteration, an area with the same size as the kernel is processed by an element-wise multiplication followed by summing each value up, representing the result for the area of this image. +This area is shifted one step (depending on side size) further to the right in the next step. +The same processing step occurs again. + +\begin{figure}[h] +\centering + \includegraphics[totalheight=5cm]{Figures/convolution.png} + \caption{Concept of a convolution~\cite{ConvolutionAnimation}.} + \label{fig:convolution} +\end{figure} + + +\subsection{Autoencoder} + +Autoencoders are artificial neural networks used to learn features of unlabeled data. +As presented in Figure~\ref{fig:autoencoder}, the encoder part compresses the data by gradually decrease of the layer size. +The resulting embedding/code is passed to the decoder part responsible for +reconstructing it. +In the decoder, the layer size increases per layer. +Overall, the input \texttt{X} and output \texttt{X'} shall be the same. + +\begin{figure}[h] +\centering + \includegraphics[totalheight=5cm]{Figures/Autoencoder.png} + \caption{An Autoencoder compresses and decompresses the data~\cite{autoencoderImg}.} + \label{fig:autoencoder} +\end{figure} + +Instead of fully connected layers, a convolutional autoencoder applies convolutions in the encoder and transposes convolutions in the decoder. + +\section{Setup} + +\subsection{Software} +TensorFlow was installed following the official guide from NVIDIA~\cite{jetsonNanoTensorFlow}. +Furthermore, it is not recommended to install the current version of OpenCV via pip3 due to compatibility issues with the CSI camera. +The CSI camera i.e. the \texttt{gstream} can only be accessed with an OpenCV version lower than 3.3.1. +This version was installed manually by downloading the source code from the official website and compiling it~\cite{opencv}. +Besides, for speed purposes, the maximal performance mode was enabled by the command \texttt{sudo nvpmodel -m 0}. +In order to enable the Jetson Clock, the command \texttt{sudo jetson\_clocks} was used. + + +\subsection{Hardware} +The CSI camera was plugged into the corresponding slot in the Jetson Nano. +Furthermore, the HDMI display shows the OpenCV window as presented in Figure~\ref{fig:OpenCV_window}. + + +\section{Training} + +\begin{wrapfigure}{r}{.4\textwidth} + +\centering + \includegraphics[totalheight=6cm]{Figures/LossPlot.png} + \caption{Train and test loss during training.} + \label{fig:trainTestLoss} + +\end{wrapfigure} +At the beginning of training our model, we used the common RGB color space. +In other words, the input was the grey scaled image, and the output was the RGB image. +However, we lost too much information in the picture. +So the general input picture was detectable but with a lot of "compression". +The reason for this is that for one pixel, all three values of RGB are responsible for the brightness of that pixel. +So it is possible to get the right color but not the correct brightness. That is why we switched to the CIE LAB color space. +Here we also have three values for each pixel, the L channel for the +'brightness' and A and B as the color channel. +The L channel is like the grayscale image for the model. +The model's output is two values, the A and B channels. +So with the combination of the given A, B, and our old L values, we get the colored image. We get an overall correct image because of the kept L channel, even if the colors would not match the original +image. + +The model was trained for 13 epochs (in total: 15 hours) with the ImageNet2012 dataset. +It contains ca. 1.300.000 training images and 300.000 validation images used for test data. +As presented in Figure~\ref{fig:trainTestLoss} the model was successfully trained to convergence because, after about ten epochs, the train loss does not change significantly ($< 0.0001$) compared with the loss to the next epoch. + + +\subsection{Model} +As shown in Listing~\ref{lst:ourModel_summary}, our convolutional autoencoder has about 300.000 parameters. +The model's memory size is about 1.2 MB ($300000 \cdot 4$ Byte). +Encoder and decoder parts of the ANN are equally balanced due to having almost the same amount of parameters. + +\begin{lstlisting}[language=bash, caption=Parameter amount of our model (output of \texttt{summary()} call)., label={lst:ourModel_summary}, basicstyle=\fontsize{11}{9}\selectfont\ttfamily] + Model: "autoencoder" +_______________________________________________________________ + Layer (type) Output Shape Param # +=============================================================== + encoder (Encoder) multiple 148155 + + decoder (Decoder) multiple 150145 + +=============================================================== +Total params: 298,302 +Trainable params: 297,210 +Non-trainable params: 1,092 +_______________________________________________________________ +\end{lstlisting} + + +Figure~\ref{fig:EncoderLayer} and~\ref{fig:DecoderLayer} present the structure of the layers contained in the encoder and decoder. +The encoder receives a 256x256 pixel grey image. +Due to the grey color, there is only one color channel. +Convolutions can be seen as feature extractors. +At the first convolution in the encoder (see \texttt{Conv2D\_0} in Figure~\ref{fig:EncoderLayer}), there are 75 features extracted from this grey image. +These extracted features are represented as channels (similar to color channels but not colors) called feature maps. +Literally speaking, a feature map could be seen as a heatmap in which the pixel belonging to the corresponding feature has a high magnitude. +Due to the stride size of 2, the size of these features maps is halved. +A convolution operation is followed by a batch normalization layer and an activation layer (the drive is normalized before its goes into the activation function). +In the encoder this occurs four times. +With each step, the amount of filters increases. + +\begin{figure}[h] + \centering + \begin{minipage}[b]{0.4\textwidth} + \includegraphics[width=\textwidth]{Figures/EncoderLayer.png} + \caption{Encoder layers.} + \label{fig:EncoderLayer} + \end{minipage} + \hfill + \begin{minipage}[b]{0.4\textwidth} + \includegraphics[width=\textwidth]{Figures/DecoderLayer.png} + \caption{Decoder layers.} + \label{fig:DecoderLayer} + \end{minipage} +\end{figure} + + +The resulting embedding is passed into the decoder. +Instead of convolutions reducing the feature map size, transpose convolutions increase the feature map size by a factor of 2. +Like the encoder, a transpose convolution is followed by batch normalization and activation layers. +In the decoder this occurs four times. +With each step, the amount of filters decreases. +Except for the last transpose convolution, which is a bottleneck layer: +It decreases the amount of filters from 75 to 2 (\textit{a} and \textit{b} channel) and keeps the feature map size constant (stride size = 1). + + +\section{Optimizing the model to run on the Jetson Nano } + + +\begin{wrapfigure}{r}{.4\textwidth} + +\centering + \includegraphics[totalheight=4cm]{Figures/ResidualConnection.png} + \caption{Concept of a residual connection~\cite{residualConnectionImg}.} + \label{fig:ResidualConnection} + +\end{wrapfigure} + +Residual connections also called skip connections in neural networks, face the vanishing gradient problem (tiny weight adjustments~\cite{vanishingGradients}) in the backpropagation algorithm~\cite{resnet}. +As shown in Figure~\ref{fig:ResidualConnection}, the output \texttt{x} of a layer is added two layers further to the input of the third layer~\cite{resnet}. +The output \texttt{x} must be saved due to it is used in a later time step. +Therefore, residual connections need a lot of GPU memory, causing a outsource of a part of other data needed for the model. +To speed up the FPS, our model does not have residual connections. + +As mentioned in the first section, the Jetson~Nano has 128 CUDA cores. +The amount of filters per layer does not exceed this number of cores. +This limitation enables TensorFlow simple scheduling of a feature map calculation to a specific core during the output calculation of a layer. +In other words, there are no cores that must do a second filter map calculation after the first one while other cores are idling. +The calculation of a previous layer must be finished before starting with the next layer. +Furthermore, limiting the amount of filer reduces the model size. + +In Deep Learning, overparameterization often occurs: +As a result, the number of trainable parameters is much larger than the number of training examples. +As a consequence, the model tends to overfit the data~\cite{overparameterization}. +The opposite applies to our model. +Literally speaking, our model is "under-parameterized" - +Due to there being only 300.000 parameters on about 1.3 million training images, our model is forced to generalize as strong as possible during training. +To archive such generalization the model is trained multiple epochs (iteration over the entire training dataset). +It is assumed that such generalization results in similar results compared with a model which has considerable amounts of parameters. +In other words, the higher costs for training a small model compared with a larger model shall result in similar results but the latency to generate the result with the smaller model is lower. +Besides, the non-existence of skip connections increases the chance of vanishing gradients during training. +Although, multiple training epochs compensate this problem. +To clarify, millions of tiny weight changes sum up into an effective weight adjustment. + + +\section{Evaluation: Compare with Colorful Image Colorization} + +As demonstrated in Listing~\ref{lst:theirModel_summary}, the Colorful Image Colorization model from Richard Zhang et al. has about 25 million parameters~\cite{colorize}. +The model presented in this paper is about 80 times smaller. +Its input shape is 256x256x1 and the same as our model. + +\begin{lstlisting}[language=bash, caption=Parameter amount of the Colorful Image Colorization model (output of \texttt{summary()} call)., label={lst:theirModel_summary}, basicstyle=\fontsize{11}{9}\selectfont\ttfamily] +Model: "ColorfulImageColorization" +_______________________________________________________________ + Layer (type) Output Shape Param # +=============================================================== +[...] + +=============================================================== +Total params: 24,793,081 +Trainable params: 24,788,345 +Non-trainable params: 4,736 +_______________________________________________________________ +\end{lstlisting} + + +Figure~\ref{fig:ColoredImages_compareModels} shows grey images colorized by the Colorful Image Colorization model~\cite{colorize} and by our model. +Our model tends to colorize the images with a grey touch and the colors are not saturated compared with the Colorful Image Colorization model. + + + +\begin{figure} +\centering + \includegraphics[totalheight=13cm]{Figures/ColoredImages_compareModels.png} + \caption{Colorized images generated by the Colorful Image Colorization model from Richard Zhang et al. and by our model.} + \label{fig:ColoredImages_compareModels} +\end{figure} + + +Our model does regression by predicting the \textit{ab} values. +The model output shape is 256x256x2 (see \texttt{tanh\_3} in Figure~\ref{fig:DecoderLayer}). +In contrast to the model from Richard Zhang et al., classification is applied here: +There is a probability distribution for each pixel approximating which color it may be. +For demonstration purposes, there were 313 colors available. +As a consequence, the model output shape is 256x256x313~\cite{colorize}. +Compared to our model, the larger output shape requires a more extensive (ca. 80 times) amount of parameters. + + + +\begin{figure}[h] +\centering + \includegraphics[totalheight=6cm]{Figures/ColorizedImagesLossPlot_comparedModels.png} + \caption{Loss based on colorized images by the + Colorful Image Colorization model from Richard Zhang et al. and by our model.} + \label{fig:Loss_compareModels} +\end{figure} + +Considering the loss as shown in Figure~\ref{fig:Loss_compareModels}, +our model outperforms the model from Richard Zhang et al. +However, the euclidean loss (mean squared error) $L_2(\hat{y}, y)$ for the prediction $y$ and the target (also called ground truth) $\hat{y}$ was applied: + +\[ L_2(\hat{y}, y) = \frac{1}{2} \cdot \sum_{h,w} || y_{h,w} - \hat{y}_{h,w} ||^{2} \] + +The loss function is ambiguous for the colorization problem. +Consider the prediction $y$ for a single pixel with a loss of $d$: +There are two corresponding targets $\hat{y} = y \pm d$ possible instead of a single one. +Furthermore, consider a set of pixels. For each of these pixels, a corresponding color will be predicted. +The optimal solution is the mean of all pixels within this set. +In the case of color prediction, this averaging causes a grey bias and desaturated colors~\cite{colorize}. + + + +\section{Conclusion} + + +Our model predicts the most possible color by applying regression. +In contrast to the model proposed by Richard Zhang et al. which classifies the most possible color. +Due to the one-hot encoding applied for these color classifications, over 80 times more parameters are needed as required for our model, considering the parameter balance between hidden layers and output layers. +Comparing the colorized images generated by an ANN based on classification and by regression, regression-based ANN tends to colorize images with a grey touch and unsaturated colors because of an ambiguous loss function for the colorization problem. +However, the results are acceptable considering the difference in the number of parameters between the two models. +Furthermore, a GPU cannot ultimately accelerate a classification-based model because the last part of the model is a sampling process. +This process is an argmax operation over 313 possible colors (see model shape) which runs on the CPU. +Note that transferring data from GPU to CPU could be seen as a performance bottleneck. + +Overall, our model archives about 10 FPS on the Jetson Nanos. +Running the Richard Zhang et al. model will result in less than 0.01 FPS. + + + + + + + + + + +\bibliographystyle{ieeetr} +\bibliography{Literatur} + +\end{document} + diff --git a/Paper/Main.toc b/Paper/Main.toc new file mode 100644 index 0000000..7d26a83 --- /dev/null +++ b/Paper/Main.toc @@ -0,0 +1,14 @@ +\contentsline {section}{\numberline {1}Introduction}{1}{}% +\contentsline {section}{\numberline {2}Convolutional Autoencoder}{1}{}% +\contentsline {subsection}{\numberline {2.1}Convolutions}{1}{}% +\contentsline {subsection}{\numberline {2.2}Autoencoder}{2}{}% +\contentsline {section}{\numberline {3}Setup}{3}{}% +\contentsline {subsection}{\numberline {3.1}Software}{3}{}% +\contentsline {subsection}{\numberline {3.2}Hardware}{3}{}% +\contentsline {section}{\numberline {4}Training}{3}{}% +\contentsline {subsection}{\numberline {4.1}Model}{4}{}% +\contentsline {section}{\numberline {5}Optimizing the model to run on the Jetson Nano}{4}{}% +\contentsline {section}{\numberline {6}Evaluation: Compare with Colorful Image Colorization}{6}{}% +\contentsline {section}{\numberline {7}Conclusion}{8}{}% +\contentsline {section}{References}{9}{}% +\providecommand \tocbasic@end@toc@file {}\tocbasic@end@toc@file diff --git a/Paper/_minted-Main/default-pyg-prefix.pygstyle b/Paper/_minted-Main/default-pyg-prefix.pygstyle new file mode 100644 index 0000000..e69de29 diff --git a/Paper/_minted-Main/default.pygstyle b/Paper/_minted-Main/default.pygstyle new file mode 100644 index 0000000..e69de29 diff --git a/Paper_GPU_colorization.pdf b/Paper_GPU_colorization.pdf new file mode 100644 index 0000000..f94d799 Binary files /dev/null and b/Paper_GPU_colorization.pdf differ