This commit is contained in:
Denshooter
2022-03-30 21:21:19 +02:00
62 changed files with 811 additions and 12 deletions

View File

@@ -7,14 +7,14 @@ from Decoder import *
class Autoencoder(tf.keras.Model):
def __init__(self):
super(Autoencoder, self).__init__()
self.encoder = Encoder()
self.decoder = Decoder()
self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
self.loss_function = tf.keras.losses.MeanSquaredError()
self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
self.metric_mean = tf.keras.metrics.Mean(name="loss")
@tf.function
def call(self, x, training=False):
@@ -31,16 +31,24 @@ class Autoencoder(tf.keras.Model):
gradients = tape.gradient(loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
return loss
self.metric_mean.update_state(loss)
def test(self, test_data):
self.metric_mean.reset_states()
# test over complete test data
test_loss_aggregator = []
for input, target in test_data: # ignore label
for input, target in test_data:
prediction = self(input)
sample_test_loss = self.loss_function(target, prediction)
test_loss_aggregator.append(sample_test_loss.numpy())
loss = self.loss_function(target, prediction)
self.metric_mean.update_state(loss)
test_loss = tf.reduce_mean(test_loss_aggregator)
return test_loss
mean_loss = self.metric_mean.result()
self.metric_mean.reset_states()
return mean_loss

View File

@@ -0,0 +1,14 @@
img_rows, img_cols = 256, 256
channel = 3
batch_size = 32
epochs = 10000
patience = 50
num_train_samples = 529202
num_valid_samples = 4268
num_classes = 313
kernel = 3
weight_decay = 1e-3
epsilon = 1e-8
nb_neighbors = 5
# temperature parameter T
T = 0.38

Binary file not shown.

View File

@@ -0,0 +1,92 @@
import keras.backend as K
import tensorflow as tf
from keras.layers import Input, Conv2D, BatchNormalization, UpSampling2D
from keras.models import Model
from keras.regularizers import l2
from Colorful_Image_Colorization.config import img_rows, img_cols, num_classes, kernel
l2_reg = l2(1e-3)
def build_model():
input_tensor = Input(shape=(img_rows, img_cols, 1))
x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='conv1_1', kernel_initializer="he_normal",
kernel_regularizer=l2_reg)(input_tensor)
x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='conv1_2', kernel_initializer="he_normal",
kernel_regularizer=l2_reg, strides=(2, 2))(x)
x = BatchNormalization()(x)
x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='conv2_1', kernel_initializer="he_normal",
kernel_regularizer=l2_reg)(x)
x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='conv2_2', kernel_initializer="he_normal",
kernel_regularizer=l2_reg,
strides=(2, 2))(x)
x = BatchNormalization()(x)
x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='conv3_1',
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='conv3_2',
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='conv3_3', kernel_initializer="he_normal",
strides=(2, 2))(x)
x = BatchNormalization()(x)
x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', name='conv4_1',
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', name='conv4_2',
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', name='conv4_3',
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
x = BatchNormalization()(x)
x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', dilation_rate=2, name='conv5_1',
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', dilation_rate=2, name='conv5_2',
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', dilation_rate=2, name='conv5_3',
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
x = BatchNormalization()(x)
x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', dilation_rate=2, name='conv6_1',
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', dilation_rate=2, name='conv6_2',
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', dilation_rate=2, name='conv6_3',
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
x = BatchNormalization()(x)
x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='conv7_1',
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='conv7_2',
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='conv7_3',
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
x = BatchNormalization()(x)
x = UpSampling2D(size=(2, 2))(x)
x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='conv8_1',
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='conv8_2',
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='conv8_3',
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
x = BatchNormalization()(x)
outputs = Conv2D(num_classes, (1, 1), activation='softmax', padding='same', name='pred')(x)
model = Model(inputs=input_tensor, outputs=outputs, name="ColorNet")
return model
if __name__ == '__main__':
with tf.device("/cpu:0"):
encoder_decoder = build_model()
print(encoder_decoder.summary())
#plot_model(encoder_decoder, to_file='encoder_decoder.svg', show_layer_names=True, show_shapes=True)
parallel_model = encoder_decoder
print(parallel_model.summary())
#plot_model(parallel_model, to_file='parallel_model.svg', show_layer_names=True, show_shapes=True)
K.clear_session()

Binary file not shown.

View File

@@ -41,5 +41,4 @@ class Encoder(tf.keras.Model): # <-- Needed to make parameters trainable and to
#print(x.shape)
#print("-------------")
#exit()
return x
return x

BIN
Plots/ColoredImages_1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.0 MiB

BIN
Plots/ColoredImages_2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.0 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.5 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.0 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

View File

@@ -0,0 +1,32 @@
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
def main():
df_train_loss = pd.read_csv('run-.-tag-Train loss.csv', sep=',')
train_loss = df_train_loss["Value"]
df_test_loss = pd.read_csv('run-.-tag-Test loss.csv', sep=',')
test_loss = df_test_loss["Value"]
x = np.arange(len(train_loss))
plt.plot(x, train_loss, label="Train loss", color="r")
plt.plot(x, test_loss, label="Test loss", color="b")
plt.legend()
plt.grid(True)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.savefig("TrainTestLoss.png")
plt.show()
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("KeyboardInterrupt received")

View File

@@ -0,0 +1,182 @@
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import sys
sys.path.append("..")
from Autoencoder import Autoencoder
from Training import prepare_data, getRGB
import numpy as np
import os
#from Training import prepare_data, getRGB
from Colorful_Image_Colorization.model import build_model
from Colorful_Image_Colorization.config import img_rows, img_cols
from Colorful_Image_Colorization.config import nb_neighbors, T, epsilon
import cv2 as cv
def main():
# Create Imagenet
labels_path = tf.keras.utils.get_file('ImageNetLabels.txt','https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt')
imagenet_labels = np.array(open(labels_path).read().splitlines())
data_dir = '/home/timwitte/Downloads/'
write_dir = '../imagenet'
# Construct a tf.data.Dataset
download_config = tfds.download.DownloadConfig(
extract_dir=os.path.join(write_dir, 'extracted'),
manual_dir=data_dir
)
download_and_prepare_kwargs = {
'download_dir': os.path.join(write_dir, 'downloaded'),
'download_config': download_config,
}
train_dataset, test_dataset= tfds.load('imagenet2012',
data_dir=os.path.join(write_dir, 'data'),
split=['train', 'validation'],
shuffle_files=True,
download=True,
as_supervised=True,
download_and_prepare_kwargs=download_and_prepare_kwargs)
test_dataset = test_dataset.take(32).apply(prepare_data)
# Load our model
model_our = Autoencoder()
model_our.build((1, 256, 256, 1)) # need a batch size
model_our.load_weights("../saved_models/trainied_weights_epoch_12")
# Load model to compare
model_weights_path = '../Colorful_Image_Colorization/model.06-2.5489.hdf5'
model_toCompare = build_model()
model_toCompare.load_weights(model_weights_path)
loss_function = tf.keras.losses.MeanSquaredError()
for img_L, img_AB_orginal in test_dataset.take(1):
img_rgb_orginal = getRGB(img_L, img_AB_orginal)
img_AB_reconstructed_our = model_our.predict(img_L.numpy())
img_rgb_reconstructed_our = getRGB(img_L, img_AB_reconstructed_our)
NUM_IMGS = 5
fig, axs = plt.subplots(NUM_IMGS, 4)
axs[0, 0].set_title("Input", fontsize=30)
axs[0, 1].set_title("Richard Zhang $\it{et\ al.}$", fontsize=30,)
axs[0, 2].set_title("Ours", fontsize=30)
axs[0, 3].set_title("Ground Truth", fontsize=30)
losses1 = []
losses2 = []
for i in range(NUM_IMGS):
img_AB_reconstructed_toCompare = getABFromModel(model_toCompare, img_L[i].numpy())
img_rgb_reconstructed_toCompare = getRGB(img_L[i], img_AB_reconstructed_toCompare, batch_mode=False)
axs[i, 0].imshow(img_L[i], cmap="gray")
axs[i, 0].set_axis_off()
axs[i, 1].imshow(img_rgb_reconstructed_toCompare)
axs[i, 1].set_axis_off()
axs[i, 2].imshow(img_rgb_reconstructed_our[i])
axs[i, 2].set_axis_off()
axs[i, 3].imshow(img_rgb_orginal[i])
axs[i, 3].set_axis_off()
loss_our = loss_function(img_rgb_orginal[i], img_rgb_reconstructed_our[i])
loss_toCompare = loss_function(img_rgb_orginal[i], img_rgb_reconstructed_our)
losses1.append(loss_our)
losses2.append(loss_toCompare)
plt.tight_layout()
fig.set_size_inches(20, 25)
fig.savefig("ColoredImages_compareModels.png")
# Reset plot
plt.clf()
plt.cla()
fig = plt.figure()
# Create bar plot
x_axis = np.arange(NUM_IMGS)
width = 0.2
plt.bar(x_axis - width/2., losses2, width=width/2, label = "Richard Zhang $\it{et\ al.}$")
plt.bar(x_axis - width/2. + 1/float(2)*width, losses1, width=width/2, label = 'Ours')
plt.xticks(x_axis,[f"No. {i}" for i in range(NUM_IMGS)])
plt.title("Loss of colorized images")
plt.xlabel("Image")
plt.ylabel("Loss")
plt.legend()
plt.tight_layout()
plt.savefig("ColorizedImagesLossPlot_comparedModels.png")
def getABFromModel(model, grey_img):
# code taken from https://github.com/foamliu/Colorful-Image-Colorization/blob/master/demo.py
q_ab = np.load("../Colorful_Image_Colorization/pts_in_hull.npy")
nb_q = q_ab.shape[0]
grey_img = np.expand_dims(grey_img, axis=0)
X_colorized = model.predict((grey_img+1)/2)
h, w = img_rows // 4, img_cols // 4
X_colorized = X_colorized.reshape((h * w, nb_q))
# Reweight probas
X_colorized = np.exp(np.log(X_colorized + epsilon) / T)
X_colorized = X_colorized / np.sum(X_colorized, 1)[:, np.newaxis]
# Reweighted
q_a = q_ab[:, 0].reshape((1, 313))
q_b = q_ab[:, 1].reshape((1, 313))
X_a = np.sum(X_colorized * q_a, 1).reshape((h, w))
X_b = np.sum(X_colorized * q_b, 1).reshape((h, w))
X_a = cv.resize(X_a, (img_rows, img_cols), cv.INTER_CUBIC)
X_b = cv.resize(X_b, (img_rows, img_cols), cv.INTER_CUBIC)
# Before: -90 <=a<= 100, -110 <=b<= 110
# After: 38 <=a<= 228, 18 <=b<= 238
X_a = X_a + 128
X_b = X_b + 128
out_lab = np.zeros((256, 256, 2), dtype=np.float32)
grey_img = np.reshape(grey_img, newshape=(256,256))
out_lab[:, :, 0] = X_a
out_lab[:, :, 1] = X_b
out_lab[:, :, 0] = -1.0 + 2*(out_lab[:, :, 0] - 38.0)/190
out_lab[:, :, 1] = -1.0 + 2*(out_lab[:, :, 1] - 20.0)/203
return out_lab
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("KeyboardInterrupt received")

View File

@@ -0,0 +1,84 @@
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
sys.path.append("..")
from Autoencoder import Autoencoder
from Training import prepare_data, getRGB
def main():
labels_path = tf.keras.utils.get_file('ImageNetLabels.txt','https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt')
imagenet_labels = np.array(open(labels_path).read().splitlines())
data_dir = '/home/timwitte/Downloads/'
write_dir = '../imagenet'
# Construct a tf.data.Dataset
download_config = tfds.download.DownloadConfig(
extract_dir=os.path.join(write_dir, 'extracted'),
manual_dir=data_dir
)
download_and_prepare_kwargs = {
'download_dir': os.path.join(write_dir, 'downloaded'),
'download_config': download_config,
}
train_dataset, test_dataset= tfds.load('imagenet2012',
data_dir=os.path.join(write_dir, 'data'),
split=['train', 'validation'],
shuffle_files=True,
download=True,
as_supervised=True,
download_and_prepare_kwargs=download_and_prepare_kwargs)
test_dataset = test_dataset.take(32).apply(prepare_data)
autoencoder = Autoencoder()
autoencoder.build((1, 256, 256, 1)) # need a batch size
autoencoder.load_weights("../saved_models/trainied_weights_epoch_12")
autoencoder.summary()
autoencoder.encoder.summary()
autoencoder.decoder.summary()
for img_L, img_AB_orginal in test_dataset.take(1):
img_AB_reconstructed = autoencoder(img_L)
img_rgb_orginal = getRGB(img_L, img_AB_orginal)
img_rgb_reconstructed = getRGB(img_L, img_AB_reconstructed)
NUM_IMGS = 5
fig, axs = plt.subplots(NUM_IMGS, 3)
axs[0, 0].set_title("Input", fontsize=30)
axs[0, 1].set_title("Output", fontsize=30)
axs[0, 2].set_title("Ground Truth", fontsize=30)
for i in range(NUM_IMGS):
axs[i, 0].imshow(img_L[i], cmap="gray")
axs[i, 0].set_axis_off()
axs[i, 1].imshow(img_rgb_reconstructed[i])
axs[i, 1].set_axis_off()
axs[i, 2].imshow(img_rgb_orginal[i])
axs[i, 2].set_axis_off()
plt.tight_layout()
fig.set_size_inches(15, 25)
fig.savefig("ColoredImages.png")
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("KeyboardInterrupt received")

View File

@@ -0,0 +1,33 @@
import tensorflow as tf
from EncoderLayers import *
from DecoderLayers import *
import sys
sys.path.append("../..")
from Colorful_Image_Colorization.model import *
def main():
encoder_layers = EncoderLayers()
decoder_layers = DecoderLayers()
inputs = tf.keras.Input(shape=(256,256, 1), name="Grey image")
encoder = tf.keras.Model(inputs=[inputs],outputs=encoder_layers.call(inputs))
embedding = tf.keras.Input(shape=(32,32, 3), name="Embedding")
decoder = tf.keras.Model(inputs=[embedding],outputs=decoder_layers.call(embedding))
tf.keras.utils.plot_model(encoder,show_shapes=True, show_layer_names=True, to_file="EncoderLayer.png")
tf.keras.utils.plot_model(decoder,show_shapes=True, show_layer_names=True, to_file="DecoderLayer.png")
ModelToCompare_layers = build_model()
modelToCompare = tf.keras.Model(inputs=[inputs],outputs=ModelToCompare_layers.call(inputs))
tf.keras.utils.plot_model(modelToCompare,show_shapes=True, show_layer_names=True, to_file="ModelToCompare.png")
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("KeyboardInterrupt received")

Binary file not shown.

After

Width:  |  Height:  |  Size: 116 KiB

View File

@@ -0,0 +1,31 @@
import tensorflow as tf
class DecoderLayers(tf.keras.Model):
def __init__(self):
super(DecoderLayers, self).__init__()
self.layer_list = [
tf.keras.layers.Conv2DTranspose(105, kernel_size=(3,3), strides=2, padding='same', name="Conv2D_Trans_0"),
tf.keras.layers.BatchNormalization(name="BatchNormalization_0"),
tf.keras.layers.Activation(tf.nn.tanh, name="tanh_0"),
tf.keras.layers.Conv2DTranspose(90, kernel_size=(3,3), strides=2, padding='same', name="Conv2D_Trans_1"),
tf.keras.layers.BatchNormalization(name="BatchNormalization_1"),
tf.keras.layers.Activation(tf.nn.tanh, name="tanh_1"),
tf.keras.layers.Conv2DTranspose(75, kernel_size=(3,3), strides=2, padding='same', name="Conv2D_Trans_2"),
tf.keras.layers.BatchNormalization(name="BatchNormalization_2"),
tf.keras.layers.Activation(tf.nn.tanh, name="tanh_2"),
# bottleneck to RGB
tf.keras.layers.Conv2DTranspose(2, kernel_size=(1,1), strides=1, padding='same', name="Conv2D_Trans_3"),
tf.keras.layers.BatchNormalization(name="BatchNormalization_3"),
tf.keras.layers.Activation(tf.nn.tanh, name="tanh_3"),
]
def call(self, x):
for layer in self.layer_list:
x = layer(x)
return x

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

View File

@@ -0,0 +1,29 @@
import tensorflow as tf
class EncoderLayers(tf.keras.Model):
def __init__(self):
super(EncoderLayers, self).__init__()
self.layer_list = [
tf.keras.layers.Conv2D(75, kernel_size=(3, 3), strides=2, padding='same', name="Conv2D_0"),
tf.keras.layers.BatchNormalization(name="BatchNormalization_0"),
tf.keras.layers.Activation(tf.nn.tanh, name="tanh_0"),
tf.keras.layers.Conv2D(90, kernel_size=(3, 3), strides=2, padding='same', name="Conv2D_1"),
tf.keras.layers.BatchNormalization(name="BatchNormalization_1"),
tf.keras.layers.Activation(tf.nn.tanh, name="tanh_1"),
tf.keras.layers.Conv2D(105, kernel_size=(3, 3), strides=2, padding='same',name="Conv2D_2"),
tf.keras.layers.BatchNormalization(name="BatchNormalization_2"),
tf.keras.layers.Activation(tf.nn.tanh, name="tanh_2"),
tf.keras.layers.Conv2D(3, kernel_size=(1, 1), strides=1, padding='same', name="Conv2D_3"),
tf.keras.layers.BatchNormalization(name="BatchNormalization_3"),
tf.keras.layers.Activation(tf.nn.tanh, name="tanh_3"),
]
def call(self, x):
for layer in self.layer_list:
x = layer(x)
return x

Binary file not shown.

After

Width:  |  Height:  |  Size: 329 KiB

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
Plots/TrainTestLossPlot.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,15 @@
Wall time,Step,Value
1647118261.250931,0,0.016809336841106415
1647122922.100936,1,0.013759356923401356
1647127578.618303,2,0.01362006925046444
1647132237.117106,3,0.014061697758734226
1647136894.337419,4,0.013472857885062695
1647141548.038196,5,0.01342787966132164
1647146209.292402,6,0.013368184678256512
1647150861.434495,7,0.013420150615274906
1647155517.411057,8,0.01329082902520895
1647160207.188101,9,0.013379388488829136
1647164926.587916,10,0.013525118120014668
1647169761.401568,11,0.01334059052169323
1647174430.458649,12,0.013532023876905441
1647179221.740573,13,0.01326887309551239
1 Wall time Step Value
2 1647118261.250931 0 0.016809336841106415
3 1647122922.100936 1 0.013759356923401356
4 1647127578.618303 2 0.01362006925046444
5 1647132237.117106 3 0.014061697758734226
6 1647136894.337419 4 0.013472857885062695
7 1647141548.038196 5 0.01342787966132164
8 1647146209.292402 6 0.013368184678256512
9 1647150861.434495 7 0.013420150615274906
10 1647155517.411057 8 0.01329082902520895
11 1647160207.188101 9 0.013379388488829136
12 1647164926.587916 10 0.013525118120014668
13 1647169761.401568 11 0.01334059052169323
14 1647174430.458649 12 0.013532023876905441
15 1647179221.740573 13 0.01326887309551239

View File

@@ -0,0 +1,15 @@
Wall time,Step,Value
1647118226.379471,0,0.01686934195458889
1647122884.759068,1,0.016268473118543625
1647127535.214221,2,0.013647115789353848
1647132192.879982,3,0.013552550226449966
1647136850.629965,4,0.01349611859768629
1647141510.180662,5,0.013455081731081009
1647146165.61258,6,0.01342522632330656
1647150823.542946,7,0.013399843126535416
1647155473.882963,8,0.013378930278122425
1647160163.770788,9,0.013358119875192642
1647164886.509832,10,0.013342463411390781
1647169721.204018,11,0.013329868204891682
1647174386.649681,12,0.013316545635461807
1647179177.404204,13,0.013304967433214188
1 Wall time Step Value
2 1647118226.379471 0 0.01686934195458889
3 1647122884.759068 1 0.016268473118543625
4 1647127535.214221 2 0.013647115789353848
5 1647132192.879982 3 0.013552550226449966
6 1647136850.629965 4 0.01349611859768629
7 1647141510.180662 5 0.013455081731081009
8 1647146165.61258 6 0.01342522632330656
9 1647150823.542946 7 0.013399843126535416
10 1647155473.882963 8 0.013378930278122425
11 1647160163.770788 9 0.013358119875192642
12 1647164886.509832 10 0.013342463411390781
13 1647169721.204018 11 0.013329868204891682
14 1647174386.649681 12 0.013316545635461807
15 1647179177.404204 13 0.013304967433214188

90
README.md Normal file
View File

@@ -0,0 +1,90 @@
# Colorization of Grey Images by applying a Convolutional Autoencoder on the Jetson Nano
## by Dennis Konkol and Tim Niklas Witte
This repository contains an pretrainied convolutional autoencoder for colorization of grey images.
The live camera stream will be colorizatized in real time.
The architecture of the ANN is optimized to run on the Jetson Nano.
It has 300.000 parameters.
In total, 10 FPS can be archived on this embedded GPU.
![Example Video](videoPresentation.gif)
## Requirements
- TensorFlow 2
- OpenCV 3.3.1
- CSI camera plugged it (see code of `live_recolor[_plot].py`)
## Model
```bash
Model: "autoencoder"
_______________________________________________________________
Layer (type) Output Shape Param #
===============================================================
encoder (Encoder) multiple 148155
decoder (Decoder) multiple 150145
===============================================================
Total params: 298,302
Trainable params: 297,210
Non-trainable params: 1,092
_______________________________________________________________
```
## Usage
### Training
Run `Training.py` to start the training of the model.
Each epoch the weights are stored into `./saved_models`.
Besides, in `./test_logs` are the corresponding trainings statistics (train and test loss and also a batch of colorized test images) logged.
```bash
python3 Training.py
```
### Live colorization
The launch of `live_recolor_plot.py` opens a window as shown in the GIF at the start of this README.
Note that, the CSI camera must be plugged in.
```bash
python3 live_recolor.py
```
It has the following structure:
```bash
(1) | (2) | (3) | (4)
(1) = live RGB camera image
(2) = live grey camera image
(3) = live colorized image
```
To get also displayed a loss plot (mean squared error between `(1)` and `(3)`),
run `live_recolor_plot.py` instead.
The loss plot is presented right from `(3)`.
```bash
python3 live_recolor_plot.py
```
### Pretrainied Model
The model was runned for 13 epochs and its weights are stored in `./saved_models`.
Note that, the grey images must have a shape of `(256,256,1)`.
The following code will load the model and colorized an image:
```python3
autoencoder = Autoencoder()
autoencoder.build((1, 256, 256, 1)) # need a batch size
autoencoder.load_weights("./saved_models/trainied_weights_epoch_12")
autoencoder.summary()
grey_img = ... # grey_img.shape = (256,256,1)
grey_img = np.expand_dims(grey_img, axis=0) # add batch dim
colorized_img = autoencoder(grey_img)
```

175
Training.py Normal file
View File

@@ -0,0 +1,175 @@
from sklearn.utils import shuffle
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import numpy as np
import tqdm
from Decoder import *
import os
from Autoencoder import Autoencoder
import tensorflow_io as tfio
def getRGB(L, AB, batch_mode=True):
# Remove normalization
L = (L + 1)*50
AB = ((AB - 1)*255/2)+128
if batch_mode:
L = tf.reshape(L, (32, 256,256,1))
LAB = tf.concat([L, AB], 3)
else:
L = tf.reshape(L, (256,256,1))
LAB = tf.concat([L, AB], 2)
rgb = tfio.experimental.color.lab_to_rgb(LAB)
return rgb
def main():
labels_path = tf.keras.utils.get_file('ImageNetLabels.txt','https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt')
imagenet_labels = np.array(open(labels_path).read().splitlines())
data_dir = '/home/timwitte/Downloads/'
write_dir = './imagenet'
# Construct a tf.data.Dataset
download_config = tfds.download.DownloadConfig(
extract_dir=os.path.join(write_dir, 'extracted'),
manual_dir=data_dir
)
download_and_prepare_kwargs = {
'download_dir': os.path.join(write_dir, 'downloaded'),
'download_config': download_config,
}
train_dataset, test_dataset= tfds.load('imagenet2012',
data_dir=os.path.join(write_dir, 'data'),
split=['train', 'validation'],
shuffle_files=True,
download=True,
as_supervised=True,
download_and_prepare_kwargs=download_and_prepare_kwargs)
train_dataset = train_dataset.apply(prepare_data)
test_dataset = test_dataset.apply(prepare_data).take(500) # take 500 batches
# for L, AB in train_dataset.take(1):
# print(L.shape)
# print(AB.shape)
# print(np.min(L[0]))
# print(np.max(L[0]))
# print("######################")
# print(np.min(AB[0]))
# print(np.max(AB[0]))
# rgb = getRGB(L, AB)
# plt.imshow(rgb[0])
# plt.show()
# exit()
autoencoder = Autoencoder()
num_epochs = 75
file_path = "test_logs/test"
summary_writer = tf.summary.create_file_writer(file_path)
for img_L_tensorBoard, img_AB_tensorBoard in test_dataset.take(1):
pass
with summary_writer.as_default():
tf.summary.image(name="grey_images",data = img_L_tensorBoard, step=0, max_outputs=32)
img_RBG = getRGB(img_L_tensorBoard, img_AB_tensorBoard)
tf.summary.image(name="colored_images",data = img_RBG, step=0, max_outputs=32)
imgs = autoencoder(img_L_tensorBoard)
tf.summary.image(name="recolored_images",data = imgs, step=0, max_outputs=32)
autoencoder.summary()
train_loss = autoencoder.test(train_dataset.take(100))
tf.summary.scalar(name="Train loss", data=train_loss, step=0)
test_loss = autoencoder.test(test_dataset)
tf.summary.scalar(name="Test loss", data=test_loss, step=0)
for epoch in range(num_epochs):
print(f"Epoch {epoch}")
for img_L, img_AB in tqdm.tqdm(train_dataset,position=0, leave=True):
autoencoder.train_step(img_L, img_AB)
tf.summary.scalar(name="Train loss", data=autoencoder.metric_mean.result(), step=epoch+1)
autoencoder.metric_mean.reset_states()
test_loss = autoencoder.test(test_dataset)
tf.summary.scalar(name="Test loss", data=test_loss, step=epoch+1)
img_AB = autoencoder(img_L_tensorBoard)
img_RBG = getRGB(img_L_tensorBoard, img_AB)
tf.summary.image(name="recolored_images",data = img_RBG, step=epoch + 1, max_outputs=32)
# save model
autoencoder.save_weights(f"./saved_models/trainied_weights_epoch_{epoch}", save_format="tf")
def prepare_data(data):
# Remove label
data = data.map(lambda img, label: img )
# resize
data = data.map(lambda img: tf.image.resize(img, [256,256]) )
#convert data from uint8 to float32
data = data.map(lambda img: tf.cast(img, tf.float32) )
# tfio.experimental.color.rgb_to_lab expects its input to be a float normalized between 0 and 1.
data = data.map(lambda img: (img/255.) )
data = data.map(lambda img: tfio.experimental.color.rgb_to_lab(img) )
# X = L channel
# Y = (A,B) channel
data = data.map(lambda img: (img[:, :, 0], tf.stack([img[:, :, 1], img[:, :, 2]], axis=2)))
# Reshape R channel -> grey
data = data.map(lambda L, AB: ( tf.reshape(L, shape=(256,256,1)) , AB))
# Normalize between [-1, 1]
data = data.map(lambda L, AB: ( (L/50.0) - 1., 1 + (2*(AB - 128)/255) ))
# add gray scaled image
#data = data.map(lambda img: (tf.image.rgb_to_grayscale(img), img))
#cache this progress in memory, as there is no need to redo it; it is deterministic after all
#data = data.cache("cachefile")
#shuffle, batch, prefetch
data = data.shuffle(7000)
data = data.batch(32)
AUTOTUNE = tf.data.AUTOTUNE
data = data.prefetch(AUTOTUNE)
#return preprocessed dataset
return data
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("KeyboardInterrupt received")

View File

BIN
saved_models/.DS_Store vendored

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.