Merge branch 'main' of https://github.com/Denshooter/gpu_colorization
@@ -7,14 +7,14 @@ from Decoder import *
|
||||
class Autoencoder(tf.keras.Model):
|
||||
def __init__(self):
|
||||
super(Autoencoder, self).__init__()
|
||||
|
||||
self.encoder = Encoder()
|
||||
self.decoder = Decoder()
|
||||
|
||||
self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
|
||||
|
||||
self.loss_function = tf.keras.losses.MeanSquaredError()
|
||||
|
||||
self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
|
||||
|
||||
self.metric_mean = tf.keras.metrics.Mean(name="loss")
|
||||
|
||||
@tf.function
|
||||
def call(self, x, training=False):
|
||||
@@ -31,16 +31,24 @@ class Autoencoder(tf.keras.Model):
|
||||
|
||||
gradients = tape.gradient(loss, self.trainable_variables)
|
||||
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
|
||||
return loss
|
||||
|
||||
self.metric_mean.update_state(loss)
|
||||
|
||||
|
||||
|
||||
def test(self, test_data):
|
||||
|
||||
self.metric_mean.reset_states()
|
||||
|
||||
# test over complete test data
|
||||
test_loss_aggregator = []
|
||||
for input, target in test_data: # ignore label
|
||||
for input, target in test_data:
|
||||
prediction = self(input)
|
||||
|
||||
sample_test_loss = self.loss_function(target, prediction)
|
||||
test_loss_aggregator.append(sample_test_loss.numpy())
|
||||
loss = self.loss_function(target, prediction)
|
||||
self.metric_mean.update_state(loss)
|
||||
|
||||
test_loss = tf.reduce_mean(test_loss_aggregator)
|
||||
return test_loss
|
||||
mean_loss = self.metric_mean.result()
|
||||
self.metric_mean.reset_states()
|
||||
return mean_loss
|
||||
|
||||
|
||||
|
||||
BIN
Colorful_Image_Colorization/__pycache__/config.cpython-38.pyc
Normal file
BIN
Colorful_Image_Colorization/__pycache__/model.cpython-38.pyc
Normal file
14
Colorful_Image_Colorization/config.py
Normal file
@@ -0,0 +1,14 @@
|
||||
img_rows, img_cols = 256, 256
|
||||
channel = 3
|
||||
batch_size = 32
|
||||
epochs = 10000
|
||||
patience = 50
|
||||
num_train_samples = 529202
|
||||
num_valid_samples = 4268
|
||||
num_classes = 313
|
||||
kernel = 3
|
||||
weight_decay = 1e-3
|
||||
epsilon = 1e-8
|
||||
nb_neighbors = 5
|
||||
# temperature parameter T
|
||||
T = 0.38
|
||||
BIN
Colorful_Image_Colorization/model.06-2.5489.hdf5
Normal file
92
Colorful_Image_Colorization/model.py
Normal file
@@ -0,0 +1,92 @@
|
||||
import keras.backend as K
|
||||
import tensorflow as tf
|
||||
from keras.layers import Input, Conv2D, BatchNormalization, UpSampling2D
|
||||
from keras.models import Model
|
||||
from keras.regularizers import l2
|
||||
|
||||
from Colorful_Image_Colorization.config import img_rows, img_cols, num_classes, kernel
|
||||
|
||||
l2_reg = l2(1e-3)
|
||||
|
||||
|
||||
def build_model():
|
||||
input_tensor = Input(shape=(img_rows, img_cols, 1))
|
||||
x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='conv1_1', kernel_initializer="he_normal",
|
||||
kernel_regularizer=l2_reg)(input_tensor)
|
||||
x = Conv2D(64, (kernel, kernel), activation='relu', padding='same', name='conv1_2', kernel_initializer="he_normal",
|
||||
kernel_regularizer=l2_reg, strides=(2, 2))(x)
|
||||
x = BatchNormalization()(x)
|
||||
|
||||
x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='conv2_1', kernel_initializer="he_normal",
|
||||
kernel_regularizer=l2_reg)(x)
|
||||
x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='conv2_2', kernel_initializer="he_normal",
|
||||
kernel_regularizer=l2_reg,
|
||||
strides=(2, 2))(x)
|
||||
x = BatchNormalization()(x)
|
||||
|
||||
x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='conv3_1',
|
||||
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
|
||||
x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='conv3_2',
|
||||
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
|
||||
x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='conv3_3', kernel_initializer="he_normal",
|
||||
strides=(2, 2))(x)
|
||||
x = BatchNormalization()(x)
|
||||
|
||||
x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', name='conv4_1',
|
||||
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
|
||||
x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', name='conv4_2',
|
||||
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
|
||||
x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', name='conv4_3',
|
||||
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
|
||||
x = BatchNormalization()(x)
|
||||
|
||||
x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', dilation_rate=2, name='conv5_1',
|
||||
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
|
||||
x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', dilation_rate=2, name='conv5_2',
|
||||
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
|
||||
x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', dilation_rate=2, name='conv5_3',
|
||||
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
|
||||
x = BatchNormalization()(x)
|
||||
|
||||
x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', dilation_rate=2, name='conv6_1',
|
||||
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
|
||||
x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', dilation_rate=2, name='conv6_2',
|
||||
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
|
||||
x = Conv2D(512, (kernel, kernel), activation='relu', padding='same', dilation_rate=2, name='conv6_3',
|
||||
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
|
||||
x = BatchNormalization()(x)
|
||||
|
||||
x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='conv7_1',
|
||||
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
|
||||
x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='conv7_2',
|
||||
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
|
||||
x = Conv2D(256, (kernel, kernel), activation='relu', padding='same', name='conv7_3',
|
||||
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
|
||||
x = BatchNormalization()(x)
|
||||
|
||||
x = UpSampling2D(size=(2, 2))(x)
|
||||
x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='conv8_1',
|
||||
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
|
||||
x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='conv8_2',
|
||||
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
|
||||
x = Conv2D(128, (kernel, kernel), activation='relu', padding='same', name='conv8_3',
|
||||
kernel_initializer="he_normal", kernel_regularizer=l2_reg)(x)
|
||||
x = BatchNormalization()(x)
|
||||
|
||||
outputs = Conv2D(num_classes, (1, 1), activation='softmax', padding='same', name='pred')(x)
|
||||
|
||||
model = Model(inputs=input_tensor, outputs=outputs, name="ColorNet")
|
||||
return model
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
with tf.device("/cpu:0"):
|
||||
encoder_decoder = build_model()
|
||||
print(encoder_decoder.summary())
|
||||
#plot_model(encoder_decoder, to_file='encoder_decoder.svg', show_layer_names=True, show_shapes=True)
|
||||
|
||||
parallel_model = encoder_decoder
|
||||
print(parallel_model.summary())
|
||||
#plot_model(parallel_model, to_file='parallel_model.svg', show_layer_names=True, show_shapes=True)
|
||||
|
||||
K.clear_session()
|
||||
BIN
Colorful_Image_Colorization/pts_in_hull.npy
Normal file
@@ -41,5 +41,4 @@ class Encoder(tf.keras.Model): # <-- Needed to make parameters trainable and to
|
||||
#print(x.shape)
|
||||
#print("-------------")
|
||||
#exit()
|
||||
return x
|
||||
|
||||
return x
|
||||
BIN
Plots/ColoredImages_1.png
Normal file
|
After Width: | Height: | Size: 3.0 MiB |
BIN
Plots/ColoredImages_2.png
Normal file
|
After Width: | Height: | Size: 3.0 MiB |
BIN
Plots/ColoredImages_compareModels_1.png
Normal file
|
After Width: | Height: | Size: 4.5 MiB |
BIN
Plots/ColoredImages_compareModels_2.png
Normal file
|
After Width: | Height: | Size: 4.0 MiB |
BIN
Plots/ColorizedImagesLossPlot_comparedModels_1.png
Normal file
|
After Width: | Height: | Size: 19 KiB |
BIN
Plots/ColorizedImagesLossPlot_comparedModels_2.png
Normal file
|
After Width: | Height: | Size: 16 KiB |
32
Plots/CreatePlot_TrainTestLoss.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
def main():
|
||||
|
||||
df_train_loss = pd.read_csv('run-.-tag-Train loss.csv', sep=',')
|
||||
train_loss = df_train_loss["Value"]
|
||||
|
||||
df_test_loss = pd.read_csv('run-.-tag-Test loss.csv', sep=',')
|
||||
test_loss = df_test_loss["Value"]
|
||||
|
||||
x = np.arange(len(train_loss))
|
||||
|
||||
plt.plot(x, train_loss, label="Train loss", color="r")
|
||||
plt.plot(x, test_loss, label="Test loss", color="b")
|
||||
|
||||
plt.legend()
|
||||
plt.grid(True)
|
||||
plt.xlabel("Epoch")
|
||||
plt.ylabel("Loss")
|
||||
|
||||
plt.savefig("TrainTestLoss.png")
|
||||
plt.show()
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except KeyboardInterrupt:
|
||||
print("KeyboardInterrupt received")
|
||||
182
Plots/CreatePlot_compareModels.py
Normal file
@@ -0,0 +1,182 @@
|
||||
import tensorflow as tf
|
||||
import tensorflow_datasets as tfds
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
import sys
|
||||
sys.path.append("..")
|
||||
|
||||
from Autoencoder import Autoencoder
|
||||
from Training import prepare_data, getRGB
|
||||
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
|
||||
#from Training import prepare_data, getRGB
|
||||
|
||||
from Colorful_Image_Colorization.model import build_model
|
||||
from Colorful_Image_Colorization.config import img_rows, img_cols
|
||||
from Colorful_Image_Colorization.config import nb_neighbors, T, epsilon
|
||||
import cv2 as cv
|
||||
|
||||
def main():
|
||||
|
||||
# Create Imagenet
|
||||
labels_path = tf.keras.utils.get_file('ImageNetLabels.txt','https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt')
|
||||
imagenet_labels = np.array(open(labels_path).read().splitlines())
|
||||
|
||||
data_dir = '/home/timwitte/Downloads/'
|
||||
write_dir = '../imagenet'
|
||||
|
||||
# Construct a tf.data.Dataset
|
||||
download_config = tfds.download.DownloadConfig(
|
||||
extract_dir=os.path.join(write_dir, 'extracted'),
|
||||
manual_dir=data_dir
|
||||
)
|
||||
download_and_prepare_kwargs = {
|
||||
'download_dir': os.path.join(write_dir, 'downloaded'),
|
||||
'download_config': download_config,
|
||||
}
|
||||
|
||||
train_dataset, test_dataset= tfds.load('imagenet2012',
|
||||
data_dir=os.path.join(write_dir, 'data'),
|
||||
split=['train', 'validation'],
|
||||
shuffle_files=True,
|
||||
download=True,
|
||||
as_supervised=True,
|
||||
download_and_prepare_kwargs=download_and_prepare_kwargs)
|
||||
|
||||
test_dataset = test_dataset.take(32).apply(prepare_data)
|
||||
|
||||
# Load our model
|
||||
model_our = Autoencoder()
|
||||
model_our.build((1, 256, 256, 1)) # need a batch size
|
||||
model_our.load_weights("../saved_models/trainied_weights_epoch_12")
|
||||
|
||||
# Load model to compare
|
||||
model_weights_path = '../Colorful_Image_Colorization/model.06-2.5489.hdf5'
|
||||
model_toCompare = build_model()
|
||||
model_toCompare.load_weights(model_weights_path)
|
||||
|
||||
loss_function = tf.keras.losses.MeanSquaredError()
|
||||
|
||||
for img_L, img_AB_orginal in test_dataset.take(1):
|
||||
|
||||
img_rgb_orginal = getRGB(img_L, img_AB_orginal)
|
||||
|
||||
img_AB_reconstructed_our = model_our.predict(img_L.numpy())
|
||||
img_rgb_reconstructed_our = getRGB(img_L, img_AB_reconstructed_our)
|
||||
|
||||
NUM_IMGS = 5
|
||||
fig, axs = plt.subplots(NUM_IMGS, 4)
|
||||
|
||||
axs[0, 0].set_title("Input", fontsize=30)
|
||||
axs[0, 1].set_title("Richard Zhang $\it{et\ al.}$", fontsize=30,)
|
||||
axs[0, 2].set_title("Ours", fontsize=30)
|
||||
axs[0, 3].set_title("Ground Truth", fontsize=30)
|
||||
losses1 = []
|
||||
losses2 = []
|
||||
for i in range(NUM_IMGS):
|
||||
|
||||
img_AB_reconstructed_toCompare = getABFromModel(model_toCompare, img_L[i].numpy())
|
||||
img_rgb_reconstructed_toCompare = getRGB(img_L[i], img_AB_reconstructed_toCompare, batch_mode=False)
|
||||
|
||||
axs[i, 0].imshow(img_L[i], cmap="gray")
|
||||
axs[i, 0].set_axis_off()
|
||||
|
||||
axs[i, 1].imshow(img_rgb_reconstructed_toCompare)
|
||||
axs[i, 1].set_axis_off()
|
||||
|
||||
axs[i, 2].imshow(img_rgb_reconstructed_our[i])
|
||||
axs[i, 2].set_axis_off()
|
||||
|
||||
axs[i, 3].imshow(img_rgb_orginal[i])
|
||||
axs[i, 3].set_axis_off()
|
||||
|
||||
loss_our = loss_function(img_rgb_orginal[i], img_rgb_reconstructed_our[i])
|
||||
loss_toCompare = loss_function(img_rgb_orginal[i], img_rgb_reconstructed_our)
|
||||
|
||||
losses1.append(loss_our)
|
||||
losses2.append(loss_toCompare)
|
||||
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
fig.set_size_inches(20, 25)
|
||||
fig.savefig("ColoredImages_compareModels.png")
|
||||
|
||||
# Reset plot
|
||||
plt.clf()
|
||||
plt.cla()
|
||||
fig = plt.figure()
|
||||
|
||||
# Create bar plot
|
||||
x_axis = np.arange(NUM_IMGS)
|
||||
width = 0.2
|
||||
plt.bar(x_axis - width/2., losses2, width=width/2, label = "Richard Zhang $\it{et\ al.}$")
|
||||
plt.bar(x_axis - width/2. + 1/float(2)*width, losses1, width=width/2, label = 'Ours')
|
||||
|
||||
|
||||
plt.xticks(x_axis,[f"No. {i}" for i in range(NUM_IMGS)])
|
||||
|
||||
plt.title("Loss of colorized images")
|
||||
plt.xlabel("Image")
|
||||
plt.ylabel("Loss")
|
||||
|
||||
plt.legend()
|
||||
plt.tight_layout()
|
||||
plt.savefig("ColorizedImagesLossPlot_comparedModels.png")
|
||||
|
||||
|
||||
|
||||
|
||||
def getABFromModel(model, grey_img):
|
||||
# code taken from https://github.com/foamliu/Colorful-Image-Colorization/blob/master/demo.py
|
||||
q_ab = np.load("../Colorful_Image_Colorization/pts_in_hull.npy")
|
||||
nb_q = q_ab.shape[0]
|
||||
|
||||
grey_img = np.expand_dims(grey_img, axis=0)
|
||||
|
||||
X_colorized = model.predict((grey_img+1)/2)
|
||||
|
||||
|
||||
h, w = img_rows // 4, img_cols // 4
|
||||
X_colorized = X_colorized.reshape((h * w, nb_q))
|
||||
|
||||
# Reweight probas
|
||||
X_colorized = np.exp(np.log(X_colorized + epsilon) / T)
|
||||
X_colorized = X_colorized / np.sum(X_colorized, 1)[:, np.newaxis]
|
||||
|
||||
# Reweighted
|
||||
q_a = q_ab[:, 0].reshape((1, 313))
|
||||
q_b = q_ab[:, 1].reshape((1, 313))
|
||||
|
||||
X_a = np.sum(X_colorized * q_a, 1).reshape((h, w))
|
||||
X_b = np.sum(X_colorized * q_b, 1).reshape((h, w))
|
||||
|
||||
X_a = cv.resize(X_a, (img_rows, img_cols), cv.INTER_CUBIC)
|
||||
X_b = cv.resize(X_b, (img_rows, img_cols), cv.INTER_CUBIC)
|
||||
|
||||
# Before: -90 <=a<= 100, -110 <=b<= 110
|
||||
# After: 38 <=a<= 228, 18 <=b<= 238
|
||||
X_a = X_a + 128
|
||||
X_b = X_b + 128
|
||||
|
||||
out_lab = np.zeros((256, 256, 2), dtype=np.float32)
|
||||
grey_img = np.reshape(grey_img, newshape=(256,256))
|
||||
|
||||
|
||||
out_lab[:, :, 0] = X_a
|
||||
out_lab[:, :, 1] = X_b
|
||||
|
||||
out_lab[:, :, 0] = -1.0 + 2*(out_lab[:, :, 0] - 38.0)/190
|
||||
out_lab[:, :, 1] = -1.0 + 2*(out_lab[:, :, 1] - 20.0)/203
|
||||
|
||||
return out_lab
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except KeyboardInterrupt:
|
||||
print("KeyboardInterrupt received")
|
||||
84
Plots/CreatePlot_showImages.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import tensorflow as tf
|
||||
import tensorflow_datasets as tfds
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
import sys
|
||||
sys.path.append("..")
|
||||
from Autoencoder import Autoencoder
|
||||
from Training import prepare_data, getRGB
|
||||
|
||||
def main():
|
||||
|
||||
labels_path = tf.keras.utils.get_file('ImageNetLabels.txt','https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt')
|
||||
imagenet_labels = np.array(open(labels_path).read().splitlines())
|
||||
|
||||
data_dir = '/home/timwitte/Downloads/'
|
||||
write_dir = '../imagenet'
|
||||
|
||||
# Construct a tf.data.Dataset
|
||||
download_config = tfds.download.DownloadConfig(
|
||||
extract_dir=os.path.join(write_dir, 'extracted'),
|
||||
manual_dir=data_dir
|
||||
)
|
||||
download_and_prepare_kwargs = {
|
||||
'download_dir': os.path.join(write_dir, 'downloaded'),
|
||||
'download_config': download_config,
|
||||
}
|
||||
|
||||
train_dataset, test_dataset= tfds.load('imagenet2012',
|
||||
data_dir=os.path.join(write_dir, 'data'),
|
||||
split=['train', 'validation'],
|
||||
shuffle_files=True,
|
||||
download=True,
|
||||
as_supervised=True,
|
||||
download_and_prepare_kwargs=download_and_prepare_kwargs)
|
||||
|
||||
test_dataset = test_dataset.take(32).apply(prepare_data)
|
||||
|
||||
autoencoder = Autoencoder()
|
||||
|
||||
autoencoder.build((1, 256, 256, 1)) # need a batch size
|
||||
autoencoder.load_weights("../saved_models/trainied_weights_epoch_12")
|
||||
autoencoder.summary()
|
||||
|
||||
autoencoder.encoder.summary()
|
||||
autoencoder.decoder.summary()
|
||||
|
||||
for img_L, img_AB_orginal in test_dataset.take(1):
|
||||
|
||||
img_AB_reconstructed = autoencoder(img_L)
|
||||
|
||||
img_rgb_orginal = getRGB(img_L, img_AB_orginal)
|
||||
img_rgb_reconstructed = getRGB(img_L, img_AB_reconstructed)
|
||||
|
||||
NUM_IMGS = 5
|
||||
fig, axs = plt.subplots(NUM_IMGS, 3)
|
||||
|
||||
axs[0, 0].set_title("Input", fontsize=30)
|
||||
axs[0, 1].set_title("Output", fontsize=30)
|
||||
axs[0, 2].set_title("Ground Truth", fontsize=30)
|
||||
|
||||
for i in range(NUM_IMGS):
|
||||
|
||||
axs[i, 0].imshow(img_L[i], cmap="gray")
|
||||
axs[i, 0].set_axis_off()
|
||||
|
||||
axs[i, 1].imshow(img_rgb_reconstructed[i])
|
||||
axs[i, 1].set_axis_off()
|
||||
|
||||
axs[i, 2].imshow(img_rgb_orginal[i])
|
||||
axs[i, 2].set_axis_off()
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
fig.set_size_inches(15, 25)
|
||||
fig.savefig("ColoredImages.png")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except KeyboardInterrupt:
|
||||
print("KeyboardInterrupt received")
|
||||
33
Plots/Layers/CreatePlot_layerStructure.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import tensorflow as tf
|
||||
|
||||
from EncoderLayers import *
|
||||
from DecoderLayers import *
|
||||
|
||||
import sys
|
||||
sys.path.append("../..")
|
||||
|
||||
from Colorful_Image_Colorization.model import *
|
||||
|
||||
def main():
|
||||
|
||||
encoder_layers = EncoderLayers()
|
||||
decoder_layers = DecoderLayers()
|
||||
|
||||
inputs = tf.keras.Input(shape=(256,256, 1), name="Grey image")
|
||||
encoder = tf.keras.Model(inputs=[inputs],outputs=encoder_layers.call(inputs))
|
||||
|
||||
embedding = tf.keras.Input(shape=(32,32, 3), name="Embedding")
|
||||
decoder = tf.keras.Model(inputs=[embedding],outputs=decoder_layers.call(embedding))
|
||||
|
||||
tf.keras.utils.plot_model(encoder,show_shapes=True, show_layer_names=True, to_file="EncoderLayer.png")
|
||||
tf.keras.utils.plot_model(decoder,show_shapes=True, show_layer_names=True, to_file="DecoderLayer.png")
|
||||
|
||||
ModelToCompare_layers = build_model()
|
||||
modelToCompare = tf.keras.Model(inputs=[inputs],outputs=ModelToCompare_layers.call(inputs))
|
||||
tf.keras.utils.plot_model(modelToCompare,show_shapes=True, show_layer_names=True, to_file="ModelToCompare.png")
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except KeyboardInterrupt:
|
||||
print("KeyboardInterrupt received")
|
||||
BIN
Plots/Layers/DecoderLayer.png
Normal file
|
After Width: | Height: | Size: 116 KiB |
31
Plots/Layers/DecoderLayers.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import tensorflow as tf
|
||||
|
||||
class DecoderLayers(tf.keras.Model):
|
||||
def __init__(self):
|
||||
super(DecoderLayers, self).__init__()
|
||||
|
||||
self.layer_list = [
|
||||
tf.keras.layers.Conv2DTranspose(105, kernel_size=(3,3), strides=2, padding='same', name="Conv2D_Trans_0"),
|
||||
tf.keras.layers.BatchNormalization(name="BatchNormalization_0"),
|
||||
tf.keras.layers.Activation(tf.nn.tanh, name="tanh_0"),
|
||||
|
||||
tf.keras.layers.Conv2DTranspose(90, kernel_size=(3,3), strides=2, padding='same', name="Conv2D_Trans_1"),
|
||||
tf.keras.layers.BatchNormalization(name="BatchNormalization_1"),
|
||||
tf.keras.layers.Activation(tf.nn.tanh, name="tanh_1"),
|
||||
|
||||
tf.keras.layers.Conv2DTranspose(75, kernel_size=(3,3), strides=2, padding='same', name="Conv2D_Trans_2"),
|
||||
tf.keras.layers.BatchNormalization(name="BatchNormalization_2"),
|
||||
tf.keras.layers.Activation(tf.nn.tanh, name="tanh_2"),
|
||||
|
||||
# bottleneck to RGB
|
||||
|
||||
tf.keras.layers.Conv2DTranspose(2, kernel_size=(1,1), strides=1, padding='same', name="Conv2D_Trans_3"),
|
||||
tf.keras.layers.BatchNormalization(name="BatchNormalization_3"),
|
||||
tf.keras.layers.Activation(tf.nn.tanh, name="tanh_3"),
|
||||
]
|
||||
|
||||
|
||||
def call(self, x):
|
||||
for layer in self.layer_list:
|
||||
x = layer(x)
|
||||
return x
|
||||
BIN
Plots/Layers/EncoderLayer.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
29
Plots/Layers/EncoderLayers.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import tensorflow as tf
|
||||
|
||||
class EncoderLayers(tf.keras.Model):
|
||||
def __init__(self):
|
||||
super(EncoderLayers, self).__init__()
|
||||
|
||||
self.layer_list = [
|
||||
tf.keras.layers.Conv2D(75, kernel_size=(3, 3), strides=2, padding='same', name="Conv2D_0"),
|
||||
tf.keras.layers.BatchNormalization(name="BatchNormalization_0"),
|
||||
tf.keras.layers.Activation(tf.nn.tanh, name="tanh_0"),
|
||||
|
||||
tf.keras.layers.Conv2D(90, kernel_size=(3, 3), strides=2, padding='same', name="Conv2D_1"),
|
||||
tf.keras.layers.BatchNormalization(name="BatchNormalization_1"),
|
||||
tf.keras.layers.Activation(tf.nn.tanh, name="tanh_1"),
|
||||
|
||||
tf.keras.layers.Conv2D(105, kernel_size=(3, 3), strides=2, padding='same',name="Conv2D_2"),
|
||||
tf.keras.layers.BatchNormalization(name="BatchNormalization_2"),
|
||||
tf.keras.layers.Activation(tf.nn.tanh, name="tanh_2"),
|
||||
|
||||
tf.keras.layers.Conv2D(3, kernel_size=(1, 1), strides=1, padding='same', name="Conv2D_3"),
|
||||
tf.keras.layers.BatchNormalization(name="BatchNormalization_3"),
|
||||
tf.keras.layers.Activation(tf.nn.tanh, name="tanh_3"),
|
||||
]
|
||||
|
||||
|
||||
def call(self, x):
|
||||
for layer in self.layer_list:
|
||||
x = layer(x)
|
||||
return x
|
||||
BIN
Plots/Layers/ModelToCompare.png
Normal file
|
After Width: | Height: | Size: 329 KiB |
BIN
Plots/Layers/__pycache__/DecoderLayers.cpython-38.pyc
Normal file
BIN
Plots/Layers/__pycache__/EncoderLayers.cpython-38.pyc
Normal file
BIN
Plots/Layers/__pycache__/ModelToCompare.cpython-38.pyc
Normal file
BIN
Plots/TrainTestLossPlot.png
Normal file
|
After Width: | Height: | Size: 30 KiB |
BIN
Plots/__pycache__/DecoderLayers.cpython-38.pyc
Normal file
BIN
Plots/__pycache__/EncoderLayers.cpython-38.pyc
Normal file
15
Plots/run-.-tag-Test loss.csv
Normal file
@@ -0,0 +1,15 @@
|
||||
Wall time,Step,Value
|
||||
1647118261.250931,0,0.016809336841106415
|
||||
1647122922.100936,1,0.013759356923401356
|
||||
1647127578.618303,2,0.01362006925046444
|
||||
1647132237.117106,3,0.014061697758734226
|
||||
1647136894.337419,4,0.013472857885062695
|
||||
1647141548.038196,5,0.01342787966132164
|
||||
1647146209.292402,6,0.013368184678256512
|
||||
1647150861.434495,7,0.013420150615274906
|
||||
1647155517.411057,8,0.01329082902520895
|
||||
1647160207.188101,9,0.013379388488829136
|
||||
1647164926.587916,10,0.013525118120014668
|
||||
1647169761.401568,11,0.01334059052169323
|
||||
1647174430.458649,12,0.013532023876905441
|
||||
1647179221.740573,13,0.01326887309551239
|
||||
|
15
Plots/run-.-tag-Train loss.csv
Normal file
@@ -0,0 +1,15 @@
|
||||
Wall time,Step,Value
|
||||
1647118226.379471,0,0.01686934195458889
|
||||
1647122884.759068,1,0.016268473118543625
|
||||
1647127535.214221,2,0.013647115789353848
|
||||
1647132192.879982,3,0.013552550226449966
|
||||
1647136850.629965,4,0.01349611859768629
|
||||
1647141510.180662,5,0.013455081731081009
|
||||
1647146165.61258,6,0.01342522632330656
|
||||
1647150823.542946,7,0.013399843126535416
|
||||
1647155473.882963,8,0.013378930278122425
|
||||
1647160163.770788,9,0.013358119875192642
|
||||
1647164886.509832,10,0.013342463411390781
|
||||
1647169721.204018,11,0.013329868204891682
|
||||
1647174386.649681,12,0.013316545635461807
|
||||
1647179177.404204,13,0.013304967433214188
|
||||
|
90
README.md
Normal file
@@ -0,0 +1,90 @@
|
||||
# Colorization of Grey Images by applying a Convolutional Autoencoder on the Jetson Nano
|
||||
## by Dennis Konkol and Tim Niklas Witte
|
||||
|
||||
This repository contains an pretrainied convolutional autoencoder for colorization of grey images.
|
||||
The live camera stream will be colorizatized in real time.
|
||||
The architecture of the ANN is optimized to run on the Jetson Nano.
|
||||
It has 300.000 parameters.
|
||||
In total, 10 FPS can be archived on this embedded GPU.
|
||||
|
||||

|
||||
|
||||
## Requirements
|
||||
|
||||
- TensorFlow 2
|
||||
- OpenCV 3.3.1
|
||||
- CSI camera plugged it (see code of `live_recolor[_plot].py`)
|
||||
|
||||
## Model
|
||||
|
||||
```bash
|
||||
Model: "autoencoder"
|
||||
_______________________________________________________________
|
||||
Layer (type) Output Shape Param #
|
||||
===============================================================
|
||||
encoder (Encoder) multiple 148155
|
||||
|
||||
decoder (Decoder) multiple 150145
|
||||
|
||||
===============================================================
|
||||
Total params: 298,302
|
||||
Trainable params: 297,210
|
||||
Non-trainable params: 1,092
|
||||
_______________________________________________________________
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Training
|
||||
|
||||
Run `Training.py` to start the training of the model.
|
||||
Each epoch the weights are stored into `./saved_models`.
|
||||
Besides, in `./test_logs` are the corresponding trainings statistics (train and test loss and also a batch of colorized test images) logged.
|
||||
|
||||
```bash
|
||||
python3 Training.py
|
||||
```
|
||||
|
||||
### Live colorization
|
||||
|
||||
The launch of `live_recolor_plot.py` opens a window as shown in the GIF at the start of this README.
|
||||
Note that, the CSI camera must be plugged in.
|
||||
|
||||
```bash
|
||||
python3 live_recolor.py
|
||||
```
|
||||
|
||||
It has the following structure:
|
||||
|
||||
```bash
|
||||
(1) | (2) | (3) | (4)
|
||||
|
||||
(1) = live RGB camera image
|
||||
(2) = live grey camera image
|
||||
(3) = live colorized image
|
||||
```
|
||||
|
||||
To get also displayed a loss plot (mean squared error between `(1)` and `(3)`),
|
||||
run `live_recolor_plot.py` instead.
|
||||
The loss plot is presented right from `(3)`.
|
||||
|
||||
```bash
|
||||
python3 live_recolor_plot.py
|
||||
```
|
||||
|
||||
### Pretrainied Model
|
||||
|
||||
The model was runned for 13 epochs and its weights are stored in `./saved_models`.
|
||||
Note that, the grey images must have a shape of `(256,256,1)`.
|
||||
The following code will load the model and colorized an image:
|
||||
|
||||
```python3
|
||||
autoencoder = Autoencoder()
|
||||
autoencoder.build((1, 256, 256, 1)) # need a batch size
|
||||
autoencoder.load_weights("./saved_models/trainied_weights_epoch_12")
|
||||
autoencoder.summary()
|
||||
|
||||
grey_img = ... # grey_img.shape = (256,256,1)
|
||||
grey_img = np.expand_dims(grey_img, axis=0) # add batch dim
|
||||
colorized_img = autoencoder(grey_img)
|
||||
```
|
||||
175
Training.py
Normal file
@@ -0,0 +1,175 @@
|
||||
from sklearn.utils import shuffle
|
||||
import tensorflow as tf
|
||||
import tensorflow_datasets as tfds
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import tqdm
|
||||
|
||||
from Decoder import *
|
||||
|
||||
import os
|
||||
|
||||
from Autoencoder import Autoencoder
|
||||
import tensorflow_io as tfio
|
||||
|
||||
def getRGB(L, AB, batch_mode=True):
|
||||
# Remove normalization
|
||||
L = (L + 1)*50
|
||||
AB = ((AB - 1)*255/2)+128
|
||||
|
||||
if batch_mode:
|
||||
L = tf.reshape(L, (32, 256,256,1))
|
||||
LAB = tf.concat([L, AB], 3)
|
||||
else:
|
||||
L = tf.reshape(L, (256,256,1))
|
||||
LAB = tf.concat([L, AB], 2)
|
||||
rgb = tfio.experimental.color.lab_to_rgb(LAB)
|
||||
|
||||
return rgb
|
||||
|
||||
def main():
|
||||
|
||||
|
||||
labels_path = tf.keras.utils.get_file('ImageNetLabels.txt','https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt')
|
||||
imagenet_labels = np.array(open(labels_path).read().splitlines())
|
||||
|
||||
data_dir = '/home/timwitte/Downloads/'
|
||||
write_dir = './imagenet'
|
||||
|
||||
# Construct a tf.data.Dataset
|
||||
download_config = tfds.download.DownloadConfig(
|
||||
extract_dir=os.path.join(write_dir, 'extracted'),
|
||||
manual_dir=data_dir
|
||||
)
|
||||
download_and_prepare_kwargs = {
|
||||
'download_dir': os.path.join(write_dir, 'downloaded'),
|
||||
'download_config': download_config,
|
||||
}
|
||||
|
||||
train_dataset, test_dataset= tfds.load('imagenet2012',
|
||||
data_dir=os.path.join(write_dir, 'data'),
|
||||
split=['train', 'validation'],
|
||||
shuffle_files=True,
|
||||
download=True,
|
||||
as_supervised=True,
|
||||
download_and_prepare_kwargs=download_and_prepare_kwargs)
|
||||
|
||||
train_dataset = train_dataset.apply(prepare_data)
|
||||
test_dataset = test_dataset.apply(prepare_data).take(500) # take 500 batches
|
||||
|
||||
|
||||
# for L, AB in train_dataset.take(1):
|
||||
|
||||
# print(L.shape)
|
||||
# print(AB.shape)
|
||||
|
||||
# print(np.min(L[0]))
|
||||
# print(np.max(L[0]))
|
||||
# print("######################")
|
||||
# print(np.min(AB[0]))
|
||||
# print(np.max(AB[0]))
|
||||
|
||||
# rgb = getRGB(L, AB)
|
||||
|
||||
# plt.imshow(rgb[0])
|
||||
# plt.show()
|
||||
|
||||
# exit()
|
||||
|
||||
autoencoder = Autoencoder()
|
||||
num_epochs = 75
|
||||
|
||||
file_path = "test_logs/test"
|
||||
summary_writer = tf.summary.create_file_writer(file_path)
|
||||
|
||||
for img_L_tensorBoard, img_AB_tensorBoard in test_dataset.take(1):
|
||||
pass
|
||||
|
||||
with summary_writer.as_default():
|
||||
|
||||
tf.summary.image(name="grey_images",data = img_L_tensorBoard, step=0, max_outputs=32)
|
||||
img_RBG = getRGB(img_L_tensorBoard, img_AB_tensorBoard)
|
||||
tf.summary.image(name="colored_images",data = img_RBG, step=0, max_outputs=32)
|
||||
|
||||
imgs = autoencoder(img_L_tensorBoard)
|
||||
tf.summary.image(name="recolored_images",data = imgs, step=0, max_outputs=32)
|
||||
|
||||
autoencoder.summary()
|
||||
|
||||
train_loss = autoencoder.test(train_dataset.take(100))
|
||||
|
||||
tf.summary.scalar(name="Train loss", data=train_loss, step=0)
|
||||
|
||||
test_loss = autoencoder.test(test_dataset)
|
||||
tf.summary.scalar(name="Test loss", data=test_loss, step=0)
|
||||
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
|
||||
print(f"Epoch {epoch}")
|
||||
|
||||
|
||||
for img_L, img_AB in tqdm.tqdm(train_dataset,position=0, leave=True):
|
||||
autoencoder.train_step(img_L, img_AB)
|
||||
|
||||
|
||||
tf.summary.scalar(name="Train loss", data=autoencoder.metric_mean.result(), step=epoch+1)
|
||||
autoencoder.metric_mean.reset_states()
|
||||
|
||||
test_loss = autoencoder.test(test_dataset)
|
||||
tf.summary.scalar(name="Test loss", data=test_loss, step=epoch+1)
|
||||
|
||||
img_AB = autoencoder(img_L_tensorBoard)
|
||||
|
||||
img_RBG = getRGB(img_L_tensorBoard, img_AB)
|
||||
|
||||
tf.summary.image(name="recolored_images",data = img_RBG, step=epoch + 1, max_outputs=32)
|
||||
|
||||
# save model
|
||||
autoencoder.save_weights(f"./saved_models/trainied_weights_epoch_{epoch}", save_format="tf")
|
||||
|
||||
def prepare_data(data):
|
||||
|
||||
# Remove label
|
||||
data = data.map(lambda img, label: img )
|
||||
|
||||
# resize
|
||||
data = data.map(lambda img: tf.image.resize(img, [256,256]) )
|
||||
|
||||
#convert data from uint8 to float32
|
||||
data = data.map(lambda img: tf.cast(img, tf.float32) )
|
||||
|
||||
# tfio.experimental.color.rgb_to_lab expects its input to be a float normalized between 0 and 1.
|
||||
data = data.map(lambda img: (img/255.) )
|
||||
data = data.map(lambda img: tfio.experimental.color.rgb_to_lab(img) )
|
||||
|
||||
# X = L channel
|
||||
# Y = (A,B) channel
|
||||
data = data.map(lambda img: (img[:, :, 0], tf.stack([img[:, :, 1], img[:, :, 2]], axis=2)))
|
||||
|
||||
# Reshape R channel -> grey
|
||||
data = data.map(lambda L, AB: ( tf.reshape(L, shape=(256,256,1)) , AB))
|
||||
|
||||
# Normalize between [-1, 1]
|
||||
data = data.map(lambda L, AB: ( (L/50.0) - 1., 1 + (2*(AB - 128)/255) ))
|
||||
|
||||
# add gray scaled image
|
||||
#data = data.map(lambda img: (tf.image.rgb_to_grayscale(img), img))
|
||||
|
||||
#cache this progress in memory, as there is no need to redo it; it is deterministic after all
|
||||
#data = data.cache("cachefile")
|
||||
|
||||
#shuffle, batch, prefetch
|
||||
data = data.shuffle(7000)
|
||||
data = data.batch(32)
|
||||
|
||||
AUTOTUNE = tf.data.AUTOTUNE
|
||||
data = data.prefetch(AUTOTUNE)
|
||||
#return preprocessed dataset
|
||||
return data
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except KeyboardInterrupt:
|
||||
print("KeyboardInterrupt received")
|
||||