diff --git a/Autoencoder.py b/Autoencoder.py new file mode 100644 index 0000000..1f29067 --- /dev/null +++ b/Autoencoder.py @@ -0,0 +1,46 @@ +import tensorflow as tf +import numpy as np + +from Encoder import * +from Decoder import * + +class Autoencoder(tf.keras.Model): + def __init__(self): + super(Autoencoder, self).__init__() + + self.encoder = Encoder() + self.decoder = Decoder() + + self.loss_function = tf.keras.losses.MeanSquaredError() + + self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.01) + + + @tf.function + def call(self, x, training=False): + embedding = self.encoder(x, training) + decoded = self.decoder(embedding, training) + return decoded + + @tf.function + def train_step(self, input, target): + + with tf.GradientTape() as tape: + prediction = self(input, training=True) + loss = self.loss_function(target, prediction) + + gradients = tape.gradient(loss, self.trainable_variables) + self.optimizer.apply_gradients(zip(gradients, self.trainable_variables)) + return loss + + def test(self, test_data): + # test over complete test data + test_loss_aggregator = [] + for input, target in test_data: # ignore label + prediction = self(input) + + sample_test_loss = self.loss_function(target, prediction) + test_loss_aggregator.append(sample_test_loss.numpy()) + + test_loss = tf.reduce_mean(test_loss_aggregator) + return test_loss \ No newline at end of file diff --git a/Decoder.py b/Decoder.py new file mode 100644 index 0000000..3d9ffa2 --- /dev/null +++ b/Decoder.py @@ -0,0 +1,42 @@ +import tensorflow as tf + +class Decoder(tf.keras.Model): # <-- Needed to make parameters trainable and to be callable + def __init__(self): + + super(Decoder, self).__init__() + self.layer_list = [ + + tf.keras.layers.Conv2DTranspose(105, kernel_size=(3,3), strides=2, padding='same'), + tf.keras.layers.BatchNormalization(), + tf.keras.layers.Activation(tf.nn.tanh), + + tf.keras.layers.Conv2DTranspose(90, kernel_size=(3,3), strides=2, padding='same'), + tf.keras.layers.BatchNormalization(), + tf.keras.layers.Activation(tf.nn.tanh), + + tf.keras.layers.Conv2DTranspose(75, kernel_size=(3,3), strides=2, padding='same'), + tf.keras.layers.BatchNormalization(), + tf.keras.layers.Activation(tf.nn.tanh), + + # bottleneck to RGB + + tf.keras.layers.Conv2DTranspose(2, kernel_size=(1,1), strides=1, padding='same'), + tf.keras.layers.BatchNormalization(), + tf.keras.layers.Activation(tf.nn.tanh), + + ] + + @tf.function + def call(self, x, training): + + #print("decoder:") + for layer in self.layer_list: + #print(x.shape) + if isinstance(layer, tf.keras.layers.BatchNormalization): + x = layer(x,training) + else: + x = layer(x) + # print(x.shape) + # print("-------------") + # exit() + return x \ No newline at end of file diff --git a/Encoder.py b/Encoder.py new file mode 100644 index 0000000..fc75cc1 --- /dev/null +++ b/Encoder.py @@ -0,0 +1,45 @@ +import tensorflow as tf + +class Encoder(tf.keras.Model): # <-- Needed to make parameters trainable and to be callable + def __init__(self): + super(Encoder, self).__init__() + self.layer_list = [ + # input (243,243) + + tf.keras.layers.Conv2D(75, kernel_size=(3, 3), strides=2, padding='same'), + tf.keras.layers.BatchNormalization(), + tf.keras.layers.Activation(tf.nn.tanh), + # -> (81, 81, 32) + + tf.keras.layers.Conv2D(90, kernel_size=(3, 3), strides=2, padding='same'), + tf.keras.layers.BatchNormalization(), + tf.keras.layers.Activation(tf.nn.tanh), + # -> (27, 27, 64) + + tf.keras.layers.Conv2D(105, kernel_size=(3, 3), strides=2, padding='same'), + tf.keras.layers.BatchNormalization(), + tf.keras.layers.Activation(tf.nn.tanh), + + + # bottleneck + tf.keras.layers.Conv2D(3, kernel_size=(1, 1), strides=1, padding='same'), + tf.keras.layers.BatchNormalization(), + tf.keras.layers.Activation(tf.nn.tanh), + + ] + + @tf.function + def call(self, x, training): + #print("encoder:") + for layer in self.layer_list: + #print(x.shape) + if isinstance(layer, tf.keras.layers.BatchNormalization): + x = layer(x,training) + else: + x = layer(x) + + #print(x.shape) + #print("-------------") + #exit() + return x + diff --git a/helper.py b/helper.py new file mode 100644 index 0000000..bc71e36 --- /dev/null +++ b/helper.py @@ -0,0 +1,281 @@ +import tensorflow as tf +import cv2 as cv +import matplotlib.pyplot as plt + +# for the color conversion see https://github.com/tensorflow/io/blob/v0.24.0/tensorflow_io/python/experimental/color_ops.py#L398-L459 +# code from tensorflow io we're used, because tfio has no pip package for the jetson nano +def rgb_to_lab(input, illuminant="D65", observer="2", name=None): + """ + Convert a RGB image to CIE LAB. + Args: + input: A 3-D (`[H, W, 3]`) or 4-D (`[N, H, W, 3]`) Tensor. + illuminant : {"A", "D50", "D55", "D65", "D75", "E"}, optional + The name of the illuminant (the function is NOT case sensitive). + observer : {"2", "10"}, optional + The aperture angle of the observer. + name: A name for the operation (optional). + Returns: + A 3-D (`[H, W, 3]`) or 4-D (`[N, H, W, 3]`) Tensor. + """ + input = tf.convert_to_tensor(input) + assert input.dtype in (tf.float16, tf.float32, tf.float64) + + illuminants = { + "A": { + "2": (1.098466069456375, 1, 0.3558228003436005), + "10": (1.111420406956693, 1, 0.3519978321919493), + }, + "D50": { + "2": (0.9642119944211994, 1, 0.8251882845188288), + "10": (0.9672062750333777, 1, 0.8142801513128616), + }, + "D55": { + "2": (0.956797052643698, 1, 0.9214805860173273), + "10": (0.9579665682254781, 1, 0.9092525159847462), + }, + "D65": { + "2": (0.95047, 1.0, 1.08883), + "10": (0.94809667673716, 1, 1.0730513595166162), + }, + "D75": { + "2": (0.9497220898840717, 1, 1.226393520724154), + "10": (0.9441713925645873, 1, 1.2064272211720228), + }, + "E": {"2": (1.0, 1.0, 1.0), "10": (1.0, 1.0, 1.0)}, + } + coords = tf.constant(illuminants[illuminant.upper()][observer], input.dtype) + + xyz = rgb_to_xyz(input) + + xyz = xyz / coords + + xyz = tf.where( + tf.math.greater(xyz, 0.008856), + tf.math.pow(xyz, 1.0 / 3.0), + xyz * 7.787 + 16.0 / 116.0, + ) + + xyz = tf.unstack(xyz, axis=-1) + x, y, z = xyz[0], xyz[1], xyz[2] + + # Vector scaling + l = (y * 116.0) - 16.0 + a = (x - y) * 500.0 + b = (y - z) * 200.0 + + return tf.stack([l, a, b], axis=-1) + + +def lab_to_rgb(input, illuminant="D65", observer="2", name=None): + """ + Convert a CIE LAB image to RGB. + Args: + input: A 3-D (`[H, W, 3]`) or 4-D (`[N, H, W, 3]`) Tensor. + illuminant : {"A", "D50", "D55", "D65", "D75", "E"}, optional + The name of the illuminant (the function is NOT case sensitive). + observer : {"2", "10"}, optional + The aperture angle of the observer. + name: A name for the operation (optional). + Returns: + A 3-D (`[H, W, 3]`) or 4-D (`[N, H, W, 3]`) Tensor. + """ + input = tf.convert_to_tensor(input) + assert input.dtype in (tf.float16, tf.float32, tf.float64) + + lab = input + lab = tf.unstack(lab, axis=-1) + l, a, b = lab[0], lab[1], lab[2] + + y = (l + 16.0) / 116.0 + x = (a / 500.0) + y + z = y - (b / 200.0) + + z = tf.math.maximum(z, 0) + + xyz = tf.stack([x, y, z], axis=-1) + + xyz = tf.where( + tf.math.greater(xyz, 0.2068966), + tf.math.pow(xyz, 3.0), + (xyz - 16.0 / 116.0) / 7.787, + ) + + illuminants = { + "A": { + "2": (1.098466069456375, 1, 0.3558228003436005), + "10": (1.111420406956693, 1, 0.3519978321919493), + }, + "D50": { + "2": (0.9642119944211994, 1, 0.8251882845188288), + "10": (0.9672062750333777, 1, 0.8142801513128616), + }, + "D55": { + "2": (0.956797052643698, 1, 0.9214805860173273), + "10": (0.9579665682254781, 1, 0.9092525159847462), + }, + "D65": { + "2": (0.95047, 1.0, 1.08883), + "10": (0.94809667673716, 1, 1.0730513595166162), + }, + "D75": { + "2": (0.9497220898840717, 1, 1.226393520724154), + "10": (0.9441713925645873, 1, 1.2064272211720228), + }, + "E": {"2": (1.0, 1.0, 1.0), "10": (1.0, 1.0, 1.0)}, + } + coords = tf.constant(illuminants[illuminant.upper()][observer], input.dtype) + + xyz = xyz * coords + + return xyz_to_rgb(xyz) + + +def rgb_to_xyz(input, name=None): + """ + Convert a RGB image to CIE XYZ. + Args: + input: A 3-D (`[H, W, 3]`) or 4-D (`[N, H, W, 3]`) Tensor. + name: A name for the operation (optional). + Returns: + A 3-D (`[H, W, 3]`) or 4-D (`[N, H, W, 3]`) Tensor. + """ + input = tf.convert_to_tensor(input) + assert input.dtype in (tf.float16, tf.float32, tf.float64) + + kernel = tf.constant( + [ + [0.412453, 0.357580, 0.180423], + [0.212671, 0.715160, 0.072169], + [0.019334, 0.119193, 0.950227], + ], + input.dtype, + ) + value = tf.where( + tf.math.greater(input, 0.04045), + tf.math.pow((input + 0.055) / 1.055, 2.4), + input / 12.92, + ) + return tf.tensordot(value, tf.transpose(kernel), axes=((-1,), (0,))) + + +def xyz_to_rgb(input, name=None): + """ + Convert a CIE XYZ image to RGB. + Args: + input: A 3-D (`[H, W, 3]`) or 4-D (`[N, H, W, 3]`) Tensor. + name: A name for the operation (optional). + Returns: + A 3-D (`[H, W, 3]`) or 4-D (`[N, H, W, 3]`) Tensor. + """ + input = tf.convert_to_tensor(input) + assert input.dtype in (tf.float16, tf.float32, tf.float64) + + # inv of: + # [[0.412453, 0.35758 , 0.180423], + # [0.212671, 0.71516 , 0.072169], + # [0.019334, 0.119193, 0.950227]] + kernel = tf.constant( + [ + [3.24048134, -1.53715152, -0.49853633], + [-0.96925495, 1.87599, 0.04155593], + [0.05564664, -0.20404134, 1.05731107], + ], + input.dtype, + ) + value = tf.tensordot(input, tf.transpose(kernel), axes=((-1,), (0,))) + value = tf.where( + tf.math.greater(value, 0.0031308), + tf.math.pow(value, 1.0 / 2.4) * 1.055 - 0.055, + value * 12.92, + ) + return tf.clip_by_value(value, 0, 1) + + +# see: https://github.com/JetsonHacksNano/CSI-Camera/blob/master/simple_camera.py +def gstreamer_pipeline( + capture_width, + capture_height, + display_width, + display_height, + framerate=10, + flip_method=0, +): + return ( + "nvarguscamerasrc ! " + "video/x-raw(memory:NVMM), " + "width=(int)%d, height=(int)%d, " + "format=(string)NV12, framerate=(fraction)%d/1 ! " + "nvvidconv flip-method=%d ! " + "video/x-raw, width=(int)%d, height=(int)%d, format=(string)BGRx ! " + "videoconvert ! " + "video/x-raw, format=(string)BGR ! appsink" + % ( + capture_width, + capture_height, + framerate, + flip_method, + display_width, + display_height, + ) + ) + +def get_RGB(L, AB): + # Remove normalization + L = (L + 1)*50 + AB = ((AB - 1)*255/2)+128 + + LAB = tf.concat([L, AB], 3) + #rgb = tfio.experimental.color.lab_to_rgb(LAB) + rgb = lab_to_rgb(LAB) + + return rgb + + +def to_LAB(frame): + # recast from int to float + gray = tf.cast(frame, tf.float32) + # normalize + gray = gray/255 + + gray = rgb_to_lab(gray) + # "split" the array + L, AB = gray[:,:,0], tf.stack([gray[:,:,1], gray[:,:,2]], axis=2) + L = tf.reshape(L, shape=(256,256,1)) + # normalize + L, AB = ((L/50.0) - 1., 1 + (2*(AB - 128)/255)) + return L, AB + +def suitable_rgb(L, ab_img): + # CIE LAB color space to RGB + recolored_image = get_RGB(L, ab_img) + # remove batch dimension + recolored_image = tf.squeeze(recolored_image, 0) + # remove normalization and interpret as uint8 + recolored_image = tf.cast(recolored_image*255, tf.uint8) + # make it a numpy array + recolored_image = recolored_image.numpy() + # convert to BGR + recolored_image = cv.cvtColor(recolored_image, cv.COLOR_RGB2BGR) + return recolored_image + + +def create_Plot(loss_array): + # set plot configuration + plt.grid(True) + plt.ylim(0, 0.07) + plt.xlabel('Each Fram') + plt.ylabel('Loss') + + plt.plot(loss_array) + plt.savefig('loss.png') + plt.clf() + + +def saturate(image): + hsvImg = cv.cvtColor(image,cv.COLOR_BGR2HSV) + #multiple by a factor to change the saturation + hsvImg[...,1] = hsvImg[...,1]*1.4 + #multiple by a factor of less than 1 to reduce the brightness + hsvImg[...,2] = hsvImg[...,2]*0.6 + image=cv.cvtColor(hsvImg,cv.COLOR_HSV2BGR) + return image \ No newline at end of file diff --git a/live_recolor.py b/live_recolor.py new file mode 100644 index 0000000..1cf7fe3 --- /dev/null +++ b/live_recolor.py @@ -0,0 +1,102 @@ +from calendar import c +from dis import dis +import cv2 as cv +from Autoencoder import Autoencoder +import helper +import tensorflow as tf + +# Height and width of each frame +HEIGHT = 256 +WIDTH = 256 + +# Text on the pictures +font = cv.FONT_HERSHEY_SIMPLEX +position = (0,HEIGHT-10) +fontScale = 1 +fontColor = (255,0,255) + + +def load_model(weight_path): + autoencoder = Autoencoder() + # need a batch size + autoencoder.build((1, 256, 256, 1)) + # load model weights + autoencoder.load_weights(weight_path) + return autoencoder + + +def addTextToFrame(frame, text): + return ( + cv.putText(frame,text, + position, + font, + fontScale, + fontColor) + ) + + +def main(pipeline, autoencoder): + # capture camera feed + if pipeline: + cap = cv.VideoCapture(pipeline, cv.CAP_GSTREAMER) + else: + cap = cv.VideoCapture(0) + # loop + while True: + # get frame + ret, frame = cap.read() + frame = cv.resize(frame, (256,256)) + + # make grayscale image + gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY) + # add back the channel, so concatination works + three_value_gray = cv.cvtColor(gray, cv.COLOR_GRAY2BGR) + + L, AB = helper.to_LAB(frame) + + # add batch dimension + L = tf.expand_dims(L, 0) + # predict the colored image from gray + ab_img = autoencoder(L) + + recolored_image = helper.suitable_rgb(L, ab_img) + + + # add text to the frame + ogFrame = addTextToFrame(frame, 'Original') + three_value_gray = addTextToFrame(three_value_gray, 'Grayscale') + recolored_image = addTextToFrame(recolored_image, 'Recolored') + + # grab fps counter at to the last frame + fps = cap.get(cv.CAP_PROP_FPS) + cv.putText(recolored_image, "FPS: {:.2f}".format(fps), (150, 20), font,0.5,fontColor) + + # connect the three frames into one + im_h = cv.hconcat([ogFrame, three_value_gray, recolored_image]) + # display it + cv.imshow("Live Recoloration", im_h) + # quit on ESC + if cv.waitKey(1) == 27 : + break + # release camera + cap.release() + cv.destroyAllWindows() + + +if __name__ == '__main__': + + # create the gstreamer pipeline for picking the camera's data + pipeline = helper.gstreamer_pipeline( + capture_width=WIDTH, + capture_height=HEIGHT, + display_width=WIDTH, + display_height=HEIGHT, + flip_method=0 + ) + + autoencoder = load_model(weight_path="./saved_models/trainied_weights_epoch_12") + try: + main(pipeline, autoencoder) + except KeyboardInterrupt: + print("[!] Exiting program . . .") + exit(1) \ No newline at end of file diff --git a/live_recolor_plot.py b/live_recolor_plot.py new file mode 100644 index 0000000..b364032 --- /dev/null +++ b/live_recolor_plot.py @@ -0,0 +1,122 @@ +import cv2 as cv +from Autoencoder import Autoencoder +import helper +import tensorflow as tf +import numpy as np +import os + +# Height and width of each frame +HEIGHT = 256 +WIDTH = 256 + +# Text on the pictures +font = cv.FONT_HERSHEY_SIMPLEX +position = (0,HEIGHT-10) +fontScale = 1 +fontColor = (255,0,255) + + +def load_model(weight_path): + autoencoder = Autoencoder() + # need a batch size + autoencoder.build((1, 256, 256, 1)) + # load model weights + autoencoder.load_weights(weight_path) + return autoencoder + + +def addTextToFrame(frame, text): + return ( + cv.putText(frame,text, + position, + font, + fontScale, + fontColor) + ) + + +def main(pipeline, autoencoder): + # capture camera feed + if pipeline: + cap = cv.VideoCapture(pipeline, cv.CAP_GSTREAMER) + else: + cap = cv.VideoCapture(0) + # array with 30 values for loss display + loss_array = np.zeros(31) + # loop + frame = 1 + while True: + # get frame + ret, frame = cap.read() + if frame is None: + break + frame = cv.resize(frame, (256,256)) + + # make grayscale image + gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY) + # add back the channel, so concatination works + three_value_gray = cv.cvtColor(gray, cv.COLOR_GRAY2BGR) + + L, AB = helper.to_LAB(frame) + + # add batch dimension + L = tf.expand_dims(L, 0) + # predict the colored image from gray + ab_img = autoencoder(L) + + recolored_image = helper.suitable_rgb(L, ab_img) + + + # add text to the frame + ogFrame = addTextToFrame(frame, 'Original') + three_value_gray = addTextToFrame(three_value_gray, 'Grayscale') + recolored_image = addTextToFrame(recolored_image, 'Recolored') + + #get loss + loss = autoencoder.loss_function(AB, ab_img) + # get only the loss value + loss = loss.numpy() + + # add loss to array + loss_array = loss_array[1:] + loss_array = np.append(loss_array, loss) + + helper.create_Plot(loss_array) + + # grab fps counter at to the last frame + fps = cap.get(cv.CAP_PROP_FPS) + cv.putText(recolored_image, "FPS: {:.2f}".format(fps), (150, 20), font,0.5,fontColor) + + # load loss graph + loss_graph = cv.imread('loss.png') + # make 256,256 image + loss_graph = cv.resize(loss_graph, (256,256)) + + # connect the three frames into one + im_h = cv.hconcat([ogFrame, three_value_gray, recolored_image, loss_graph]) + # display it + cv.imshow("Live Recoloration", im_h) + # quit on ESC + if cv.waitKey(1) == 27 : + break + # delete loss graph + os.remove('loss.png') + # release camera + cap.release() + cv.destroyAllWindows() + + +if __name__ == '__main__': + + # create the gstreamer pipeline for picking the camera's data + pipeline = None + + autoencoder = load_model(weight_path="./saved_models/trainied_weights_epoch_12") + try: + main(pipeline, autoencoder) + except KeyboardInterrupt: + print("[!] Exiting program . . .") + # delete loss graph + if os.path.exists('loss.png'): + os.remove('loss.png') + exit(1) \ No newline at end of file diff --git a/saved_models/.DS_Store b/saved_models/.DS_Store new file mode 100644 index 0000000..7deef98 Binary files /dev/null and b/saved_models/.DS_Store differ diff --git a/saved_models/checkpoint b/saved_models/checkpoint new file mode 100644 index 0000000..3b3ae8b --- /dev/null +++ b/saved_models/checkpoint @@ -0,0 +1,2 @@ +model_checkpoint_path: "trainied_weights_epoch_12" +all_model_checkpoint_paths: "trainied_weights_epoch_12" diff --git a/saved_models/trainied_weights_epoch_1.data-00000-of-00001 b/saved_models/trainied_weights_epoch_1.data-00000-of-00001 new file mode 100644 index 0000000..82f95cd Binary files /dev/null and b/saved_models/trainied_weights_epoch_1.data-00000-of-00001 differ diff --git a/saved_models/trainied_weights_epoch_1.index b/saved_models/trainied_weights_epoch_1.index new file mode 100644 index 0000000..b541cdb Binary files /dev/null and b/saved_models/trainied_weights_epoch_1.index differ diff --git a/saved_models/trainied_weights_epoch_12.data-00000-of-00001 b/saved_models/trainied_weights_epoch_12.data-00000-of-00001 new file mode 100644 index 0000000..e7983f7 Binary files /dev/null and b/saved_models/trainied_weights_epoch_12.data-00000-of-00001 differ diff --git a/saved_models/trainied_weights_epoch_12.index b/saved_models/trainied_weights_epoch_12.index new file mode 100644 index 0000000..8aaed32 Binary files /dev/null and b/saved_models/trainied_weights_epoch_12.index differ diff --git a/saved_models/trainied_weights_epoch_19.data-00000-of-00001 b/saved_models/trainied_weights_epoch_19.data-00000-of-00001 new file mode 100644 index 0000000..a25bc5d Binary files /dev/null and b/saved_models/trainied_weights_epoch_19.data-00000-of-00001 differ diff --git a/saved_models/trainied_weights_epoch_19.index b/saved_models/trainied_weights_epoch_19.index new file mode 100644 index 0000000..a59234c Binary files /dev/null and b/saved_models/trainied_weights_epoch_19.index differ diff --git a/saved_models/trainied_weights_epoch_6.data-00000-of-00001 b/saved_models/trainied_weights_epoch_6.data-00000-of-00001 new file mode 100644 index 0000000..8f371ba Binary files /dev/null and b/saved_models/trainied_weights_epoch_6.data-00000-of-00001 differ diff --git a/saved_models/trainied_weights_epoch_6.index b/saved_models/trainied_weights_epoch_6.index new file mode 100644 index 0000000..0fd28f6 Binary files /dev/null and b/saved_models/trainied_weights_epoch_6.index differ diff --git a/short_presentation.mp4 b/short_presentation.mp4 new file mode 100644 index 0000000..2f50787 Binary files /dev/null and b/short_presentation.mp4 differ diff --git a/short_presentation_without_text.mp4 b/short_presentation_without_text.mp4 new file mode 100644 index 0000000..d5f762b Binary files /dev/null and b/short_presentation_without_text.mp4 differ diff --git a/videoPresentation.gif b/videoPresentation.gif new file mode 100644 index 0000000..755609d Binary files /dev/null and b/videoPresentation.gif differ