def __init__(self, layers=None, skip_connections=False, has_translator=True): with tf.variable_scope('encoder-decoder'): if layers == None: layers = [] layers.append(Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_1_1')) layers.append(Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_1_2')) layers.append(MaxPool2d(kernel_size=2, name='max_1', skip_connection=skip_connections)) layers.append(Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_2_1')) layers.append(Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_2_2')) layers.append(MaxPool2d(kernel_size=2, name='max_2', skip_connection=skip_connections)) layers.append(Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_3_1')) layers.append(Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_3_2')) layers.append(Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_3_3')) self.inputs = tf.placeholder(tf.float32, [None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.IMAGE_CHANNELS], name='inputs') self.targets = tf.placeholder(tf.float32, [None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, 1], name='targets') self.is_training = tf.placeholder_with_default(False, [], name='is_training') self.description = "" self.layers = {} net = self.inputs # ENCODER for layer in layers: self.layers[layer.name] = net = layer.create_layer(net, is_training=False) layers.reverse() Conv2d.reverse_global_variables() #midfield if(has_translator == True): old_shape = net.get_shape() o_s = old_shape.as_list() feature_len = o_s[1]*o_s[2]*o_s[3] net = tf.reshape(net, [-1, feature_len]) for i in range(3): net = slim.fully_connected(net, feature_len, scope="fc_{}".format(i+1)) self.fc_vars = tf.contrib.framework.get_variables("fc_{}".format(i+1)) net = tf.reshape(net, [-1, o_s[1], o_s[2], o_s[3]]) # DECODER layers_len = len(layers) for i, layer in enumerate(layers): if i == (layers_len-1): self.segmentation_result = layer.create_layer_reversed(net, prev_layer=self.layers[layer.name], last_layer=True, is_training=False) else: net = layer.create_layer_reversed(net, prev_layer=self.layers[layer.name], is_training=False) self.final_result = self.segmentation_result self.variables = tf.contrib.framework.get_variables(scope='encoder-decoder')
act = None optim = 'adam' lr = 1e-1 out_fldr = './out' # sobel detection kernel # my_kernel = [[[[1, 2, 1], [0, 0, 0], [-1, -2, -1]] for _ in range(in_ch)] for _ in range(out_ch)] # my_kernel = [[[[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]] for _ in range(in_ch)] for _ in range(out_ch)] # my_kernel = np.array(my_kernel, dtype=np.float32) # layer init my_conv = Conv2d(in_ch, out_ch, k_sz, stride, bias=False, padding=(0, 0), act=act, optim=optim) # weight init my_conv.initWeight( np.random.uniform(-1, 1, size=(out_ch, in_ch, k_sz[0], k_sz[1]))) my_inp = cv2.imread('F:/gitrepo/CNN/sobel.PNG', 0) my_inp = np.array([[my_inp]]).astype(np.float32) my_target = np.load('./target.npy').astype(np.float32) def train(i): my_conv.zeroGrad()
def __init__(self, layers=None, skip_connections=True): if layers == None: layers = [] layers.append( Conv2d(kernel_size=3, strides=[1, 2, 2, 1], output_channels=64, name='conv_1_1')) layers.append( Conv2d(kernel_size=3, strides=[1, 1, 1, 1], output_channels=64, name='conv_1_2')) layers.append( MaxPool2d(kernel_size=2, name='max_1', skip_connection=skip_connections)) layers.append( Conv2d(kernel_size=3, strides=[1, 2, 2, 1], output_channels=64, name='conv_2_1')) layers.append( Conv2d(kernel_size=3, strides=[1, 1, 1, 1], output_channels=64, name='conv_2_2')) layers.append( MaxPool2d(kernel_size=2, name='max_2', skip_connection=skip_connections)) layers.append( Conv2d(kernel_size=3, strides=[1, 2, 2, 1], output_channels=64, name='conv_3_1')) layers.append( Conv2d(kernel_size=3, strides=[1, 1, 1, 1], output_channels=64, name='conv_3_2')) self.inputs = tf.placeholder( tf.float32, [None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.IMAGE_CHANNELS], name='inputs') self.targets = tf.placeholder( tf.float32, [None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, 1], name='targets') self.is_training = tf.placeholder_with_default(False, [], name='is_training') self.description = "" self.layers = {} net = self.inputs # ENCODER for layer in layers: self.layers[layer.name] = net = layer.create_layer(net) self.description += "{}".format(layer.get_description()) layers.reverse() Conv2d.reverse_global_variables() #midfield self.feature_set = tf.reshape( net, [-1, net.get_shape()[1] * net.get_shape()[2] * net.get_shape()[3]]) # DECODER layers_len = len(layers) for i, layer in enumerate(layers): if i == (layers_len - 1): net = layer.create_layer_reversed( net, prev_layer=self.layers[layer.name], last_layer=True) self.final_result = slim.conv2d(net, 1, 1, scope="last_conv", activation_fn=None) else: net = layer.create_layer_reversed( net, prev_layer=self.layers[layer.name]) # MSE loss output = self.final_result inputv = self.targets mean = tf.reduce_mean(tf.square(output - inputv)) self.cost1 = mean # Reconstruct with MS_SSIM loss function self.train_op = tf.train.AdamOptimizer( learning_rate=tf.train.polynomial_decay( 0.01, 1, 1000, 0.0001)).minimize(self.cost1) with tf.name_scope('accuracy'): correct_pred = tf.py_func(msssim.MultiScaleSSIM, [self.final_result, self.targets], tf.float32) self.accuracy = correct_pred self.mse = tf.reduce_mean( tf.square(self.final_result - self.targets)) tf.summary.scalar('accuracy', self.accuracy) self.summaries = tf.summary.merge_all()
def __init__(self, layers=None, skip_connections=False): with tf.variable_scope('encoder-decoder'): if layers == None: layers = [] layers.append( Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_1_1')) #layers.append(Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_1_2')) layers.append( MaxPool2d(kernel_size=2, name='max_1', skip_connection=skip_connections)) #layers.append(Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_2_1')) #layers.append(Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_2_2')) #layers.append(MaxPool2d(kernel_size=2, name='max_2', skip_connection=skip_connections)) #layers.append(Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_3_1')) #layers.append(Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_3_2')) #layers.append(Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_3_3')) self.inputs = tf.placeholder(tf.float32, [ None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.IMAGE_CHANNELS ], name='inputs') self.targets = tf.placeholder( tf.float32, [None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, 1], name='targets') self.is_training = tf.placeholder_with_default(False, [], name='is_training') self.nepoch = tf.placeholder(tf.int32, [], name="nepoch") self.description = "" self.layers = {} net = self.inputs # ENCODER for layer in layers: self.layers[layer.name] = net = layer.create_layer(net) self.description += "{}".format(layer.get_description()) layers.reverse() Conv2d.reverse_global_variables() # DECODER layers_len = len(layers) for i, layer in enumerate(layers): if i == (layers_len - 1): self.segmentation_result = layer.create_layer_reversed( net, prev_layer=self.layers[layer.name], last_layer=True) else: net = layer.create_layer_reversed( net, prev_layer=self.layers[layer.name]) self.variables = tf.contrib.framework.get_variables( scope='encoder-decoder') self.final_result = self.segmentation_result self.rec1 = Recognizer(self.final_result, reuse=tf.AUTO_REUSE) self.rec2 = Recognizer(self.inputs, reuse=tf.AUTO_REUSE) self.rec3 = Recognizer(self.targets, reuse=tf.AUTO_REUSE) # MSE loss # Expression Removal with MSE loss function output = self.segmentation_result inputv = self.targets mean = tf.reduce_mean(tf.square(output - inputv)) # Recognition feature sets rec1_loss = self.rec1.modelo rec2_loss = self.rec2.modelo rec3_loss = self.rec3.modelo output_weight = tf.constant(3, shape=[], dtype=tf.float32) cost_rec1 = tf.multiply( output_weight, tf.reduce_sum(tf.reduce_mean(tf.abs(rec1_loss - rec3_loss), 0))) cost_rec2 = tf.reduce_sum( tf.reduce_mean(tf.abs(rec1_loss - rec2_loss), 0)) self.cost_rec = cost_rec1 + cost_rec2 self.cost_mse = mean self.train_op_rec = tf.train.AdamOptimizer( learning_rate=tf.train.polynomial_decay( 10e-5, self.nepoch, 10000, 10e-7)).minimize(self.cost_rec) self.train_op_mse = tf.train.AdamOptimizer( learning_rate=tf.train.polynomial_decay( 10e-4, self.nepoch, 10000, 10e-5)).minimize(self.cost_mse)
def __init__(self, layers=None, per_image_standardization=True, batch_norm=True, skip_connections=True): # Define network - ENCODER (decoder will be symmetric). s=3 if layers == None: layers = [] layers.append(Conv2d(kernel_size=s, strides=[1, 1, 1, 1], output_channels=64, name='conv_1_1')) # 61 x 61 x 64 layers.append(Conv2d(kernel_size=s, strides=[1, 1, 1, 1], output_channels=64, name='conv_1_2')) # 55 x 55 x 64 layers.append(MaxPool2d(kernel_size=2, name='max_1', skip_connection=skip_connections)) # 54 x 54 layers.append(Conv2d(kernel_size=s, strides=[1, 1, 1, 1], output_channels=64, name='conv_2_1')) # 24 x 24 x 64 layers.append(Conv2d(kernel_size=s, strides=[1, 1, 1, 1], output_channels=64, name='conv_2_2')) # 18 layers.append(MaxPool2d(kernel_size=2, name='max_2', skip_connection=skip_connections)) # 17 layers.append(Conv2d(kernel_size=s, strides=[1, 1, 1, 1], output_channels=64, name='conv_3_1')) # 24 x 24 x 64 layers.append(Conv2d(kernel_size=s, strides=[1, 1, 1, 1], output_channels=64, name='conv_3_2')) # 18 layers.append(MaxPool2d(kernel_size=2, name='max_3', skip_connection=skip_connections)) # 17 layers.append(Conv2d(kernel_size=s, strides=[1, 1, 1, 1], output_channels=64, name='conv_4_1')) # 6 layers.append(Conv2d(kernel_size=s, strides=[1, 1, 1, 1], output_channels=64, name='conv_4_2')) layers.append(MaxPool2d(kernel_size=2, name='max_4', skip_connection=skip_connections)) layers.append(Conv2d(kernel_size=s, strides=[1, 1, 1, 1], output_channels=64, name='conv_5_1')) # 6 layers.append(Conv2d(kernel_size=s, strides=[1, 1, 1, 1], output_channels=64, name='conv_5_2')) layers.append(MaxPool2d(kernel_size=2, name='max_5')) self.inputs = tf.placeholder(tf.float32, [None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.IMAGE_CHANNELS], name='inputs') self.targets = tf.placeholder(tf.float32, [None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, 1], name='targets') self.is_training = tf.placeholder_with_default(False, [], name='is_training') self.description = "" self.layers = {} if per_image_standardization: list_of_images_norm = tf.map_fn(tf.image.per_image_standardization, self.inputs) net = tf.stack(list_of_images_norm) else: net = self.inputs # ENCODER for layer in layers: self.layers[layer.name] = net = layer.create_layer(net) self.description += "{}".format(layer.get_description()) print("Current input shape: ", net.get_shape()) layers.reverse() Conv2d.reverse_global_variables() # DECODER for layer in layers: net = layer.create_layer_reversed(net, prev_layer=self.layers[layer.name]) self.segmentation_result = tf.sigmoid(net) print('segmentation_result.shape: {}, targets.shape: {}'.format(self.segmentation_result.get_shape(), self.targets.get_shape())) # MSE loss self.cost = tf.sqrt(tf.reduce_mean(tf.square(self.segmentation_result - self.targets))) self.train_op = tf.train.AdamOptimizer().minimize(self.cost) with tf.name_scope('accuracy'): argmax_probs = tf.round(self.segmentation_result) # 0x1 correct_pred = tf.cast(tf.equal(argmax_probs, self.targets), tf.float32) self.accuracy = tf.reduce_mean(correct_pred) tf.summary.scalar('accuracy', self.accuracy) self.summaries = tf.summary.merge_all()
help="Path to directory storing checkpointed model.") parser.add_argument( "input_image", default="", type=str, help="Path to image for which the segmentation should be performed.") parser.add_argument("--out", default="/tmp", type=str, help="Path to directory to store resulting image.") args = parser.parse_args() layers = [] layers.append( Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_1_1')) layers.append( Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_1_2')) layers.append(MaxPool2d(kernel_size=2, name='max_1', skip_connection=True)) layers.append( Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_2_1')) layers.append( Conv2d(kernel_size=7,
def __init__(self, layers = None, batch_norm=True, skip_connections=True, pretrain= False): # Define network - ENCODER (decoder will be symmetric). self.IMAGE_HEIGHT = 2048 self.IMAGE_WIDTH = 1 self.IMAGE_CHANNELS = 1 self.is_training = tf.placeholder_with_default(False, [], name='is_training') if pretrain: # load pretrained weights from TICNN pre_train_path = '/home/dawei/cyril/1d_conv_encoder_decoder/pretrained_TICNN/' W_conv1 = tf.constant(np.load(pre_train_path + 'W_conv1.npy')) W_conv2 = tf.constant(np.load(pre_train_path + 'W_conv2.npy')) W_conv3 = tf.constant(np.load(pre_train_path + 'W_conv3.npy')) W_conv4 = tf.constant(np.load(pre_train_path + 'W_conv4.npy')) W_conv5 = tf.constant(np.load(pre_train_path + 'W_conv5.npy')) else: W_conv1 = None W_conv2 = None W_conv3 = None W_conv4 = None W_conv5 = None if layers == None: layers = [] layers.append(Conv2d(kernel_size=64, strides=[1, 8, 1, 1], output_channels=16, name='conv_1', is_training = self.is_training, initializer = W_conv1)) layers.append(MaxPool2d(kernel_size=[2, 1], name='max_1',skip_connection=True and skip_connections)) # layers.append(Conv2d(kernel_size=3, strides=[1, 1, 1, 1], output_channels=32, name='conv_2', is_training = self.is_training, initializer = W_conv2)) layers.append(MaxPool2d(kernel_size=[2, 1], name='max_2',skip_connection=True and skip_connections)) # layers.append(Conv2d(kernel_size=3, strides=[1, 1, 1, 1], output_channels=64, name='conv_3', is_training = self.is_training, initializer = W_conv3)) layers.append(MaxPool2d(kernel_size=[2, 1], name='max_3',skip_connection=True and skip_connections)) layers.append(Conv2d(kernel_size=3, strides=[1, 1, 1, 1], output_channels=64, name='conv_4', is_training = self.is_training, initializer = W_conv4)) layers.append(MaxPool2d(kernel_size=[2, 1], name='max_4',skip_connection=True and skip_connections)) layers.append(Conv2d(kernel_size=3, strides=[1, 1, 1, 1], output_channels=64, name='conv_5', is_training = self.is_training, initializer = W_conv5)) layers.append(MaxPool2d(kernel_size=[2, 1], name='max_5')) self.inputs = tf.placeholder(tf.float32, [None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.IMAGE_CHANNELS], name='inputs') self.targets = tf.placeholder(tf.float32, [None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, 1], name='targets') self.description = "" self.layers = {} net = self.inputs #(None, 2048, 1, 1) # ENCODER for layer in layers: self.layers[layer.name] = net = layer.create_layer(net) self.description += "{}".format(layer.get_description()) print("Current input shape: ", self.inputs.get_shape()) layers.reverse() # reverse the list of layers Conv2d.reverse_global_variables() # DECODER for layer in layers: net = layer.create_layer_reversed(net, prev_layer=self.layers[layer.name]) #self.segmentation_result = tf.sigmoid(net) self.segmentation_result = net print('segmentation_result.shape: {}, targets.shape: {}'.format(self.segmentation_result.get_shape(), self.targets.get_shape())) # RMSE loss self.cost = tf.sqrt(tf.reduce_mean(tf.square(self.segmentation_result - self.targets))) self.train_op = tf.train.AdamOptimizer(0.001).minimize(self.cost) tf.summary.scalar('rmse_cost', self.cost) self.summary = tf.summary.merge_all()
import os import tensorflow as tf import convolutional_autoencoder from conv2d import Conv2d from max_pool_2d import MaxPool2d import numpy as np import cv2 import matplotlib.pyplot as plt if __name__ == '__main__': layers = [] layers.append( Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64)) layers.append( Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64)) layers.append(MaxPool2d(kernel_size=2)) layers.append( Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64)) layers.append( Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64)) layers.append(MaxPool2d(kernel_size=2)) layers.append( Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64)) layers.append( Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64)) layers.append(MaxPool2d(kernel_size=2))
def __init__(self, layers=None, skip_connections=False): with tf.variable_scope('encoder-decoder'): if layers == None: layers = [] layers.append( Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_1_1')) layers.append( Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_1_2')) layers.append( MaxPool2d(kernel_size=2, name='max_1', skip_connection=skip_connections)) layers.append( Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_2_1')) layers.append( Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_2_2')) layers.append( MaxPool2d(kernel_size=2, name='max_2', skip_connection=skip_connections)) layers.append( Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_3_1')) layers.append( Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_3_2')) layers.append( Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_3_3')) self.inputs = tf.placeholder(tf.float32, [ None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.IMAGE_CHANNELS ], name='inputs') self.targets = tf.placeholder( tf.float32, [None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, 1], name='targets') self.is_training = tf.placeholder_with_default(False, [], name='is_training') self.description = "" self.layers = {} net = self.inputs # ENCODER for layer in layers: self.layers[layer.name] = net = layer.create_layer(net) self.description += "{}".format(layer.get_description()) layers.reverse() Conv2d.reverse_global_variables() #midfield '''old_shape = net.get_shape() o_s = old_shape.as_list() feature_len = o_s[1]*o_s[2]*o_s[3] net = tf.reshape(net, [-1, feature_len]) for i in range(3): net = slim.fully_connected(net, feature_len, scope="fc_{}".format(i+1)) self.fc_vars = tf.contrib.framework.get_variables("fc_{}".format(i+1)) net = tf.reshape(net, [-1, o_s[1], o_s[2], o_s[3]])''' # DECODER layers_len = len(layers) for i, layer in enumerate(layers): if i == (layers_len - 1): self.segmentation_result = layer.create_layer_reversed( net, prev_layer=self.layers[layer.name], last_layer=True) else: net = layer.create_layer_reversed( net, prev_layer=self.layers[layer.name]) self.variables = tf.contrib.framework.get_variables( scope='encoder-decoder') self.final_result = self.segmentation_result self.rec1 = Recognizer(self.final_result, reuse=tf.AUTO_REUSE) self.rec2 = Recognizer(self.inputs, reuse=tf.AUTO_REUSE) self.rec3 = Recognizer(self.targets, reuse=tf.AUTO_REUSE) # MSE loss # Expression Removal with MSE loss function output = self.segmentation_result inputv = self.targets mean = tf.reduce_mean(tf.square(output - inputv)) # Recognition feature sets rec1_loss = self.rec1.modelo rec2_loss = self.rec2.modelo rec3_loss = self.rec3.modelo output_weight = tf.constant(3, shape=[], dtype=tf.float32) cost_rec1 = tf.multiply( output_weight, tf.reduce_sum(tf.reduce_mean(tf.abs(rec1_loss - rec3_loss), 0))) cost_rec2 = tf.reduce_sum( tf.reduce_mean(tf.abs(rec1_loss - rec2_loss), 0)) # Cost based on recognition final_weight = tf.constant(10, shape=[], dtype=tf.float32) self.cost_rec = tf.multiply(final_weight, mean) + cost_rec1 + cost_rec2 #self.cost_rec = tf.constant(0, dtype=tf.float32) self.cost_mse = mean self.train_op_rec = tf.train.AdamOptimizer( learning_rate=tf.train.polynomial_decay( 0.00001, 1, 10000, 0.0000001)).minimize(self.cost_rec) self.train_op_mse = tf.train.AdamOptimizer( learning_rate=tf.train.polynomial_decay( 0.0001, 1, 10000, 0.00001)).minimize(self.cost_mse) with tf.name_scope('accuracy'): correct_pred = tf.py_func(msssim.MultiScaleSSIM, [self.final_result, self.targets], tf.float32) self.accuracy = correct_pred self.mse = tf.reduce_mean( tf.square(self.final_result - self.targets)) tf.summary.scalar('accuracy', self.accuracy) self.summaries = tf.summary.merge_all()
def __init__(self, layers=None, per_image_standardization=True, batch_norm=True, skip_connections=True): # Define network - ENCODER (decoder will be symmetric). if layers == None: layers = [] layers.append( Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_1_1')) layers.append( Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_1_2')) layers.append( MaxPool2d(kernel_size=2, name='max_1', skip_connection=True and skip_connections)) layers.append( Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_2_1')) layers.append( Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_2_2')) layers.append( MaxPool2d(kernel_size=2, name='max_2', skip_connection=True and skip_connections)) layers.append( Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_3_1')) layers.append( Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_3_2')) layers.append(MaxPool2d(kernel_size=2, name='max_3')) self.inputs = tf.placeholder( tf.float32, [None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.IMAGE_CHANNELS], name='inputs') # when output image has multiple dimensions, and each pixel has one of n class predictions #self.targets = tf.placeholder(tf.int32, [None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.IMAGE_CHANNELS], name='targets') self.targets = tf.placeholder( tf.float32, [None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, 1], name='targets') self.is_training = tf.placeholder_with_default(False, [], name='is_training') self.description = "" self.layers = {} if per_image_standardization: list_of_images_norm = tf.map_fn(tf.image.per_image_standardization, self.inputs) net = tf.stack(list_of_images_norm) else: net = self.inputs # ENCODER for layer in layers: # layer is Conv2d type, Conv2d has create_layer method # note how we are passing output of graph net as input to next layer self.layers[layer.name] = net = layer.create_layer(net) self.description += "{}".format(layer.get_description()) print("Current input shape: ", net.get_shape()) # just reversing the array layers.reverse() Conv2d.reverse_global_variables() # DECODER for layer in layers: # use prev_layer to reverse net = layer.create_layer_reversed( net, prev_layer=self.layers[layer.name]) self.segmentation_result = tf.sigmoid(net) # cross entropy loss # can't use cross entropy since output image is not image mask, rather an image with float values print('segmentation_result.shape: {}, targets.shape: {}'.format( self.segmentation_result.get_shape(), self.targets.get_shape())) # targets_as_classes = tf.reshape(self.targets, [-1, self.IMAGE_HEIGHT, self.IMAGE_WIDTH]) #self.cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.segmentation_result, labels=targets_as_classes)) # MSE loss self.cost = tf.sqrt( tf.reduce_mean(tf.square(self.segmentation_result - self.targets))) self.train_op = tf.train.AdamOptimizer().minimize(self.cost) with tf.name_scope('accuracy'): argmax_probs = tf.round(self.segmentation_result) # 0x1 correct_pred = tf.cast(tf.equal(argmax_probs, self.targets), tf.float32) self.accuracy = tf.reduce_mean(correct_pred) tf.summary.scalar('accuracy', self.accuracy) self.summaries = tf.summary.merge_all()
def __init__(self, layers = None, per_image_standardization=True, batch_norm=True, skip_connections=True): # Define network - ENCODER (decoder will be symmetric). if layers == None: layers = [] layers.append(Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_1_1')) layers.append(Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_1_2')) layers.append(MaxPool2d(kernel_size=2, name='max_1', skip_connection=True and skip_connections)) layers.append(Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_2_1')) layers.append(Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_2_2')) layers.append(MaxPool2d(kernel_size=2, name='max_2', skip_connection=True and skip_connections)) layers.append(Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_3_1')) layers.append(Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_3_2')) layers.append(MaxPool2d(kernel_size=2, name='max_3')) self.inputs = tf.placeholder(tf.float32, [None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.IMAGE_CHANNELS], name='inputs') # when output image has multiple dimensions, and each pixel has one of n class predictions #self.targets = tf.placeholder(tf.int32, [None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.IMAGE_CHANNELS], name='targets') self.targets = tf.placeholder(tf.float32, [None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, 1], name='targets') self.is_training = tf.placeholder_with_default(False, [], name='is_training') self.description = "" self.layers = {} if per_image_standardization: list_of_images_norm = tf.map_fn(tf.image.per_image_standardization, self.inputs) net = tf.stack(list_of_images_norm) else: net = self.inputs # ENCODER for layer in layers: # layer is Conv2d type, Conv2d has create_layer method # note how we are passing output of graph net as input to next layer self.layers[layer.name] = net = layer.create_layer(net) self.description += "{}".format(layer.get_description()) print("Current input shape: ", net.get_shape()) # just reversing the array layers.reverse() Conv2d.reverse_global_variables() # DECODER for layer in layers: # use prev_layer to reverse net = layer.create_layer_reversed(net, prev_layer=self.layers[layer.name]) self.segmentation_result = tf.sigmoid(net) # cross entropy loss # can't use cross entropy since output image is not image mask, rather an image with float values print('segmentation_result.shape: {}, targets.shape: {}'.format(self.segmentation_result.get_shape(), self.targets.get_shape())) # targets_as_classes = tf.reshape(self.targets, [-1, self.IMAGE_HEIGHT, self.IMAGE_WIDTH]) #self.cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.segmentation_result, labels=targets_as_classes)) # MSE loss self.cost = tf.sqrt(tf.reduce_mean(tf.square(self.segmentation_result - self.targets))) self.train_op = tf.train.AdamOptimizer().minimize(self.cost) with tf.name_scope('accuracy'): argmax_probs = tf.round(self.segmentation_result) # 0x1 correct_pred = tf.cast(tf.equal(argmax_probs, self.targets), tf.float32) self.accuracy = tf.reduce_mean(correct_pred) tf.summary.scalar('accuracy', self.accuracy) self.summaries = tf.summary.merge_all()
def __init__(self, layers=None, per_image_standardization=False, batch_norm=True, skip_connections=True): # Define network - ENCODER (decoder will be symmetric). if layers == None: layers = [] layers.append( Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_1_1')) layers.append( Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_1_2')) layers.append( MaxPool2d(kernel_size=2, name='max_1', skip_connection=skip_connections)) layers.append( Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_2_1')) layers.append( Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_2_2')) layers.append( MaxPool2d(kernel_size=2, name='max_2', skip_connection=skip_connections)) layers.append( Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_3_1')) layers.append( Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_3_2')) layers.append( MaxPool2d(kernel_size=2, name='max_3', skip_connection=skip_connections)) layers.append( Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_4_1')) layers.append( Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_4_2')) layers.append( Conv2d(kernel_size=7, strides=[1, 2, 2, 1], output_channels=64, name='conv_5_1')) layers.append( Conv2d(kernel_size=7, strides=[1, 1, 1, 1], output_channels=64, name='conv_5_2')) self.inputs = tf.placeholder( tf.float32, [None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.IMAGE_CHANNELS], name='inputs') self.targets = tf.placeholder( tf.float32, [None, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, 1], name='targets') self.is_training = tf.placeholder_with_default(False, [], name='is_training') self.description = "" self.layers = {} if per_image_standardization: list_of_images_norm = tf.map_fn(tf.image.per_image_standardization, self.inputs) net = tf.stack(list_of_images_norm) else: net = self.inputs # ENCODER for layer in layers: self.layers[layer.name] = net = layer.create_layer(net) self.description += "{}".format(layer.get_description()) layers.reverse() Conv2d.reverse_global_variables() # DECODER layers_len = len(layers) for i, layer in enumerate(layers): if i == (layers_len - 1): self.segmentation_result = layer.create_layer_reversed( net, prev_layer=self.layers[layer.name], last_layer=True) else: net = layer.create_layer_reversed( net, prev_layer=self.layers[layer.name]) # segmentation_as_classes = tf.reshape(self.y, [50 * self.IMAGE_HEIGHT * self.IMAGE_WIDTH, 1]) # targets_as_classes = tf.reshape(self.targets, [50 * self.IMAGE_HEIGHT * self.IMAGE_WIDTH]) # print(self.y.get_shape()) # self.cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(segmentation_as_classes, targets_as_classes)) print('segmentation_result.shape: {}, targets.shape: {}'.format( self.segmentation_result.get_shape(), self.targets.get_shape())) # MSE loss self.cost = self.batchmsssim(self.segmentation_result, self.targets) #self.cost = tf.square(self.segmentation_result - self.targets) self.train_op = tf.train.AdamOptimizer( learning_rate=0.001).minimize(1 - self.cost) with tf.name_scope('accuracy'): # argmax_probs = tf.round(self.segmentation_result) # 0x1 # correct_pred = tf.cast(tf.equal(argmax_probs, self.targets), tf.float32) correct_pred = tf.square(self.segmentation_result - self.targets) self.accuracy = tf.reduce_mean(correct_pred) tf.summary.scalar('accuracy', self.accuracy) self.summaries = tf.summary.merge_all()