def fcn_RESNET50_8s_crfrnn(INPUT_SIZE,nb_classes,num_crf_iterations): """ Returns Keras FCN-8 + based on ResNet50 model definition. """ fcn = fcn_RESNET50_8s(INPUT_SIZE, nb_classes) #saved_model_path = '/storage/gby/semseg/streets_weights_resnet50fcn8s_2000ep' saved_model_path = '/storage/cfmata/deeplab/crf_rnn/crfasrnn_keras/results/pascal_voc12/voc12_weights.500-0.66' #saved_model_path = '/storage/cfmata/deeplab/crf_rnn/crfasrnn_keras/results/horse_coarse/horse_coarse_weights.1000-0.35' fcn.load_weights(saved_model_path) inputs = fcn.layers[0].output #fcn_score = fcn.output fcn_score = fcn.get_layer('add_pred8_pred16_pred32').output # Adding the crfrnn layer: height, weight = INPUT_SIZE, INPUT_SIZE crfrnn_output = CrfRnnLayer(image_dims=(height, weight), num_classes=nb_classes, theta_alpha=160., theta_beta=90., theta_gamma=3., num_iterations=num_crf_iterations, # 10 for test, 5 for train name='crfrnn')([fcn_score, inputs]) model = Model(inputs=inputs, outputs=crfrnn_output, name='fcn_RESNET50_8s_crfrnn') # Fixing weighs in lower layers (optional) # for layer in model.layers[:29]: # 15,21,29 (overall 30 layers) # layer.trainable = False return model
def fcn_RESNET50_32s_crfrnn(INPUT_SIZE, nb_classes, num_crf_iterations): """ Returns Keras FCN-8 + based on ResNet50 model definition. """ fcn = fcn_RESNET50_32s(INPUT_SIZE, nb_classes) saved_model_path = '/storage/gby/semseg/streets_weights_resnet50fcn32s_5000ep' fcn.load_weights(saved_model_path) inputs = fcn.layers[0].output #fcn_score = fcn.output fcn_score = fcn.get_layer('score_pred32_upsample').output # Adding the crfrnn layer: height, weight = INPUT_SIZE, INPUT_SIZE crfrnn_output = CrfRnnLayer( image_dims=(height, weight), num_classes=nb_classes, theta_alpha=160., theta_beta=90., theta_gamma=3., num_iterations=num_crf_iterations, # 10 for test, 5 for train name='crfrnn')([fcn_score, inputs]) model = Model(inputs=inputs, outputs=crfrnn_output, name='fcn_RESNET50_32s_crfrnn') # Fixing weighs in lower layers (optional) # for layer in model.layers[:-1]: # 15,21,29 (overall 30 layers) # layer.trainable = True return model
def fcn_VGG16_8s_crfrnn(INPUT_SIZE,nb_classes,num_crf_iterations): """ Returns Keras FCN-8 + CRFRNN layer model definition. """ fcn = fcn_VGG16_8s(INPUT_SIZE,nb_classes) saved_model_path = '/storage/gby/semseg/streets_weights_vgg16fcn8s_5000ep' #fcn.load_weights(saved_model_path) inputs = fcn.layers[0].output # Add plenty of zero padding #inputs = ZeroPadding2D(padding=(100, 100))(inputs) fcn_score = fcn.get_layer('score_7_4_3_up').output # used to be: fcn.output #fcn_score = fcn.output # Adding the crfrnn layer: height, weight = INPUT_SIZE, INPUT_SIZE crfrnn_output = CrfRnnLayer(image_dims=(height, weight), num_classes=nb_classes, theta_alpha=160., theta_beta=90., #3. theta_gamma=3., num_iterations=num_crf_iterations, # 10 in test time, 5 in train time name='crfrnn')([fcn_score, inputs]) model = Model(inputs=inputs, output=crfrnn_output, name='fcn_VGG16_8s_crfrnn') # # Fixing weighs in lower layers (optional) for layer in model.layers[:28]: # 15,21,29 (overall 30 layers) layer.trainable = True return model
def fcn_VGG16_8s_crfrnn(INPUT_SIZE, nb_classes, num_crf_iterations, finetune_path): """ Returns Keras FCN-8 + CRFRNN layer, based on VGG16 model definition. """ fcn = fcn_VGG16_8s(INPUT_SIZE, nb_classes) if not finetune_path=='': fcn.load_weights(finetune_path) inputs = fcn.layers[0].output # Add plenty of zero padding #inputs = ZeroPadding2D(padding=(100, 100))(inputs) # fcn_score = fcn.output fcn_score = fcn.get_layer('score_7_4_3_up').output # Adding the crfrnn layer: height, weight = INPUT_SIZE, INPUT_SIZE crfrnn_output = CrfRnnLayer(image_dims=(height, weight), num_classes=nb_classes, theta_alpha=160., theta_beta=90., #3. theta_gamma=3., num_iterations=num_crf_iterations, # 10 in test time, 5 in train time name='crfrnn')([fcn_score, inputs]) model = Model(inputs=inputs, output=crfrnn_output, name='fcn_VGG16_8s_crfrnn') # # Fixing weighs in lower layers (optional) # for layer in model.layers[:28]: # 15,21,29 (overall 30 layers) # layer.trainable = True return model
def get_crfrnn_model_def_v2(): channels, height, weight = 3, 500, 500 # Input input_shape = (height, weight, 3) img_input = Input(shape=input_shape) c7 = Conv2D(8, (7, 7), activation='relu', padding='same', name='conv7')(img_input) c9 = Conv2D(8, (9, 9), activation='relu', padding='same', name='conv9')(img_input) c11 = Conv2D(8, (11, 11), activation='relu', padding='same', name='conv11')(img_input) concat = Concatenate(axis=3)([c7, c9, c11]) outputs = Conv2D(1, (1, 1), padding='same', name='score-fr')(concat) # upscore = Cropping2D(((31, 37), (31, 37)))(outputs) upscore = outputs output = CrfRnnLayer(image_dims=(height, weight), num_classes=1, theta_alpha=160., theta_beta=3., theta_gamma=3., num_iterations=10, name='crfrnn')([upscore, img_input]) tf.summary.image('t', output) tf.summary.merge_all() model = Model(img_input, output, name='crfrnn_net') return model
def fcn_RESNET50_8s_crfrnn(INPUT_SIZE, nb_classes, num_crf_iterations, finetune_path, batch_size): #, batch_sizes_train,batch_sizes_val, batch_sizes_total): """ Returns Keras FCN-8 + CRFRNNlayer, based on ResNet50 model definition. """ fcn = fcn_RESNET50_8s(INPUT_SIZE, nb_classes) if not finetune_path=='': fcn.load_weights(finetune_path) inputs = fcn.layers[0].output #fcn_score = fcn.output fcn_score = fcn.get_layer('add_pred8_pred16_pred32').output # Adding the crfrnn layer: height, weight = INPUT_SIZE, INPUT_SIZE crfrnn_output = CrfRnnLayer(image_dims=(height, weight), num_classes=nb_classes, theta_alpha=160., theta_beta=90., #3. (original), 90 (faster) theta_gamma=3., batch_size=batch_size, num_iterations=num_crf_iterations, # 10 for test, 5 for train name='crfrnn')([fcn_score, inputs]) model = Model(inputs=inputs, outputs=crfrnn_output, name='fcn_RESNET50_8s_crfrnn') #pdb.set_trace() # Fixing weights in lower layers (optional) for layer in model.layers[:160]: #181]: # 15,21,29 (overall 30 layers) feezing until layer pred 8 (182) layer.trainable = False return model
def fcn_VGG16_32s_crfrnn(INPUT_SIZE, nb_classes, num_crf_iterations, finetune_path): """ Returns Keras FCN-32 + CRFRNN layer model definition. """ fcn = fcn_VGG16_32s(INPUT_SIZE,nb_classes) #saved_model_path = '/storage/gby/semseg/voc12_weights_fcn32_200ep' if not finetune_path=='': fcn.load_weights(finetune_path) inputs = fcn.layers[0].output fcn_score = fcn.get_layer('score_pool7c_upsample_32').output # used to be: fcn.output #fcn_score = fcn.output # Adding the crfrnn layer: height, weight = INPUT_SIZE, INPUT_SIZE crfrnn_output = CrfRnnLayer(image_dims=(height, weight), num_classes=nb_classes, theta_alpha=160., theta_beta=90., theta_gamma=3., num_iterations=num_crf_iterations, # 10 at test time, 5 at train time name='crfrnn')([fcn_score, inputs]) model = Model(inputs=inputs, output=crfrnn_output, name='fcn_VGG16_32s_crfrnn') return model
def fcn_8s_Sadeep_crfrnn( nb_classes, num_crf_iterations, batch_size ): #, batch_sizes_train, batch_sizes_val, batch_sizes_total): """ Returns Keras FCN-8 + CRFRNN layer model definition. """ INPUT_SIZE = 500 fcn = fcn_8s_Sadeep(nb_classes) #saved_model_path = '/storage/gby/semseg/streets_weights_fcn8s_Sadeep_500ep' #saved_model_path = '/storage/cfmata/deeplab/crf_rnn/crfasrnn_keras/results/pascal_voc12/voc2012_sadeep_start0.80-1.18' saved_model_path = '/storage/cfmata/deeplab/crf_rnn/crfasrnn_keras/results/horse_coarse/horsecoarse_fcn_8s_Sadeep_is500_ep30_iou.215' fcn.load_weights(saved_model_path) inputs = fcn.layers[0].output #seg_input = fcn.layers[0].output # Add plenty of zero padding #inputs = ZeroPadding2D(padding=(100, 100))(inputs) fcn_score = fcn.get_layer('upscore').output # used to be: fcn.output #fcn_score = fcn.output # Adding the crfrnn layer: height, weight = INPUT_SIZE, INPUT_SIZE crfrnn_output = CrfRnnLayer( image_dims=(height, weight), num_classes=nb_classes, theta_alpha=160., theta_beta=90., #3. theta_gamma=3., batch_size=batch_size, #batch_sizes_train = batch_sizes_train, #batch_sizes_val = batch_sizes_val, #batch_sizes_total = batch_sizes_total, num_iterations=num_crf_iterations, # 10 in test time, 5 in train time name='crfrnn')([fcn_score, inputs]) # crfrnn_output = CrfRnnLayerSP(image_dims=(height, weight), # num_classes=nb_classes, # theta_alpha=160., # theta_beta=3., # theta_gamma=3., # num_iterations=0, #5 # bil_rate = 0.5, #add for the segmentation # theta_alpha_seg = 30, #add for the segmentation # name='crfrnn')([fcn_score, inputs, seg_input]) #set num_iterations to 0 if we do not want crf model = Model(inputs=inputs, output=crfrnn_output, name='fcn_8s_Sadeep_crfrnn') # # Fixing weighs in lower layers (optional) # for layer in model.layers[:29]: # 15,21,29 (overall 30 layers) # layer.trainable = True #False return model
def fcn_8s_Sadeep_crfrnn(INPUT_SIZE, nb_classes, num_crf_iterations, finetune_path): """ Returns Keras FCN-8 + CRFRNN layer model definition. """ assert INPUT_SIZE == 500, print("Error: INPUT SIZE must be 500!") fcn = fcn_8s_Sadeep(INPUT_SIZE, nb_classes) #saved_model_path = '/storage/gby/semseg/streets_weights_fcn8s_Sadeep_500ep' #fcn.load_weights(saved_model_path) if not finetune_path == '': fcn.load_weights(finetune_path) inputs = fcn.layers[0].output #seg_input = fcn.layers[0].output # Add plenty of zero padding #inputs = ZeroPadding2D(padding=(100, 100))(inputs) fcn_score = fcn.get_layer('upscore').output # used to be: fcn.output #fcn_score = fcn.output # Adding the crfrnn layer: height, weight = INPUT_SIZE, INPUT_SIZE crfrnn_output = CrfRnnLayer(image_dims=(height, weight), num_classes=nb_classes, theta_alpha=160., theta_beta=3., #90., #3. theta_gamma=3., num_iterations=num_crf_iterations, # 10 in test time, 5 in train time name='crfrnn')([fcn_score, inputs]) # crfrnn_output = CrfRnnLayerSP(image_dims=(height, weight), # num_classes=nb_classes, # theta_alpha=160., # theta_beta=3., # theta_gamma=3., # num_iterations=0, #5 # bil_rate = 0.5, #add for the segmentation # theta_alpha_seg = 30, #add for the segmentation # name='crfrnn')([fcn_score, inputs, seg_input]) #set num_iterations to 0 if we do not want crf model = Model(inputs=inputs, output=crfrnn_output, name='fcn_8s_Sadeep_crfrnn') # # Fixing weighs in lower layers (optional) # for layer in model.layers[:29]: # 15,21,29 (overall 30 layers) # layer.trainable = True #False return model
def add_deconv(self, bilinear=False): fusion = self.get_output('fusion') with tf.variable_scope('deconv') as scope: # Learn from scratch if not bilinear: w_deconv = tf.get_variable( 'weights', [32, 32, self.num_classes, self.num_classes], initializer=tf.truncated_normal_initializer(0.0, stddev=0.01)) # Using fiexed bilinearing upsampling filter else: w_deconv = tf.get_variable( 'weights', trainable=True, initializer=bilinear_upsample_weights( 16, self.num_classes)) b_deconv = tf.get_variable('biases', [self.num_classes], initializer=tf.constant_initializer(0)) z_deconv = tf.nn.conv2d_transpose( fusion, w_deconv, [ self.batch_num, self.max_size[0], self.max_size[1], self.num_classes ], strides=[1, 16, 16, 1], padding='SAME', name='z') + b_deconv ''' try to add crfRnn layer,you can also uncomment it ''' crf_deconv = CrfRnnLayer( image_dims=(self.max_size[0], self.max_size[1]), num_classes=self.num_classes, theta_alpha=160., theta_beta=3., theta_gamma=3., num_iterations=10, name='crfrnn')([z_deconv, self.img]) #shape=(1, 640, 640, 21) # Add to store dicts self.outputs[ 'deconv'] = crf_deconv #<shape=(5, 640, 640, 21) dtype=float32> #self.outputs['deconv'] = z_deconv#<shape=(5, 640, 640, 21) dtype=float32> self.layers['deconv'] = {'weights': w_deconv, 'biases': b_deconv}
def fcn_VGG16_8s_crfrnn(INPUT_SIZE, nb_classes, num_crf_iterations, batch_size): """ Returns Keras FCN-8 + CRFRNN layer model definition. """ fcn = fcn_VGG16_8s(INPUT_SIZE, nb_classes) saved_model_path = '/storage/cfmata/deeplab/crf_rnn/crfasrnn_keras/results/results/pascal_voc12/voc2012_sadeep_start0.80-1.18' fcn.load_weights(saved_model_path) inputs = fcn.layers[0].output # Add plenty of zero padding #inputs = ZeroPadding2D(padding=(100, 100))(inputs) fcn_score = fcn.get_layer('score_7_4_3_up').output # used to be: fcn.output #fcn_score = fcn.output # Adding the crfrnn layer: height, weight = INPUT_SIZE, INPUT_SIZE crfrnn_output = CrfRnnLayer( image_dims=(height, weight), num_classes=nb_classes, theta_alpha=160., theta_beta=90., #3. theta_gamma=3., num_iterations=num_crf_iterations, # 10 in test time, 5 in train time batch_size=batch_size, name='crfrnn')([fcn_score, inputs]) model = Model(inputs=inputs, output=crfrnn_output, name='fcn_VGG16_8s_crfrnn') # # Fixing weighs in lower layers (optional) # for layer in model.layers[:28]: # 15,21,29 (overall 30 layers) # layer.trainable = True return model
def main(argv=None): keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="input_image") annotation = tf.placeholder(tf.int32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation") FM_pl = tf.placeholder(tf.float32, []) total_acc_pl = tf.placeholder(tf.float32, []) acc_pl = tf.placeholder(tf.float32, []) iu_pl = tf.placeholder(tf.float32, []) fwavacc_pl = tf.placeholder(tf.float32, []) # is_traing = tf.placeholder('bool') with tf.Session() as sess: saver = tf.train.import_meta_graph('log99/model.ckpt-144270.meta') ckpt = tf.train.latest_checkpoint(FLAGS.logs_dir) if ckpt: saver.restore(sess, ckpt) print("Model restored...") graph = tf.get_default_graph() upscore = graph.get_tensor_by_name("up:0") logits = CrfRnnLayer(image_dims=(256, 256), num_classes=2, theta_alpha=160., theta_beta=3., theta_gamma=3., num_iterations=5, name='crfrnn')([upscore, image]) pred_annotation = tf.argmax(logits, axis=3) # loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, # labels=tf.squeeze(annotation, squeeze_dims=[3]), # name="entropy"))) # loss_summary = tf.summary.scalar("entropy", loss) # trainable_var = tf.trainable_variables() # S_vars = [svar for svar in tf.trainable_variables() if 'weight' in svar.name] # l2 = tf.add_n([tf.nn.l2_loss(var) for var in S_vars]) # # loss = loss + l2 * FLAGS.weight_decay # # train_op = tf.train.MomentumOptimizer(FLAGS.learning_rate, 0.9).minimize(loss + l2 * FLAGS.weight_decay) # train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(loss + l2 * FLAGS.weight_decay) # # train_op = train(loss, trainable_var) """ median-frequency re-weighting """ # class_weights = np.array([ # 0.5501, # 5.4915 # ]) # loss = tf.reduce_mean((tf.nn.weighted_cross_entropy_with_logits(logits=logits, # targets=tf.one_hot(tf.squeeze(annotation, squeeze_dims=[3]), depth=num_classes), # pos_weight=class_weights, # name="entropy"))) loss = LOSS.loss(logits, tf.one_hot(tf.squeeze(annotation, squeeze_dims=[3]), depth=num_classes), num_classes, head=None) regularization_loss = tf.add_n( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) t_loss = loss + regularization_loss loss_summary = tf.summary.scalar("entropy", loss) FM_summary = tf.summary.scalar('FM', FM_pl) acc_total_summary = tf.summary.scalar("total_acc", total_acc_pl) acc_summary = tf.summary.scalar("acc", acc_pl) iu_summary = tf.summary.scalar("iu", iu_pl) fwavacc_summary = tf.summary.scalar("fwavacc", fwavacc_pl) train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(loss) #train_op = tf.train.MomentumOptimizer(FLAGS.learning_rate, 0.99).minimize(t_loss) # train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(t_loss) summary_op = tf.summary.merge_all() train_records, valid_records = scene_parsing.read_dataset(FLAGS.data_dir) print(len(train_records)) print(len(valid_records)) print("Setting up dataset reader") image_options = {'resize': False, 'resize_size': IMAGE_SIZE} if FLAGS.mode == 'train': train_dataset_reader = dataset.BatchDatset(train_records, image_options) validation_dataset_reader = dataset.BatchDatset(valid_records, image_options) sess = tf.Session(config=config) saver = tf.train.Saver(max_to_keep=3) # create two summary writers to show training loss and validation loss in the same graph # need to create two folders 'train' and 'validation' inside FLAGS.logs_dir praph_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/graph', sess.graph) train_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/train') validation_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/validation') sess.run(tf.global_variables_initializer()) if FLAGS.mode == "train": for itr in range(1, MAX_ITERATION): train_images, train_annotations = train_dataset_reader.next_batch( FLAGS.batch_size) feed_dict = { image: train_images, annotation: train_annotations, keep_probability: 0.5 } sess.run(train_op, feed_dict=feed_dict) if itr % 10 == 0: train_loss, summary_str = sess.run([loss, loss_summary], feed_dict=feed_dict) print("Step: %d, Train_loss:%g" % (itr, train_loss)) train_writer.add_summary(summary_str, itr) if itr % 210 == 0: valid_iamges, valid_annotations = validation_dataset_reader.get_records( ) val_count = 0 total_loss = 0 hist = np.zeros((num_classes, num_classes)) fm = 0 for i in range(1, 21): val_images = valid_iamges[val_count:val_count + val_batch_size] val_annotations = valid_annotations[val_count:val_count + val_batch_size] val_loss, val_pred_dense = sess.run( [loss, logits], feed_dict={ image: val_images, annotation: val_annotations, keep_probability: 1.0 }) total_loss = total_loss + val_loss val_count = val_count + val_batch_size hist += get_hist(val_pred_dense, val_annotations) fm += get_FM(val_pred_dense, val_annotations) valid_loss = total_loss / 20 FM = fm / (20 * val_batch_size) acc_total = np.diag(hist).sum() / hist.sum() acc = np.diag(hist) / hist.sum(1) iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) freq = hist.sum(1) / hist.sum() # summary_st = sess.run(summary_op,feed_dict=feed_dict) summary_sva = sess.run(loss_summary, feed_dict={loss: valid_loss}) summary_FM = sess.run(FM_summary, feed_dict={FM_pl: FM}) summary_acc_total = sess.run( acc_total_summary, feed_dict={total_acc_pl: acc_total}) summary_acc = sess.run(acc_summary, feed_dict={acc_pl: np.nanmean(acc)}) summary_iu = sess.run(iu_summary, feed_dict={iu_pl: np.nanmean(iu)}) summary_fwavacc = sess.run( fwavacc_summary, feed_dict={ fwavacc_pl: (freq[freq > 0] * iu[freq > 0]).sum() }) print("Step: %d, Valid_loss:%g" % (itr, valid_loss)) print(" >>> Step: %d, f1_score:%g" % (itr, FM)) # overall accuracy print(" >>> Step: %d, overall accuracy:%g" % (itr, acc_total)) print(" >>> Step: %d, mean accuracy:%g" % (itr, np.nanmean(acc))) print(" >>> Step: %d, mean IU:%g" % (itr, np.nanmean(iu))) print(" >>> Step: %d, fwavacc:%g" % (itr, (freq[freq > 0] * iu[freq > 0]).sum())) # validation_writer.add_summary(summary_st, step) validation_writer.add_summary(summary_sva, itr) validation_writer.add_summary(summary_FM, itr) validation_writer.add_summary(summary_acc_total, itr) validation_writer.add_summary(summary_acc, itr) validation_writer.add_summary(summary_iu, itr) validation_writer.add_summary(summary_fwavacc, itr) saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr) va_images, va_annotations = validation_dataset_reader.get_random_batch( 1) pred = sess.run(pred_annotation, feed_dict={ image: va_images, annotation: va_annotations, keep_probability: 1.0 }) va_annotations = np.squeeze(va_annotations, axis=3) # pred = np.squeeze(pred, axis=3) pred = pred * 255 va_annotations = va_annotations * 255 for it in range(1): utils.save_image(va_images[it].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5 + it)) utils.save_image(va_annotations[it].astype(np.uint8), FLAGS.logs_dir, name="gt_" + str(5 + it)) utils.save_image(pred[it].astype(np.uint8), FLAGS.logs_dir, name="pred_" + str(5 + it))
def build(self, rgb, num_classes=20, keep_probability=1.0, random_init_fc8=False, debug=False): """ Build the VGG model using loaded weights Parameters ---------- rgb: image batch tensor Image in rgb shap. Scaled to Intervall [0, 255] train: bool Whether to build train or inference graph num_classes: int How many classes should be predicted (by fc8) random_init_fc8 : bool Whether to initialize fc8 layer randomly. Finetuning is required in this case. debug: bool Whether to print additional Debug Information. """ # Convert RGB to BGR with tf.name_scope('Processing'): red, green, blue = tf.split(rgb, 3, 3) # assert red.get_shape().as_list()[1:] == [224, 224, 1] # assert green.get_shape().as_list()[1:] == [224, 224, 1] # assert blue.get_shape().as_list()[1:] == [224, 224, 1] bgr = tf.concat([ blue - VGG_MEAN[0], green - VGG_MEAN[1], red - VGG_MEAN[2], ], 3) if debug: bgr = tf.Print(bgr, [tf.shape(bgr)], message='Shape of input image: ', summarize=4, first_n=1) self.conv1_1 = self._conv_layer(bgr, "conv1_1") self.conv1_2 = self._conv_layer(self.conv1_1, "conv1_2") self.pool1 = self._max_pool(self.conv1_2, 'pool1', debug) self.conv2_1 = self._conv_layer(self.pool1, "conv2_1") self.conv2_2 = self._conv_layer(self.conv2_1, "conv2_2") self.pool2 = self._max_pool(self.conv2_2, 'pool2', debug) self.conv3_1 = self._conv_layer(self.pool2, "conv3_1") self.conv3_2 = self._conv_layer(self.conv3_1, "conv3_2") self.conv3_3 = self._conv_layer(self.conv3_2, "conv3_3") self.pool3 = self._max_pool(self.conv3_3, 'pool3', debug) self.conv4_1 = self._conv_layer(self.pool3, "conv4_1") self.conv4_2 = self._conv_layer(self.conv4_1, "conv4_2") self.conv4_3 = self._conv_layer(self.conv4_2, "conv4_3") self.pool4 = self._max_pool(self.conv4_3, 'pool4', debug) self.conv5_1 = self._conv_layer(self.pool4, "conv5_1") self.conv5_2 = self._conv_layer(self.conv5_1, "conv5_2") self.conv5_3 = self._conv_layer(self.conv5_2, "conv5_3") self.pool5 = self._max_pool(self.conv5_3, 'pool5', debug) self.fc6 = self._fc_layer(self.pool5, "fc6") self.fc6 = tf.nn.dropout(self.fc6, keep_probability) self.fc7 = self._fc_layer(self.fc6, "fc7") self.fc7 = tf.nn.dropout(self.fc7, keep_probability) if random_init_fc8: self.score_fr = self._score_layer(self.fc7, "score_fr", num_classes) else: self.score_fr = self._fc_layer(self.fc7, "score_fr", num_classes=num_classes, relu=False) self.pred = tf.argmax(self.score_fr, axis=3) self.upscore = self._upscore_layer(self.score_fr, shape=tf.shape(bgr), num_classes=num_classes, debug=debug, name='up', ksize=64, stride=32) self.upscore = CrfRnnLayer(image_dims=(256, 256), num_classes=2, theta_alpha=160., theta_beta=3., theta_gamma=3., num_iterations=5, name='crfrnn')([self.upscore, bgr]) self.pred_up = tf.argmax(self.upscore, axis=3)
def add_crf_double_stream_inputsize_128_remapfactor_16(self, load_pretrained_weights): # load pretrained model # model_pretrained = self.w_net_siamese_aligned_images_custom_segmentation_pretrained_unets_more_subtractions_stronger_remapping() model_pretrained = self.double_stream_6_subs_64_filters_remapfactor_32() if load_pretrained_weights: model_pretrained.load_weights(self.args['pretrained_model_file']) # the input to the crf_rnn block has to have the following properties: # - RGB image (range [0, 255]) -> invert vgg16 preprocessing # - shape: [1, H, W, num_classes] -> in our case, num_classes == 2 since we have "change" and "no change" # - Class 0: Background # - Class 1: Foreground -> background + foreground values have to sum up to 1 # map values from [0, 1] to [-8, 8] remap_factor = 16 seg_positive_reduced_normalized_mult = Lambda(lambda x: tf.math.multiply_no_nan( x, remap_factor), name='seg_process_6')(model_pretrained.get_layer('seg_process_5').output) seg_positive_reduced_normalized_remapped = Lambda(lambda x: tf.math.subtract( x, remap_factor/2), name='seg_process_7')(seg_positive_reduced_normalized_mult) foreground_sigmoid = Activation(activation='sigmoid', name='seg_process_sigmoid')( seg_positive_reduced_normalized_remapped) # background is simply 1 - foreground ones_matrix = Lambda(lambda x: tf.ones_like( x), name='seg_process_ones_matrix')(foreground_sigmoid) background_sigmoid = Subtract(name='seg_process_background')([ ones_matrix, foreground_sigmoid]) # background_sigmoid = Lambda(lambda x: tf.math.subtract(1, x), name='seg_process_background')(foreground_sigmoid) # concatenate background and foreground crf_rnn_unary = concatenate( [background_sigmoid, foreground_sigmoid], axis=3, name='seg_process_concat') # transform the input image to RGB color space (de-process it: BGR->RGB, add imagenet mean) # TODO: this deprocessing is only correct if used with VGG16 preprocessing!!!! # image_current_deprocessed = Lambda(lambda x: concatenate([np.expand_dims(x[:,:,:,2], axis=3), np.expand_dims(x[:,:,:,1], axis=3), np.expand_dims(x[:,:,:,0], axis=3)], axis=3, name='seg_process_deprocess_1'))(image_current) channel_r = Lambda(lambda x: tf.expand_dims(x[:, :, :, 2], axis=3), name='seg_process_channel_r')( model_pretrained.get_layer('image_current').input) channel_g = Lambda(lambda x: tf.expand_dims(x[:, :, :, 1], axis=3), name='seg_process_channel_g')( model_pretrained.get_layer('image_current').input) channel_b = Lambda(lambda x: tf.expand_dims(x[:, :, :, 0], axis=3), name='seg_process_channel_b')( model_pretrained.get_layer('image_current').input) image_current_deprocessed = concatenate( [channel_r, channel_g, channel_b], axis=3, name='seg_process_cat') imagenet_mean = [103.939, 116.779, 123.68] image_current_deprocessed = Lambda(lambda x: tf.math.add( x[0], x[1]), name='seg_process_deprocess_mean')([image_current_deprocessed, imagenet_mean]) # upscale both image and unary. I hope that this will help to better segment the images! # images are upsampled to 512x512 (assuming that input is 128x128) image_current_deprocessed_upscaled = UpSampling2D( size=4, interpolation='bilinear', name='seg_process_upsample_1')(image_current_deprocessed) crf_rnn_unary_upscaled = UpSampling2D( size=4, interpolation='bilinear', name='seg_process_upsample_2')(crf_rnn_unary) # apply crf crf_rnn = CrfRnnLayer(image_dims=(512, 512), num_classes=2, theta_alpha=160., theta_beta=3., theta_gamma=3., num_iterations=5, name='crfrnn')([crf_rnn_unary_upscaled, image_current_deprocessed_upscaled]) # downsample the crf_rnn layer output again to 128x128 crf_rnn_downsampled = AveragePooling2D( pool_size=4, strides=4, padding='same', data_format='channels_last')(crf_rnn) # apply softmax function crf_rnn_softmax = Softmax( axis=-1, name='crf_softmax')(crf_rnn_downsampled) # extract first channel of the crf_rnn output # channel 0: background # channel 1: foreground crf_rnn_channel_1 = Lambda( lambda x: x[:, :, :, 1], name='crf_rnn_channel_1')(crf_rnn_softmax) crf_rnn_channel_1 = Lambda( lambda x: x[:, :, :, np.newaxis], name='crf_rnn_channel_1_newaxis')(crf_rnn_channel_1) image_current_seg_mult = model_pretrained.get_layer('seg_process_multiply')( [crf_rnn_channel_1, model_pretrained.get_layer('image_current').input]) classifier_output = model_pretrained.get_layer( 'vgg16')(image_current_seg_mult) gav = model_pretrained.get_layer( 'classify_global_avg_pool')(classifier_output) dense1 = model_pretrained.get_layer('classify_dense1')(gav) # combine the two streams and apply two more fc layers cat_streams = model_pretrained.get_layer('cat_streams')( [dense1, model_pretrained.get_layer('stream1_classify_dense1').output]) dense2 = ReLU()(model_pretrained.get_layer('classify_dense2')(cat_streams)) dense3 = model_pretrained.get_layer('classify_dense3')(dense2) output_feature_comparison = model_pretrained.get_layer( 'output_activation')(dense3) model = Model(inputs=[model_pretrained.get_layer('image_previous').input, model_pretrained.get_layer( 'image_current').input], outputs=output_feature_comparison) return model
def deeplabv_crf(input_tensor=None, input_shape=(224, 224, 3), OS=16): """ Instantiates the Deeplabv3+ architecture Optionally loads weights pre-trained on PASCAL VOC. This model is available for TensorFlow only, and can only be used with inputs following the TensorFlow data format `(width, height, channels)`. # Arguments input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: shape of input image. format HxWxC PASCAL VOC model was trained on (512,512,3) images OS: determines input_shape/feature_extractor_output ratio. One of {8,16}. Used only for xception backbone. # Returns A Keras model instance. # Raises RuntimeError: If attempting to run this model with a backend that does not support separable convolutions. ValueError: in case of invalid argument for `weights` or `backbone` """ if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if OS == 8: entry_block3_stride = 1 middle_block_rate = 2 # ! Not mentioned in paper, but required exit_block_rates = (2, 4) atrous_rates = (12, 24, 36) else: entry_block3_stride = 2 middle_block_rate = 1 exit_block_rates = (1, 2) atrous_rates = (6, 12, 18) x = Conv2D(32, (3, 3), strides=(2, 2), name='entry_flow_conv1_1', use_bias=False, padding='same')(img_input) x = BatchNormalization(name='entry_flow_conv1_1_BN')(x) x = Activation('relu')(x) x = _conv2d_same(x, 64, 'entry_flow_conv1_2', kernel_size=3, stride=1) x = BatchNormalization(name='entry_flow_conv1_2_BN')(x) x = Activation('relu')(x) x = _xception_block(x, [128, 128, 128], 'entry_flow_block1', skip_connection_type='conv', stride=2, depth_activation=False) x, skip1 = _xception_block(x, [256, 256, 256], 'entry_flow_block2', skip_connection_type='conv', stride=2, depth_activation=False, return_skip=True) x = _xception_block(x, [728, 728, 728], 'entry_flow_block3', skip_connection_type='conv', stride=entry_block3_stride, depth_activation=False) for i in range(16): x = _xception_block(x, [728, 728, 728], 'middle_flow_unit_{}'.format(i + 1), skip_connection_type='sum', stride=1, rate=middle_block_rate, depth_activation=False) x = _xception_block(x, [728, 1024, 1024], 'exit_flow_block1', skip_connection_type='conv', stride=1, rate=exit_block_rates[0], depth_activation=False) x = _xception_block(x, [1536, 1536, 2048], 'exit_flow_block2', skip_connection_type='none', stride=1, rate=exit_block_rates[1], depth_activation=True) # end of feature extractor # branching for Atrous Spatial Pyramid Pooling # Image Feature branch #out_shape = int(np.ceil(input_shape[0] / OS)) b4 = AveragePooling2D(pool_size=(int(np.ceil(input_shape[0] / OS)), int(np.ceil(input_shape[1] / OS))))(x) b4 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='image_pooling')(b4) b4 = BatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4) b4 = Activation('relu')(b4) b4 = BilinearUpsampling((int(np.ceil(input_shape[0] / OS)), int(np.ceil(input_shape[1] / OS))))(b4) # simple 1x1 b0 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x) b0 = BatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0) b0 = Activation('relu', name='aspp0_activation')(b0) # there are only 2 branches in mobilenetV2. not sure why # rate = 6 (12) b1 = SepConv_BN(x, 256, 'aspp1', rate=atrous_rates[0], depth_activation=True, epsilon=1e-5) # rate = 12 (24) b2 = SepConv_BN(x, 256, 'aspp2', rate=atrous_rates[1], depth_activation=True, epsilon=1e-5) # rate = 18 (36) b3 = SepConv_BN(x, 256, 'aspp3', rate=atrous_rates[2], depth_activation=True, epsilon=1e-5) # concatenate ASPP branches & project x = Concatenate()([b4, b0, b1, b2, b3]) x = Conv2D(256, (1, 1), padding='same', use_bias=False, name='concat_projection')(x) x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x) x = Activation('relu')(x) x = Dropout(0.1)(x) # DeepLab v.3+ decoder # Feature projection # x4 (x2) block x = BilinearUpsampling(output_size=(int(np.ceil(input_shape[0] / 4)), int(np.ceil(input_shape[1] / 4))))(x) dec_skip1 = Conv2D(48, (1, 1), padding='same', use_bias=False, name='feature_projection0')(skip1) dec_skip1 = BatchNormalization( name='feature_projection0_BN', epsilon=1e-5)(dec_skip1) dec_skip1 = Activation('relu')(dec_skip1) x = Concatenate()([x, dec_skip1]) x = SepConv_BN(x, 256, 'decoder_conv0', depth_activation=True, epsilon=1e-5) x = SepConv_BN(x, 256, 'decoder_conv1', depth_activation=True, epsilon=1e-5) # you can use it with arbitary number of classes last_layer_name = 'custom_logits_semantic' x = Conv2D(2, (1, 1), padding='same', name=last_layer_name)(x) x = BilinearUpsampling(output_size=(input_shape[0], input_shape[1]))(x) output =CrfRnnLayer(image_dims=(224,224),num_classes=2,theta_alpha=160.,theta_beta=3., theta_gamma=3.,num_iterations=10, name='crfrnn')([x, img_input]) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input model = Model(inputs, output, name='deeplabv3') # load weights model.load_weights(WEIGHTS_PATH_X, by_name=True) return model
def get_crfrnn_model_def(nClasses, optimizer=None, input_height=360, input_width=480): """ Returns Keras CRN-RNN model definition. Currently, only 500 x 500 images are supported. However, one can get this to work with different image sizes by adjusting the parameters of the Cropping2D layers below. """ channels, height, weight = 3, input_height, input_width # Input input_shape = (height, weight, 3) img_input = Input(shape=input_shape) kernel = 3 filter_size = 64 pad = 1 pool_size = 2 # Add plenty of zero padding x = ZeroPadding2D(padding=(pad, pad))(img_input) # VGG-16 convolution block 1 x = Conv2D(filter_size, (kernel, kernel), padding='valid', name='conv1_1')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D((pool_size, pool_size), name='pool1')(x) # VGG-16 convolution block 1 x = Conv2D(128, (kernel, kernel), padding='valid', name='conv1_2')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D((pool_size, pool_size), name='pool2')(x) # VGG-16 convolution block 1 x = Conv2D(256, (kernel, kernel), padding='valid', name='conv1_3')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D((pool_size, pool_size), name='pool3')(x) pool3 = x # VGG-16 convolution block 1 x = Conv2D(512, (kernel, kernel), padding='valid', name='conv1_4')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D((pool_size, pool_size), name='pool4')(x) pool4 = x #decoder x = ZeroPadding2D(padding=(pad, pad))(x) x = Conv2D(512, (kernel, kernel), padding='valid', name='conv2_1')(x) x = BatchNormalization()(x) x = UpSampling2D((pool_size, pool_size))(x) x = ZeroPadding2D(padding=(pad, pad))(x) x = Conv2D(256, (kernel, kernel), padding='valid', name='conv2_2')(x) x = BatchNormalization()(x) x = UpSampling2D((pool_size, pool_size))(x) x = ZeroPadding2D(padding=(pad, pad))(x) x = Conv2D(128, (kernel, kernel), padding='valid', name='conv2_3')(x) x = BatchNormalization()(x) x = UpSampling2D((pool_size, pool_size))(x) x = ZeroPadding2D(padding=(pad, pad))(x) x = Conv2D(filter_size, (kernel, kernel), padding='valid', name='conv2_4')(x) x = BatchNormalization()(x) x = Conv2D(nClasses, (1, 1), padding='valid', name='conv3_1')(x) #x = Conv2D(100,(kernel,kernel),padding='valid')(x) #out_height = x.shape[1] #out_width = x.shape[2] #x = Reshape((nClasses,32*32), input_shape=(32, 32, nClasses))(x) #x = Permute((2,1))(x) #x = Activation('softmax')(x) print x #x = UpSampling2D(size=(4,4))(x) output = CrfRnnLayer(image_dims=(32, 32), num_classes=nClasses, theta_alpha=160., theta_beta=3., theta_gamma=3., num_iterations=10, name='crfrnn')([x, img_input]) # Build the model model = Model(img_input, output, name='crfrnn_net') model.outputHeight = 32 model.outputWidth = 32 return model
def getmodel(): saved_model_path = 'crfrnn_keras_model.h5' f=h5py.File(saved_model_path) #a=list(f.keys()) #print(a) #for i in range(len(a)): # b=list(f[a[i]]) # print(b) # if len(b)>0: # c=list(f[a[i]][b[0]]) # print(c) # if len(c)>0: # for j in range(len(c)): # d=list(f[a[i]][b[0]][c[j]]) # print('size of d') # print(len(d)) # #print(d) input_shape = (height, width, 3) img_input = Input(shape=input_shape) x = ZeroPadding2D(padding=(100, 100))(img_input) # VGG-16 convolution block 1 x = Conv2D(3, (3, 3), activation='relu', padding='valid', name='convpre1_1')(x) x = Conv2D(3, (3, 3), activation='relu', padding='same', name='convpre1_2')(x) x = MaxPooling2D((2, 2), strides=(1, 1), name='poolpre1')(x) # VGG-16 convolution block 2 x = Conv2D(3, (3, 3), activation='relu', padding='same', name='convpre2_1')(x) x = Conv2D(3, (3, 3), activation='relu', padding='same', name='convpre2_2')(x) x = MaxPooling2D((2, 2), strides=(1, 1), name='poolpre2', padding='same')(x) # VGG-16 convolution block 1 x = Conv2D(64, (3, 3), activation='relu', padding='valid', name='conv1_1')(x) x = Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_2')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='pool1')(x) # VGG-16 convolution block 2 x = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_1')(x) x = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_2')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='pool2', padding='same')(x) # VGG-16 convolution block 3 x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_1')(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_2')(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_3')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='pool3', padding='same')(x) pool3 = x # VGG-16 convolution block 4 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_1')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_2')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_3')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='pool4', padding='same')(x) pool4 = x # VGG-16 convolution block 5 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_1')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_2')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_3')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='pool5', padding='same')(x) # Fully-connected layers converted to convolution layers x = Conv2D(4096, (7, 7), activation='relu', padding='valid', name='fc6')(x) x = Dropout(0.5)(x) x = Conv2D(4096, (1, 1), activation='relu', padding='valid', name='fc7')(x) x = Dropout(0.5)(x) x = Conv2D(5, (1, 1), padding='valid', name='score-fr')(x) score2 = Conv2DTranspose(5, (3, 3), strides=2, name='score2')(x) # Skip connections from pool4 score_pool4 = Conv2D(5, (1, 1), name='score-pool4')(pool4) score_pool4c = Cropping2D((5, 5))(score_pool4) score_fused = Add()([score2, score_pool4c]) score4 = Conv2DTranspose(5, (5, 5), strides=2, name='score4', use_bias=False)(score_fused) # Skip connections from pool3 score_pool3 = Conv2D(5, (1, 1), name='score-pool3')(pool3) score_pool3c = Cropping2D((8, 8))(score_pool3) # Fuse things together score_final = Add()([score4, score_pool3c]) # Final up-sampling and cropping upsample = Conv2DTranspose(5, (16, 16), strides=8, name='upsample', use_bias=False)(score_final) upscore = Cropping2D(((39, 37), (39, 37)))(upsample) output = CrfRnnLayer(image_dims=(height, width), num_classes=5, theta_alpha=160., theta_beta=3., theta_gamma=3., num_iterations=10, name='crfrnn')([upscore, img_input]) #output=upscore # Build the model model = Model(img_input, output, name='crfrnn_net') #model = Model(input = img_input, output = x) print(model.summary()) #transfer learning from https://medium.com/@14prakash/transfer-learning-using-keras-d804b2e04ef8 layer_names = [layer.name for layer in model.layers] print(layer_names) last=layer_names.index('score-fr') for i in range(8,last-2): #for i in range(8,last): name=layer_names[i] dropname=name.find('dropout') if dropname!=-1: print(name) nameindex=int(name[8]) print(nameindex) if nameindex>2 and nameindex%2!=0: nameindex=1 if nameindex>2 and nameindex%2==0: nameindex=2 print('change dropout layer names') name=name[:8]+str(nameindex) c=list(f[name]) #model.layers[i].trainable=False if len(c)>0: print(c) d=list(f[name][c[0]]) print(d) weight=[f[name][c[0]][d[1]],f[name][c[0]][d[0]]] weight=np.asarray(weight) print(i) model.layers[i].set_weights(weight) #print(weight[0].shape) #print(weight[1].shape) #test=model.layers[i].get_weights() #test=np.array(test) #print(test.shape) #for i in range(5,last): # model.layers[i].trainable=False model.compile(loss = "categorical_crossentropy", optimizer = optimizers.adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False),metrics=["accuracy",iou_loss_core])#optimizers.SGD(lr=0.0001, momentum=0.9), metrics=["accuracy"]) return model
def get_crfrnn_model_def(): """ Returns Keras CRN-RNN model definition. Currently, only 500 x 500 images are supported. However, one can get this to work with different image sizes by adjusting the parameters of the Cropping2D layers below. """ channels, height, weight = 3, 500, 500 # Input input_shape = (height, weight, 3) img_input = Input(shape=input_shape) # Add plenty of zero padding x = ZeroPadding2D(padding=(100, 100))(img_input) # VGG-16 convolution block 1 x = Conv2D(64, (3, 3), activation='relu', padding='valid', name='conv1_1')(x) x = Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_2')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='pool1')(x) # VGG-16 convolution block 2 x = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_1')(x) x = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_2')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='pool2', padding='same')(x) # VGG-16 convolution block 3 x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_1')(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_2')(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_3')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='pool3', padding='same')(x) pool3 = x # VGG-16 convolution block 4 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_1')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_2')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_3')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='pool4', padding='same')(x) pool4 = x # VGG-16 convolution block 5 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_1')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_2')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_3')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='pool5', padding='same')(x) # Fully-connected layers converted to convolution layers x = Conv2D(4096, (7, 7), activation='relu', padding='valid', name='fc6')(x) x = Dropout(0.5)(x) x = Conv2D(4096, (1, 1), activation='relu', padding='valid', name='fc7')(x) x = Dropout(0.5)(x) x = Conv2D(21, (1, 1), padding='valid', name='score-fr')(x) # Deconvolution score2 = Conv2DTranspose(21, (4, 4), strides=2, name='score2')(x) # Skip connections from pool4 score_pool4 = Conv2D(21, (1, 1), name='score-pool4')(pool4) score_pool4c = Cropping2D((5, 5))(score_pool4) score_fused = Add()([score2, score_pool4c]) score4 = Conv2DTranspose(21, (4, 4), strides=2, name='score4', use_bias=False)(score_fused) # Skip connections from pool3 score_pool3 = Conv2D(21, (1, 1), name='score-pool3')(pool3) score_pool3c = Cropping2D((9, 9))(score_pool3) # Fuse things together score_final = Add()([score4, score_pool3c]) # Final up-sampling and cropping upsample = Conv2DTranspose(21, (16, 16), strides=8, name='upsample', use_bias=False)(score_final) upscore = Cropping2D(((31, 37), (31, 37)))(upsample) output = CrfRnnLayer(image_dims=(height, weight), num_classes=21, theta_alpha=160., theta_beta=3., theta_gamma=3., num_iterations=10, name='crfrnn')([upscore, img_input]) # Build the model model = Model(img_input, output, name='crfrnn_net') return model
def segnet(nClasses, optimizer=None, input_height=360, input_width=480): kernel = 3 filter_size = 64 pad = 1 pool_size = 2 img_input = Input(shape=(input_height, input_width, 3)) # encoder x = ZeroPadding2D(padding=(pad, pad))(img_input) x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x) x = BatchNormalization()(x) x = Activation('relu')(x) l1 = x x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) x = ZeroPadding2D(padding=(pad, pad))(x) x = Convolution2D(128, (kernel, kernel), padding='valid')(x) x = BatchNormalization()(x) x = Activation('relu')(x) l2 = x x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) x = ZeroPadding2D(padding=(pad, pad))(x) x = Convolution2D(256, (kernel, kernel), padding='valid')(x) x = BatchNormalization()(x) x = Activation('relu')(x) l3 = x x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) x = ZeroPadding2D(padding=(pad, pad))(x) x = Convolution2D(512, (kernel, kernel), padding='valid')(x) x = BatchNormalization()(x) l4 = x x = Activation('relu')(x) # decoder x = ZeroPadding2D(padding=(pad, pad))(x) x = Convolution2D(512, (kernel, kernel), padding='valid')(x) x = BatchNormalization()(x) x = Add()([l4, x]) x = UpSampling2D(size=(pool_size, pool_size))(x) x = ZeroPadding2D(padding=(pad, pad))(x) x = Convolution2D(256, (kernel, kernel), padding='valid')(x) x = BatchNormalization()(x) x = Add()([l3, x]) x = UpSampling2D(size=(pool_size, pool_size))(x) x = ZeroPadding2D(padding=(pad, pad))(x) x = Convolution2D(128, (kernel, kernel), padding='valid')(x) x = BatchNormalization()(x) x = Add()([l2, x]) x = UpSampling2D(size=(pool_size, pool_size))(x) x = ZeroPadding2D(padding=(pad, pad))(x) x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x) x = BatchNormalization()(x) x = Add()([l1, x]) x = Convolution2D(nClasses, (1, 1), padding='valid')(x) beforeCrfRNN = x out = CrfRnnLayer(image_dims=(input_height, input_width), num_classes=nClasses, theta_alpha=160., theta_beta=3., theta_gamma=3., num_iterations=5, name='crfrnn')([x, img_input]) a = Model(inputs=img_input, outputs=out) model = [] a.outputHeight = a.output_shape[1] a.outputWidth = a.output_shape[2] out = Reshape((a.outputHeight * a.outputWidth, nClasses), input_shape=(nClasses, a.outputHeight, a.outputWidth))(out) out = Activation('softmax')(out) model = Model(inputs=img_input, outputs=out) model.outputHeight = a.outputHeight model.outputWidth = a.outputWidth print beforeCrfRNN.shape print img_input.shape print out.shape print x.shape model.compile(loss=penalized_loss(bottleNeckFeatures=l4), optimizer="adadelta", metrics=['accuracy']) return model
def build_network(self): # Set up crf based on flags if self.rescale075: if self.phase: # If training and rescaling, scale crf dimensions and rgb images appropriately using cropped size as default crf_dims = (int(self.image_height * 0.75), int(self.image_width * 0.75)) self.raw_images = tf.image.resize_images( self.inputs, [crf_dims[0], crf_dims[1]]) num_iter = 5 else: # If testing and rescaling, use original image dimensions as default inputs_shape = tf.shape(self.inputs) image_height, image_width = inputs_shape[1], inputs_shape[2] #crf_dims = (int(image_height * 0.75), int(image_width * 0.75)) crf_dims = (self.image_height, self.image_width) #self.raw_images = tf.image.resize_images(self.inputs, [crf_dims[0], crf_dims[1]]) num_iter = 10 elif self.rescale05: if self.phase: crf_dims = (int(self.image_height * 0.5), int(self.image_width * 0.5)) self.raw_images = tf.image.resize_images( self.inputs, [ int(self.image_height * 0.5), int(self.image_width * 0.5) ]) num_iter = 5 else: inputs_shape = tf.shape(self.inputs) image_height, image_width = inputs_shape[1], inputs_shape[2] #crf_dims = (int(image_height * 0.5), int(image_width * 0.5)) crf_dims = (self.image_height, self.image_width) #self.raw_images = tf.image.resize_images(self.inputs, [crf_dims[0], crf_dims[1]]) num_iter = 10 else: if self.phase: self.raw_images = self.inputs crf_dims = (self.image_height, self.image_width) num_iter = 5 else: inputs_shape = tf.shape(self.inputs) image_height, image_width = inputs_shape[1], inputs_shape[2] #crf_dims = (image_height, image_width) crf_dims = (self.image_height, self.image_width) self.raw_images = self.inputs num_iter = 10 self.encoding = self.build_encoder() self.decoding = self.build_decoder(self.encoding) #''' if self.phase: self.resized_decoding = tf.image.resize_bilinear( self.decoding, [crf_dims[0], crf_dims[1]]) # use during training self.raw_inputs = self.raw_images if self.sp_inputs != None: self.superpixels = tf.image.resize_bilinear( self.sp_inputs, [crf_dims[0], crf_dims[1]]) #self.superpixels = self.sp_inputs else: self.superpixels = None else: self.resized_decoding = tf.image.resize_bilinear( self.decoding, [self.image_height, self.image_width]) # use during testing self.raw_inputs = tf.image.resize_bilinear( self.inputs, [self.image_height, self.image_width]) # use during testing if self.sp_inputs != None: #self.superpixels = self.sp_inputs self.superpixels = tf.image.resize_bilinear( self.sp_inputs, [self.image_height, self.image_width]) else: self.superpixels = None #''' #self.resized_decoding = tf.image.resize_bilinear(self.decoding, [crf_dims[0], crf_dims[1]]) # use during training #self.resized_decoding = tf.image.resize_bilinear(self.decoding, [self.image_height, self.image_width]) # use during testing self.resized_raw = tf.image.resize_bilinear( self.inputs, [self.image_height, self.image_width]) # use during testing if self.crf_type == 'crf': self.outputs = CrfRnnLayer( image_dims=crf_dims, num_classes=self.num_classes, theta_alpha=160., theta_beta=90., theta_gamma=3., num_iterations=5, name='crfrnn')([self.resized_decoding, self.raw_inputs]) elif self.crf_type == 'crfSP': self.outputs = CrfRnnLayerSP(image_dims=crf_dims, num_classes=self.num_classes, theta_alpha=160., theta_beta=90., theta_gamma=3., num_iterations=5, batch_size=1, name='crfrnn')([ self.resized_decoding, self.raw_inputs, self.superpixels ]) elif self.crf_type == 'crfSPAT': self.outputs = CrfRnnLayerSPAT(image_dims=crf_dims, num_classes=self.num_classes, theta_alpha=160., theta_beta=90., theta_gamma=3., num_iterations=5, batch_size=1, name='crfrnn')([ self.resized_decoding, self.raw_inputs, self.superpixels ]) elif self.crf_type == 'crfSPIO': self.outputs = CrfRnnLayerSPIO(image_dims=crf_dims, num_classes=self.num_classes, theta_alpha=160., theta_beta=90., theta_gamma=3., num_iterations=5, batch_size=1, name='crfrnn')([ self.resized_decoding, self.raw_inputs, self.superpixels ]) elif self.crf_type == 'crfALL': self.outputs = CrfRnnLayerAll(image_dims=crf_dims, num_classes=self.num_classes, theta_alpha=160., theta_beta=90., theta_gamma=3., num_iterations=5, batch_size=1, name='crfrnn')([ self.resized_decoding, self.raw_inputs, self.superpixels ]) else: self.outputs = self.decoding
def segnet_crf_res(nClasses, optimizer=None, input_height=360, input_width=480): kernel = 3 filter_size = 64 pad = 1 pool_size = 2 img_input = Input(shape=(input_height, input_width, 3)) # encoder x = ZeroPadding2D(padding=(pad, pad))(img_input) x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x) x = BatchNormalization()(x) x = Activation('relu')(x) l1 = x x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) x = ZeroPadding2D(padding=(pad, pad))(x) x = Convolution2D(128, (kernel, kernel), padding='valid')(x) x = BatchNormalization()(x) x = Activation('relu')(x) l2 = x x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) x = ZeroPadding2D(padding=(pad, pad))(x) x = Convolution2D(256, (kernel, kernel), padding='valid')(x) x = BatchNormalization()(x) x = Activation('relu')(x) l3 = x x = MaxPooling2D(pool_size=(pool_size, pool_size))(x) x = ZeroPadding2D(padding=(pad, pad))(x) x = Convolution2D(512, (kernel, kernel), padding='valid')(x) x = BatchNormalization()(x) l4 = x x = Activation('relu')(x) # decoder x = ZeroPadding2D(padding=(pad, pad))(x) x = Convolution2D(512, (kernel, kernel), padding='valid')(x) x = BatchNormalization()(x) x = Add()([l4, x]) x = UpSampling2D(size=(pool_size, pool_size))(x) x = ZeroPadding2D(padding=(pad, pad))(x) x = Convolution2D(256, (kernel, kernel), padding='valid')(x) x = BatchNormalization()(x) x = Add()([l3, x]) x = UpSampling2D(size=(pool_size, pool_size))(x) x = ZeroPadding2D(padding=(pad, pad))(x) x = Convolution2D(128, (kernel, kernel), padding='valid')(x) x = BatchNormalization()(x) x = Add()([l2, x]) x = UpSampling2D(size=(pool_size, pool_size))(x) x = ZeroPadding2D(padding=(pad, pad))(x) x = Convolution2D(filter_size, (kernel, kernel), padding='valid')(x) x = BatchNormalization()(x) x = Add()([l1, x]) x = Convolution2D(nClasses, (1, 1), padding='valid')(x) out = CrfRnnLayer(image_dims=(input_height, input_width), num_classes=nClasses, theta_alpha=160., theta_beta=3., theta_gamma=3., num_iterations=5, name='crfrnn')([x, img_input]) # out = x a = Model(inputs=img_input, outputs=out) model = [] a.outputHeight = a.output_shape[1] a.outputWidth = a.output_shape[2] out = Reshape((a.outputHeight * a.outputWidth, nClasses), input_shape=(nClasses, a.outputHeight, a.outputWidth))(out) out = Activation('softmax')(out) model = Model(inputs=img_input, outputs=out) model.outputHeight = a.outputHeight model.outputWidth = a.outputWidth return model
def create_model(img_shape, num_class): concat_axis = 3 # input inputs = Input(shape=img_shape) # Unet convolution block 1 conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(inputs) print "conv1 shape:", conv1.shape conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv1) print "conv1 shape:", conv1.shape pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) print "pool1 shape:", pool1.shape # Unet convolution block 2 conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool1) print "conv2 shape:", conv2.shape conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2) print "conv2 shape:", conv2.shape pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) print "pool2 shape:", pool2.shape # Unet convolution block 3 conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool2) print "conv3 shape:", conv3.shape conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3) print "conv3 shape:", conv3.shape pool3 = MaxPooling2D(pool_size=(2, 2))(conv3) print "pool3 shape:", pool3.shape # Unet convolution block 4 conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool3) print "conv4 shape:", conv4.shape conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv4) print "conv4 shape:", conv4.shape # drop4 = Dropout(0.5)(conv4) pool4 = MaxPooling2D(pool_size=(2, 2))(conv4) print "pool4 shape:", pool4.shape # Unet convolution block 5 conv5 = layers.Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool4) print "conv5 shape:", conv5.shape conv5 = layers.Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv5) print "conv5 shape:", conv5.shape # drop5 = Dropout(0.5)(conv5) # Unet up-sampling block 1; Concatenation with crop_conv4 up6 = Conv2D(512, 2, activation='relu', padding='same', kernel_initializer='he_normal')(UpSampling2D(size=(2, 2))(conv5)) print "up6 shape:", up6.shape ch, cw = get_crop_shape(conv4, up6) crop_conv4 = Cropping2D(cropping=(ch, cw))(conv4) print "crop_conv4 shape:", crop_conv4.shape merge6 = concatenate([crop_conv4, up6], axis=3) print "merge6 shape:", merge6.shape conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge6) print "conv6 shape:", conv6.shape conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6) print "conv6 shape:", conv6.shape # Unet up-sampling block 2; Concatenation with crop_conv3 up7 = Conv2D(256, 2, activation='relu', padding='same', kernel_initializer='he_normal')(UpSampling2D(size=(2, 2))(conv6)) print "up7 shape:", up7.shape ch, cw = get_crop_shape(conv3, up7) crop_conv3 = Cropping2D(cropping=(ch, cw))(conv3) print "crop_conv3 shape:", crop_conv3.shape merge7 = concatenate([crop_conv3, up7], axis=3) print "merge7 shape:", merge7.shape conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge7) print "conv7 shape:", conv7.shape conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7) print "conv7 shape:", conv7.shape # Unet up-sampling block 3; Concatenation with crop_conv2 up8 = Conv2D(128, 2, activation='relu', padding='same', kernel_initializer='he_normal')(UpSampling2D(size=(2, 2))(conv7)) print "up8 shape:", up8.shape ch, cw = get_crop_shape(conv2, up8) crop_conv2 = Cropping2D(cropping=(ch, cw))(conv2) print "crop_conv2 shape:", crop_conv2.shape merge8 = concatenate([crop_conv2, up8], axis=3) print "merge8 shape:", merge8.shape conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge8) print "conv8 shape:", conv8.shape conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8) print "conv8 shape:", conv8.shape # Unet up-sampling block 4; Concatenation with crop_conv1 up9 = Conv2D(64, 2, activation='relu', padding='same', kernel_initializer='he_normal')(UpSampling2D(size=(2, 2))(conv8)) print "up9 shape:", up9.shape ch, cw = get_crop_shape(conv1, up9) crop_conv1 = Cropping2D(cropping=(ch, cw))(conv1) print "crop_conv1 shape:", crop_conv2.shape merge9 = concatenate([crop_conv1, up9], axis=3) print "merge9 shape:", merge9.shape conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge9) print "conv9 shape:", conv9.shape conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9) print "conv9 shape:", conv9.shape conv9 = Conv2D(num_class, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9) print "conv9 shape:", conv9.shape ch, cw = get_crop_shape(inputs, conv9) conv9 = ZeroPadding2D(padding=((ch[0], ch[1]), (cw[0], cw[1])))(conv9) print "conv9 shape:", conv9.shape # conv10 = Conv2D(num_class, (1, 1))(conv9) # print "conv10 shape:",conv10.shape # Add Crf_rnn_layer output = CrfRnnLayer(image_dims=img_shape, num_classes=num_class, theta_alpha=160., theta_beta=3., theta_gamma=3., num_iterations=10, name='crfrnn')([conv9, inputs]) print "output shape", output.shape model = Model(input=inputs, output=output) return model