#skip connection followed by upsampling on layer4 by 2 input = tf.layers.conv2d(vgg_layer4_out, num_classes, 1, strides=(1,1), padding='same', kernel_initializer=tf.truncated_normal_initializer(stddev=0.01), kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) input = tf.add(input, output) output = tf.layers.conv2d_transpose(input, num_classes, 4, strides = (2, 2), padding= 'same', kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) #skip connection followed by upsampling on layer3 by 8 input = tf.layers.conv2d(vgg_layer3_out, num_classes, 1, strides=(1,1), padding='same', kernel_initializer=tf.truncated_normal_initializer(stddev=0.01), kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) input = tf.add(input, output) nn_last_layer = tf.layers.conv2d_transpose(input, num_classes, 32, strides = (8, 8), padding= 'same', kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) return nn_last_layer tests.test_layers(layers) def optimize(nn_last_layer, correct_label, learning_rate, num_classes): """ Build the TensorFLow loss and optimizer operations. :param nn_last_layer: TF Tensor of the last layer in the neural network :param correct_label: TF Placeholder for the correct label image :param learning_rate: TF Placeholder for the learning rate :param num_classes: Number of classes to classify :return: Tuple of (logits, train_op, cross_entropy_loss) """ # TODO: Implement function logits = tf.reshape(nn_last_layer, (-1, num_classes)) #define loss function correct_label = tf.reshape(correct_label, (-1, num_classes))
def perform_tests(): tests.test_for_kitti_dataset(data_dir) tests.test_load_vgg(load_vgg, tf) tests.test_layers(layers) tests.test_optimize(optimize) tests.test_train_nn(train_nn)
strides=(2, 2), padding='same', kernel_regularizer=regularizer, kernel_initializer=initializer) output3 = tf.layers.conv2d_transpose(output2, num_classes, 4, strides=(2, 2), padding='same', kernel_regularizer=regularizer, kernel_initializer=initializer) return output3 tests.test_layers(layers) def optimize(nn_last_layer, correct_label, learning_rate, num_classes): """ Build the TensorFLow loss and optimizer operations. :param nn_last_layer: TF Tensor of the last layer in the neural network :param correct_label: TF Placeholder for the correct label image :param learning_rate: TF Placeholder for the learning rate :param num_classes: Number of classes to classify :return: Tuple of (logits, train_op, cross_entropy_loss) """ # TODO: Implement function logits = tf.reshape(nn_last_layer, (-1, num_classes)) labels = tf.reshape(correct_label, (-1, num_classes))
vgg_layer7_out, num_classes, one_by_one_channels, l2_scale) #graph_out = layers_dropout(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, vgg_keep_prob, num_classes, one_by_one_channels) #graph_out = layers_deep(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, vgg_keep_prob, num_classes, one_by_one_channels) labels = tf.placeholder(tf.bool, name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') logits, train_op, cross_entropy_loss = optimize( graph_out, labels, learning_rate) # Train NN using the train_nn function train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, vgg_input, labels, vgg_keep_prob, learning_rate) # Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, vgg_keep_prob, vgg_input, num_classes) # OPTIONAL: Apply the trained model to a video tests.test_load_vgg(load_vgg, tf) tests.test_layers(layers_regularizer, num_classes) tests.test_optimize(optimize, num_classes) tests.test_train_nn(train_nn) if __name__ == '__main__': run()
def run_tests(): tests.test_layers(layers) tests.test_optimize(optimize) tests.test_for_kitti_dataset(DATA_DIRECTORY) tests.test_train_nn(train_nn)
def train(epochs: int = None, save_model_freq: int = None, batch_size: int = None, learning_rate: float = None, keep_prob: float = None, dataset: str = None): """ Performs the FCN training from begining to end, that is, downloads required datasets and pretrained models, constructs the FNC architecture, trains it, and saves the trained model. :param epochs: number of epochs for training :param save_model_freq: save model each save_model_freq epoch :param batch_size: batch size for training :param learning_rate: learning rate for training :param keep_prob: keep probability for dropout layers for training :param dataset: dataset name """ if None in [epochs, save_model_freq, batch_size, learning_rate, keep_prob, dataset]: raise ValueError('some parameters were not specified for function "%s"' % train.__name__) dataset = DATASETS[dataset] if not os.path.exists(dataset.data_root_dir): os.makedirs(dataset.data_root_dir) # Download Kitti Road dataset helper.maybe_download_dataset_from_yandex_disk(dataset) # Download pretrained vgg model helper.maybe_download_pretrained_vgg_from_yandex_disk(dataset.data_root_dir) # Run tests to check that environment is ready to execute the semantic segmentation pipeline if dataset.name == 'kitti_road': tests.test_for_kitti_dataset(dataset.data_root_dir) tests.test_load_vgg(load_vgg, tf) tests.test_layers(layers) tests.test_optimize(optimize) tests.test_train_nn(train_nn, dataset) # TODO: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # https://www.cityscapes-dataset.com/ with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: # Path to vgg model vgg_path = os.path.join(dataset.data_root_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(dataset.data_training_dir, dataset.image_shape) # TODO: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network image_input_tensor, keep_prob_tensor, layer3_out_tensor, layer4_out_tensor, layer7_out_tensor = \ load_vgg(sess, vgg_path) output_layer_tensor = layers(layer3_out_tensor, layer4_out_tensor, layer7_out_tensor, dataset.num_classes) correct_label_tensor = tf.placeholder(tf.float32, (None, None, None, dataset.num_classes)) learning_rate_tensor = tf.placeholder(tf.float32) logits_tensor, train_op_tensor, cross_entropy_loss_tensor, softmax_tensor = \ optimize(output_layer_tensor, correct_label_tensor, learning_rate_tensor, dataset.num_classes) iou_tensor, iou_op_tensor = mean_iou(softmax_tensor, correct_label_tensor, dataset.num_classes) train_nn(sess, dataset, epochs, save_model_freq, batch_size, learning_rate, keep_prob, get_batches_fn, train_op_tensor, cross_entropy_loss_tensor, image_input_tensor, correct_label_tensor, keep_prob_tensor, learning_rate_tensor, iou_tensor, iou_op_tensor) save_model(sess, 'fcn8-final', dataset, epochs=epochs, batch_size=batch_size, learning_rate=learning_rate, keep_prob=learning_rate)