def run(): num_classes = 2 image_h, image_w = (160, 576) with tf.Session() as sess: # Path to vgg model vgg_path = join(data_dir, 'vgg') # Create function to get batches batch_generator = gen_batch_function(join(data_dir, 'data_road/training'), (image_h, image_w)) # Load VGG pretrained image_input, keep_prob, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out = load_vgg(sess, vgg_path) # Add skip connections output = layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes) # Define placeholders labels = tf.placeholder(tf.float32, shape=[None, image_h, image_w, num_classes]) learning_rate = tf.placeholder(tf.float32, shape=[]) logits, train_op, cross_entropy_loss = optimize(output, labels, learning_rate, num_classes) # Training parameters train_nn(sess, args.training_epochs, args.batch_size, batch_generator, train_op, cross_entropy_loss, image_input, labels, keep_prob, learning_rate) save_inference_samples(runs_dir, data_dir, sess, (image_h, image_w), logits, keep_prob, image_input)
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) epochs = 50 batch_size = 5 # TF placeholders correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') input_image, keep_prob, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out = load_vgg(sess, vgg_path) nn_last_layer = layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize(nn_last_layer, correct_label, learning_rate, num_classes) train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)
def run(): print("Run") num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ # Note: Not done kBatchSize = 5 kEpochs = 10 with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # Note: Not implemented. correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') # Build NN using load_vgg, layers, and optimize function input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg(sess, vgg_path) layer_output = layers(layer3_out, layer4_out, layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize(layer_output, correct_label, learning_rate, num_classes) # Train NN using the train_nn function train_nn(sess, kEpochs, kBatchSize, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) # Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) # Save the variables to disk. print("Saving model...") saver = tf.train.Saver() save_path = saver.save(sess, "./model/semantic_segmentation_model.ckpt") print("Model saved in path: %s" % save_path)
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # Build NN using load_vgg, layers, and optimize function epochs = 50 batch_size = 8 correct_label = tf.placeholder(tf.int32, (None, None, None, num_classes)) learning_rate = tf.placeholder(tf.float32) input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg(sess, vgg_path) layer_output = layers(layer3_out, layer4_out, layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize(layer_output, correct_label, learning_rate, num_classes) # Train NN using the train_nn function train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) # Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network epochs = 30 batch_size = 16 correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg(sess, vgg_path) nn_last_layer = layers(layer3_out, layer4_out, layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize(nn_last_layer, correct_label, learning_rate, num_classes) train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network learning_rate = tf.placeholder(tf.float32, name="learning_rate") correct_label = tf.placeholder(tf.float32, [None, image_shape[0], image_shape[1], num_classes], name="correct_label") epochs = 15 batch_size = 5 # TODO: Build NN using load_vgg, layers, and optimize function input_weight, keep_prob, layer_3, layer_4, layer_7=load_vgg(sess,vgg_path) output_layer=layers(layer_3,layer_4,layer_7,num_classes) logits, train_op, cross_entropy_loss = optimize(output_layer, correct_label, learning_rate, num_classes) # Train NN using the train_nn function train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_weight, correct_label, keep_prob, learning_rate) # Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_weight)
def run(): # Download pretrained vgg model #helper.maybe_download_pretrained_vgg(data_dir) # A function to get batches get_batches_fn = helper.gen_batch_function(training_dir, IMAGE_SHAPE) with tf.Session() as session: # Returns the three layers, keep probability and input layer from the vgg architecture image_input, keep_prob, layer3, layer4, layer7 = load_vgg(session, vgg_path) # The resulting network architecture from adding a decoder on top of the given vgg model model_output = layers(layer3, layer4, layer7, NUMBER_OF_CLASSES) # Returns the output logits, training operation and cost operation to be used # - logits: each row represents a pixel, each column a class # - train_op: function used to get the right parameters to the model to correctly label the pixels # - cross_entropy_loss: function outputting the cost which we are minimizing, lower cost should yield higher accuracy logits, train_op, cross_entropy_loss = optimize(model_output, correct_label, learning_rate, NUMBER_OF_CLASSES) # Initialize all variables session.run(tf.global_variables_initializer()) session.run(tf.local_variables_initializer()) print("Model build successful KK, starting training") # Train the neural network train_nn(session, EPOCHS, BATCH_SIZE, get_batches_fn, train_op, cross_entropy_loss, image_input, correct_label, keep_prob, learning_rate) # Run the model with the test images and save each painted output image (roads painted green) helper.save_inference_samples(runs_dir, data_dir, session, IMAGE_SHAPE, logits, keep_prob, image_input) print("All done!")
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) # Build NN using load_vgg, layers, and optimize function epochs = 50 batch_size = 5 # TF placeholders correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') input_image, keep_prob, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out = load_vgg(sess, vgg_path) nn_last_layer = layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize(nn_last_layer, correct_label, learning_rate, num_classes) # Train NN using the train_nn function train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) # Save inference data using helper.save_inference_samples # helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)
def run(): num_classes = 2 image_shape = (160, 576) # KITTI dataset uses 160x576 images data_dir = 'data' runs_dir = 'runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # Build NN using load_vgg, layers, and optimize function image_input, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg(sess, vgg_path) output_layer = layers(layer3_out, layer4_out, layer7_out, 2) correct_label = tf.placeholder(dtype=tf.float32, shape=(None, None, None, 2), name='correct_label') learning_rate = tf.placeholder(dtype=tf.float32, name='learning_rate') logits, train_opp, cross_entropy_loss = optimize(output_layer, correct_label, learning_rate, 2) sess.run(tf.global_variables_initializer()) # Train NN using the train_nn function train_nn(sess, 8, 8, get_batches_fn, train_opp, cross_entropy_loss, image_input, correct_label, keep_prob, learning_rate) # Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, image_input)
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # Needs a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: vgg_path = os.path.join(data_dir, 'vgg') get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) epochs = 50 batch_size = 5 correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes]) learning_rate = tf.placeholder(tf.float32) input_image, keep_prob, layer3, layer4, layer7 = load_vgg( sess, vgg_path) output = layers(layer3, layer4, layer7, num_classes) logits, train_op, cross_entropy_loss = optimize( output, correct_label, learning_rate, num_classes) train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)
def run(): proportion_train = 0.75 # rest validation. Don't have big enough set for separate test set really! img_type = "both" # "sim", "real" or "both" save_model_name = "both_full_frame_model.ckpt" # if saving this time load_trained_weights = False # False to train, True to run in inference mode if load_trained_weights: # Want to apply model in inference mode to all images proportion_train = 0.0 runs_dir = "../ros/src/tl_detector/runs" # Walkthrough: maybe ~6 epochs to start with. Batches not too big because large amount of information. epochs = 10 # Now reduced for debug 8 # Seemed to get worse last time after too many batch_size = 1 # Already getting memory warnings! # Other hyperparameters in train_nn(); would have put them here but went with template calling structure # Split images into training and validation sets training_image_paths, validation_image_paths = \ helper.get_split_image_paths(proportion_train, img_type, '../data/training_images') with tf.Session() as sess: # Construct model; done by separate class so that it can be invoked # by ROS runtime in exactly the same way cnn_model = cnn_classifier_model.CnnClassifierModel(sess, load_trained_weights) # Create function to get batches get_batches_fn = helper.gen_batch_function(training_image_paths, cnn_model.image_shape, cnn_model.num_classes) if not load_trained_weights: print("Training model...") train_nn(sess, epochs, batch_size, get_batches_fn, cnn_model) else: # Using trained weights so no training to do pass # DONE: Save inference data using helper.save_inference_samples run_output_dir = helper.save_inference_samples(runs_dir, validation_image_paths, sess, cnn_model.image_shape, cnn_model.keep_prob, cnn_model.logits, cnn_model.image_input) # Save model for reuse in inference mode, if we trained it this time if not load_trained_weights: save_path = cnn_model.saver.save(sess, os.path.join(run_output_dir, save_model_name)) print("Saved TensorFlow model in %s\n" % save_path) else: print("Didn't save model because we loaded the weights from disk this time")
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) epochs = 15 batch_size = 8 # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) # TODO: Build NN using load_vgg, layers, and optimize function input_image, keep_prob, layer_3_out, layer_4_out, layer_7_out = load_vgg(sess, vgg_path) output = layers(layer_3_out, layer_4_out, layer_7_out, num_classes) correct_label = tf.placeholder(tf.float32, shape = [None, None, None, num_classes]) learning_rate = tf.placeholder(tf.float32) logits, train_op, cross_entropy_loss = optimize(output, correct_label, learning_rate, num_classes) sess.run(tf.global_variables_initializer()) train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) # TODO: Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)
def cont(): with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), IMAGE_SHAPE) input, keep_prob, layer3, layer4, layer7 = load_vgg(sess, vgg_path) output = layers(layer3, layer4, layer7, num_classes) correct_label = tf.placeholder(dtype=tf.float32, shape=(None, None, None, num_classes)) learning_rate = tf.placeholder(dtype=tf.float32) logits, train_op, cross_entropy_loss = optimize( output, correct_label, learning_rate, num_classes) sess.run(tf.global_variables_initializer()) new_saver = tf.train.import_meta_graph( './models_3col/epoch_199.ckpt.meta') new_saver.restore(sess, tf.train.latest_checkpoint('./models_3col/')) saver = tf.train.Saver() #Simple model saver train_nn(sess, 10, BATCH_SIZE, get_batches_fn, train_op, cross_entropy_loss, input, correct_label, keep_prob, learning_rate, saver, MODEL_DIR) helper.save_inference_samples(runs_dir, data_dir, sess, IMAGE_SHAPE, logits, keep_prob, input, num_classes)
def run(): num_classes = 2 image_shape = (160, 576) # KITTI dataset uses 160x576 images data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # TODO: Build NN using load_vgg, layers, and optimize function image_input, keep_prob, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out = load_vgg(sess, vgg_path) model = layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes) labels = tf.placeholder(tf.float32, shape=[None, image_shape[0], image_shape[1], num_classes]) learning_rate = tf.placeholder(tf.float32, shape=[]) logits, training_operation, cross_entropy_loss = optimize(model, labels, learning_rate, num_classes) # TODO: Train NN using the train_nn function train_nn(sess, 100, 32, get_batches_fn, training_operation, cross_entropy_loss, image_input, labels, keep_prob, learning_rate) # TODO: Save inference data using helper.save_inference_samples # helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, image_input)
def run(self): helper.check_compatibility() tests.test_for_kitti_dataset(self.data_dir) helper.maybe_download_pretrained_vgg(self.data_dir) # Define the batching function get_batches_fn = helper.gen_batch_function(self.training_path, self.image_shape) # TensorFlow session with tf.Session() as sess: # Placeholders self.learning_rate_holder = tf.placeholder(dtype=tf.float32) self.correct_label_holder = tf.placeholder( dtype=tf.float32, shape=(None, None, None, self.num_classes)) # Define network and training operations self.load_vgg(sess) self.layers() self.optimize_cross_entropy() # Initialize variables sess.run(tf.global_variables_initializer()) # Train the model self.train(sess) # Save images using the helper helper.save_inference_samples(self.runs_dir, self.data_dir, sess, self.image_shape, self.logits, self.keep_prob, self.input_image) # Save the model self.save_model(sess)
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ # Hyperparameters epochs = 20 batch_size = 5 with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # Build NN using load_vgg, layers, and optimize function input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg(sess, vgg_path) layer_output = layers(layer3_out, layer4_out, layer7_out, num_classes) correct_label = tf.placeholder(dtype=tf.float32, shape=(None, None, None, num_classes)) learning_rate = tf.placeholder(dtype=tf.float32) logits, train_op, cross_entropy_loss = optimize(layer_output, correct_label, learning_rate, num_classes) # Train NN using the train_nn function sess.run(tf.global_variables_initializer()) train_nn(sess=sess, epochs=epochs, batch_size=batch_size, get_batches_fn=get_batches_fn, train_op=train_op, cross_entropy_loss=cross_entropy_loss, input_image=input_image, correct_label=correct_label, keep_prob=keep_prob, learning_rate=learning_rate) # Save inference data using helper.save_inference_samples output_dir = helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) # Compose video of images output = 'segmentation.mp4' clip = ImageSequenceClip(output_dir, fps=15) clip.write_videofile(output, audio=False)
def run(): tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # TODO: Build NN using load_vgg, layers, and optimize function epochs = 50 batch_size = 4 learning_rate = tf.placeholder(tf.float32) correct_label = tf.placeholder(tf.float32, [None, image_shape[0], image_shape[1], num_classes]) input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg(sess, vgg_path) layer_output = layers(layer3_out, layer4_out, layer7_out, num_classes) logits, train_optimizer, cross_entropy_loss = optimize(layer_output, correct_label, learning_rate, num_classes) # TODO: Train NN using the train_nn function f = open(csv_filename, 'w') str_line = "%s,%s,%.5s\n" % ('Number', 'Epoch', 'loss') f.write(str_line) f.close() train_nn(sess, epochs, batch_size, get_batches_fn, train_optimizer, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate,logits) # TODO: Save inference data using helper.save_inference_samples output_path = helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image,'final_result_') # OPTIONAL: Apply the trained model to a video helper.save_video(output_path, 'final_result.avi', 5, image_shape)
def train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate,logits): """ Train neural network and print out the loss during training. :param sess: TF Session :param epochs: Number of epochs :param batch_size: Batch size :param get_batches_fn: Function to get batches of training data. Call using get_batches_fn(batch_size) :param train_op: TF Operation to train the neural network :param cross_entropy_loss: TF Tensor for the amount of loss :param input_image: TF Placeholder for input images :param correct_label: TF Placeholder for label images :param keep_prob: TF Placeholder for dropout keep probability :param learning_rate: TF Placeholder for learning rate """ # TODO: Implement function sess.run(tf.global_variables_initializer()) for epoch in range(epochs): total_training_loss = 0 num_samples = 0 for batch, (image, label) in enumerate(get_batches_fn(batch_size)): #for X, y in get_batches_fn(batch_size): loss, _ = sess.run( [cross_entropy_loss, train_op], feed_dict={input_image: image, correct_label: label, keep_prob: 0.8, learning_rate: 1e-4} ) print ('Epoch:',str(epoch).zfill(2), ' batch:', str(batch).zfill(2), ' batch size:', len(label), ' training loss: ', loss ) total_training_loss += loss*len(label) num_samples += len(label) epoch_training_loss = total_training_loss/num_samples print ('Epoch (summary):', str(epoch).zfill(2), ' training loss: ', epoch_training_loss) #save the data in a csv file if os.path.exists(csv_filename): f = open(csv_filename, 'a') str_line = "%i,%s,%.8f\n" % (epoch, 'Epoch'+ str(epoch).zfill(2), epoch_training_loss ) f.write(str_line) f.close() if data_dir is not None and epoch > 0 and (epoch % 5) == 0: output_path =helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image,'epoch_'+str(epoch).zfill(2)+'_') print helper.save_video(output_path, 'epoch_'+str(epoch).zfill(2) +'.avi', 5, image_shape) print ('Generating video for Eposh: ', epoch)
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # TODO: Build NN using load_vgg, layers, and optimize function # TF placeholders correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') # Getting layers from vgg. input_image, keep_prob, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out = load_vgg( sess, vgg_path) nn_last_layer = layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize( nn_last_layer, correct_label, learning_rate, num_classes) # TODO: Train NN using the train_nn function epochs = 48 # 6 12 10 24 30 48 50 batch_size = 5 train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) # TODO: Save inference data using helper.save_inference_samples runs_dir = runs_dir + str(epochs) helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) summary_writer = tf.summary.FileWriter('/home/ubuntu/graph', graph_def=sess.graph_def) # OPTIONAL: Apply the trained model to a video # Save model weights to disk saver = tf.train.Saver() model_path = "/model_{}.ckpt".format(str(epochs)) save_path = saver.save(sess, model_path) print("Model saved in file: %s" % save_path)
def run(): global p_keep, r_learning, BATCH_SIZE num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' Epochs = 30 #BATCH_SIZE = 16 if len(sys.argv) > 1: BATCH_SIZE = int(sys.argv[1]) if len(sys.argv) > 2: p_keep = float(sys.argv[2]) tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ tf.reset_default_graph() with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # TODO: Build NN using load_vgg, layers, and optimize function input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg( sess, vgg_path) tf.Print(keep_prob, [keep_prob]) #p_keep = keep_prob.get_Variable() #print("keep_prob from VGG: %f keep_prob in project: %f"%(keep_prob,p_keep)) final_out = layers(layer3_out, layer4_out, layer7_out, num_classes) correct_label = tf.placeholder(tf.float32, (None, None, None, num_classes)) learning_rate = tf.placeholder(tf.float32) logits, train_op, cross_entropy_loss = optimize( final_out, correct_label, learning_rate, num_classes) # TODO: Train NN using the train_nn function sess.run(tf.global_variables_initializer()) train_nn(sess, Epochs, BATCH_SIZE, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) saver = tf.train.Saver() saver.save(sess, "./runs/Batch%d_Pkeep%f.ckpt" % (BATCH_SIZE, p_keep)) # TODO: Save inference data using helper.save_inference_samples # helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) # OPTIONAL: Apply the trained model to a video video_name = 'dataroad_marked.mp4' helper.gen_test_output_video(data_dir, sess, image_shape, logits, keep_prob, input_image, video_name) print('Writing video Finished.')
def run(): runs_dir = './runs' print("\n\nTesting for datatset presence......") tests.test_looking_for_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(vgg_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Create function to get batches get_batches_fn = helper.gen_batch_function( glob_trainig_images_path, glob_labels_trainig_image_path, image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # Build NN using load_vgg, layers, and optimize function KK-DONE # TF placeholders correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg(sess, vgg_dir) layer_output = layers(layer3_out, layer4_out, layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize(layer_output, correct_label, learning_rate, num_classes) # tfImShape = tf.get_variable("image_shape") # tfLogits = tf.get_variable("logits") # tfKeepProb = tf.get_variable("keep_prob") TEM NO TF print(100*'*') print(image_shape) #(160, 576) print(100*'*') print(logits) #Tensor("Reshape:0", shape=(?, 2), dtype=float32) print(100*'*') print(keep_prob) #Tensor("keep_prob:0", dtype=float32) print(100*'*') print(input_image) #Tensor("image_input:0", shape=(?, ?, ?, 3), dtype=float32) print(100*'*') init_op = tf.global_variables_initializer() sess.run(init_op) train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) folderToSaveModel = "model" # Add ops to save and restore all the variables. saver = tf.train.Saver() for i, var in enumerate(saver._var_list): print('Var {}: {}'.format(i, var)) if not os.path.exists(folderToSaveModel): os.makedirs(path) pathSaveModel = os.path.join(folderToSaveModel, "model.ckpt") pathSaveModel = saver.save(sess, pathSaveModel) print(colored("Model saved in path: {}".format(pathSaveModel), 'green')) helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' model_path = './teeekay/' model = FLAGS.model #'ss_mdl10' # mdl9 15 * 500 at lr = 0.000015 # mdl8 15 * 500 images at lr = 0.000025 # mdl7 5 * 500 images at lr = 0.00001 # mdl6 with cityscapes data #mdl4 100 at 0.00001 #mdl3 50 at .00003 #mdl2 25 at .00005 print ("model = '{ssmodel}'".format(ssmodel=model)) with tf.name_scope("data"): correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') Kepochs = FLAGS.epochs # set to reasonable value Kbatch_size = FLAGS.batch_size KLearningRate = FLAGS.learn_rate Kl2_regularization_rate = FLAGS.l2_regularization_rate print("Kepochs ={}, Kbatch_size= {}, KLearningRate={:3.6f}, Kl2_regularization_rate ={:3.6f} Model name ={}" .format(Kepochs, Kbatch_size, KLearningRate, Kl2_regularization_rate, model_path+model)) # Download pretrained vgg model print("helper.maybe_download_pretrained_vgg({})".format(data_dir)) helper.maybe_download_pretrained_vgg(data_dir) # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') training_data_dir = os.path.join(data_dir, 'data_road/training') # Create function to get batches # get_batches_fn = helper.gen_batch_function(training_data_dir, image_shape) # test with cityscapes data get_batches_fn = helper.gen_batch_function(data_dir, image_shape) print("get_batches_fn = {}".format(get_batches_fn)) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ config = tf.ConfigProto() tf.log_device_placement=True config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.8 with tf.Session(config = config) as sess: # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network if FLAGS.mode == 0: # Train the model print("load_vgg") input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg(sess, vgg_path) with tf.name_scope("data"): tf.summary.image('input_images', input_image, max_outputs=3) print("layers") last_layer = layers(layer3_out, layer4_out, layer7_out, num_classes) print("optimize") logits, train_op, combined_loss, iou_obj = optimize(last_layer, correct_label, learning_rate, num_classes, iou_test=True) print("Train!") initialized = tf.global_variables_initializer() sess.run(initialized) train_nn(sess, Kepochs, Kbatch_size, get_batches_fn, train_op, combined_loss, input_image, correct_label, keep_prob, learning_rate, Kl2_regularization_rate, lr=KLearningRate, iou_obj=iou_obj) # Save model result saver = tf.train.Saver() save_path = saver.save(sess, model_path+model) print("\nSaved model at {}.".format(save_path)) print("Kepochs ={}, Kbatch_size= {}, KLearningRate={:3.6f}, Kl2_regularization_rate ={:3.6f} Model name ={}" .format(Kepochs, Kbatch_size, KLearningRate, Kl2_regularization_rate, model_path+model)) print("saving samples") helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) elif FLAGS.mode == 1: # run inference on images from kitti dataset # Load saved model saver = tf.train.import_meta_graph(model_path+model+'.meta') saver.restore(sess, tf.train.latest_checkpoint(model_path)) graph = tf.get_default_graph() img_input = graph.get_tensor_by_name('image_input:0') keep_prob = graph.get_tensor_by_name('keep_prob:0') last_layer = graph.get_tensor_by_name('decoder/last_layer:0') logits = tf.reshape(last_layer, (-1, num_classes)) # Process test images print("saving samples") helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, img_input) exit() elif FLAGS.mode == 2: # Run inference on Video file def process_frame(sess, logits, keep_prob, image_pl, frame, frame_shape, image_shape): """ Generate output using the video frames :param sess: TF session :param logits: TF Tensor for the logits :param keep_prob: TF Placeholder for the dropout keep robability :param image_pl: TF Placeholder for the image placeholder :param frame: image frame in :param frame_shape: Tuple - Shape of frame coming in and going out :param image_shape: Tuple - Shape of image used in TF model :return: np.array of video frame image with superimposed semantic segmentation """ softmax_criteria = 0.5 softmax_criteria1 = 0.45 softmax_criteria2 = 0.4 # resize to shape used in model img_resized = scipy.misc.imresize(frame, image_shape, interp='lanczos') # inference with no dropout im_softmax = sess.run( [tf.nn.softmax(logits)], {keep_prob: 1.0, img_input: [img_resized]}) # reshape to image dimensions im_softmax = im_softmax[0][:, 1].reshape(image_shape[0], image_shape[1]) # apply mask anywhere softmax is > softmax_criteria segmentation = (im_softmax > softmax_criteria).reshape(image_shape[0], image_shape[1], 1) segmentation1 = (np.logical_and(im_softmax <= softmax_criteria, im_softmax > softmax_criteria1)).reshape(image_shape[0], image_shape[1], 1) segmentation2 = (np.logical_and(im_softmax <= softmax_criteria1, im_softmax > softmax_criteria2)).reshape(image_shape[0], image_shape[1], 1) # create mask as green and semitransparent mask = np.dot(segmentation, np.array([[0, 255, 0, 127]])) mask1 = np.dot(segmentation1, np.array([[0, 225, 0, 63]])) mask2 = np.dot(segmentation2, np.array([[0, 200, 0, 31]])) mask = scipy.misc.toimage(mask, mode="RGBA") mask1 = scipy.misc.toimage(mask1, mode="RGBA") mask2 = scipy.misc.toimage(mask2, mode="RGBA") mask_resized = scipy.misc.imresize(mask, frame_shape, mode="RGBA") mask_resized1 = scipy.misc.imresize(mask1, frame_shape, mode="RGBA") mask_resized2 = scipy.misc.imresize(mask2, frame_shape, mode="RGBA") mask_resized = scipy.misc.toimage(mask_resized, mode="RGBA") mask_resized1 = scipy.misc.toimage(mask_resized1, mode="RGBA") mask_resized2 = scipy.misc.toimage(mask_resized2, mode="RGBA") frame_im = scipy.misc.toimage(frame) frame_im.paste(mask_resized2, box=None, mask=mask_resized2) frame_im.paste(mask_resized1, box=None, mask=mask_resized1) frame_im.paste(mask_resized, box=None, mask=mask_resized) return np.array(frame_im) # cap = imageio.get_reader('./video/harder_challenge_video.mp4') cap = imageio.get_reader('./video/NeighborhoodStreet.mov') md = cap.get_meta_data() fps = float(md['fps']) framewidth = int(md['size'][0]) frameheight = int(md['size'][1]) framecount = int(md['nframes']) frame_shape = (frameheight, framewidth) print("Video opened with framecount of {:4,d}, dimensions ({:4d},{:4d}), and speed of {:3.03f} fps." .format(framecount, framewidth, frameheight, fps)) # Load saved model saver = tf.train.import_meta_graph(model_path+model+'.meta') saver.restore(sess, tf.train.latest_checkpoint(model_path)) graph = tf.get_default_graph() img_input = graph.get_tensor_by_name('image_input:0') keep_prob = graph.get_tensor_by_name('keep_prob:0') last_layer = graph.get_tensor_by_name('decoder/last_layer:0') logits = tf.reshape(last_layer, (-1, num_classes)) fileruntime = datetime.datetime.now().strftime("%Y%m%d%H:%M:%S") outfilename = './video/ss_video_output_' + model + fileruntime + '.mp4' out = imageio.get_writer(outfilename, fps=fps) frames = 0 for frame in cap: frames += 1 #uncomment for early stop #framecount = 150 if frames > framecount: print("\nClosed video after passing expected framecount of {}".format(frames-1)) break out_frame = process_frame(sess, logits, keep_prob, img_input, frame, frame_shape, image_shape) out.append_data(out_frame) print("Frames: {0:02d}, Seconds: {1:03.03f}".format(frames, frames/fps), end='\r') print("finished processing video - output video is {}".format(outfilename)) cap.close() out.close()
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' model_dir = './data/model/model.ckpt' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes]) learning_rate = tf.placeholder(tf.float32) input_image, keep_prob, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out = load_vgg( sess, vgg_path) nn_last_layer = layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes) logits, train_op, cross_entropy_loss, iou_obj = optimize( nn_last_layer, correct_label, learning_rate, num_classes) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) tf_saver = tf.train.Saver(max_to_keep=5) if training: train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate, iou_obj) model_save = tf_saver.save(sess, model_dir) print("Model saved to: ", model_dir) else: tf_saver.restore(sess, model_dir) print("Model restored from: ", model_dir) if video: input_vid = "./videoinput.mp4" video_pipeline(input_vid, runs_dir, sess, image_shape, logits, keep_prob, input_image) else: helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)
def run(): global image_shape, sess, logits, keep_prob, input_image num_classes = 2 image_shape = (160, 576) data_dir = '/data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pre-trained vgg model #helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # TODO: Build NN using load_vgg, layers, and optimize function epochs = 30 batch_size = 8 learning_rate = tf.placeholder(tf.float32) correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes]) input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg( sess, vgg_path) layer_output = layers(layer3_out, layer4_out, layer7_out, num_classes) logits, optimizer, cross_entropy_loss = optimize( layer_output, correct_label, learning_rate, num_classes) # TODO: Train NN using the train_nn function saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) if (load_net): checkpoint = tf.train.get_checkpoint_state("save_network") if checkpoint and checkpoint.model_checkpoint_path: saver.restore(sess, checkpoint.model_checkpoint_path) print("Successfully loaded:", checkpoint.model_checkpoint_path) else: print("Could not find old network weights") if (train_net): train_nn(sess, epochs, batch_size, get_batches_fn, optimizer, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) # TODO: Save inference data using helper.save_inference_samples # saver.restore(sess, tf.train.latest_checkpoint("./save_network")) if (load_net): helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) if (save_net): save_path = saver.save(sess, "./save_network/model_60_8")
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: train_model = True if (train_model): # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # TODO: Build NN using load_vgg, layers, and optimize function epochs = 40 batch_size = 6 #Placeholders correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg( sess, vgg_path) nn_last_layer = layers(layer3_out, layer4_out, layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize( nn_last_layer, correct_label, learning_rate, num_classes) # TODO: Train NN using the train_nn function train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) # TODO: Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) save_model = False # OPTIONAL: Apply the trained model to a video if (save_model): saver = tf.train.Saver() save_file = './models/semantic-segmantation-model.ckpt' print("Saving model...") #string_model_name = './models/semantic-segmantation-model-'+str(time.strftime("%d_%m_%Y")) saver.save(sess, save_file) print("Saving model finished...")
def run(): tests.test_for_kitti_dataset(DATA_DIR) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(DATA_DIR) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ printStatistics() with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(DATA_DIR, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(DATA_DIR, 'data_road/training'), IMAGE_SHAPE) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # Build NN using load_vgg, layers, and optimize function print("Load VGG model...") input_image, keep_prob, layer_3, layer_4, layer_7 = load_vgg( sess, vgg_path) layer_output = layers(layer_3, layer_4, layer_7, NUM_CLASSES) label = tf.placeholder(tf.int32, shape=[None, None, None, NUM_CLASSES]) learning_rate = tf.placeholder(tf.float32) iou_obj = None if IOU_ENABLED: logits, train_op, cross_entropy_loss, iou_obj, accuracy_op = optimize( layer_output, label, learning_rate, NUM_CLASSES, iou_enabled=IOU_ENABLED) else: logits, train_op, cross_entropy_loss, accuracy_op = optimize( layer_output, label, learning_rate, NUM_CLASSES) # Train NN using the train_nn function print("Start training...") train_nn(sess, EPOCHS, BATCH_SIZE, get_batches_fn, train_op, cross_entropy_loss, input_image, label, keep_prob, learning_rate, accuracy_op, iou_obj) # Safe the trained model print("Save trained model...") saver = tf.train.Saver() saver.save(sess, './runs/semantic_segmentation_model.ckpt') print("Saving the model") if "saved_model" in os.listdir(os.getcwd()): shutil.rmtree("./saved_model") builder = tf.saved_model.builder.SavedModelBuilder("./saved_model") builder.add_meta_graph_and_variables(sess, ["vgg16"]) builder.save() # Save inference data using helper.save_inference_samples print("Save inference samples...") helper.save_inference_samples(RUNS_DIR, DATA_DIR, sess, IMAGE_SHAPE, logits, keep_prob, input_image)
def run(): parser = argparse.ArgumentParser() parser.add_argument('--train', action='store_true', help='Run the training') parser.add_argument('--restore', type=str, nargs='?', help='Restore from a checkpoint') parser.add_argument('--video', type=str, nargs='?', help='Run segmentation on a video') args = parser.parse_args() data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), IMAGE_SHAPE) # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') with tf.Session() as sess: # Returns the three layers, keep probability and input layer from the vgg architecture image_input, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg( sess, vgg_path) # The resulting network architecture from adding a decoder on top of the given vgg model model_output = layers(layer3_out, layer4_out, layer7_out, NUM_CLASSES) # placeholders correct_label = tf.placeholder( tf.float32, [None, IMAGE_SHAPE[0], IMAGE_SHAPE[1], NUM_CLASSES]) learning_rate = tf.placeholder(tf.float32) # Returns the output logits, training operation and cost operation to be used # - logits: each row represents a pixel, each column a class # - train_op: function used to get the right parameters to the model to correctly label the pixels # - cross_entropy_loss: function outputting the cost which we are minimizing, lower cost should yield higher accuracy logits, train_op, cross_entropy_loss = optimize( model_output, correct_label, learning_rate, NUM_CLASSES) # Initialize all variables sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) saver = tf.train.Saver() if args.restore: saver.restore(sess, args.restore) print('Model restored from {0}'.format(args.restore)) if args.train: # Train the neural network train_nn(sess, EPOCHS, BATCH_SIZE, get_batches_fn, train_op, cross_entropy_loss, image_input, correct_label, keep_prob, learning_rate) pathlib.Path(CHECKPOINT_FOLDER).mkdir(parents=True, exist_ok=True) save_path = saver.save(sess, CHECKPOINT_PREFIX) print("Model saved in path: {}".format(save_path)) if args.video: from moviepy.editor import VideoFileClip def process_image(image): return helper.process_image(sess, logits, keep_prob, image_input, image, IMAGE_SHAPE) clip = VideoFileClip(args.video) white_clip = clip.fl_image(process_image) output_file = "{0}_{2}.{1}".format(*args.video.rsplit('.', 1) + ['output']) white_clip.write_videofile(output_file, audio=False) else: # Run the model with the test images and save each painted output image (roads painted green) helper.save_inference_samples(runs_dir, data_dir, sess, IMAGE_SHAPE, logits, keep_prob, image_input) print("All done!")
def run(): num_classes = 13 image_shape = (608, 800) data_dir = '../data' runs_dir = './runs' epochs = 10 batch_size = 16 if len(sys.argv) > 1: mode = sys.argv[1].lower() else: mode = "train" print("Current mode: ", mode) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ LOGDIR = os.path.join('./data', 'fcn8_log') if mode == "train": with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'Train'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # Build NN using load_vgg, layers, and optimize function input_image, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg( sess, vgg_path) layer_output = layers(layer3_out, layer4_out, layer7_out, num_classes) correct_label = tf.placeholder( tf.int32, (None, image_shape[0], image_shape[1], num_classes), name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') logits, train_op, cross_entropy_loss = optimize( layer_output, correct_label, learning_rate, num_classes) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(LOGDIR, graph=sess.graph) # Train NN using the train_nn function train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate, train_writer, merged, logits=logits) # Save the model for future use # as_text = true will cause freeze_graph throw memory error tf.train.write_graph(sess.graph_def, './fcn8', 'base_graph.pb', as_text=False) print("Model graph saved in path: ./fcn8/base_graph.pb") saver = tf.train.Saver() save_path = saver.save(sess, "./fcn8/ckpt") print("Model weights saved in path: %s" % save_path) t0 = time.time() # Save inference data using helper.save_inference_samples output_dir = helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) duration = time.time() - t0 print("Run complete, time taken = {0}".format(duration)) helper.calculate_score(os.path.join(data_dir, 'Test', 'CameraSeg'), output_dir) elif mode == "test": if len(sys.argv) < 3: print("main.py test <graph location>") else: graph_file = sys.argv[2] use_xla = False config = tf.ConfigProto() if use_xla: jit_level = tf.OptimizerOptions.ON_1 config.graph_options.optimizer_options.global_jit_level = jit_level with tf.Session(graph=tf.Graph(), config=config) as sess: gd = tf.GraphDef() g = sess.graph with tf.gfile.Open(graph_file, 'rb') as f: data = f.read() gd.ParseFromString(data) tf.import_graph_def(gd, name='') x = g.get_tensor_by_name('input_2:0') out = g.get_tensor_by_name('output_node0:0') t0 = time.time() # Save inference data using helper.save_inference_samples output_dir = helper.save_inference_samples_2( runs_dir, data_dir, sess, image_shape, out, None, x) duration = time.time() - t0 print("Run complete, time taken = {0}".format(duration)) helper.calculate_score( os.path.join(data_dir, 'Test', 'CameraSeg'), output_dir) else: print("Command unrecognized.")
adam = Adam(lr=LEARNING_RATE) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) model.fit(x, y, batch_size=BATCH_SIZE, epochs=EPOCHS) model.save('trained_model' + str(time.time()) + '.h5') # score = model.evaluate(x_test, y_test, batch_size=32) # features = model.predict(x) if __name__ == "__main__": if len(sys.argv) == 2: if sys.argv[1] == '-e': model_filename = 'trained_model.h5' model_path = os.path.join('.', model_filename) helper.save_inference_samples(RUNS_DIRECTORY, DATA_DIRECTORY, IMAGE_SHAPE, model_path) else: print("continue traning...") model_filename = sys.argv[1] model_path = os.path.join('.', model_filename) if not os.path.exists(model_path): print("Model not found!") else: run(load_model(model_path)) else: run()
def run(): """ Run main semantic segmentation program """ print('\nStarting run...') args = parse_args() # Basic parameters kNumClasses = 2 # "road" or "not road" kImageShape = (160, 576) data_dir = './data' runs_dir = './runs' model_path = './duffnet/' model_name = 'duffnet' # Hyperparameters epochs = args.epochs batch_size = args.batch_size learning_rate = args.learn_rate # TensorFlow placeholders correct_label = tf.placeholder(tf.bool, [None, None, None, kNumClasses]) # Check data set validity print('\nTesting Kitti dataset...') tests.test_for_kitti_dataset(data_dir) # Download pretrained VGG model if necessary helper.maybe_download_pretrained_vgg(data_dir) # Path to VGG model vgg_path = os.path.join(data_dir, 'vgg') data_folder = os.path.join(data_dir, 'data_road/training') # Create generator function to get batches for training get_batches_fn = helper.gen_batch_function(data_folder, kImageShape) # Start TensorFlow session with tf.Session() as sess: ### Train new network ### if args.mode == 0: # Build NN using load_vgg, layers, and optimize function img_input, keep_prob, vgg3, vgg4, vgg7 = load_vgg(sess, vgg_path) fcn8s_out = layers(vgg3, vgg4, vgg7, kNumClasses) logits, train_op, loss = optimize(fcn8s_out, correct_label, learning_rate, kNumClasses) # Train NN using the train_nn function train_nn(sess, epochs, batch_size, get_batches_fn, train_op, loss, img_input, correct_label, keep_prob, learning_rate) # Save model result saver = tf.train.Saver() save_path = saver.save(sess, model_path + model_name) print("\nModel saved.") ### Test network ### elif args.mode == 1: # Load saved model saver = tf.train.import_meta_graph(model_path + model_name + '.meta') saver.restore(sess, tf.train.latest_checkpoint(model_path)) graph = tf.get_default_graph() img_input = graph.get_tensor_by_name('image_input:0') keep_prob = graph.get_tensor_by_name('keep_prob:0') fcn8s_out = graph.get_tensor_by_name('fcn8s_out:0') logits = tf.reshape(fcn8s_out, (-1, kNumClasses)) # Process test images helper.save_inference_samples(runs_dir, data_dir, sess, kImageShape, logits, keep_prob, img_input) ### Process video ### elif args.mode == 2: def process_frame(img): # Input image is a Numpy array, resize it to match NN input dimensions img_orig_size = (img.shape[0], img.shape[1]) img_resized = scipy.misc.imresize(img, kImageShape) # Get NN tensors graph = tf.get_default_graph() img_input = graph.get_tensor_by_name('image_input:0') keep_prob = graph.get_tensor_by_name('keep_prob:0') fcn8s_out = graph.get_tensor_by_name('fcn8s_out:0') logits = tf.reshape(fcn8s_out, (-1, kNumClasses)) # Process image with NN img_softmax = sess.run([tf.nn.softmax(logits)], { keep_prob: 1.0, img_input: [img_resized] }) # Reshape to 2D image dimensions img_softmax = img_softmax[0][:, 1].reshape( kImageShape[0], kImageShape[1]) # Threshold softmax probability to a binary road judgement (>50%) segmentation = (img_softmax > 0.5).reshape( kImageShape[0], kImageShape[1], 1) # Apply road judgement to original image as a mask with alpha = 50% mask = np.dot(segmentation, np.array([[0, 255, 0, 127]])) mask = scipy.misc.toimage(mask, mode="RGBA") street_img = Image.fromarray(img_resized) street_img.paste(mask, box=None, mask=mask) # Resize image back to original dimensions street_img_resized = scipy.misc.imresize( street_img, img_orig_size) # Output image as a Numpy array img_out = np.array(street_img_resized) return img_out # Load saved model saver = tf.train.import_meta_graph(model_path + model_name + '.meta') saver.restore(sess, tf.train.latest_checkpoint(model_path)) # Process video frames video_outfile = './video/project_video_out.mp4' video = VideoFileClip( './video/project_video.mp4') #.subclip(37,38) video_out = video.fl_image(process_frame) video_out.write_videofile(video_outfile, audio=False) else: print('Error: Invalid mode selected.')
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ with tf.Session() as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function(os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # TODO: Build NN using load_vgg, layers, and optimize function correct_label = tf.placeholder(tf.int32, [None, None, None, num_classes], name='correct_label') learning_rate = tf.placeholder(tf.float32, name='learning_rate') input_image, keep_prob, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out = load_vgg(sess, vgg_path) nn_last_layer = layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize(nn_last_layer, correct_label, learning_rate, num_classes) # TODO: Train NN using the train_nn function epochs = 1000 batch_size = 20 sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate) # export the graph for inference programs output_node_names = 'adam_logit' output_graph_def = tf.graph_util.convert_variables_to_constants( sess, # The session is used to retrieve the weights tf.get_default_graph().as_graph_def(), # The graph_def is used to retrieve the nodes output_node_names.split(",")) # The output node names are used to select the usefull nodes print("output_node: {}".format(output_node_names.split(","))) saver = tf.train.Saver() saver.save(sess, runs_dir + '/fcn') # tf.train.write_graph(tf.get_default_graph().as_graph_def(), '', './runs/base_graph.pb', False) tf.train.write_graph(output_graph_def, '', runs_dir + '/frozen_graph.pb', False) # TODO: Save inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) # OPTIONAL: Apply the trained model to a video flist = [ ['project_video.mp4', 'project_video_fcn8_100.mp4', (640, 360)]] for files in (flist): print('file: ' + files[0] + ' -> ' + files[1], flush=True) clip1 = VideoFileClip(files[0]) save_size = files[2] fps = 30 fourcc = cv2.VideoWriter_fourcc('M', 'P', '4', 'V') video_out = cv2.VideoWriter(files[1], int(fourcc), fps, save_size) frameno = 0 for frame in clip1.iter_frames(): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) image = scipy.misc.imresize(frame, image_shape) im_softmax = sess.run([tf.nn.softmax(logits)], {keep_prob: 1.0, input_image: [image]}) im_softmax = im_softmax[0][:, 1].reshape(image_shape[0], image_shape[1]) segmentation = (im_softmax > 0.5).reshape(image_shape[0], image_shape[1], 1) mask = np.dot(segmentation, np.array([[0, 255, 0, 127]])) mask = scipy.misc.toimage(mask, mode="RGBA") street_im = scipy.misc.toimage(image) street_im.paste(mask, box=None, mask=mask) result = np.array(street_im) result2 = cv2.resize(result, (save_size[0], save_size[1])) video_out.write(result2) frameno += 1 #if 3000 < frameno: # break #cv2.imshow('fcn8s', result2) #if cv2.waitKey(1) & 0xFF == ord('q'): # break video_out = None
def run(): num_classes = 2 image_shape = (160, 576) epochs = 40 batch_size = 20 data_dir = './data' runs_dir = './runs' ## Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') tests.test_for_kitti_dataset(data_dir) ## Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # Change the flag to True global save save = True ## OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. ## You'll need a GPU with at least 10 teraFLOPS to train on. ## https://www.cityscapes-dataset.com/ with tf.Session() as sess: ## Create function to get batches print("Create function to get batches") get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) ## OPTIONAL: Augment Images for better results ## https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network ## TODO: Build NN using load_vgg, layers, and optimize function print("Loading the VGG16 model... ") vgg_input_tensor, vgg_keep_prob_tensor, vgg_layer3_out_tensor, vgg_layer4_out_tensor, vgg_layer7_out_tensor = load_vgg( sess, vgg_path) print("VGG16 model loaded !") print("Creating the layers ... ") deconv3 = layers(vgg_layer3_out_tensor, vgg_layer4_out_tensor, vgg_layer7_out_tensor, num_classes) print("Layers created !") print( "Creating the placeholder for labels and variable for learning rate ..." ) correct_label = tf.placeholder(dtype=tf.float32, shape=(None, None, None, num_classes)) learning_rate = tf.placeholder( dtype=tf.float32) # can not convert float to tensor error print("Placeholders created !") print( "Creating the logits, training operation and loss function for the network ..." ) logits, train_op, cross_entropy_loss = optimize( deconv3, correct_label, learning_rate, num_classes) print("Logits, training operation and loss function created !") # TODO: Train NN using the train_nn function sess.run(tf.global_variables_initializer()) print("Variables initialized !") # We will train if training flag is true or there is no checkpoint saved if training or not (os.path.exists('checkpoint')): print("\n ===================================================== ") print(" Training ....") train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, vgg_input_tensor, correct_label, vgg_keep_prob_tensor, learning_rate) print(" Training completed !") # TODO: Save inference data using helper.save_inference_samples # helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) print("\n ===================================================== ") print(" Testing ....") helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, vgg_keep_prob_tensor, vgg_input_tensor) else: # TODO: Save inference data using helper.save_inference_samples # helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image) saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint('')) print("\n ===================================================== ") print(" Testing ....") helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, vgg_keep_prob_tensor, vgg_input_tensor)
parser.add_argument('model', type=str, help='an integer for the accumulator') args = parser.parse_args() num_classes = 2 image_shape = (160, 576) # KITTI dataset uses 160x576 images data_dir = './data' runs_dir = './runs' vgg_path = os.path.join(data_dir, 'vgg') # reset graph tf.reset_default_graph() with tf.Session() as sess: init_op = tf.initialize_all_variables() sess.run(init_op) # load model input_image, keep_prob, layer3, layer4, layer7 = load_vgg(sess, vgg_path) layer_output = layers(layer3, layer4, layer7, num_classes) logits = tf.reshape(layer_output, (-1, num_classes)) # restore variables saver = tf.train.Saver() saver.restore(sess, args.model) print('inference on test images...') helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, input_image)
def run(): num_classes = 2 image_shape = (160, 576) data_dir = './data' runs_dir = './runs' tests.test_for_kitti_dataset(data_dir) # Download pretrained vgg model helper.maybe_download_pretrained_vgg(data_dir) # OPTIONAL: Train and Inference on the cityscapes dataset instead of the Kitti dataset. # You'll need a GPU with at least 10 teraFLOPS to train on. # https://www.cityscapes-dataset.com/ USE_GPU = True config = tf.ConfigProto(device_count={'GPU': 1 if USE_GPU else 0}) with tf.Session(config=config) as sess: # Path to vgg model vgg_path = os.path.join(data_dir, 'vgg') # Create function to get batches get_batches_fn = helper.gen_batch_function( os.path.join(data_dir, 'data_road/training'), image_shape) # OPTIONAL: Augment Images for better results # https://datascience.stackexchange.com/questions/5224/how-to-prepare-augment-images-for-neural-network # Building NN using load_vgg, layers, and optimize function correct_label = tf.placeholder(tf.float32) learning_rate = tf.placeholder(tf.float32) image_input, keep_prob, layer3_out, layer4_out, layer7_out = load_vgg( sess, os.path.join(data_dir, "vgg")) nn_last_layer = layers(layer3_out, layer4_out, layer7_out, num_classes) logits, train_op, cross_entropy_loss = optimize( nn_last_layer, correct_label, learning_rate, num_classes) # Training NN using the train_nn function # Epochs 20 = Average loss per image in epoch 20 has been: 0.1744 epochs = 30 batch_size = 1 train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, image_input, correct_label, keep_prob, learning_rate) # Saving inference data using helper.save_inference_samples helper.save_inference_samples(runs_dir, data_dir, sess, image_shape, logits, keep_prob, image_input) # OPTIONAL: Applying the trained model to a video def process_videoimage(original_image, sess=sess, image_shape=image_shape, logits=logits, keep_prob=keep_prob, image_input=image_input): original_image_shape = original_image.shape image = scipy.misc.imresize(original_image, image_shape) im_softmax = sess.run([tf.nn.softmax(logits)], { keep_prob: 1.0, image_input: [image] }) im_softmax = im_softmax[0][:, 1].reshape(image_shape[0], image_shape[1]) segmentation = (im_softmax > 0.5).reshape(image_shape[0], image_shape[1], 1) mask = np.dot(segmentation, np.array([[0, 255, 0, 127]])) mask = scipy.misc.toimage(mask, mode="RGBA") street_im = scipy.misc.toimage(image) street_im.paste(mask, box=None, mask=mask) return np.array( scipy.misc.imresize(street_im, original_image_shape)) clip1 = VideoFileClip("./data/harder_challenge_video.mp4") white_clip = clip1.fl_image( process_videoimage) # NOTE: this function expects color images!! white_clip.write_videofile("./data/segmented_project_video.mp4", audio=False)