np.stack(input_image_batch, axis=1)) output_image_batch = np.squeeze( np.stack(output_image_batch, axis=1)) # Do the training _, current = sess.run([opt, loss], feed_dict={ input: input_image_batch, output: output_image_batch }) current_losses.append(current) cnt = cnt + args.batch_size if cnt % 20 == 0: string_print = "Epoch = %d Count = %d Current = %.2f Time = %.2f" % ( epoch, cnt, current, time.time() - st) utils.LOG(string_print) mean_loss = np.mean(current_losses) avg_loss_per_epoch.append(mean_loss) # Create directories if needed if not os.path.isdir("%s/%04d" % ("checkpoints2", epoch)): os.makedirs("%s/%04d" % ("checkpoints2", epoch)) saver.save(sess, model_checkpoint_name) saver.save(sess, "%s/%04d/model.ckpt" % ("checkpoints2", epoch)) target = open("%s/%04d/val_scores.txt" % ("checkpoints2", epoch), 'w') target.write( "val_name, avg_accuracy, precision, recall, f1 score, mean iou %s\n" % (class_names_string))
np.stack(input_image_batch, axis=1)) output_image_batch = np.squeeze( np.stack(output_image_batch, axis=1)) # Do the training _, current = sess.run([opt, loss], feed_dict={ net_input: input_image_batch, net_output: output_image_batch }) current_losses.append(current) cnt = cnt + args.batch_size if cnt % 20 == 0: string_print = "Epoch = %d Count = %d Current_Loss = %.4f Time = %.2f" % ( epoch, cnt, current, time.time() - st) utils.LOG(string_print) st = time.time() mean_loss = np.mean(current_losses) avg_loss_per_epoch.append(mean_loss) # Create directories if needed if not os.path.isdir("%s/%04d" % ("checkpoints", epoch)): os.makedirs("%s/%04d" % ("checkpoints", epoch)) # Save latest checkpoint to same file name print("Saving latest checkpoint") saver.save(sess, model_checkpoint_name) if val_indices != 0 and epoch % args.checkpoint_step == 0: print("Saving checkpoint for this epoch")
def train(): if cfg.class_balancing: print("Computing class weights for trainlabel ...") class_weights = utils.compute_class_weights( labels_dir=train_output_names, label_values=label_values) weights = tf.reduce_sum(class_weights * net_output, axis=-1) unweighted_loss = None unweighted_loss = tf.nn.softmax_cross_entropy_with_logits_v2( logits=network, labels=net_output) losses = unweighted_loss * class_weights else: losses = tf.nn.softmax_cross_entropy_with_logits_v2(logits=network, labels=net_output) loss = tf.reduce_mean(losses) opt = tf.train.AdamOptimizer(cfg.lr).minimize( loss, var_list=[var for var in tf.trainable_variables()]) sess.run(tf.global_variables_initializer()) utils.count_params() # If a pre-trained ResNet is required, load the weights. # This must be done AFTER the variables are initialized with sess.run(tf.global_variables_initializer()) if init_fn is not None: init_fn(sess) avg_scores_per_epoch = [] avg_loss_per_epoch = [] # Which validation images do we want val_indices = [] num_vals = min(cfg.num_val_images, len(val_input_names)) # Set random seed to make sure models are validated on the same validation images. # So you can compare the results of different models more intuitively. random.seed(16) val_indices = random.sample(range(0, len(val_input_names)), num_vals) # Do the training here for epoch in range(0, cfg.num_epochs): current_losses = [] cnt = 0 # Equivalent to shuffling id_list = np.random.permutation(len(train_input_names)) num_iters = int(np.floor(len(id_list) / cfg.batch_size)) st = time.time() epoch_st = time.time() for i in range(num_iters): # st=time.time() input_image_batch = [] output_image_batch = [] # Collect a batch of images for j in range(cfg.batch_size): index = i * cfg.batch_size + j id = id_list[index] input_image = dataset.load_image(train_input_names[id]) output_image = dataset.load_image(train_output_names[id]) h, w, _ = input_image.shape new_h, new_w = dataset.getTrainSize(h, w) with tf.device('/cpu:0'): input_image, output_image = dataset.data_augmentation( input_image, output_image, new_h, new_w) # Prep the data. Make sure the labels are in one-hot format input_image = np.float32(input_image) / 255.0 output_image = np.float32( helpers.one_hot_it(label=output_image, label_values=label_values)) input_image_batch.append( np.expand_dims(input_image, axis=0)) output_image_batch.append( np.expand_dims(output_image, axis=0)) # ***** THIS CAUSES A MEMORY LEAK AS NEW TENSORS KEEP GETTING CREATED ***** # input_image = tf.image.crop_to_bounding_box(input_image, offset_height=0, offset_width=0, # target_height=args.crop_height, target_width=args.crop_width).eval(session=sess) # output_image = tf.image.crop_to_bounding_box(output_image, offset_height=0, offset_width=0, # target_height=args.crop_height, target_width=args.crop_width).eval(session=sess) # ***** THIS CAUSES A MEMORY LEAK AS NEW TENSORS KEEP GETTING CREATED ***** # memory() # print(cfg.batch_size) if cfg.batch_size == 1: input_image_batch = input_image_batch[0] output_image_batch = output_image_batch[0] else: input_image_batch = np.squeeze( np.stack(input_image_batch, axis=1)) output_image_batch = np.squeeze( np.stack(output_image_batch, axis=1)) # print(input_image_batch.shape) # Do the training _, current = sess.run([opt, loss], feed_dict={ net_input: input_image_batch, net_output: output_image_batch }) current_losses.append(current) cnt = cnt + cfg.batch_size if cnt % 20 == 0: string_print = "Epoch = %d Count = %d Current_Loss = %.4f Time = %.2f" % ( epoch, cnt, current, time.time() - st) utils.LOG(string_print) st = time.time() mean_loss = np.mean(current_losses) avg_loss_per_epoch.append(mean_loss) # Create directories if needed if not os.path.isdir(cfg.base_dir + "%s/%s/%04d" % ("checkpoints", cfg.model, epoch)): os.makedirs(cfg.base_dir + "%s/%s/%04d" % ("checkpoints", cfg.model, epoch)) # Save latest checkpoint to same file name print("Saving latest checkpoint") saver.save(sess, model_checkpoint_name) if val_indices != 0 and epoch % cfg.checkpoint_step == 0: print("Saving checkpoint for this epoch") saver.save( sess, cfg.base_dir + "%s/%s/%04d/model.ckpt" % ("checkpoints", cfg.model, epoch)) if epoch % cfg.validation_step == 0: print("Performing validation") target = open( cfg.base_dir + "%s/%s/%04d/val_scores.csv" % ("checkpoints", cfg.model, epoch), 'w') target.write( "val_name, avg_accuracy, precision, recall, f1 score, mean iou, %s\n" % (class_names_string)) scores_list = [] class_scores_list = [] precision_list = [] recall_list = [] f1_list = [] iou_list = [] # Do the validation on a small set of validation images for ind in val_indices: input_image = dataset.load_image(val_input_names[ind]) output_image = dataset.load_image(val_output_names[ind]) h, w, _ = input_image.shape new_h, new_w = dataset.getTrainSize(h, w) input_image, output_image = utils.random_crop( input_image, output_image, new_h, new_w) input_image = np.expand_dims(np.float32(input_image), axis=0) / 255.0 gt = helpers.reverse_one_hot( helpers.one_hot_it(output_image, label_values)) # st = time.time() output_image = sess.run(network, feed_dict={net_input: input_image}) output_image = np.array(output_image[0, :, :, :]) output_image = helpers.reverse_one_hot(output_image) out_vis_image = helpers.colour_code_segmentation( output_image, label_values) accuracy, class_accuracies, prec, rec, f1, iou = utils.evaluate_segmentation( pred=output_image, label=gt, num_classes=num_classes) file_name = utils.filepath_to_name(val_input_names[ind]) target.write("%s, %f, %f, %f, %f, %f" % (file_name, accuracy, prec, rec, f1, iou)) for item in class_accuracies: target.write(", %f" % (item)) target.write("\n") scores_list.append(accuracy) class_scores_list.append(class_accuracies) precision_list.append(prec) recall_list.append(rec) f1_list.append(f1) iou_list.append(iou) gt = helpers.colour_code_segmentation(gt, label_values) file_name = os.path.basename(val_input_names[ind]) file_name = os.path.splitext(file_name)[0] cv2.imwrite( cfg.base_dir + "%s/%s/%04d/%s_pred.png" % ("checkpoints", cfg.model, epoch, file_name), cv2.cvtColor(np.uint8(out_vis_image), cv2.COLOR_RGB2BGR)) cv2.imwrite( cfg.base_dir + "%s/%s/%04d/%s_gt.png" % ("checkpoints", cfg.model, epoch, file_name), cv2.cvtColor(np.uint8(gt), cv2.COLOR_RGB2BGR)) target.close() avg_score = np.mean(scores_list) class_avg_scores = np.mean(class_scores_list, axis=0) avg_scores_per_epoch.append(avg_score) avg_precision = np.mean(precision_list) avg_recall = np.mean(recall_list) avg_f1 = np.mean(f1_list) avg_iou = np.mean(iou_list) print("\nAverage validation accuracy for epoch # %04d = %f" % (epoch, avg_score)) print("Average per class validation accuracies for epoch # %04d:" % (epoch)) for index, item in enumerate(class_avg_scores): print("%s = %f" % (class_names_list[index], item)) print("Validation precision = ", avg_precision) print("Validation recall = ", avg_recall) print("Validation F1 score = ", avg_f1) print("Validation IoU score = ", avg_iou) epoch_time = time.time() - epoch_st remain_time = epoch_time * (cfg.num_epochs - 1 - epoch) m, s = divmod(remain_time, 60) h, m = divmod(m, 60) if s != 0: train_time = "Remaining training time = %d hours %d minutes %d seconds\n" % ( h, m, s) else: train_time = "Remaining training time : Training completed.\n" utils.LOG(train_time) scores_list = [] utils.drawLine(range(cfg.num_epochs), avg_scores_per_epoch, cfg.base_dir + 'checkpoints/' + cfg.model + '/accuracy_vs_epochs.png', title='Average validation accuracy vs epochs', xlabel='Epoch', ylabel='Avg. val. accuracy') utils.drawLine(range(cfg.num_epochs), avg_loss_per_epoch, cfg.base_dir + 'checkpoints/' + cfg.model + '/loss_vs_epochs.png', title='Average loss vs epochs', xlabel='Epoch', ylabel='Current loss')