def Validation(cf, sess, sb): val_time = time.time() val_writer = sb.tensorBoard.save( cf.exp_folder + cf.log_path + 'validation/', sess) valid_image_path = os.path.join(cf.valid_dataset_path, cf.valid_folder_names[0]) valid_gt_path = os.path.join(cf.valid_dataset_path, cf.valid_folder_names[1]) valid_set = Data_loader(cf, valid_image_path, cf.valid_samples, cf.size_image_valid, valid_gt_path) valid_set.Load_dataset(cf.valid_batch_size) valid_stats = Statistics(cf.valid_batch_size, sb) tf.summary.scalar("Mean_IoU/validation", valid_stats.mean_IoU, collections=['validation']) tf.summary.scalar("Mean_Acc/validation", valid_stats.accuracy_class, collections=['validation']) valid_loss_batch = np.zeros(valid_set.num_batches, dtype=np.float32) sess.run(valid_stats.running_vars_initializer) prog_bar = ProgressBar(valid_set.num_batches) for i in range(valid_set.num_batches): batch_x, batch_y = valid_set.Next_batch(cf.valid_batch_size) feed_dict = { sb.model.simb_image: batch_x, sb.model.simb_gt: batch_y, sb.model.simb_is_training: False } simbol_list = [ sb.loss_fun, sb.model.annotation_pred, valid_stats.update_IoU, valid_stats.update_acc_class ] sess_return = sess.run(simbol_list, feed_dict) valid_loss_batch[i] = sess_return[0] pred = sess_return[1] conf_mat = sess_return[3] prog_bar.update() conf_mat = conf_mat / valid_set.num_batches img_conf_mat = confm_metrics2image(conf_mat, cf.labels) img_conf_mat = tf.expand_dims(img_conf_mat, 0) tf.summary.image("conf_mat/validation", img_conf_mat, max_outputs=2, collections=['validation']) summary_op_val = sb.tensorBoard.set_up('validation') mIoU_valid, mAcc_valid, sammary_val = sess.run( [valid_stats.mean_IoU, valid_stats.accuracy_class, summary_op_val]) val_time = time.time() - val_time print("\t Loss: %g, mIoU: %g, mAcc: %g, Time: %ds" % (np.mean( np.asarray(valid_loss_batch)), mIoU_valid, mAcc_valid, val_time)) val_writer.add_summary(sammary_val)
def start(self, valid_set, valid_loader, mode='Validation', epoch=None, global_bar=None, save_folder=None): confm_list = np.zeros((self.cf.num_classes, self.cf.num_classes)) self.val_loss = AverageMeter() # Initialize epoch progress bar val_num_batches = math.ceil(valid_set.num_images / float(self.cf.valid_batch_size)) prev_msg = '\n' + mode + ' estimated time...\n' bar = ProgressBar(val_num_batches, lenBar=20) bar.set_prev_msg(prev_msg) bar.update(show=False) # Validate model if self.cf.problem_type == 'detection': self.validation_loop(epoch, valid_loader, valid_set, bar, global_bar, save_folder) else: self.validation_loop(epoch, valid_loader, valid_set, bar, global_bar, confm_list) # Compute stats self.compute_stats(np.asarray(self.stats.val.conf_m), self.val_loss) # Save stats self.save_stats(epoch) if mode == 'Epoch Validation': self.logger_stats.write_stat( self.stats.train, epoch, os.path.join(self.cf.train_json_path, 'valid_epoch_' + str(epoch) + '.json')) elif mode == 'Validation': self.logger_stats.write_stat(self.stats.val, epoch, self.cf.val_json_file) elif mode == 'Test': self.logger_stats.write_stat(self.stats.val, epoch, self.cf.test_json_file)
def start(self, criterion, valid_set, valid_loader, epoch=None, global_bar=None): confm_list = np.zeros((self.cf.num_classes,self.cf.num_classes)) val_loss = AverageMeter() # Initialize epoch progress bar val_num_batches = math.ceil(valid_set.num_images / float(self.cf.valid_batch_size)) prev_msg = '\nValidation estimated time...\n' bar = ProgressBar(val_num_batches, lenBar=20) bar.set_prev_msg(prev_msg) bar.update(show=False) # Validate model for vi, data in enumerate(valid_loader): # Read data inputs, gts = data n_images,w,h,c = inputs.size() inputs = Variable(inputs, volatile=True).cuda() gts = Variable(gts, volatile=True).cuda() # Predict model outputs = self.model.net(inputs) predictions = outputs.data.max(1)[1].cpu().numpy() # Compute batch stats val_loss.update(criterion(outputs, gts).data[0] / n_images, n_images) confm = compute_confusion_matrix(predictions,gts.cpu().data.numpy(),self.cf.num_classes,self.cf.void_class) confm_list = map(operator.add, confm_list, confm) # Save epoch stats self.stats.val.conf_m = confm_list self.stats.val.loss = val_loss.avg / (w * h * c) # Update messages self.update_msg(bar, global_bar) # Compute stats self.compute_stats(np.asarray(self.stats.val.conf_m), val_loss) # Save stats self.save_stats(epoch)
def Predict(cf, sess, sb): predict_time = time.time() test_image_path = os.path.join(cf.test_dataset_path, cf.test_folder_names[0]) test_set = Data_loader(cf, test_image_path, cf.test_samples, cf.resize_image_test) test_set.Load_dataset(cf.test_batch_size) prog_bar = ProgressBar(test_set.num_batches) for i in range(test_set.num_batches): batch_x, batch_names = test_set.Next_batch_pred(cf.test_batch_size) feed_dict = { sb.model.simb_image: batch_x, sb.model.simb_is_training: False } simbol_list = [sb.model.annotation_pred] sess_return = sess.run(simbol_list, feed_dict) pred = sess_return[0] save_prediction(cf.predict_output, pred, batch_names) prog_bar.update() predict_time = time.time() - predict_time print("\t Time: %ds" % (predict_time))
def start(self, train_loader, train_set, valid_set=None, valid_loader=None): self.train_num_batches = math.ceil(train_set.num_images / float(self.cf.train_batch_size)) self.val_num_batches = 0 if valid_set is None else math.ceil(valid_set.num_images / \ float(self.cf.valid_batch_size)) # Define early stopping control if self.cf.early_stopping: early_Stopping = Early_Stopping(self.cf) else: early_Stopping = None prev_msg = '\nTotal estimated training time...\n' self.global_bar = ProgressBar( (self.cf.epochs + 1 - self.curr_epoch) * (self.train_num_batches + self.val_num_batches), lenBar=20) self.global_bar.set_prev_msg(prev_msg) # Train process for epoch in range(self.curr_epoch, self.cf.epochs + 1): # Shuffle train data train_set.update_indexes() # Initialize logger epoch_time = time.time() self.logger_stats.write('\t ------ Epoch: ' + str(epoch) + ' ------ \n') # Initialize epoch progress bar self.msg.accum_str = '\n\nEpoch %d/%d estimated time...\n' % \ (epoch, self.cf.epochs) epoch_bar = ProgressBar(self.train_num_batches, lenBar=20) epoch_bar.update(show=False) # Initialize stats self.stats.epoch = epoch self.train_loss = AverageMeter() self.confm_list = np.zeros( (self.cf.num_classes, self.cf.num_classes)) # Train epoch self.training_loop(epoch, train_loader, epoch_bar) # Save stats self.stats.train.conf_m = self.confm_list self.compute_stats(np.asarray(self.confm_list), self.train_loss) self.save_stats_epoch(epoch) self.logger_stats.write_stat( self.stats.train, epoch, os.path.join(self.cf.train_json_path, 'train_epoch_' + str(epoch) + '.json')) # Validate epoch self.validate_epoch(valid_set, valid_loader, early_Stopping, epoch, self.global_bar) # Update scheduler if self.model.scheduler is not None: self.model.scheduler.step(self.stats.val.loss) # Saving model if score improvement new_best = self.model.save(self.stats) if new_best: self.logger_stats.write_best_stats(self.stats, epoch, self.cf.best_json_file) # Update display values self.update_messages(epoch, epoch_time, new_best) if self.stop: return # Save model without training if self.cf.epochs == 0: self.model.save_model()
def start(self, criterion, optimizer, train_loader, train_set, valid_set=None, valid_loader=None, scheduler=None): train_num_batches = math.ceil(train_set.num_images / float(self.cf.train_batch_size)) val_num_batches = 0 if valid_set is None else math.ceil(valid_set.num_images / float(self.cf.valid_batch_size)) # Define early stopping control if self.cf.early_stopping: early_Stopping = Early_Stopping(self.cf) else: early_Stopping = None prev_msg = '\nTotal estimated training time...\n' global_bar = ProgressBar((self.cf.epochs+1-self.curr_epoch)*(train_num_batches+val_num_batches), lenBar=20) global_bar.set_prev_msg(prev_msg) # Train process for epoch in range(self.curr_epoch, self.cf.epochs + 1): # Shuffle train data train_set.update_indexes() # Initialize logger epoch_time = time.time() self.logger_stats.write('\t ------ Epoch: ' + str(epoch) + ' ------ \n') # Initialize epoch progress bar self.msg.accum_str = '\n\nEpoch %d/%d estimated time...\n' % (epoch, self.cf.epochs + 1 - self.curr_epoch) epoch_bar = ProgressBar(train_num_batches, lenBar=20) epoch_bar.update(show=False) # Initialize stats train_loss = AverageMeter() confm_list = np.zeros((self.cf.num_classes, self.cf.num_classes)) # Train epoch for i, data in enumerate(train_loader): # Read Data inputs, labels = data N,w,h,c = inputs.size() inputs = Variable(inputs).cuda() labels = Variable(labels).cuda() # Predict model optimizer.zero_grad() outputs = self.model.net(inputs) predictions = outputs.data.max(1)[1].cpu().numpy() # Compute gradients loss = criterion(outputs, labels) loss.backward() optimizer.step() # Compute batch stats train_loss.update(loss.data[0], N) confm = compute_confusion_matrix(predictions, labels.cpu().data.numpy(), self.cf.num_classes, self.cf.void_class) confm_list = map(operator.add, confm_list, confm) self.stats.train.loss = train_loss.avg / (w*h*c) # Save stats self.save_stats_batch((epoch - 1) * train_num_batches + i) # Update epoch messages self.update_epoch_messages(epoch_bar, global_bar, train_num_batches,epoch, i) # Save stats self.stats.train.conf_m = confm_list self.compute_stats(np.asarray(confm_list),train_loss) self.save_stats_epoch(epoch) # Validate epoch self.validate_epoch(valid_set, valid_loader, criterion, early_Stopping, epoch, global_bar) # Update scheduler if scheduler is not None: scheduler.step(self.stats.val.loss) # Saving model if needed self.model.net.save(self.stats) # Update display values self.update_messages(epoch, epoch_time) if self.stop: return # Save model without training if self.cf.epochs == 0: self.model.save_model(self.model.net)
def Train(cf, sess, sb): #Path definitions train_image_path = os.path.join(cf.train_dataset_path, cf.train_folder_names[0]) train_gt_path = os.path.join(cf.train_dataset_path, cf.train_folder_names[1]) valid_image_path = os.path.join(cf.valid_dataset_path, cf.valid_folder_names[0]) valid_gt_path = os.path.join(cf.valid_dataset_path, cf.valid_folder_names[1]) trainable_var = tf.trainable_variables() # Training dataset set up train_set = Data_loader(cf, train_image_path, cf.train_samples, cf.size_image_train, train_gt_path) train_set.Load_dataset(cf.train_batch_size) # Validation dataset set up valid_set = Data_loader(cf, valid_image_path, cf.valid_samples_epoch, cf.size_image_valid, valid_gt_path) valid_set.Load_dataset(cf.valid_batch_size) # Simbol creation for metrics and statistics train_stats = Statistics(cf.train_batch_size, sb) valid_stats = Statistics(cf.valid_batch_size, sb) # More summary information to add #tf.summary.scalar("Mean_loss", train_mLoss) #img_conf_mat = tf.placeholder(tf.uint8, shape=[None, 480, 640, 3], name="conf_mat") tf.summary.scalar("Mean_IoU/train", train_stats.mean_IoU, collections=['train']) tf.summary.scalar("Mean_Acc/train", train_stats.accuracy_class, collections=['train']) tf.summary.scalar("Mean_IoU/train_valid", valid_stats.mean_IoU, collections=['train_valid']) tf.summary.scalar("Mean_Acc/train_valid", valid_stats.accuracy_class, collections=['train_valid']) train_writer = sb.tensorBoard.save(cf.exp_folder + cf.log_path + 'train/', sess) val_writer = sb.tensorBoard.save( cf.exp_folder + cf.log_path + 'train_valid/', sess) # Early stopping if cf.early_stopping: e_stop = Early_Stopping(cf.patience) # Training feed_dict = [] stop = False epoch = 1 # Epoch loop while epoch < cf.epochs + 1 and not stop: epoch_time = time.time() if cf.shuffle: train_set.Shuffle() valid_set.Shuffle() loss_per_batch = np.zeros(train_set.num_batches, dtype=np.float32) conf_mat = np.zeros((cf.num_classes, cf.num_classes), dtype=np.float32) # initialize/reset the running variables sess.run(train_stats.running_vars_initializer) #Progress bar prog_bar = ProgressBar(train_set.num_batches) #Dataset batch loop for i in range(train_set.num_batches): batch_x, batch_y = train_set.Next_batch(cf.train_batch_size, crop=True) feed_dict = { sb.model.simb_image: batch_x, sb.model.simb_gt: batch_y, sb.model.simb_is_training: True } simbol_list = [ sb.train_op, sb.loss_fun, sb.model.annotation_pred, train_stats.update_IoU, train_stats.update_acc_class, train_stats.conf_matrix_batch ] sess_return = sess.run(simbol_list, feed_dict) loss_per_batch[i] = sess_return[1] #pred = sess_return[2] conf_mat += sess_return[5] prog_bar.update() # Epoch train summary info conf_mat = conf_mat / train_set.num_batches img_conf_mat = confm_metrics2image(conf_mat, cf.labels) img_conf_mat = tf.expand_dims(img_conf_mat, 0) tf.summary.image("conf_mat/train", img_conf_mat, max_outputs=2, collections=['train']) train_mLoss = np.mean(np.asarray(loss_per_batch)) summary_op_train = sb.tensorBoard.set_up('train') mIoU_train, mAcc_train, summary_train = sess.run([ train_stats.mean_IoU, train_stats.accuracy_class, summary_op_train ], feed_dict) train_set.Reset_Offset() # Validation in train if cf.valid_samples_epoch > 0: conf_mat = np.zeros((cf.num_classes, cf.num_classes), dtype=np.float32) valid_loss_batch = np.zeros(valid_set.num_batches, dtype=np.float32) sess.run(valid_stats.running_vars_initializer) for i in range(valid_set.num_batches): batch_x, batch_y = valid_set.Next_batch(cf.valid_batch_size) feed_dict = { sb.model.simb_image: batch_x, sb.model.simb_gt: batch_y, sb.model.simb_is_training: False } simbol_list = [ sb.loss_fun, sb.model.annotation_pred, valid_stats.update_IoU, valid_stats.update_acc_class, valid_stats.conf_matrix_batch ] sess_return = sess.run(simbol_list, feed_dict) valid_loss_batch[i] = sess_return[0] pred = sess_return[1] conf_mat += sess_return[4] conf_mat = conf_mat / train_set.num_batches img_conf_mat = confm_metrics2image(conf_mat, cf.labels) img_conf_mat = tf.expand_dims(img_conf_mat, 0) tf.summary.image("conf_mat/train_valid", img_conf_mat, max_outputs=2, collections=['train_valid']) summary_op_val = sb.tensorBoard.set_up('train_valid') mIoU_valid, mAcc_valid, sammary_val = sess.run([ valid_stats.mean_IoU, valid_stats.accuracy_class, summary_op_val ]) valid_mLoss = np.mean(np.asarray(valid_loss_batch)) valid_set.Reset_Offset() # Screen display train_writer.add_summary(summary_train, epoch) val_writer.add_summary(sammary_val, epoch) epoch_time = time.time() - epoch_time print("Epoch: %d, Time: %ds \n\t Train_loss: %g, mIoU: %g, mAcc: %g" % (epoch, epoch_time, train_mLoss, mIoU_train, mAcc_train)) if cf.valid_samples_epoch > 0: print("\t Valid_loss: %g, mIoU: %g, mAcc: %g" % (valid_mLoss, mIoU_valid, mAcc_valid)) sb.model.modelIO.Save(cf, sess, train_mLoss, mIoU_train, mAcc_train, valid_mLoss, mIoU_valid, mAcc_valid) if cf.early_stopping: stop = e_stop.Check(cf.save_condition, train_mLoss, mIoU_train, mAcc_train, valid_mLoss, mIoU_valid, mAcc_valid) else: sb.model.modelIO.Save(cf, sess, train_mLoss, mIoU_train, mAcc_train) if cf.early_stopping: stop = e_stop.Check(cf.save_condition, train_mLoss, mIoU_train, mAcc_train) epoch += 1