def test(self): self.load_model() self.network.eval() self.test_loss_metric.reset(0) self.test_acc_metric.reset(0) y_pred = np.array([]) y_true = np.array([]) with torch.no_grad(): for i, (img, depth_map, label) in enumerate(self.valloader): # if i % 99 == 0: # plt.imshow(img.numpy()[0, 0, :, :]) img, depth_map, label = img.to(self.device), depth_map.to( self.device), label.to(self.device) net_depth_map, _, _, _, _, _ = self.network(img) loss = self.criterion(net_depth_map, depth_map) preds, score = predict(net_depth_map) targets, _ = predict(depth_map) y_pred = np.append(y_pred, preds.to('cpu').numpy()) y_true = np.append(y_true, label.to('cpu').numpy()) accuracy = calc_accuracy(preds, targets) # Update metrics self.test_loss_metric.update(loss.item()) self.test_acc_metric.update(accuracy) # print('loss: {}, acc: {}'.format(self.test_loss_metric.avg, self.test_acc_metric.avg)) utils.print_metrics(y_true, y_pred) return y_pred
def test_vae(args, model, data_path, fold, gpu, dicts, data_loader): filename = data_path.replace('train', fold) print('file for evaluation: %s' % filename) num_labels = len(dicts['ind2c']) y, yhat, yhat_raw, hids, losses = [], [], [], [], [] model.eval() # loader data_iter = iter(data_loader) num_iter = len(data_loader) for i in range(num_iter): with torch.no_grad(): if args.model.find("bert") != -1: inputs_id, segments, masks, labels = next(data_iter) inputs_id, segments, masks, labels = torch.LongTensor(inputs_id), torch.LongTensor(segments), \ torch.LongTensor(masks), torch.FloatTensor(labels) if gpu >= 0: inputs_id, segments, masks, labels = inputs_id.cuda( gpu), segments.cuda(gpu), masks.cuda(gpu), labels.cuda(gpu) output, loss = model(inputs_id, segments, masks, labels) else: inputs_id, labels, text_inputs = next(data_iter) inputs_id, labels, = torch.LongTensor(inputs_id), torch.FloatTensor(labels) if gpu >= 0: inputs_id, labels, text_inputs = inputs_id.cuda(gpu), labels.cuda(gpu), text_inputs.cuda(gpu) output, loss = model(inputs_id, labels, text_inputs) loss = loss[0] + 0.001 * loss[1] output = torch.sigmoid(output) output = output.data.cpu().numpy() losses.append(loss.item()) target_data = labels.data.cpu().numpy() yhat_raw.append(output) output = np.round(output) y.append(target_data) yhat.append(output) y = np.concatenate(y, axis=0) yhat = np.concatenate(yhat, axis=0) yhat_raw = np.concatenate(yhat_raw, axis=0) k = 5 if num_labels == 50 else [8,15] metrics = all_metrics(yhat, y, k=k, yhat_raw=yhat_raw) print_metrics(metrics) metrics['loss_%s' % fold] = np.mean(losses) return metrics
def train_one_epoch(self, epoch): print(f'Epoch: {epoch}') self.network.train() self.train_loss_metric.reset(epoch) self.train_acc_metric.reset(epoch) y_pred = np.array([]) y_true = np.array([]) for i, (img, depth_map, label) in enumerate(self.trainloader): # image = img.cpu().detach().numpy()[0] # print(np.unique(image)) # image = np.transpose(image, (1, 2, 0)) # plt.imshow(image) # plt.title(str(label.cpu()[0])) # plt.show() img, depth_map, label = img.to(self.device), depth_map.to( self.device), label.to(self.device) net_depth_map, _, _, _, _, _ = self.network(img) # image = net_depth_map.cpu().detach().numpy()[0] # plt.imshow(image) # plt.title(str(label.cpu()[0])) # plt.colorbar() # plt.show() # print("label: ", label.cpu()) self.optimizer.zero_grad() loss = self.criterion(net_depth_map, depth_map) loss.backward() self.optimizer.step() preds, _ = predict(net_depth_map) targets, _ = predict(depth_map) y_pred = np.append(y_pred, preds.to('cpu').numpy()) y_true = np.append(y_true, label.to('cpu').numpy()) accuracy = calc_accuracy(preds, targets) # Update metrics self.train_loss_metric.update(loss.item()) self.train_acc_metric.update(accuracy) # print('Epoch: {}, iter: {}, loss: {}, acc: {}'.format(epoch, epoch * len(self.trainloader) + i, # self.train_loss_metric.avg, # self.train_acc_metric.avg)) utils.print_metrics(y_true, y_pred) return self.train_acc_metric.avg, self.train_loss_metric.avg
def main(): os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Arguments parser = argparse.ArgumentParser(description='Monocular Depth') parser.add_argument('--backbone', default='PNASNet5Large', type=str) parser.add_argument('--batch_size', default=4, type=int, help='batch size') parser.add_argument('--save_prediction', default=True, type=bool) parser.add_argument('--get_metric', default=True, type=bool) args = parser.parse_args() # dataset using test_dataset_use = {'NYUv2_test': True} # image size original_image_size = [480, 640] input_image_size = [288, 384] # interpolation function / relu interpolate_bicubic_fullsize = nn.Upsample(size=original_image_size, mode='bicubic') interpolate_bicubic_inputsize = nn.Upsample(size=input_image_size, mode='bicubic') relu = nn.ReLU() # Create model model = nn.DataParallel(create_model(args.backbone).half()) print('Model created.') # loading training/testing data batch_size = args.batch_size test_loader, num_test_data = getTestingData(batch_size, test_dataset_use) # model path model_path = 'models' model_name = 'PNAS_model.pth' pred_path = 'prediction/PNAS_model' # prediction path if os.path.isdir(pred_path) == False: os.mkdir(pred_path) # Start testing N = len(test_loader) end = time.time() total_time = time.time() - end # load model.load_state_dict(torch.load(model_path + '/' + model_name)) model.eval() print(model_path + '/' + model_name) test_metrics = np.zeros((num_test_data, 8)) # 8 metrics for i, sample_batched in enumerate(test_loader): # Prepare sample and target image = torch.autograd.Variable(sample_batched['image'].cuda()).half() depth_gt = torch.autograd.Variable( sample_batched['depth'].cuda(non_blocking=True)) depth_gt_for_metric = depth_gt[:, :, 0 + 20:480 - 20, 0 + 24:640 - 24].cuda() current_batch_size = depth_gt.size(0) # Predict image_input = interpolate_bicubic_inputsize(image) depth_pred_for_loss = model(image_input) depth_pred_for_loss = (relu(depth_pred_for_loss - 0.0001) + 0.0001).float() depth_pred_full = interpolate_bicubic_fullsize(depth_pred_for_loss) depth_pred_for_metric = ( relu(depth_pred_full[:, :, 0 + 20:480 - 20, 0 + 24:640 - 24] - 0.0001) + 0.0001).cuda() # save prediction if args.save_prediction == True: for index_test in range(i * batch_size + 1, i * batch_size + current_batch_size + 1): pred2png( depth_pred_full[index_test - (i * batch_size + 1), 0, :, :].cpu().detach().numpy(), pred_path, index_test) # compute metric if args.get_metric == True: rmse, rmse_log, abs_rel, sqr_rel, log10, delta1, delta2, delta3 \ = compute_multi_metric(depth_pred_for_metric, depth_gt_for_metric) test_metrics = get_metric_1batch(batch_size, current_batch_size, i, num_test_data, test_metrics, rmse, rmse_log, abs_rel, sqr_rel, log10, delta1, delta2, delta3) # Measure elapsed time total_time = total_time + (time.time() - end) end = time.time() # Print to console if i == 0 or i % 20 == 19 or i == N - 1: print('Evaluation - ', str(i + 1).zfill(5), '/', str(N).zfill(5), ' Time: ', str('%10.2f' % total_time), 's') if args.get_metric == True: test_metrics_mean = test_metrics.mean(axis=0) print_metrics(test_metrics_mean) # save metrics dataframe = pd.DataFrame(test_metrics) dataframe.to_csv(model_path + "/metrics" + ".csv", header=False, index=False) dataframe = pd.DataFrame(test_metrics_mean) dataframe.to_csv(model_path + "/metrics_mean" + ".csv", header=False, index=False) print('------------------------ FINISH -------------------------') print('---------------------------------------------------------')
def main(): os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Arguments parser = argparse.ArgumentParser( description= 'Multi-Loss Rebalancing Algorithm for Monocular Depth Estimation') parser.add_argument('--backbone', default='PNASNet5Large', type=str, help='DenseNet161 (bs12) / PNASNet5Largea (bs6)') parser.add_argument('--decoder_scale', default=1024, type=int, help='valid for PNASNet5Large') parser.add_argument('--epochs', default=20, type=int, help='number of total epochs to run') parser.add_argument('--lr', '--learning-rate', default=0.0001, type=float, help='initial learning rate') parser.add_argument('--bs', default=8, type=int, help='batch size') parser.add_argument('--weight_initialization', default=True, type=bool) parser.add_argument('--weight_rebalancing', default=True, type=bool) parser.add_argument('--num_weight_rebalancing_per_epoch', default=4, type=int) parser.add_argument('--num_save_per_epoch', default=4, type=int) parser.add_argument('--lambda_for_adjust_start', default=3, type=float) parser.add_argument('--lambda_for_adjust_slope', default=-1.5, type=float) parser.add_argument('--lambda_for_adjust_min', default=-3, type=float) #parser.add_argument('--train_dataset_path', default='dataset/train_reduced05.zip', type=str) #parser.add_argument('--train_dataset_csv_list', default='train_reduced05/train.csv', type=str) parser.add_argument('--train_dataset_path', default='dataset/train795.zip', type=str) parser.add_argument('--train_dataset_csv_list', default='train795/train.csv', type=str) args = parser.parse_args() # image size original_image_size = [480, 640] input_image_size = [288, 384] # interpolation function / relu interpolate_bicubic_fullsize = nn.Upsample(size=original_image_size, mode='bicubic') relu = nn.ReLU() # create model model = network_model.create_model(args.backbone, args.decoder_scale) print('Summary: All Network') print( utils_utils.get_model_summary(model, torch.rand(1, 3, input_image_size[0], input_image_size[1]).cuda(), verbose=True)) print('Model created.') # Training parameters optimizer = torch.optim.Adam(model.parameters(), args.lr) batch_size = args.bs # loading training/testing data train_loader, num_train_data = utils_get_data.getTrainingData( batch_size, args.train_dataset_path, args.train_dataset_csv_list) # Model path model_path = utils_utils.make_model_path(args.backbone, args.decoder_scale, batch_size) # train scores train_scores = np.zeros((num_train_data, 78)) # 78 scores train_metrics = np.zeros((num_train_data, 8)) # 8 metrics # loss term loss_weights = utils_multi_loss.get_loss_weights() loss_initialize_scale = utils_multi_loss.get_loss_initialize_scale() loss_valid = np.array(loss_weights) > 0 # save path savePath = model_path + '/weight/loss_weights.csv' dataframe = pd.DataFrame(loss_weights) dataframe.to_csv(savePath, header=False, index=False) # weight rebalancing argument weight_initialization = args.weight_initialization weight_rebalancing = args.weight_rebalancing weight_initialization_done = False last_rebalancing_iter = 0 previous_total_loss = 0 previous_loss = 0 # iter/epoch iter_per_epoch = len(train_loader) # save iteration iter_list_save = utils_utils.get_notable_iter( iter_per_epoch, num_per_epoch=args.num_save_per_epoch) iter_list_rebalancing = utils_utils.get_notable_iter( iter_per_epoch, num_per_epoch=args.num_weight_rebalancing_per_epoch) # mixed precision + Dataparallel if APEX_AVAILABLE == True: use_amp = True model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=True, loss_scale="dynamic") else: use_amp = False model = nn.DataParallel(model) try: # try to load epoch1_iter00000 model_name = "model/epoch01_iter00000.pth" model.load_state_dict(torch.load(model_name)) print('LOAD MODEL ', model_name) except: # save model print('THERE IS NO MODEL TO LOAD') model_name = model_path + "/model/epoch" + str(0 + 1).zfill( 2) + '_iter' + str(0).zfill(5) + ".pth" print('SAVE MODEL:' + model_path) torch.save(model.state_dict(), model_name) # Start training... for epoch in range(args.epochs): print('---------------------------------------------------------') print('-------------- TRAINING OF EPOCH ' + str(0 + epoch + 1).zfill(2) + 'START ----------------') end = time.time() # Switch to train mode model.train() # train parameter current_lambda_for_adjust = max( args.lambda_for_adjust_start + epoch * args.lambda_for_adjust_slope, args.lambda_for_adjust_min) for i, sample_batched in enumerate(train_loader): optimizer.zero_grad() # Prepare sample and target image = torch.autograd.Variable(sample_batched['image'].cuda()) depth_gt = torch.autograd.Variable( sample_batched['depth'].cuda(non_blocking=True)) # depth gt depth_gt_input = depth_gt depth_gt_full = interpolate_bicubic_fullsize(depth_gt_input) depth_gt_for_loss = depth_gt_input depth_gt_for_loss = depth_gt_for_loss.cuda() depth_gt_for_metric = ( relu(depth_gt_full[:, :, 0 + 20:480 - 20, 0 + 24:640 - 24] - 0.0001) + 0.0001) # Predict image_input = image depth_pred_for_loss = model(image_input).cuda() depth_pred_full = interpolate_bicubic_fullsize(depth_pred_for_loss) depth_pred_for_metric = ( relu(depth_pred_full[:, :, 0 + 20:480 - 20, 0 + 24:640 - 24] - 0.0001) + 0.0001) # current batch size current_batch_size = depth_gt_for_loss.size(0) # compute loss losses = utils_multi_loss.compute_multi_loss( depth_pred_for_loss, depth_gt_for_loss, loss_valid) # compute iter loss & train_scores loss, l_custom, train_scores = utils_multi_loss.get_loss_1batch( batch_size, current_batch_size, i, num_train_data, loss_weights, train_scores, losses) metrics = utils_multi_loss.compute_multi_metric( depth_pred_for_metric, depth_gt_for_metric) train_metrics = utils_multi_loss.get_metric_1batch( batch_size, current_batch_size, i, num_train_data, train_metrics, metrics) # Update if use_amp == True: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() # Measure elapsed time end = time.time() # Log progress if (i + 1) % 100 == 0 or (i + 1) == iter_per_epoch: if epoch == 0: train_scores_mean = train_scores[0:(i + 1) * batch_size].mean(axis=0) train_metrics_mean = train_metrics[0:(i + 1) * batch_size].mean(axis=0) else: train_scores_mean = train_scores.mean(axis=0) train_metrics_mean = train_metrics.mean(axis=0) # Print to console print('Epoch: [{0}][{1}/{2}]\t'.format(epoch, i + 1, iter_per_epoch)) utils_utils.print_metrics(train_metrics_mean) utils_utils.print_scores(train_scores_mean) if (i + 1) == 1 or (i + 1) % 1000 == 0 or (i + 1) == iter_per_epoch: savePath = model_path + "/current" + ".csv" dataframe = pd.DataFrame(train_scores) dataframe.to_csv(savePath, header=False, index=False) if i in iter_list_save: model_name = model_path + "/model/epoch" + str( epoch + 1).zfill(2) + '_iter' + str(i).zfill(5) + ".pth" # save model print('SAVE MODEL:' + model_path + '/model') torch.save(model.state_dict(), model_name) if i in iter_list_rebalancing: temp_train_scores_mean = train_scores[last_rebalancing_iter * batch_size:(i + 1) * batch_size, :].mean( axis=0) total_loss = np.sum(temp_train_scores_mean * loss_weights) if weight_initialization == True and weight_initialization_done == False: for index_loss in range(len(loss_valid)): if loss_valid[index_loss] == 1: loss_weights[index_loss] = ( total_loss * loss_initialize_scale[index_loss] ) / temp_train_scores_mean[index_loss] else: loss_weights[index_loss] = 0 # save previous record weight_initialization_done = True previous_total_loss = np.sum(temp_train_scores_mean * loss_weights) previous_loss = temp_train_scores_mean elif weight_rebalancing == True and ( weight_initialization_done == True or weight_initialization == False): temp_train_scores_mean = train_scores[ last_rebalancing_iter * batch_size:(i + 1) * batch_size, :].mean(axis=0) total_loss = np.sum(temp_train_scores_mean * loss_weights) previous_loss_weights = np.array(loss_weights) if previous_total_loss > 0: for index_loss in range(len(loss_valid)): if loss_valid[index_loss] == 1: adjust_term = 1 + current_lambda_for_adjust * ( (total_loss / previous_total_loss) * (previous_loss[index_loss] / temp_train_scores_mean[index_loss]) - 1) adjust_term = min(max(adjust_term, 1.0 / 2.0), 2.0 / 1.0) loss_weights[ index_loss] = previous_loss_weights[ index_loss] * adjust_term else: loss_weights[index_loss] = 0 # save previous record previous_total_loss = np.sum(temp_train_scores_mean * loss_weights) previous_loss = temp_train_scores_mean # save - loss weights savePath = model_path + "/weight/weight" + str( epoch + 1).zfill(2) + '_iter' + str(i).zfill(5) + ".csv" dataframe = pd.DataFrame(loss_weights) dataframe.to_csv(savePath, header=False, index=False) last_rebalancing_iter = (i + 1) % iter_per_epoch # save - each image train score savePath = model_path + "/score/train_epoch" + str(0 + epoch + 1).zfill(2) + ".csv" dataframe = pd.DataFrame(train_scores) dataframe.to_csv(savePath, header=False, index=False) # save - train mean score savePath = model_path + "/score/train_mean_epoch" + str( 0 + epoch + 1).zfill(2) + ".csv" dataframe = pd.DataFrame(train_scores_mean) dataframe.to_csv(savePath, header=False, index=False) # save - each image train score savePath = model_path + "/metric/train_epoch" + str( 0 + epoch + 1).zfill(2) + ".csv" dataframe = pd.DataFrame(train_metrics) dataframe.to_csv(savePath, header=False, index=False) # save - train mean score savePath = model_path + "/metric/train_mean_epoch" + str( 0 + epoch + 1).zfill(2) + ".csv" dataframe = pd.DataFrame(train_metrics_mean) dataframe.to_csv(savePath, header=False, index=False) print('-------------- TRAINING OF EPOCH ' + str(0 + epoch + 1).zfill(2) + 'FINISH ---------------') print('---------------------------------------------------------') print(' ') print(' ') print(' ')
def run_single_step(self, epoch): loss_train, loss_valid = [], [] for phase in ['train', 'valid']: if phase == 'train': self.model.train() else: self.model.eval() metrics = defaultdict(float) samples = 0 for batch_i, sample_batch in enumerate(self.dataloader[phase]): log_str = "---- [Phase %s][Epoch %d/%d, Batch %d/%d] ----"% \ (phase, epoch, self.config.epochs, batch_i, len(self.dataloader[phase])) print(log_str) x, y_true = sample_batch x, y_true = x.float(), y_true.float() x, y_true = x.to(self.device), y_true.to(self.device) self.optimizer.zero_grad() # forward with torch.set_grad_enabled(phase == 'train'): y_pred = self.model(x) _loss = self.loss.forward(y_pred, y_true, metrics=metrics) if phase == 'valid': loss_valid.append(_loss.item()) # backward and optimize only in training phase if phase == 'train': loss_train.append(_loss.item()) _loss.backward() self.optimizer.step() samples += x.size(0) print_metrics(metrics, samples, phase, epoch) epoch_loss = metrics['loss'] / samples # tensorboard if phase == 'train': # writer = SummaryWriter(log_dir=self.config.log_folder) # writer.add_scalar('Loss/train', np.mean(loss_train), epoch) self.logger.scalar_summary('Loss/train', np.mean(loss_train), epoch) else: # writer = SummaryWriter(log_dir=self.config.log_folder) # writer.add_scalar('Loss/valid', np.mean(loss_valid), epoch) self.logger.scalar_summary('Loss/valid', np.mean(loss_valid), epoch) if epoch_loss < self.best_valid_loss: print('saving best model with {:4f} better than {:4f}'. format(epoch_loss, self.best_valid_loss)) self.best_valid_loss = epoch_loss self.best_model_wts = copy.deepcopy( self.model.state_dict()) if epoch % self.config.save_step == 0: torch.save( self.model.state_dict(), self.config.save_folder + "\model-unet-epo{}.pth".format(epoch))