def fit(self, envs, num_iterations, callback=False): for epoch in range(num_iterations): losses = [ self.loss(self.network(x), y) for x, y in envs["train"]["envs"] ] gradients = [ grad(loss, self.parameters(), create_graph=True) for loss in losses ] # average loss and gradients avg_loss = sum(losses) / len(losses) avg_gradient = grad(avg_loss, self.parameters(), create_graph=True) # compute trace penalty penalty_value = 0 for gradient in gradients: for gradient_i, avg_grad_i in zip(gradient, avg_gradient): penalty_value += (gradient_i - avg_grad_i).pow(2).sum() self.optimizer.zero_grad() (avg_loss + self.hparams['penalty'] * penalty_value).backward() self.optimizer.step() if callback: # compute errors utils.compute_errors(self, envs)
def fit(self, envs, num_iterations, callback=False): for epoch in range(num_iterations): losses_env = [] gradients_env = [] for x, y in envs["train"]["envs"]: losses_env.append(self.loss(self.network(x), y)) gradients_env.append( grad(losses_env[-1], self.net_dummies, create_graph=True)) # Average loss across envs losses_avg = sum(losses_env) / len(losses_env) gradients_avg = grad(losses_avg, self.net_dummies, create_graph=True) penalty = 0 for gradients_this_env in gradients_env: for g_env, g_avg in zip(gradients_this_env, gradients_avg): if self.version == 1: penalty += g_env.pow(2).sum() else: raise NotImplementedError obj = (1 - self.hparams["irm_lambda"]) * losses_avg obj += self.hparams["irm_lambda"] * penalty self.optimizer.zero_grad() obj.backward() self.optimizer.step() if callback: # compute errors utils.compute_errors(self, envs)
def full_weighted_cv(X, y, Ds, lambda_gtv=np.linspace(.1, 1, 10), lambda_lasso=None, t=50, auto_cv=True, alpha=.9, k=5): errors = [] X_train, X_test, y_train, y_test = temporal_split(X, y, t) if alpha<1: n = X_train.shape[0] weights = np.array([alpha**(n-t) for t in np.arange(1, n+1)]) X_train = X_train * np.sqrt(weights.reshape(-1,1)) y_train = y_train * np.sqrt(weights) n,p = X_train.shape # test errors for l1 in lambda_gtv: for m in Ds: D = Ds[m] if auto_cv: XD, bigY, invD = augmented_system_lasso(X_train, y_train, D, l1, 0, l1_only=True) fit = cvglmnet(x = XD, y = bigY, family = 'gaussian', ptype = 'mse', nfolds = 5) b = cvglmnetCoef(fit, s = 'lambda_min') l3 = fit['lambda_min'][0] beta = [email protected](b.shape[0])[1:] mset, r2t = compute_errors(y_train, X_train@beta) mse, r2 = compute_errors(y_test, X_test@beta) errors.append([m, l1, l3, mset, r2t, mse, r2]) else: for l3 in lambda_lasso: XD, bigY, invD = augmented_system_lasso(X_train, y_train, D, l1/l3, 0, l1_only=True) #XD, bigY, invD = epsilon_system_lasso(X_train, y_train, D, l1) fit = glmnet(x = XD, y = bigY) b = glmnetCoef(fit, s = scipy.float64([l3]), exact = False) beta = [email protected](b.shape[0])[1:] mset, r2t = compute_errors(y_train, X_train@beta) mse, r2 = compute_errors(y_test, X_test@beta) errors.append([m, l1, l3, mset, r2t, mse, r2]) df = pd.DataFrame(errors, columns=['method', 'lambda_tv', 'lambda_1', 'train_mse', 'train_r2', 'test_mse', 'test_r2']) return df
def fit(self, envs, num_iterations, callback=False): x = torch.cat([xe for xe, ye in envs["train"]["envs"]]) y = torch.cat([ye for xe, ye in envs["train"]["envs"]]) for epoch in range(num_iterations): self.optimizer.zero_grad() self.loss(self.network(x), y).backward() self.optimizer.step() if callback: # compute errors utils.compute_errors(self, envs)
def fit(self, envs, num_iterations, callback=False): for epoch in range(num_iterations): losses = [ self.loss(self.network(x), y) for x, y in envs["train"]["envs"] ] self.mask_step(losses, list(self.parameters()), tau=self.hparams["tau"], wd=self.hparams["wd"], lr=self.hparams["lr"]) if callback: # compute errors utils.compute_errors(self, envs)
def main(): # Arguments parser = argparse.ArgumentParser(description='High Quality Monocular Depth Estimation via Transfer Learning') parser.add_argument('-c', '--configFile', required=True, help='Path to config yaml file', metavar='path/to/config') args = parser.parse_args() CONFIG_FILE_PATH = args.configFile with open(CONFIG_FILE_PATH) as fd: config_yaml = oyaml.load(fd) # Returns an ordered dict. Used for printing config = AttrDict(config_yaml) print(colored('Config being used for training:\n{}\n\n'.format(oyaml.dump(config_yaml)), 'green')) # Create a new directory to save logs runs = sorted(glob.glob(os.path.join(config.train.logsDir, 'exp-*'))) prev_run_id = int(runs[-1].split('-')[-1]) if runs else 0 MODEL_LOG_DIR = os.path.join(config.train.logsDir, 'exp-{:03d}'.format(prev_run_id + 1)) CHECKPOINT_DIR = os.path.join(MODEL_LOG_DIR, 'checkpoints') os.makedirs(CHECKPOINT_DIR) print('Saving logs to folder: ' + colored('"{}"'.format(MODEL_LOG_DIR), 'blue')) # Save a copy of config file in the logs shutil.copy(CONFIG_FILE_PATH, os.path.join(MODEL_LOG_DIR, 'config.yaml')) # Create a tensorboard object and Write config to tensorboard writer = SummaryWriter(MODEL_LOG_DIR, comment='create-graph') string_out = io.StringIO() oyaml.dump(config_yaml, string_out, default_flow_style=False) config_str = string_out.getvalue().split('\n') string = '' for line in config_str: string = string + ' ' + line + '\n\r' writer.add_text('Config', string, global_step=None) # Create model model = Model() print('Model created.') # to continue training from a checkpoint if config.train.continueTraining: print('Transfer Learning enabled. Model State to be loaded from a prev checkpoint...') if not os.path.isfile(config.train.pathPrevCheckpoint): raise ValueError('Invalid path to the given weights file for transfer learning.\ The file {} does not exist'.format(config.train.pathPrevCheckpoint)) CHECKPOINT = torch.load(config.train.pathPrevCheckpoint, map_location='cpu') if 'model_state_dict' in CHECKPOINT: # Newer weights file with various dicts print(colored('Continuing training from checkpoint...Loaded data from checkpoint:', 'green')) print('Config Used to train Checkpoint:\n', oyaml.dump(CHECKPOINT['config']), '\n') print('From Checkpoint: Last Epoch Loss:', CHECKPOINT['epoch_loss'], '\n\n') model.load_state_dict(CHECKPOINT['model_state_dict']) elif 'state_dict' in CHECKPOINT: # reading original authors checkpoints if config.train.model != 'rednet': # original author deeplab checkpoint CHECKPOINT['state_dict'].pop('decoder.last_conv.8.weight') CHECKPOINT['state_dict'].pop('decoder.last_conv.8.bias') else: # rednet checkpoint # print(CHECKPOINT['state_dict'].keys()) CHECKPOINT['state_dict'].pop('final_deconv.weight') CHECKPOINT['state_dict'].pop('final_deconv.bias') CHECKPOINT['state_dict'].pop('out5_conv.weight') CHECKPOINT['state_dict'].pop('out5_conv.bias') CHECKPOINT['state_dict'].pop('out4_conv.weight') CHECKPOINT['state_dict'].pop('out4_conv.bias') CHECKPOINT['state_dict'].pop('out3_conv.weight') CHECKPOINT['state_dict'].pop('out3_conv.bias') CHECKPOINT['state_dict'].pop('out2_conv.weight') CHECKPOINT['state_dict'].pop('out2_conv.bias') model.load_state_dict(CHECKPOINT['state_dict'], strict=False) else: # Old checkpoint containing only model's state_dict() model.load_state_dict(CHECKPOINT) # Enable Multi-GPU training print("Let's use", torch.cuda.device_count(), "GPUs!") if torch.cuda.device_count() > 1: print('Multiple GPUs being used, can\'t save model graph to Tensorboard') # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = nn.DataParallel(model) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) # Training parameters optimizer = torch.optim.Adam( model.parameters(), config.train.optimAdam.learningRate ) batch_size = config.train.batchSize prefix = 'densenet_' + str(batch_size) # Load data train_loader_list = [] test_loader_list = [] for dataset in config.train.datasetsTrain: train_data = getTrainingTestingData('rgb', 'train', dataset.images, dataset.labels) train_loader_list.append(train_data) for dataset in config.train.datasetsVal: print(dataset.images) test_data = getTrainingTestingData('rgb', 'eval', dataset.images, dataset.labels) test_loader_list.append(test_data) train_loader = DataLoader(torch.utils.data.ConcatDataset(train_loader_list), batch_size, num_workers=config.train.numWorkers, shuffle=True, drop_last=True, pin_memory=True) test_loader = DataLoader(torch.utils.data.ConcatDataset(test_loader_list), batch_size, num_workers=config.train.numWorkers, shuffle=False, drop_last=True, pin_memory=True) print(len(torch.utils.data.ConcatDataset(train_loader_list))) print(len(train_loader)) print(len(test_loader)) # Create a tensorboard object and Write config to tensorboard writer = SummaryWriter(MODEL_LOG_DIR, comment='create-graph') # Loss l1_criterion = nn.L1Loss() total_iter_num = 0 # Start training... for epoch in range(config.train.numEpochs): batch_time = AverageMeter() losses = AverageMeter() N = len(train_loader) # Log the current Epoch Number writer.add_scalar('data/Epoch Number', epoch, total_iter_num) # Switch to train mode model.train() end = time.time() running_loss = 0.0 for i, sample_batched in enumerate(train_loader): optimizer.zero_grad() total_iter_num += 1 # Prepare sample and target image = torch.autograd.Variable(sample_batched['image'].cuda()) depth = torch.autograd.Variable(sample_batched['depth'].cuda(non_blocking=True)) # Normalize depth depth_n = DepthNorm( depth ) # Predict output = model(image) # Compute the loss l_depth = l1_criterion(output, depth_n) l_ssim = torch.clamp((1 - ssim(output, depth_n, val_range = 1000.0 / 10.0)) * 0.5, 0, 1) loss = (1.0 * l_ssim) + (0.1 * l_depth) # Update step losses.update(loss.data.item(), image.size(0)) loss.backward() optimizer.step() # statistics running_loss += loss.item() # Measure elapsed time batch_time.update(time.time() - end) end = time.time() eta = str(datetime.timedelta(seconds=int(batch_time.val*(N - i)))) # Log progress niter = epoch*N+i if i % 5 == 0: # Print to console print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.sum:.3f})\t' 'ETA {eta}\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})' .format(epoch, i, N, batch_time=batch_time, loss=losses, eta=eta)) # Log to tensorboard writer.add_scalar('Train/Loss', losses.val, niter) if i % 50 == 0: LogProgress(model, writer, test_loader, niter) # Log Epoch Loss epoch_loss = running_loss / (len(train_loader)) writer.add_scalar('data/Train Epoch Loss', epoch_loss, total_iter_num) print('\nTrain Epoch Loss: {:.4f}'.format(epoch_loss)) metrics = compute_errors(depth_n, output) print(metrics) for keys, values in metrics.items(): print(str(keys) + ':' + str(values)) # Record epoch's intermediate results LogProgress(model, writer, test_loader, niter) writer.add_scalar('Train/Loss.avg', losses.avg, epoch) # Save the model checkpoint every N epochs if (epoch % config.train.saveModelInterval) == 0: filename = os.path.join(CHECKPOINT_DIR, 'checkpoint-epoch-{:04d}.pth'.format(epoch)) if torch.cuda.device_count() > 1: model_params = model.module.state_dict() # Saving nn.DataParallel model else: model_params = model.state_dict() torch.save( { 'model_state_dict': model_params, 'optimizer_state_dict': optimizer.state_dict(), 'epoch': epoch, 'total_iter_num': total_iter_num, 'epoch_loss': epoch_loss, 'config': config_yaml }, filename)
l_ssim = torch.clamp( (1 - ssim(outputs, depth_n, val_range=1000.0 / 10.0)) * 0.5, 0, 1) loss = (1.0 * l_ssim) + (0.1 * l_depth) running_loss += loss.item() # Save output images, one at a time, to results inputs_tensor = image.detach().cpu() output_tensor = outputs.detach().cpu() label_tensor = depth_n.detach().cpu() depth_metric = depth * (config.train.max_depth / 1000.0) outputs_tmp = DepthNorm(outputs) outputs_metric = outputs_tmp * (config.train.max_depth / 1000.0) metrics = compute_errors(depth_metric, outputs_metric) # print(metrics) for keys, values in metrics.items(): print(str(keys) + ': ' + str(values)) # Extract each tensor within batch and save results for iii, sample_batched in enumerate( zip(inputs_tensor, output_tensor, label_tensor)): input, output, label = sample_batched if key == 'real': RESULTS_DIR = config.eval.resultsDirReal else: RESULTS_DIR = config.eval.resultsDirSynthetic result_path = os.path.join(
def trainAndVal(loader, model, l1_criterion, optimizer=None): batch_time = AverageMeter() losses = AverageMeter() rsmes = AverageMeter() if (optimizer): # switch to train mode model.train() print('Train', flush=True) else: # switch to evaluate mode model.eval() print('Val', flush=True) N = len(loader) end = time.time() start = end if (optimizer is None): predictions = [] testSetDepths = [] device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # process epoch for i, sample_batched in enumerate(loader): # Prepare sample and target image = sample_batched['image'].to(device) depth = sample_batched['depth'].to(device) # Normalize depth depth_n = DepthNorm(depth) # Predict output = model(image) # Compute the loss l_depth = l1_criterion(output, depth_n) l_ssim = torch.clamp( (1 - ssim(output, depth_n, val_range=1000.0 / 10.0)) * 0.5, 0, 1) loss = (1.0 * l_ssim) + (0.1 * l_depth) # measure accuracy and record loss losses.update(loss.data, image.size(0)) rmse = (depth_n.data.cpu() - output.data.cpu())**2 rmse = np.sqrt(rmse.mean()) rsmes.update(rmse, image.size(0)) if (optimizer): # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() eta = str(datetime.timedelta(seconds=int(batch_time.avg * (N - i)))) total = str( datetime.timedelta(seconds=int((time.time() - start) + batch_time.avg * (N - i)))) minDepth = 10 maxDepth = 1000 if (optimizer is None): predictions.append(output.squeeze().data.cpu()) testSetDepths.append(depth_n.squeeze().data.cpu()) if i % 5 == 0: p = 100 * i / N bar = "[%-10s] %d%%" % ('=' * int(p * 10 / 100) + '.' * (10 - int(p * 10 / 100)), p) print('[{0}/{1}] {2} - ' 'Batch Time: {batch_time.val:.2f} ({batch_time.avg:.2f}) ' 'ETA: {eta}/{total} ' 'Loss: {loss.val:.3f} ({loss.avg:.3f}) ' 'RSME: {rsme.val:.3f} ({rsme.avg:.3f})'.format( i, N, bar, batch_time=batch_time, eta=eta, total=total, loss=losses, rsme=rsmes), flush=True) break if (optimizer is None): predictions = np.vstack(predictions) testSetDepths = np.vstack(testSetDepths) e = compute_errors(predictions, testSetDepths) print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format( 'a1', 'a2', 'a3', 'rel', 'rms', 'log_10')) print("{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}". format(e[0], e[1], e[2], e[3], e[4], e[5])) return losses.avg
def validate_depth_with_gt(val_loader, disp_net, criterion, epoch, logger, tb_writer, global_vars_dict=None): device = global_vars_dict['device'] args = global_vars_dict['args'] n_iter_val_depth = global_vars_dict['n_iter_val_depth'] show_samples = copy.deepcopy(args.show_samples) for i in range(len(show_samples)): show_samples[i] *= len(val_loader) show_samples[i] = show_samples[i] // 1 batch_time = AverageMeter() error_names = ['abs_diff', 'abs_rel', 'sq_rel', 'a1', 'a2', 'a3'] errors = AverageMeter(i=len(error_names), precision=3) # switch to evaluate mode disp_net.eval() end = time.time() fig = plt.figure(1, figsize=(8, 6)) #criterion = MaskedL1Loss().to(device)#l1LOSS 容易优化 for i, (tgt_img, depth_gt) in enumerate(val_loader): tgt_img = tgt_img.to(device) #BCHW depth_gt = depth_gt.to(device) output_disp = disp_net(tgt_img) #BCHW if args.spatial_normalize: output_disp = spatial_normalize(output_disp) output_depth = 255 / output_disp #err = compute_errors2(depth_gt.data.squeeze(1),output_depth.data.squeeze(1)) err = compute_errors(gt=depth_gt.data.squeeze(1), pred=output_depth.data.squeeze(1), crop=False) ver_gt = VGSmap(depth_gt) ver_pre = VGSmap(output_depth) errors.update(err) # measure elapsed time batch_time.update(time.time() - end) end = time.time() fig = plt.figure(1, figsize=(8, 6)) if args.img_freq > 0 and i in show_samples: #output_writers list(3) if epoch == 0: #训练前的validate,目的在于先评估下网络效果 #1.img # 不会执行第二次,注意ref_imgs axis0是batch的索引; axis 1是list(adjacent frame)的索引! tb_writer.add_image('epoch 0 Input/sample{}'.format(i), tensor2array(tgt_img[0]), 0) tb_writer.add_image('epoch 0 depth_gt/sample{}'.format(i), tensor2array(depth_gt[0], colormap='bone'), 0) tb_writer.add_image( 'Depth Output/sample{}'.format(i), tensor2array(output_depth[0], max_value=None, colormap='bone'), 0) plt.hist(tensor2array(depth_gt[0], colormap='bone').flatten() * 256, 256, [0, 256], color='r') tb_writer.add_figure(tag='histogram_gt/sample{}'.format(i), figure=fig, global_step=0) else: #2.disp # tensor disp_to_show :[1,h,w],0.5~3.1~10 #disp2show = tensor2array(output_disp[0], max_value=None,colormap='bone') depth2show = tensor2array(output_depth[0], max_value=None, colormap='bone') #tb_writer.add_image('Disp Output/sample{}'.format(i), disp2show, epoch) tb_writer.add_image('Depth Output/sample{}'.format(i), depth2show, epoch) #add_figure plt.hist(depth2show.flatten() * 256, 256, [0, 256], color='r') tb_writer.add_figure(tag='histogram_sample/sample{}'.format(i), figure=fig, global_step=epoch) # add scalar if args.scalar_freq > 0 and n_iter_val_depth % args.scalar_freq == 0: pass #h_loss =HistgramLoss()(tgt_img,depth_gt) #tb_writer.add_scalar('batch/val_h_loss' ,h_loss, n_iter_val_depth) #tb_writer.add_scalar('batch/' + error_names[1], errors.val[1], n_iter_val_depth) #tb_writer.add_scalar('batch/' + error_names[2], errors.val[2], n_iter_val_depth) #tb_writer.add_scalar('batch/' + error_names[3], errors.val[3], n_iter_val_depth) #tb_writer.add_scalar('batch/' + error_names[4], errors.val[4], n_iter_val_depth) #tb_writer.add_scalar('batch/' + error_names[5], errors.val[5], n_iter_val_depth) if args.log_terminal: logger.valid_logger_update(batch=i, time=batch_time, names=error_names, values=errors) n_iter_val_depth += 1 #end for #if args.log_terminal: # logger.valid_bar.update(len(val_loader)) global_vars_dict['n_iter_val_depth'] = n_iter_val_depth return error_names, errors
_, pose3d_out_, pose3d_gt_, loss_, image_, pose2d_gt_ = sess.run([ train_op, pose3d_out, pose3d_gt, loss, image, sample['pose2d_crop'] ]) # Display training status epoch_cur = i * opt.batch_size // meta_info.NUM_SAMPLES_H36 iter_cur = (i * opt.batch_size) % meta_info.NUM_SAMPLES_H36 t.set_postfix(epoch=epoch_cur, iter_percent="%d %%" % (iter_cur / float(meta_info.NUM_SAMPLES_H36) * 100), loss='%.3f' % loss_) # Log numerical reuslts if i % opt.freq_log == 0: mpjpe_, pa_mpjpe_ = compute_errors(pose3d_out_, pose3d_gt_) log(tag='train/loss', step=i, writer=summary_writer, value=loss_) log(tag='train/mpjpe', step=i, writer=summary_writer, value=mpjpe_) log(tag='train/pa_mpjpe', step=i, writer=summary_writer, value=pa_mpjpe_) # Log visual reuslts if i % opt.freq_display == 0:
def validate(args, model, test_loader, criterion_ueff, epoch, epochs, device='cpu'): with torch.no_grad(): val_si = RunningAverage() # val_bins = RunningAverage() metrics = utils.RunningAverageDict() for batch in tqdm(test_loader, desc=f"Epoch: {epoch + 1}/{epochs}. Loop: Validation" ) if is_rank_zero(args) else test_loader: img = batch['image'].to(device) depth = batch['depth'].to(device) if 'has_valid_depth' in batch: if not batch['has_valid_depth']: continue depth = depth.squeeze().unsqueeze(0).unsqueeze(0) bins, pred = model(img) mask = depth > args.min_depth l_dense = criterion_ueff(pred, depth, mask=mask.to(torch.bool), interpolate=True) val_si.append(l_dense.item()) pred = nn.functional.interpolate(pred, depth.shape[-2:], mode='bilinear', align_corners=True) pred = pred.squeeze().cpu().numpy() pred[pred < args.min_depth_eval] = args.min_depth_eval pred[pred > args.max_depth_eval] = args.max_depth_eval pred[np.isinf(pred)] = args.max_depth_eval pred[np.isnan(pred)] = args.min_depth_eval gt_depth = depth.squeeze().cpu().numpy() valid_mask = np.logical_and(gt_depth > args.min_depth_eval, gt_depth < args.max_depth_eval) if args.garg_crop or args.eigen_crop: gt_height, gt_width = gt_depth.shape eval_mask = np.zeros(valid_mask.shape) if args.garg_crop: eval_mask[int(0.40810811 * gt_height):int(0.99189189 * gt_height), int(0.03594771 * gt_width):int(0.96405229 * gt_width)] = 1 elif args.eigen_crop: if args.dataset == 'kitti': eval_mask[int(0.3324324 * gt_height):int(0.91351351 * gt_height), int(0.0359477 * gt_width):int(0.96405229 * gt_width)] = 1 else: eval_mask[45:471, 41:601] = 1 valid_mask = np.logical_and(valid_mask, eval_mask) metrics.update( utils.compute_errors(gt_depth[valid_mask], pred[valid_mask])) return metrics.get_value(), val_si
def train(): """ Runs data processing scripts to turn raw data from (../raw) into cleaned data ready to be analyzed (saved in ../processed). """ logger = logging.getLogger(__name__) logger.info('training...') # data loader train_dataset = make_dataloader(dataset_name='bikenyc', mode='train', len_closeness=len_closeness, len_period=len_period, len_trend=len_trend) # Creating data indices for training and validation splits: dataset_size = len(train_dataset) indices = list(range(dataset_size)) split = int(np.floor(validation_split * dataset_size)) if shuffle_dataset: np.random.seed(random_seed) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] val_timestamps = [ train_dataset.timestamp_train[i] for i in indices[:split] ] val_Y = [train_dataset.Y_data[i] for i in indices[:split]] print('training size:', len(train_indices)) print('val size:', len(val_indices)) # Creating PT data samplers and loaders: train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(val_indices) training_generator = data.DataLoader(train_dataset, **params, sampler=train_sampler) val_generator = data.DataLoader(train_dataset, **params, sampler=valid_sampler) # Total iterations total_iters = np.ceil(len(train_indices) / batch_size) * epoch_nums # model model = stresnet((len_closeness, nb_flow, map_height, map_width), (len_period, nb_flow, map_height, map_width), (len_trend, nb_flow, map_height, map_width), external_dim=8, nb_residual_unit=nb_residual_unit) if LOAD_INITIAL: logger.info('\tload initial_checkpoint = %s\n' % initial_checkpoint) model.load_state_dict( torch.load(initial_checkpoint, map_location=lambda storage, loc: storage)) #model.apply(weight_init) # Loss and optimizer loss_fn = nn.MSELoss() # nn.L1Loss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) loss_fn.to(device) # Train the model es = EarlyStopping(patience=early_stop_patience, mode='min', model=model, save_path=checkpoint_dir + '/%s/model.best.pth' % (model_name)) for e in range(epoch_nums): for i, (X_c, X_p, X_t, X_meta, Y_batch) in enumerate(training_generator): #epoch = i * batch_size / len(train_loader) # Move tensors to the configured device X_c = X_c.type(torch.FloatTensor).to(device) X_p = X_p.type(torch.FloatTensor).to(device) X_t = X_t.type(torch.FloatTensor).to(device) X_meta = X_meta.type(torch.FloatTensor).to(device) #print(X_meta[0]) Y_batch = Y_batch.type(torch.FloatTensor).to(device) # Forward pass outputs = model(X_c, X_p, X_t, X_meta) #print(outputs[0]) loss = loss_fn( outputs.reshape(len(outputs), map_width, map_height), Y_batch) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() its = np.ceil(len(train_indices) / batch_size) * ( e + 1) # iterations at specific epochs print('Epoch [{}/{}], step [{}/{}], Loss: {:.4f}'.format( e + 1, epoch_nums, its, total_iters, loss.item())) # valid after each training epoch val_loss = valid(model, val_generator, compute_errors, device) if es.step(val_loss): print('early stopped! With val loss:', val_loss) break # early stop criterion is met, we can stop now if e in epoch_save: torch.save(model.state_dict(), checkpoint_dir + '/%s/%08d_model.pth' % (model_name, e)) torch.save( { 'optimizer': optimizer.state_dict(), 'iter': its, 'epoch': e, }, checkpoint_dir + '/%s/%08d_optimizer.pth' % (model_name, e)) logger.info(checkpoint_dir + '/%s/%08d_model.pth' % (model_name, e) + ' saved!') rmse_list = [] mse_list = [] mae_list = [] for i, (X_c, X_p, X_t, X_meta, Y_batch) in enumerate(training_generator): # Move tensors to the configured device X_c = X_c.type(torch.FloatTensor).to(device) X_p = X_p.type(torch.FloatTensor).to(device) X_t = X_t.type(torch.FloatTensor).to(device) X_meta = X_meta.type(torch.FloatTensor).to(device) #Y_batch = Y_batch.type(torch.FloatTensor).to(device) # Forward pass outputs = model(X_c, X_p, X_t, X_meta) #.cpu().data.numpy() mse, mae, rmse = compute_errors( outputs.cpu().data.numpy(), Y_batch.data.numpy() ) #original version, bug has appeared where shape is x,1,32,32 ratehr than x,32,32? this did not happen 3 weeks ago... # mse, mae, rmse = compute_errors(outputs.reshape(len(outputs),map_width, map_height), Y_batch.data.numpy()) rmse_list.append(rmse) mse_list.append(mse) mae_list.append(mae) rmse = np.mean(rmse_list) mse = np.mean(mse_list) mae = np.mean(mae_list) print('Training mse: %.6f mae: %.6f rmse (norm): %.6f, rmse (real): %.6f' % (mse, mae, rmse, rmse * (train_dataset.mmn._max - train_dataset.mmn._min) / 2. * m_factor)) if COMPARE_TO_HA: print("Preparing Benchmark Scores, this may take a few minutes.....") # return compare_to_ha(compute_errors, val_timestamps, val_Y, train_dataset.mmn) mse_benchmark, mae_benchmark, rmse_benchmark = compare_to_simple_ha( compute_errors, val_timestamps, val_Y, train_dataset.mmn) print( 'Simple HA Benchmark mse: %.6f mae: %.6f rmse (norm): %.6f, rmse (real): %.6f' % (mse_benchmark, mae_benchmark, rmse_benchmark, rmse_benchmark * (train_dataset.mmn._max - train_dataset.mmn._min) / 2. * m_factor)) mse_benchmark, mae_benchmark, rmse_benchmark = compare_to_tuned_ha( compute_errors, val_timestamps, val_Y, train_dataset.mmn) print( 'Tuned HA Benchmark mse: %.6f mae: %.6f rmse (norm): %.6f, rmse (real): %.6f' % (mse_benchmark, mae_benchmark, rmse_benchmark, rmse_benchmark * (train_dataset.mmn._max - train_dataset.mmn._min) / 2. * m_factor))
def eval_depth(self): pred_depths = [] pred_disps = [] errors = [] ratios = [] # Predict print('doing evaluation...') for i, img_path in enumerate(self.img_paths): img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) img = cv2.resize(img, (self.params.input_w, self.params.input_h)) img = tf.expand_dims( tf.convert_to_tensor(img, tf.float32) / 255., 0) outputs = self.val_step(img) _, depth = disp_to_depth(outputs['disparity0'], min_depth=MIN_DEPTH, max_depth=MAX_DEPTH) depth *= 0.54 pred_depths.append(depth.numpy()) pred_disps.append(np.squeeze(outputs['disparity0'].numpy())) for i in range(len(pred_depths)): gt_depth = self.gt_depths[i] gt_height, gt_width = gt_depth.shape[:2] pred_depth = pred_depths[i][0] pred_depth = cv2.resize(pred_depth, (gt_width, gt_height)) mask = np.logical_and(gt_depth > MIN_DEPTH, gt_depth < MAX_DEPTH) crop = np.array([ 0.40810811 * gt_height, 0.99189189 * gt_height, 0.03594771 * gt_width, 0.96405229 * gt_width ]).astype(np.int32) crop_mask = np.zeros(mask.shape) crop_mask[crop[0]:crop[1], crop[2]:crop[3]] = 1 mask = np.logical_and(mask, crop_mask) pred_depth = pred_depth[mask] gt_depth = gt_depth[mask] # Median scaling ratio = np.median(gt_depth) / np.median(pred_depth) ratios.append(ratio) pred_depth *= ratio pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH errors.append(compute_errors(gt_depth, pred_depth)) ratios = np.array(ratios) med = np.median(ratios) print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format( med, np.std(ratios / med))) mean_errors = np.array(errors).mean(0) print("\n " + ("{:>8} | " * 7).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3")) print(("&{: 8.3f} " * 7).format(*mean_errors.tolist()) + "\\\\") print("\n-> Done!\n")