def train(train_loader, model, criterion, optimizer, epoch, logger): average_meter = AverageMeter() model.train() # switch to train mode end = time.time() batch_num = len(train_loader) for i, (input, target) in enumerate(train_loader): # itr_count += 1 input, target = input.cuda(), target.cuda() # print('input size = ', input.size()) # print('target size = ', target.size()) torch.cuda.synchronize() data_time = time.time() - end # compute pred end = time.time() with torch.autograd.detect_anomaly(): pred_d, pred_ord = model(input) # @wx 注意输出 target_c = utils.get_labels_sid( args, target) # using sid, discretize the groundtruth loss = criterion(pred_ord, target_c) optimizer.zero_grad() loss.backward() # compute gradient and do SGD step optimizer.step() torch.cuda.synchronize() gpu_time = time.time() - end # measure accuracy and record loss result = Result() depth = utils.get_depth_sid(args, pred_d) result.evaluate(depth.data, target.data) average_meter.update(result, gpu_time, data_time, input.size(0)) end = time.time() if (i + 1) % args.print_freq == 0: print('=> output: {}'.format(output_directory)) print('Train Epoch: {0} [{1}/{2}]\t' 't_Data={data_time:.3f}({average.data_time:.3f}) ' 't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t' 'Loss={Loss:.5f} ' 'RMSE={result.rmse:.2f}({average.rmse:.2f}) ' 'RML={result.absrel:.2f}({average.absrel:.2f}) ' 'Log10={result.lg10:.3f}({average.lg10:.3f}) ' 'Delta1={result.delta1:.3f}({average.delta1:.3f}) ' 'Delta2={result.delta2:.3f}({average.delta2:.3f}) ' 'Delta3={result.delta3:.3f}({average.delta3:.3f})'.format( epoch, i + 1, len(train_loader), data_time=data_time, gpu_time=gpu_time, Loss=loss.item(), result=result, average=average_meter.average())) current_step = epoch * batch_num + i logger.add_scalar('Train/RMSE', result.rmse, current_step) logger.add_scalar('Train/rml', result.absrel, current_step) logger.add_scalar('Train/Log10', result.lg10, current_step) logger.add_scalar('Train/Delta1', result.delta1, current_step) logger.add_scalar('Train/Delta2', result.delta2, current_step) logger.add_scalar('Train/Delta3', result.delta3, current_step) avg = average_meter.average()
def train(train_loader, model, criterion, optimizer, epoch, logger, device, opts, iteration): average_meter = AverageMeter() model.train() # switch to train mode end = time.time() batch_num = len(train_loader) for i, (input, target) in enumerate(train_loader): if i < iteration: continue input = input.to(device) target = target.to(device) if torch.cuda.is_available(): torch.cuda.synchronize() data_time = time.time() - end # compute pred end = time.time() with torch.autograd.detect_anomaly(): pred_d, pred_ord = model(input) target_c = utils.get_labels_sid( opts, target, device) # using sid, discretize the groundtruth loss = criterion(pred_ord, target_c) optimizer.zero_grad() loss.backward() # compute gradient and do SGD step optimizer.step() if torch.cuda.is_available(): torch.cuda.synchronize() gpu_time = time.time() - end # measure accuracy and record loss result = Result() depth = utils.get_depth_sid(opts, pred_d, device) result.evaluate(depth.data, target.data) average_meter.update(result, gpu_time, data_time, input.size(0)) end = time.time() if (i + 1) % opts.save_freq == 0: utils.save_checkpoint( { 'args': opts, 'epoch': epoch, 'model': model, 'best_result': best_result, 'optimizer': optimizer, 'iteration': i + 1 }, False, epoch, output_directory) if (i + 1) % opts.print_freq == 0: print('Train Epoch: {0} [{1}/{2}]\t' 't_Data={data_time:.3f}({average.data_time:.3f}) ' 't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t' 'Loss={Loss:.5f} ' 'RMSE={result.rmse:.2f}({average.rmse:.2f}) ' 'RML={result.absrel:.2f}({average.absrel:.2f}) ' 'Log10={result.lg10:.3f}({average.lg10:.3f}) ' 'Delta1={result.delta1:.3f}({average.delta1:.3f}) ' 'Delta2={result.delta2:.3f}({average.delta2:.3f}) ' 'Delta3={result.delta3:.3f}({average.delta3:.3f})'.format( epoch, i + 1, len(train_loader), data_time=data_time, gpu_time=gpu_time, Loss=loss.item(), result=result, average=average_meter.average())) current_step = epoch * batch_num + i logger.add_scalar('Train/RMSE', result.rmse, current_step) logger.add_scalar('Train/rml', result.absrel, current_step) logger.add_scalar('Train/Log10', result.lg10, current_step) logger.add_scalar('Train/Delta1', result.delta1, current_step) logger.add_scalar('Train/Delta2', result.delta2, current_step) logger.add_scalar('Train/Delta3', result.delta3, current_step) print("GPU:", torch.cuda.memory_allocated()) if torch.cuda.is_available(): torch.cuda.empty_cache() avg = average_meter.average()