def main_worker(gpu, save_dir, args): # basic setup cudnn.benchmark = True args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) model = HyperRegression(args) torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) start_epoch = 0 optimizer = model.make_optimizer(args) if args.resume_checkpoint is None and os.path.exists( os.path.join(save_dir, 'checkpoint-latest.pt')): args.resume_checkpoint = os.path.join( save_dir, 'checkpoint-latest.pt') # use the latest checkpoint if args.resume_checkpoint is not None: if args.resume_optimizer: model, optimizer, start_epoch = resume( args.resume_checkpoint, model, optimizer, strict=(not args.resume_non_strict)) else: model, _, start_epoch = resume(args.resume_checkpoint, model, optimizer=None, strict=(not args.resume_non_strict)) print('Resumed from: ' + args.resume_checkpoint) # main training loop start_time = time.time() point_nats_avg_meter = AverageValueMeter() if args.distributed: print("[Rank %d] World size : %d" % (args.rank, dist.get_world_size())) print("Start epoch: %d End epoch: %d" % (start_epoch, args.epochs)) for epoch in range(start_epoch, args.epochs): print("Epoch starts:") data = ExampleData() train_loader = torch.utils.data.DataLoader(dataset=data, batch_size=args.batch_size, shuffle=True, num_workers=0, pin_memory=True) for bidx, data in enumerate(train_loader): x, y = data x = x.float().to(args.gpu).unsqueeze(1) y = y.float().to(args.gpu).unsqueeze(1).unsqueeze(2) step = bidx + len(train_loader) * epoch model.train() recon_nats = model(x, y, optimizer, step, None) point_nats_avg_meter.update(recon_nats.item()) if step % args.log_freq == 0: duration = time.time() - start_time start_time = time.time() print( "[Rank %d] Epoch %d Batch [%2d/%2d] Time [%3.2fs] PointNats %2.5f" % (args.rank, epoch, bidx, len(train_loader), duration, point_nats_avg_meter.avg)) # save visualizations kk = 3 if (epoch + 1) % args.viz_freq == 0: # reconstructions model.eval() x = torch.from_numpy(np.linspace(0, kk, num=100)).float().to( args.gpu).unsqueeze(1) _, y = model.decode(x, 100) x = x.cpu().detach().numpy() y = y.cpu().detach().numpy() x = np.expand_dims(x, 1).repeat(100, axis=1).flatten() y = y.flatten() figs, axs = plt.subplots(1, 1, figsize=(12, 12)) plt.xlim([0, kk]) plt.ylim([-2, 2]) plt.scatter(x, y) plt.savefig( os.path.join( save_dir, 'images', 'tr_vis_sampled_epoch%d-gpu%s.png' % (epoch, args.gpu))) plt.clf() if (epoch + 1) % args.save_freq == 0: save(model, optimizer, epoch + 1, os.path.join(save_dir, 'checkpoint-%d.pt' % epoch)) save(model, optimizer, epoch + 1, os.path.join(save_dir, 'checkpoint-latest.pt'))
def main_worker(gpu, save_dir, ngpus_per_node, args): # basic setup cudnn.benchmark = True args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.distributed: args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) if args.log_name is not None: log_dir = "runs/%s" % args.log_name else: log_dir = "runs/time-%d" % time.time() if not args.distributed or (args.rank % ngpus_per_node == 0): writer = SummaryWriter(logdir=log_dir) else: writer = None if not args.use_latent_flow: # auto-encoder only args.prior_weight = 0 args.entropy_weight = 0 # multi-GPU setup model = PointFlow(args) if args.distributed: # Multiple processes, single GPU per process if args.gpu is not None: def _transform_(m): return nn.parallel.DistributedDataParallel( m, device_ids=[args.gpu], output_device=args.gpu, check_reduction=True) torch.cuda.set_device(args.gpu) model.cuda(args.gpu) model.multi_gpu_wrapper(_transform_) args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = 0 else: assert 0, "DistributedDataParallel constructor should always set the single device scope" elif args.gpu is not None: # Single process, single GPU per process torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: # Single process, multiple GPUs per process def _transform_(m): return nn.DataParallel(m) model = model.cuda() model.multi_gpu_wrapper(_transform_) # resume checkpoints start_epoch = 0 optimizer = model.make_optimizer(args) if args.resume_checkpoint is None and os.path.exists( os.path.join(save_dir, 'checkpoint-latest.pt')): args.resume_checkpoint = os.path.join( save_dir, 'checkpoint-latest.pt') # use the latest checkpoint if args.resume_checkpoint is not None: if args.resume_optimizer: model, optimizer, start_epoch = resume( args.resume_checkpoint, model, optimizer, strict=(not args.resume_non_strict)) else: model, _, start_epoch = resume(args.resume_checkpoint, model, optimizer=None, strict=(not args.resume_non_strict)) print('Resumed from: ' + args.resume_checkpoint) # initialize datasets and loaders tr_dataset = MyDataset(args.data_dir, istest=False) te_dataset = MyDataset(args.data_dir, istest=True) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( tr_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader(dataset=tr_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=0, pin_memory=True, sampler=train_sampler, drop_last=True, worker_init_fn=init_np_seed) test_loader = torch.utils.data.DataLoader(dataset=te_dataset, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True, drop_last=False, worker_init_fn=init_np_seed) # save dataset statistics # if not args.distributed or (args.rank % ngpus_per_node == 0): # np.save(os.path.join(save_dir, "train_set_mean.npy"), tr_dataset.all_points_mean) # np.save(os.path.join(save_dir, "train_set_std.npy"), tr_dataset.all_points_std) # np.save(os.path.join(save_dir, "train_set_idx.npy"), np.array(tr_dataset.shuffle_idx)) # np.save(os.path.join(save_dir, "val_set_mean.npy"), te_dataset.all_points_mean) # np.save(os.path.join(save_dir, "val_set_std.npy"), te_dataset.all_points_std) # np.save(os.path.join(save_dir, "val_set_idx.npy"), np.array(te_dataset.shuffle_idx)) # load classification dataset if needed if args.eval_classification: from datasets import get_clf_datasets def _make_data_loader_(dataset): return torch.utils.data.DataLoader(dataset=dataset, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True, drop_last=False, worker_init_fn=init_np_seed) clf_datasets = get_clf_datasets(args) clf_loaders = { k: [_make_data_loader_(ds) for ds in ds_lst] for k, ds_lst in clf_datasets.items() } else: clf_loaders = None # initialize the learning rate scheduler if args.scheduler == 'exponential': scheduler = optim.lr_scheduler.ExponentialLR(optimizer, args.exp_decay) elif args.scheduler == 'step': scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.epochs // 2, gamma=0.1) elif args.scheduler == 'linear': def lambda_rule(ep): lr_l = 1.0 - max(0, ep - 0.5 * args.epochs) / float( 0.5 * args.epochs) return lr_l scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule) else: assert 0, "args.schedulers should be either 'exponential' or 'linear'" # main training loop start_time = time.time() entropy_avg_meter = AverageValueMeter() latent_nats_avg_meter = AverageValueMeter() point_nats_avg_meter = AverageValueMeter() if args.distributed: print("[Rank %d] World size : %d" % (args.rank, dist.get_world_size())) print("Start epoch: %d End epoch: %d" % (start_epoch, args.epochs)) for epoch in range(start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) # adjust the learning rate if (epoch + 1) % args.exp_decay_freq == 0: scheduler.step(epoch=epoch) if writer is not None: writer.add_scalar('lr/optimizer', scheduler.get_lr()[0], epoch) # train for one epoch for bidx, data in enumerate(train_loader): idx_batch, tr_batch, te_batch = data['idx'], data[ 'train_points'], data['test_points'] step = bidx + len(train_loader) * epoch model.train() inputs = tr_batch.cuda(args.gpu, non_blocking=True) out = model(inputs, optimizer, step, writer) entropy, prior_nats, recon_nats = out['entropy'], out[ 'prior_nats'], out['recon_nats'] entropy_avg_meter.update(entropy) point_nats_avg_meter.update(recon_nats) latent_nats_avg_meter.update(prior_nats) if step % args.log_freq == 0: duration = time.time() - start_time start_time = time.time() print( "[Rank %d] Epoch %d Batch [%2d/%2d] Time [%3.2fs] Entropy %2.5f LatentNats %2.5f PointNats %2.5f" % (args.rank, epoch, bidx, len(train_loader), duration, entropy_avg_meter.avg, latent_nats_avg_meter.avg, point_nats_avg_meter.avg)) # evaluate on the validation set # if not args.no_validation and (epoch + 1) % args.val_freq == 0: # from utils import validate # validate(test_loader, model, epoch, writer, save_dir, args, clf_loaders=clf_loaders) # save visualizations if (epoch + 1) % args.viz_freq == 0: # reconstructions model.eval() samples = model.reconstruct(inputs) results = [] for idx in range(min(10, inputs.size(0))): res = visualize_point_clouds(samples[idx], inputs[idx], idx) results.append(res) res = np.concatenate(results, axis=1) scipy.misc.imsave( os.path.join( save_dir, 'images', 'tr_vis_conditioned_epoch%d-gpu%s.png' % (epoch, args.gpu)), res.transpose((1, 2, 0))) if writer is not None: writer.add_image('tr_vis/conditioned', torch.as_tensor(res), epoch) # samples if args.use_latent_flow: num_samples = min(10, inputs.size(0)) num_points = inputs.size(1) _, samples = model.sample(num_samples, num_points) results = [] for idx in range(num_samples): res = visualize_point_clouds(samples[idx], inputs[idx], idx) results.append(res) res = np.concatenate(results, axis=1) scipy.misc.imsave( os.path.join( save_dir, 'images', 'tr_vis_conditioned_epoch%d-gpu%s.png' % (epoch, args.gpu)), res.transpose((1, 2, 0))) if writer is not None: writer.add_image('tr_vis/sampled', torch.as_tensor(res), epoch) # save checkpoints if not args.distributed or (args.rank % ngpus_per_node == 0): if (epoch + 1) % args.save_freq == 0: save(model, optimizer, epoch + 1, os.path.join(save_dir, 'checkpoint-%d.pt' % epoch)) save(model, optimizer, epoch + 1, os.path.join(save_dir, 'checkpoint-latest.pt'))
extrinsic = extrinsic.cuda() R = extrinsic[:, 0:3, 0:3] t = extrinsic[:, 0:3, 3].unsqueeze(1) # Forward pass R_pred, t_pred = network(image) # Loss computation xyz_rot = transformation(xyz, R_pred, t_pred) xyz_rot_gt = transformation(xyz, R, t) batch_loss = loss(xyz_rot, xyz_rot_gt) batch_loss.backward() train_loss.update(batch_loss.item()) torch.nn.utils.clip_grad_norm_(network.parameters(), 0.1) # Clip gradients optimizer.step() # gradient update print('[%d: %d/%d] train loss: %f' % (epoch, i, len_dataset / opt.batch_size, batch_loss.item())) # VALIDATION test_loss.reset() network.eval() with torch.no_grad(): for i, data in enumerate(dataloader_test, 0): # Load data points, image, _, extrinsic, _ = data
def main_worker(gpu, save_dir, ngpus_per_node, args): # basic setup cudnn.benchmark = True normalize = False args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) model = HyperRegression(args) torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) start_epoch = 0 optimizer = model.make_optimizer(args) if args.resume_checkpoint is None and os.path.exists( os.path.join(save_dir, 'checkpoint-latest.pt')): args.resume_checkpoint = os.path.join( save_dir, 'checkpoint-latest.pt') # use the latest checkpoint if args.resume_checkpoint is not None: if args.resume_optimizer: model, optimizer, start_epoch = resume( args.resume_checkpoint, model, optimizer, strict=(not args.resume_non_strict)) else: model, _, start_epoch = resume(args.resume_checkpoint, model, optimizer=None, strict=(not args.resume_non_strict)) print('Resumed from: ' + args.resume_checkpoint) # initialize datasets and loaders # initialize the learning rate scheduler if args.scheduler == 'exponential': scheduler = optim.lr_scheduler.ExponentialLR(optimizer, args.exp_decay) elif args.scheduler == 'step': scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.epochs // 2, gamma=0.1) elif args.scheduler == 'linear': def lambda_rule(ep): lr_l = 1.0 - max(0, ep - 0.5 * args.epochs) / float( 0.5 * args.epochs) return lr_l scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule) else: assert 0, "args.schedulers should be either 'exponential' or 'linear'" # main training loop start_time = time.time() entropy_avg_meter = AverageValueMeter() latent_nats_avg_meter = AverageValueMeter() point_nats_avg_meter = AverageValueMeter() if args.distributed: print("[Rank %d] World size : %d" % (args.rank, dist.get_world_size())) print("Start epoch: %d End epoch: %d" % (start_epoch, args.epochs)) data = SDDData(split='train', normalize=normalize, root=args.data_dir) data_test = SDDData(split='test', normalize=normalize, root=args.data_dir) train_loader = torch.utils.data.DataLoader(dataset=data, batch_size=args.batch_size, shuffle=True, num_workers=0, pin_memory=True) test_loader = torch.utils.data.DataLoader(dataset=data_test, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) for epoch in range(start_epoch, args.epochs): # adjust the learning rate if (epoch + 1) % args.exp_decay_freq == 0: scheduler.step(epoch=epoch) # train for one epoch print("Epoch starts:") for bidx, data in enumerate(train_loader): # if bidx < 2: x, y = data #y = y.float().to(args.gpu).unsqueeze(1).repeat(1, 10).unsqueeze(2) x = x.float().to(args.gpu) y = y.float().to(args.gpu).unsqueeze(1) y = y.repeat(1, 20, 1) y += torch.randn(y.shape[0], y.shape[1], y.shape[2]).to(args.gpu) step = bidx + len(train_loader) * epoch model.train() recon_nats = model(x, y, optimizer, step, None) point_nats_avg_meter.update(recon_nats.item()) if step % args.log_freq == 0: duration = time.time() - start_time start_time = time.time() print( "[Rank %d] Epoch %d Batch [%2d/%2d] Time [%3.2fs] PointNats %2.5f" % (args.rank, epoch, bidx, len(train_loader), duration, point_nats_avg_meter.avg)) # print("Memory") # print(process.memory_info().rss / (1024.0 ** 3)) # save visualizations if (epoch + 1) % args.viz_freq == 0: # reconstructions model.eval() for bidx, data in enumerate(test_loader): x, _ = data x = x.float().to(args.gpu) _, y_pred = model.decode(x, 100) y_pred = y_pred.cpu().detach().numpy().squeeze() # y_pred[y_pred < 0] = 0 # y_pred[y_pred >= 0.98] = 0.98 testing_sequence = data_test.dataset.scenes[ data_test.test_id].sequences[bidx] objects_list = [] for k in range(3): objects_list.append( decode_obj(testing_sequence.objects[k], testing_sequence.id)) objects = np.stack(objects_list, axis=0) gt_object = decode_obj(testing_sequence.objects[-1], testing_sequence.id) drawn_img_hyps = draw_hyps(testing_sequence.imgs[-1], y_pred, gt_object, objects, normalize) cv2.imwrite( os.path.join(save_dir, 'images', str(bidx) + '-' + str(epoch) + '-hyps.jpg'), drawn_img_hyps) if (epoch + 1) % args.save_freq == 0: save(model, optimizer, epoch + 1, os.path.join(save_dir, 'checkpoint-%d.pt' % epoch)) save(model, optimizer, epoch + 1, os.path.join(save_dir, 'checkpoint-latest.pt'))
mean_dist1 = 0 for k in range(B_size): dist1_per_batch = dist1[k] mask_per_batch = mask_per_pts[k, :] mean_dist1 += torch.mean(dist1_per_batch[mask_per_batch]) / B_size # Finally here is the final loss computation: # both sides of Chamfer + BCE on occupancy grids loss_ch = torch.mean(dist2) + mean_dist1 loss_occ = F.binary_cross_entropy(occupancy, target_occupancy) loss_net = loss_ch + 100.0 * loss_occ loss_net.backward() optimizer.step() # gradient update total_train_loss.update(loss_net.item()) chd_train_loss.update(loss_ch.item()) occ_train_loss.update(loss_occ.item()) # VISUALIZE if i % 200 <= 0: print("Storing to file...") save_pointcloud(points[0].data.cpu(), os.path.join(output_folder, f'train_GT_{epoch}_{i}.ply')) save_pointcloud(points_flat[0][mask_per_pts[0]].data.cpu(), os.path.join(output_folder, f'train_output_{epoch}_{i}.ply')) save_image(img[0], os.path.join(output_folder, f'train_input_{epoch}_{i}.png')) print('[%d: %d/%d] Train Chamfer Loss: %f, Train Occupancy Loss: %f ' % ( epoch, i, len_dataset / opt.batch_size, loss_ch.item(), loss_occ.item()))
# Prevent from reaching 0 (otherwise cannot take log) z_fake = torch.clamp(z_fake, min=0.001, max=1.) # Compute losses depth_loss = depth_criterion(z_fake, z) grad_real, grad_fake = imgrad_yx(z), imgrad_yx(z_fake) grad_loss = grad_criterion(grad_fake, grad_real) * grad_factor * (epoch>3) normal_loss = normal_criterion(grad_fake, grad_real) * normal_factor * (epoch>7) loss = depth_loss + grad_loss + normal_loss loss.backward() optimizer.step() # gradient update train_total.update(loss.item()) train_logRMSE.update(depth_loss.item()) train_grad.update(grad_loss.item()) train_normal.update(normal_loss.item()) # Print info print("[epoch %2d][iter %4d] loss: %.4f , RMSElog: %.4f , grad_loss: %.4f , normal_loss: %.4f" \ % (epoch, i, loss, depth_loss, grad_loss, normal_loss)) # VISUALIZE if i == 0: for idx in [0, img.shape[0]-1]: save_image(img[idx], os.path.join(output_folder, f'train_input_{epoch}_{idx}.png')) save_image(z[idx], os.path.join(output_folder, f'train_GT_{epoch}_{idx}.png')) save_image(z_fake[idx], os.path.join(output_folder, f'train_pred_{epoch}_{idx}.png'))
# Then map back to the scaling used in DISN scale = T[0, 0, 0] pointsReconstructed = pointsReconstructed / scale gt_points = gt_points / scale points_flat = pointsReconstructed.reshape(B_size, -1, 3) # In case the output is empty, just randomly put 100 points if points_flat.shape[1] == 0: print(f'Error: for shape {mesh_name}, the output is empty') points_flat = torch.rand(1, 100, 3).cuda() - 0.5 ##### f-score computation f_score_value = test_f_score(points_flat, gt_points, opt.pts_for_fscore).item() overall_f_score_5_percent.update(f_score_value) per_cat_f_score_5_percent[cat].update(f_score_value) ##### Chamfer loss chd_value = test_chamfer(points_flat, gt_points, opt.pts_for_chd).item() overall_chd_loss.update(chd_value) per_cat_chd_loss[cat].update(chd_value) ##### IoU computation iou_value = test_shellIoU(points_flat, gt_points, opt.pts_for_IoU).item() overall_iou_loss.update(iou_value) per_cat_iou_loss[cat].update(iou_value) # Save output point clouds for the first 10 objects per category