def main_worker_eval(worker_id, args): device = torch.device("cuda:%d" % worker_id) cfg = setup(args) # build test set test_loader = build_data_loader( cfg, get_dataset_name(cfg), "test", multigpu=False ) test_loader.dataset.set_depth_only(True) test_loader.collate_fn = default_collate logger.info("test - %d" % len(test_loader)) # load checkpoing and build model if cfg.MODEL.CHECKPOINT == "": raise ValueError("Invalid checkpoing provided") logger.info("Loading model from checkpoint: %s" % (cfg.MODEL.CHECKPOINT)) cp = torch.load(PathManager.get_local_path(cfg.MODEL.CHECKPOINT)) state_dict = clean_state_dict(cp["best_states"]["model"]) model = MVSNet(cfg.MODEL.MVSNET) model.load_state_dict(state_dict) logger.info("Model loaded") model.to(device) eval_split = 'test' test_metrics, test_preds = evaluate_split_depth( model, test_loader, prefix="%s_" % eval_split ) str_out = "Results on %s: " % eval_split for k, v in test_metrics.items(): str_out += "%s %.4f " % (k, v) logger.info(str_out)
def main_worker_eval(worker_id, args): device = torch.device("cuda:%d" % worker_id) cfg = setup(args) # build test set test_loader = build_data_loader(cfg, dataset, "test", multigpu=False, num_workers=8) logger.info("test - %d" % len(test_loader)) # load checkpoing and build model if cfg.MODEL.CHECKPOINT == "": raise ValueError("Invalid checkpoing provided") logger.info("Loading model from checkpoint: %s" % (cfg.MODEL.CHECKPOINT)) cp = torch.load(PathManager.get_local_path(cfg.MODEL.CHECKPOINT)) state_dict = clean_state_dict(cp["best_states"]["model"]) model = build_model(cfg) model.load_state_dict(state_dict) logger.info("Model loaded") model.to(device) wandb.init(project='MeshRCNN', config=cfg, name='meshrcnn-eval') if args.eval_p2m: evaluate_test_p2m(model, test_loader) else: evaluate_test(model, test_loader)
def main_worker_eval(worker_id, args): device = torch.device("cuda:%d" % worker_id) cfg = setup(args) # build test set test_loader = build_data_loader( cfg, get_dataset_name(cfg), "test", multigpu=False ) logger.info("test - %d" % len(test_loader)) # load checkpoing and build model if cfg.MODEL.CHECKPOINT == "": raise ValueError("Invalid checkpoing provided") logger.info("Loading model from checkpoint: %s" % (cfg.MODEL.CHECKPOINT)) cp = torch.load(PathManager.get_local_path(cfg.MODEL.CHECKPOINT)) state_dict = clean_state_dict(cp["best_states"]["model"]) model = build_model(cfg) model.load_state_dict(state_dict) logger.info("Model loaded") model.to(device) def disable_running_stats(model): if type(model).__name__.startswith("BatchNorm"): model.track_running_stats = False else: for m in model.children(): disable_running_stats(m) # disable_running_stats(model) val_loader = build_data_loader( cfg, get_dataset_name(cfg), "test", multigpu=False ) logger.info("val - %d" % len(val_loader)) test_metrics = evaluate_vox( model, val_loader, max_predictions=100 ) str_out = "Results on test" for k, v in test_metrics.items(): str_out += "%s %.4f " % (k, v) logger.info(str_out) prediction_dir = os.path.join( cfg.OUTPUT_DIR, "predictions" ) test_metrics = evaluate_vox(model, test_loader, prediction_dir) print(test_metrics) str_out = "Results on test" for k, v in test_metrics.items(): str_out += "%s %.4f " % (k, v) logger.info(str_out)
def __init__(self, cfg, output_dir="./vis"): """ Args: cfg (CfgNode): """ self.predictor = VoxMeshHead(cfg) #Load pretrained weights into model cp = torch.load(PathManager.get_local_path(cfg.MODEL.CHECKPOINT)) state_dict = clean_state_dict(cp["best_states"]["model"]) self.predictor.load_state_dict(state_dict) os.makedirs(output_dir, exist_ok=True) self.output_dir = output_dir
def __init__(self, cfg, checkpoint_lp_model, output_dir="./vis"): """ Args: cfg (CfgNode): """ self.predictor = VoxMeshHead(cfg) self.device = torch.device('cuda') #Load pretrained weights into model cp = torch.load(PathManager.get_local_path(cfg.MODEL.CHECKPOINT)) state_dict = clean_state_dict(cp["best_states"]["model"]) self.predictor.load_state_dict(state_dict) self.loss_predictor = LossPredictionModule() #Path to trained prediction module state_dict = torch.load(checkpoint_lp_model, map_location='cuda:0') self.loss_predictor.load_state_dict(state_dict) self.predictor.to(self.device) self.loss_predictor.to(self.device) os.makedirs(output_dir, exist_ok=True) self.output_dir = output_dir
def __init__(self, cfg): super(MVSNet, self).__init__() self.freeze_cv = cfg.FREEZE self.input_image_size = cfg.INPUT_IMAGE_SIZE self.depth_values = cfg.MIN_DEPTH \ + (torch.arange(cfg.NUM_DEPTHS, dtype=torch.float32) * cfg.DEPTH_INTERVAL) self.intrinsics = torch.tensor( [[cfg.FOCAL_LENGTH[0], 0, cfg.PRINCIPAL_POINT[0], 0], [0, cfg.FOCAL_LENGTH[1], cfg.PRINCIPAL_POINT[1], 0], [0, 0, 1, 0], [0, 0, 1, 0]], dtype=torch.float32) self.feature = VGG16P2M() self.cost_regularization = CostRegNet(cfg.FEATURES_LIST) # self.features_dim = 960# + 191 # self.features_dim = 384 # self.features_dim = 960 # 120 self.features_dim = np.sum(self.cost_regularization.features_list) if cfg.CHECKPOINT: print("==> Loading MVSNet checkpoint:", cfg.CHECKPOINT) state_dict = torch.load(cfg.CHECKPOINT) if "model" in state_dict: state_dict = state_dict["model"] elif "best_states" in state_dict \ and "model" in state_dict["best_states"]: state_dict = clean_state_dict( state_dict["best_states"]["model"]) self.load_state_dict(state_dict) for param in self.parameters(): param.requires_grad = not self.freeze_cv print("==> cost volume weight require_grad is:", not self.freeze_cv) print("==> number of cost volume features:", self.cost_regularization.features_list, self.features_dim)
def main_worker(worker_id, args): distributed = False if args.num_gpus > 1: distributed = True dist.init_process_group(backend="NCCL", init_method=args.dist_url, world_size=args.num_gpus, rank=worker_id) torch.cuda.set_device(worker_id) device = torch.device("cuda:%d" % worker_id) cfg = setup(args) # data loaders loaders = setup_loaders(cfg) for split_name, loader in loaders.items(): logger.info("%s - %d" % (split_name, len(loader))) # build the model model = build_model(cfg) model.to(device) if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[worker_id], output_device=worker_id, check_reduction=True, broadcast_buffers=False, ) optimizer = build_optimizer(cfg, model) cfg.SOLVER.COMPUTED_MAX_ITERS = cfg.SOLVER.NUM_EPOCHS * len( loaders["train"]) scheduler = build_lr_scheduler(cfg, optimizer) loss_fn_kwargs = { "chamfer_weight": cfg.MODEL.MESH_HEAD.CHAMFER_LOSS_WEIGHT, "normal_weight": cfg.MODEL.MESH_HEAD.NORMALS_LOSS_WEIGHT, "edge_weight": cfg.MODEL.MESH_HEAD.EDGE_LOSS_WEIGHT, "voxel_weight": cfg.MODEL.VOXEL_HEAD.LOSS_WEIGHT, "gt_num_samples": cfg.MODEL.MESH_HEAD.GT_NUM_SAMPLES, "pred_num_samples": cfg.MODEL.MESH_HEAD.PRED_NUM_SAMPLES, } loss_fn = MeshLoss(**loss_fn_kwargs) checkpoint_path = "checkpoint.pt" checkpoint_path = os.path.join(cfg.OUTPUT_DIR, checkpoint_path) cp = Checkpoint(checkpoint_path) if len(cp.restarts) == 0: # We are starting from scratch, so store some initial data in cp iter_per_epoch = len(loaders["train"]) cp.store_data("iter_per_epoch", iter_per_epoch) else: logger.info("Loading model state from checkpoint") model.load_state_dict(cp.latest_states["model"]) optimizer.load_state_dict(cp.latest_states["optim"]) scheduler.load_state_dict(cp.latest_states["lr_scheduler"]) # Use pretrained voxmesh weights if supplied if not cfg.MODEL.CHECKPOINT == "": saved_weights = torch.load( PathManager.get_local_path(cfg.MODEL.CHECKPOINT)) logger.info("Loading model from checkpoint: %s" % (cfg.MODEL.CHECKPOINT)) state_dict = clean_state_dict(saved_weights["best_states"]["model"]) model.load_state_dict(state_dict) training_loop(cfg, cp, model, optimizer, scheduler, loaders, device, loss_fn)
def main_worker(worker_id, args): distributed = False if args.num_gpus > 1: distributed = True dist.init_process_group( backend="NCCL", init_method=args.dist_url, world_size=args.num_gpus, rank=worker_id ) torch.cuda.set_device(worker_id) device = torch.device("cuda:%d" % worker_id) cfg = setup(args) # data loaders loaders = setup_loaders(cfg) for split_name, loader in loaders.items(): logger.info("%s - %d" % (split_name, len(loader))) # build the model model = build_model(cfg) model.to(device) if distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[worker_id], output_device=worker_id, check_reduction=True, broadcast_buffers=False, ) optimizer = build_optimizer(cfg, model) cfg.SOLVER.COMPUTED_MAX_ITERS = cfg.SOLVER.NUM_EPOCHS * len(loaders["train"]) scheduler = build_lr_scheduler(cfg, optimizer) loss_fn_kwargs = { "chamfer_weight": cfg.MODEL.MESH_HEAD.CHAMFER_LOSS_WEIGHT, "normal_weight": cfg.MODEL.MESH_HEAD.NORMALS_LOSS_WEIGHT, "edge_weight": cfg.MODEL.MESH_HEAD.EDGE_LOSS_WEIGHT, "voxel_weight": cfg.MODEL.VOXEL_HEAD.LOSS_WEIGHT, "gt_num_samples": cfg.MODEL.MESH_HEAD.GT_NUM_SAMPLES, "pred_num_samples": cfg.MODEL.MESH_HEAD.PRED_NUM_SAMPLES, } loss_fn = MeshLoss(**loss_fn_kwargs) checkpoint_path = "checkpoint.pt" checkpoint_path = os.path.join(cfg.OUTPUT_DIR, checkpoint_path) cp = Checkpoint(checkpoint_path) if len(cp.restarts) == 0: # We are starting from scratch, so store some initial data in cp iter_per_epoch = len(loaders["train"]) cp.store_data("iter_per_epoch", iter_per_epoch) else: logger.info("Loading model state from checkpoint") model.load_state_dict(cp.latest_states["model"]) optimizer.load_state_dict(cp.latest_states["optim"]) scheduler.load_state_dict(cp.latest_states["lr_scheduler"]) if not cfg.PRETRAINED_MODEL2 == "": # initialization with trained model from Mesh RCNN checkpoint = torch.load(cfg.PRETRAINED_MODEL2) checkpoint1 = clean_state_dict(checkpoint['best_states']['model']) #Because output voxel size is 32 vs 48, we can't load some mesh heads del checkpoint1['mesh_head.stages.0.bottleneck.weight'] del checkpoint1['mesh_head.stages.1.bottleneck.weight'] del checkpoint1['mesh_head.stages.2.bottleneck.weight'] model.load_state_dict(checkpoint1, strict=False) training_loop(cfg, cp, model, optimizer, scheduler, loaders, device, loss_fn)