def test_createall(self): for type_file in self.model_type_files: associated_task = type_file.split("/")[-2] models_config = OmegaConf.load(type_file) models_config = OmegaConf.merge(models_config, self.data_config) models_config.update("data.task", associated_task) for model_name in models_config.models.keys(): print(model_name) if model_name not in ["MyTemplateModel"]: models_config.update("model_name", model_name) instantiate_model(models_config, MockDatasetGeometric(6))
def test_pointnet2ms(self): params = load_model_config("segmentation", "pointnet2", "pointnet2ms") dataset = MockDatasetGeometric(5) model = instantiate_model(params, dataset) model.set_input(dataset[0]) model.forward() model.backward()
def test_kpconv(self): params = load_model_config("segmentation", "kpconv", "SimpleKPConv") dataset = MockDatasetGeometric(5) model = instantiate_model(params, dataset) model.set_input(dataset[0]) model.forward() model.backward()
def test_model_ckpt_using_pointnet2ms(self,): # Create a checkpt name = "model" self.run_path = os.path.join(DIR, "checkpt") print(self.run_path) if not os.path.exists(self.run_path): os.makedirs(self.run_path) model_checkpoint = ModelCheckpoint(self.run_path, name, "test", run_config=self.config, resume=False) dataset = MockDatasetGeometric(5) model = instantiate_model(self.config, dataset) model.set_input(dataset[0], "cpu") model.instantiate_optimizers(self.config) mock_metrics = {"current_metrics": {"acc": 12}, "stage": "test", "epoch": 10} model_checkpoint.save_best_models_under_current_metrics(model, mock_metrics) # Load checkpoint and initialize model model_checkpoint = ModelCheckpoint(self.run_path, name, "test", self.config, resume=True) model2 = model_checkpoint.create_model(dataset, weight_name="acc") self.assertEqual(str(model.optimizer.__class__.__name__), str(model2.optimizer.__class__.__name__)) self.assertEqual(model.optimizer.defaults, model2.optimizer.defaults) self.assertEqual(model.schedulers["lr_scheduler"].state_dict(), model2.schedulers["lr_scheduler"].state_dict()) self.assertEqual(model.schedulers["bn_scheduler"].state_dict(), model2.schedulers["bn_scheduler"].state_dict()) shutil.rmtree(self.run_path) remove(os.path.join(ROOT, "{}.pt".format(name))) remove(os.path.join(DIR, "{}.pt".format(name)))
def create_model(self, dataset, weight_name=Checkpoint._LATEST): if not self.is_empty: run_config = copy.deepcopy(self._checkpoint.run_config) model = instantiate_model(run_config, dataset) self._initialize_model(model, weight_name) return model else: raise ValueError("Checkpoint is empty")
def test_largekpconv(self): params = load_model_config("segmentation", "kpconv", "KPConvPaper") params.update("data.use_category", True) params.update("data.first_subsampling", 0.02) dataset = MockDatasetGeometric(5) model = instantiate_model(params, dataset) model.set_input(dataset[0]) model.forward() model.backward()
def test_kpconvpretransform(self): params = load_model_config("segmentation", "kpconv", "SimpleKPConv") dataset = MockDatasetGeometric(5) model = instantiate_model(params, dataset) model.eval() dataset_transform = MockDatasetGeometric(5) dataset_transform.set_strategies(model) model.set_input(dataset[0]) model.forward() model.get_output() torch.testing.assert_allclose(dataset_transform[0].pos, dataset[0].pos)
def main(cfg): OmegaConf.set_struct( cfg, False) # This allows getattr and hasattr methods to function correctly if cfg.pretty_print: print(cfg.pretty()) set_debugging_vars_to_global(cfg.debugging) # Get device device = torch.device("cuda" if ( torch.cuda.is_available() and cfg.training.cuda) else "cpu") log.info("DEVICE : {}".format(device)) # Enable CUDNN BACKEND torch.backends.cudnn.enabled = cfg.training.enable_cudnn dataset = instantiate_dataset(cfg.data) model = instantiate_model(cfg, dataset) log.info(model) log.info( "Model size = %i", sum(param.numel() for param in model.parameters() if param.requires_grad)) # Set dataloaders dataset.create_dataloaders( model, cfg.training.batch_size, cfg.training.shuffle, cfg.training.num_workers, cfg.training.precompute_multi_scale, ) log.info(dataset) # Run training / evaluation model = model.to(device) measurement_name = "{}_{}".format(cfg.model_name, dataset.__class__.__name__) run(cfg, model, dataset, device, measurement_name)
def test_runall(self): def is_known_to_fail(model_name): forward_failing = [ "MinkUNet_WIP", "pointcnn", "RSConv_4LD", "RSConv_2LD", "randlanet" ] for failing in forward_failing: if failing.lower() in model_name.lower(): return True return False def get_dataset(conv_type): features = 2 if conv_type.lower() == "dense": return MockDataset(features, num_points=2048) if conv_type.lower() == "sparse": return MockDatasetGeometric(features, transform=ToSparseInput(0.01), num_points=1024) return MockDatasetGeometric(features) for type_file in self.model_type_files: associated_task = type_file.split("/")[-2] models_config = OmegaConf.load(type_file) models_config = OmegaConf.merge(models_config, self.data_config) models_config.update("data.task", associated_task) for model_name in models_config.models.keys(): with self.subTest(model_name): if not is_known_to_fail(model_name): models_config.update("model_name", model_name) dataset = get_dataset( models_config.models[model_name].conv_type) model = instantiate_model(models_config, dataset) model.set_input(dataset[0], device) try: model.forward() model.backward() except Exception as e: print("Model failing:") print(model) raise e
def test_accumulated_gradient(self): params = load_model_config("segmentation", "pointnet2", "pointnet2ms") config_training = OmegaConf.load( os.path.join(DIR, "test_config/training_config.yaml")) dataset = MockDatasetGeometric(5) model = instantiate_model(params, dataset) model.instantiate_optimizers(config_training) model.set_input(dataset[0]) expected_make_optimizer_step = [ False, False, True, False, False, True, False, False, True, False ] expected_contains_grads = [ False, True, True, False, True, True, False, True, True, False ] make_optimizer_steps = [] contains_grads = [] for epoch in range(10): model.forward() make_optimizer_step = model.manage_optimizer_zero_grad( ) # Accumulate gradient if option is up make_optimizer_steps.append(make_optimizer_step) grad_ = model._modules["lin1"].weight.grad if grad_ is not None: contains_grads.append((grad_.sum() != 0).item()) else: contains_grads.append(False) model.backward() # calculate gradients if make_optimizer_step: model._optimizer.step() # update parameters self.assertEqual(contains_grads, expected_contains_grads) self.assertEqual(make_optimizer_steps, expected_make_optimizer_step)
def main(cfg): OmegaConf.set_struct( cfg, False) # This allows getattr and hasattr methods to function correctly if cfg.pretty_print: print(cfg.pretty()) # Get device device = torch.device("cuda" if ( torch.cuda.is_available() and cfg.training.cuda) else "cpu") log.info("DEVICE : {}".format(device)) # Enable CUDNN BACKEND torch.backends.cudnn.enabled = cfg.training.enable_cudnn # Start Wandb if public launch_wandb(cfg, cfg.wandb.public and cfg.wandb.log) # Checkpoint checkpoint = ModelCheckpoint( cfg.training.checkpoint_dir, cfg.model_name, cfg.training.weight_name, run_config=cfg, resume=bool(cfg.training.checkpoint_dir), ) # Create model and datasets if not checkpoint.is_empty: dataset = instantiate_dataset(checkpoint.data_config) model = checkpoint.create_model(dataset, weight_name=cfg.training.weight_name) else: dataset = instantiate_dataset(cfg.data) model = instantiate_model(cfg, dataset) model.instantiate_optimizers(cfg) log.info(model) model.log_optimizers() log.info( "Model size = %i", sum(param.numel() for param in model.parameters() if param.requires_grad)) # Set dataloaders dataset.create_dataloaders( model, cfg.training.batch_size, cfg.training.shuffle, cfg.training.num_workers, cfg.training.precompute_multi_scale, ) log.info(dataset) # Choose selection stage selection_stage = getattr(cfg, "selection_stage", "") checkpoint.selection_stage = dataset.resolve_saving_stage(selection_stage) tracker: BaseTracker = dataset.get_tracker(model, dataset, cfg.wandb.log, cfg.tensorboard.log) launch_wandb(cfg, not cfg.wandb.public and cfg.wandb.log) # Run training / evaluation model = model.to(device) visualizer = Visualizer(cfg.visualization, dataset.num_batches, dataset.batch_size, os.getcwd()) run(cfg, model, dataset, device, tracker, checkpoint, visualizer)