def hydra_main(cfg: DictConfig) -> None: # Set up python logging. logger = logging.getLogger() if is_rank_zero(): logger.setLevel(cfg.log_level) logging.info(OmegaConf.to_yaml(cfg)) wandb_version = wandb.util.generate_id() add_wandb_version(cfg, wandb_version) if cfg.cluster.name == 'slurm': slurm_dir = Path.cwd() / 'slurm' slurm_dir.mkdir() logging.info(f'Slurm logs: {slurm_dir}') executor = submitit.AutoExecutor(slurm_dir) executor.update_parameters( slurm_gpus_per_node=cfg.cluster.gpus_per_node, slurm_nodes=cfg.cluster.nodes, slurm_ntasks_per_node=cfg.cluster.gpus_per_node, slurm_cpus_per_task=cfg.cluster.cpus_per_task, slurm_time=cfg.cluster.time, slurm_additional_parameters={ 'constraint': 'gpu', 'account': cfg.cluster.account, 'requeue': True }) job = executor.submit(train, cfg=cfg) logging.info(f'submitted job {job.job_id}.') else: train(cfg)
def hydra_main(cfg: DictConfig) -> None: # Set up python logging. logger = logging.getLogger() if is_rank_zero(): logger.setLevel(cfg.log_level) logging.info(OmegaConf.to_yaml(cfg)) test(cfg)
def __init__(self, cfg: OmegaConf): super().__init__() self.hparams.update(cfg) if is_rank_zero(): self.save_hyperparameters(cfg) #self.hparams.optimizer._target_ = 'calo_cluster.training.optimizers.adam_factory' #self.hparams.scheduler._target_ = 'calo_cluster.training.schedulers.one_cycle_lr_factory' self.optimizer_factory = hydra.utils.instantiate( self.hparams.optimizer) self.scheduler_factory = hydra.utils.instantiate( self.hparams.scheduler) assert self.hparams.task == 'panoptic' sem_model = SPVCNN_sem.load_from_checkpoint(cfg.model.sem_path) sem_model.freeze() self.backbone = sem_model.backbone self.classifier = sem_model.classifier cs = [int(self.hparams.model.cr * x) for x in self.hparams.model.cs] self.embedder = SPVCNN_embedder_head(cs, self.hparams.model.embed_dim) self.embed_criterion = hydra.utils.instantiate( self.hparams.embed_criterion)
def __init__(self, cfg: OmegaConf): super().__init__() self.hparams.update(cfg) if is_rank_zero(): self.save_hyperparameters(cfg) #self.hparams.optimizer._target_ = 'calo_cluster.training.optimizers.adam_factory' #self.hparams.scheduler._target_ = 'calo_cluster.training.schedulers.one_cycle_lr_factory' self.optimizer_factory = hydra.utils.instantiate( self.hparams.optimizer) self.scheduler_factory = hydra.utils.instantiate( self.hparams.scheduler) task = self.hparams.task assert task in ('instance', 'semantic', 'panoptic') if task == 'instance' or task == 'panoptic': self.embed_criterion = hydra.utils.instantiate( self.hparams.embed_criterion) if task == 'semantic' or task == 'panoptic': self.semantic_criterion = hydra.utils.instantiate( self.hparams.semantic_criterion) cs = [int(self.hparams.model.cr * x) for x in self.hparams.model.cs] self.stem = nn.Sequential( spnn.Conv3d(self.hparams.dataset.num_features, cs[0], kernel_size=3, stride=1), spnn.BatchNorm(cs[0]), spnn.ReLU(True), spnn.Conv3d(cs[0], cs[0], kernel_size=3, stride=1), spnn.BatchNorm(cs[0]), spnn.ReLU(True)) self.stage1 = nn.Sequential( BasicConvolutionBlock(cs[0], cs[0], ks=2, stride=2, dilation=1), ResidualBlock(cs[0], cs[1], ks=3, stride=1, dilation=1), ResidualBlock(cs[1], cs[1], ks=3, stride=1, dilation=1), ) self.stage2 = nn.Sequential( BasicConvolutionBlock(cs[1], cs[1], ks=2, stride=2, dilation=1), ResidualBlock(cs[1], cs[2], ks=3, stride=1, dilation=1), ResidualBlock(cs[2], cs[2], ks=3, stride=1, dilation=1), ) self.stage3 = nn.Sequential( BasicConvolutionBlock(cs[2], cs[2], ks=2, stride=2, dilation=1), ResidualBlock(cs[2], cs[3], ks=3, stride=1, dilation=1), ResidualBlock(cs[3], cs[3], ks=3, stride=1, dilation=1), ) self.stage4 = nn.Sequential( BasicConvolutionBlock(cs[3], cs[3], ks=2, stride=2, dilation=1), ResidualBlock(cs[3], cs[4], ks=3, stride=1, dilation=1), ResidualBlock(cs[4], cs[4], ks=3, stride=1, dilation=1), ) self.up1 = nn.ModuleList([ BasicDeconvolutionBlock(cs[4], cs[5], ks=2, stride=2), nn.Sequential( ResidualBlock(cs[5] + cs[3], cs[5], ks=3, stride=1, dilation=1), ResidualBlock(cs[5], cs[5], ks=3, stride=1, dilation=1), ) ]) self.up2 = nn.ModuleList([ BasicDeconvolutionBlock(cs[5], cs[6], ks=2, stride=2), nn.Sequential( ResidualBlock(cs[6] + cs[2], cs[6], ks=3, stride=1, dilation=1), ResidualBlock(cs[6], cs[6], ks=3, stride=1, dilation=1), ) ]) self.up3 = nn.ModuleList([ BasicDeconvolutionBlock(cs[6], cs[7], ks=2, stride=2), nn.Sequential( ResidualBlock(cs[7] + cs[1], cs[7], ks=3, stride=1, dilation=1), ResidualBlock(cs[7], cs[7], ks=3, stride=1, dilation=1), ) ]) if task == 'semantic' or task == 'panoptic': self.c_up4 = nn.ModuleList([ BasicDeconvolutionBlock(cs[7], cs[8], ks=2, stride=2), nn.Sequential( ResidualBlock(cs[8] + cs[0], cs[8], ks=3, stride=1, dilation=1), ResidualBlock(cs[8], cs[8], ks=3, stride=1, dilation=1), ) ]) self.c_point_transform = nn.Sequential( nn.Linear(cs[6], cs[8]), nn.BatchNorm1d(cs[8]), nn.ReLU(True), ) self.c_lin = nn.Sequential( nn.Linear(cs[8], self.hparams.dataset.num_classes)) if task == 'instance' or task == 'panoptic': self.e_up4 = nn.ModuleList([ BasicDeconvolutionBlock(cs[7], cs[8], ks=2, stride=2), nn.Sequential( ResidualBlock(cs[8] + cs[0], cs[8], ks=3, stride=1, dilation=1), ResidualBlock(cs[8], cs[8], ks=3, stride=1, dilation=1), ) ]) self.e_point_transform = nn.Sequential( nn.Linear(cs[6], cs[8]), nn.BatchNorm1d(cs[8]), nn.ReLU(True), ) self.e_lin = nn.Sequential( nn.Linear(cs[8], self.hparams.model.embed_dim)) self.point_transforms = nn.ModuleList([ nn.Sequential( nn.Linear(cs[0], cs[4]), nn.BatchNorm1d(cs[4]), nn.ReLU(True), ), nn.Sequential( nn.Linear(cs[4], cs[6]), nn.BatchNorm1d(cs[6]), nn.ReLU(True), ) ]) self.weight_initialization() self.dropout = nn.Dropout(0.3, True)
def train(cfg: DictConfig) -> None: logging.info('Beginning training...') fix_task(cfg) if cfg.overfit: overfit_batches = 1 cfg.train.batch_size = 1 cfg.checkpoint.save_top_k = 0 cfg.checkpoint.save_last = False else: overfit_batches = 0.0 callbacks = [] # Set up SWA. if cfg.swa.active: swa_callback = hydra.utils.instantiate(cfg.swa.callback) callbacks.append(swa_callback) # Set up checkpointing. if cfg.resume_ckpt is not None: logging.info(f'Resuming checkpoint={cfg.resume_ckpt}') resume_from_checkpoint = cfg.resume_ckpt else: resume_from_checkpoint = None checkpoint_callback = hydra.utils.instantiate(cfg.checkpoint) callbacks.append(checkpoint_callback) # Set up learning rate monitor. lr_monitor = pl.callbacks.LearningRateMonitor(logging_interval='step') callbacks.append(lr_monitor) # Set up wandb logging. logger = hydra.utils.instantiate(cfg.wandb, save_dir=cfg.outputs_dir, version=cfg.wandb.version, group=cfg.wandb.name) if is_rank_zero(): shutil.copytree(Path.cwd() / '.hydra', Path(logger.experiment.dir) / '.hydra') cfg.wandb.version = logger.version if is_rank_zero(): config_path = Path(logger.experiment.dir) / '.hydra' / 'config.yaml' with config_path.open('r+') as f: data = yaml.load(f, Loader=yaml.CLoader) data['wandb']['version'] = cfg.wandb.version f.seek(0) yaml.dump(data, f) datamodule = hydra.utils.instantiate(cfg.dataset) if cfg.init_ckpt is not None: model = SPVCNN.load_from_checkpoint(cfg.init_ckpt, **cfg) else: model = hydra.utils.instantiate(cfg.model.target, cfg) # train trainer = pl.Trainer(gpus=cfg.train.gpus, logger=logger, max_epochs=cfg.train.num_epochs, resume_from_checkpoint=resume_from_checkpoint, deterministic=True, accelerator=cfg.train.distributed_backend, overfit_batches=overfit_batches, val_check_interval=cfg.val_check_interval, callbacks=callbacks, precision=32, log_every_n_steps=1) if is_rank_zero(): trainer.logger.log_hyperparams(cfg._content) # pylint: disable=no-member trainer.fit(model=model, datamodule=datamodule)
def __init__(self, cfg: OmegaConf): super(DGCNN, self).__init__() self.hparams = cfg if is_rank_zero(): self.save_hyperparameters(cfg) self.optimizer_factory = hydra.utils.instantiate( self.hparams.optimizer) self.scheduler_factory = hydra.utils.instantiate( self.hparams.scheduler) task = self.hparams.task assert task in ('instance', 'semantic', 'panoptic') if task == 'instance' or task == 'panoptic': self.embed_criterion = hydra.utils.instantiate( self.hparams.criterion.embed) if task == 'semantic' or task == 'panoptic': self.semantic_criterion = hydra.utils.instantiate( self.hparams.criterion.semantic) self.bn1 = nn.BatchNorm2d(64) self.bn2 = nn.BatchNorm2d(64) self.bn3 = nn.BatchNorm2d(64) self.bn4 = nn.BatchNorm2d(64) self.bn5 = nn.BatchNorm2d(64) self.bn6 = nn.BatchNorm1d(1024) self.bn7 = nn.BatchNorm1d(512) self.bn8 = nn.BatchNorm1d(256) self.conv1 = nn.Sequential(nn.Conv2d(8, 64, kernel_size=1, bias=False), self.bn1, nn.LeakyReLU(negative_slope=0.2)) self.conv2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1, bias=False), self.bn2, nn.LeakyReLU(negative_slope=0.2)) self.conv3 = nn.Sequential(nn.Conv2d(64*2, 64, kernel_size=1, bias=False), self.bn3, nn.LeakyReLU(negative_slope=0.2)) self.conv4 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1, bias=False), self.bn4, nn.LeakyReLU(negative_slope=0.2)) self.conv5 = nn.Sequential(nn.Conv2d(64*2, 64, kernel_size=1, bias=False), self.bn5, nn.LeakyReLU(negative_slope=0.2)) self.conv6 = nn.Sequential(nn.Conv1d(192, 1024, kernel_size=1, bias=False), self.bn6, nn.LeakyReLU(negative_slope=0.2)) self.conv7 = nn.Sequential(nn.Conv1d(1216, 512, kernel_size=1, bias=False), self.bn7, nn.LeakyReLU(negative_slope=0.2)) self.conv8 = nn.Sequential(nn.Conv1d(512, 256, kernel_size=1, bias=False), self.bn8, nn.LeakyReLU(negative_slope=0.2)) self.dp1 = nn.Dropout(p=self.hparams.model.dropout) if task == 'semantic' or task == 'panoptic': self.classifier = nn.Sequential(nn.Conv1d(256, self.hparams.dataset.num_classes, kernel_size=1, bias=False)) if task == 'instance' or task == 'panoptic': self.embedder = nn.Sequential(nn.Conv1d(256, self.hparams.model.embed_dim, kernel_size=1, bias=False))