Exemplo n.º 1
0
def get_wandb_logger(name: Optional[str] = None):
    offline = "WANDB_API_KEY" not in os.environ
    project = os.environ.get("WANDB_PROJECT",
                             "osic-pulmonary-fibrosis-progression")
    return WandbLogger(name=name,
                       offline=offline,
                       project=project,
                       log_model=True)
Exemplo n.º 2
0
def main(hparams):
    model = TransformerMarco(hparams)

    loggers = []
    if hparams.use_wandb:
        wandb_logger = WandbLogger(project='long-marco', entity='usiir',
                            name=f'Albert-passage-{hparams.slurm_job_id}')
        wandb_logger.log_hyperparams(hparams)
        loggers.append(wandb_logger)
    if hparams.use_tensorboard:
        tb_logger = TensorBoardLogger("tb_logs", name=f"Longformer-docs",
                                    version=hparams.slurm_job_id)
        loggers.append(tb_logger)

    checkpoint_callback = ModelCheckpoint(
                filepath=os.path.join(os.getcwd(), 'checkpoints'),
                save_top_k=3,
                verbose=True,
                monitor='val_epoch_loss',
                mode='min',
                prefix=''
                )

    # This Trainer handles most of the stuff.
    # Enables distributed training with one line:
    # https://towardsdatascience.com/trivial-multi-node-training-with-pytorch-lightning-ff75dfb809bd
    trainer = pl.Trainer(
            gpus=hparams.gpus,
            num_nodes=hparams.num_nodes,
            distributed_backend=hparams.distributed_backend,
            # control the effective batch size with this param
            accumulate_grad_batches=hparams.trainer_batch_size,
            # Training will stop if max_steps or max_epochs have reached (earliest).
            max_epochs=hparams.epochs,
            max_steps=hparams.num_training_steps, 
            logger=loggers,
            checkpoint_callback=checkpoint_callback,
            # progress_bar_callback=False,
            # progress_bar_refresh_rate=0,
            # use_amp=True --> use 16bit precision
            # val_check_interval=0.25, # val 4 times during 1 train epoch
            val_check_interval=hparams.val_check_interval, # val every N steps
            # num_sanity_val_steps=5,
            # fast_dev_run=True
        )
    trainer.fit(model)
Exemplo n.º 3
0
 def get_logger(cls,
                save_dir: str,
                version=0,
                offline=False,
                anonymous=False,
                project=None,
                log_model=True,
                **__) -> LightningLoggerBase:
     return WandbLogger(save_dir=save_dir,
                        version=str(version),
                        offline=offline,
                        anonymous=anonymous,
                        project=project,
                        log_model=log_model)
Exemplo n.º 4
0
 def build_trainer(
     self,
     trainer_params: Dict,
     callback_list: List[Callback],
     logger_dict: Dict,
     checkpoint_dict: Dict,
     device: Optional[Union[str, int]] = None,
 ) -> Trainer:
     if isinstance(device, str):
         device: Union[str, List[int]] = json.loads(device)
         if not isinstance(device, (str, list)):
             raise ValueError(f"Device is not valid: {device}")
     trainer_params["gpus"] = device
     logger: WandbLogger = WandbLogger(**logger_dict)
     model_checkpointer: ModelCheckpoint = ModelCheckpoint(
         **checkpoint_dict)
     callback_list += model_checkpointer
     return Trainer(callbacks=callback_list, logger=logger, *trainer_params)
def get_wandb_logger(name: Optional[str] = None):
    offline = "WANDB_API_KEY" not in os.environ
    project = os.environ.get("WANDB_PROJECT", "siim_isic_melanoma_classification")
    return WandbLogger(name=name, offline=offline, project=project, log_model=True)
Exemplo n.º 6
0
    random.seed(seed)

    hparams = vars(opt)
    hparams['git_id'] = get_current_git_hash()
    hparams['batch_size'] = opt.batch_size
    hparams['num_workers'] = 0

    hparams['bands'] = len(util.get_wavelengths_for(opt.camera_type))
    print("Hparams: %s" % hparams)

    all_records = get_for_camera_type(
        get_for_fruit(all_fruits, hparams['fruit']), hparams['camera_type'])

    model = LayerClassifierModule(hparams, all_records)
    logger = WandbLogger(hparams['git_id'],
                         offline=not opt.online_logging,
                         save_dir=opt.log_path,
                         project='deephs-layerclassifier')

    early_stop_callback = EarlyStopping(monitor='val_loss',
                                        min_delta=0.00,
                                        patience=3,
                                        verbose=False,
                                        mode='max')

    checkpoint_callback = ModelCheckpoint(filepath='best.ckpt',
                                          save_top_k=1,
                                          verbose=True,
                                          monitor='val_loss',
                                          mode='min')

    trainer = lightning.Trainer(max_epochs=opt.num_epochs,
Exemplo n.º 7
0
from model import Network
from text_data import MemoryMapDataset
from labeler import Labeler, SpacySentenceTokenizer, SpacyWordTokenizer


def store_code(run):
    dst = Path(run.dir) / "code" / "train"
    dst.mkdir(parents=True, exist_ok=True)
    src = (Path(__file__) / "..").resolve()

    for f in glob(str(src / "*.py")):
        shutil.copy(f, dst)


if __name__ == "__main__":
    wandb_logger = WandbLogger(project="nnsplit")

    parser = Network.get_parser()
    parser.set_defaults(logger=wandb_logger)
    hparams = parser.parse_args()

    if hparams.logger:
        store_code(wandb_logger.experiment)

    labeler = Labeler([
        SpacySentenceTokenizer("de_core_news_sm",
                               lower_start_prob=0.7,
                               remove_end_punct_prob=0.7),
        SpacyWordTokenizer("de_core_news_sm"),
    ])
                                betas=(0.28, 0.93),
                                weight_decay=0.01)
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                   '25,35',
                                                   gamma=0.1)
        return [optimizer], [scheduler]

    def train_dataloader(self):
        return tg.loader.DataLoader(list(self.dataset),
                                    num_workers=self.num_workers,
                                    pin_memory=False,
                                    shuffle=True)

    def val_dataloader(self):
        return tg.loader.DataLoader(list(self.dataset),
                                    num_workers=self.num_workers,
                                    pin_memory=False,
                                    shuffle=True)


if __name__ == '__main__':
    os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
    data_dir = os.path.join('GraphCoAttention', 'data')
    wandb.init()
    wandb_logger = WandbLogger(project='flux', log_model='all')
    trainer = pl.Trainer(gpus=[0],
                         max_epochs=2000,
                         check_val_every_n_epoch=500,
                         accumulate_grad_batches=1)
    trainer.fit(Learner(data_dir))
Exemplo n.º 9
0
        encoder = FixNineYearOldCodersJunk(encoder)
    else:
        encoder = torch.hub.load(repo_or_dir='rwightman/gen-efficientnet-pytorch', model=args.vision_model,
                                 pretrained=args.pretrained)
        encoder.conv_stem = nn.Conv2d(dm.num_channels, 32, kernel_size=3, stride=2, padding=1, bias=False)

        if encoder.classifier.in_features == 2048:
            encoder.classifier = nn.Identity()
        else:
            encoder.classifier = nn.Linear(encoder.classifier.in_features, 2048, bias=False)

    model = BYOL(encoder=encoder, **args.__dict__)

    save_dir = f'./{args.vision_model}-{args.input_size}'
    Path(save_dir).mkdir(parents=True, exist_ok=True)
    wandb_logger = WandbLogger(project=f"byol-{args.dataset}-breakout", save_dir=save_dir, log_model=False)
    # finetune in real-time

    if args.old_eval:
        online_eval = OldSSLOnlineEvaluator(dataset=args.dataset, z_dim=2048, num_classes=dm.num_classes)
    else:
        online_eval = SSLOnlineEvaluator(dataset=args.dataset, z_dim=2048, num_classes=dm.num_classes, name_classes=dm.name_classes)

    image_viewer = CV2ModelImageSampler(nrow=32)
    # DEFAULTS used by the Trainer
    callbacks = [online_eval]

    if args.debug:
        callbacks += [image_viewer]

    trainer = pl.Trainer.from_argparse_args(args, max_steps=300000, callbacks=callbacks)
    def train_dataloader(self):
        qm9_dataloader = tg.loader.DataLoader(list(self.qm9_dataset), batch_size=self.batch_size,
                                              num_workers=self.num_workers, pin_memory=False, shuffle=True)

        ddi_dataloader = tg.loader.DataLoader(list(self.ddi_dataset), batch_size=self.batch_size,
                                              num_workers=self.num_workers, pin_memory=False, shuffle=True)

        loaders = {"QM9": qm9_dataloader, 'DDI': ddi_dataloader}
        return loaders

    def val_dataloader(self):
        qm9_dataloader = tg.loader.DataLoader(list(self.qm9_dataset), batch_size=self.batch_size,
                                              num_workers=self.num_workers, pin_memory=False, shuffle=True)

        ddi_dataloader = tg.loader.DataLoader(list(self.ddi_dataset), batch_size=self.batch_size,
                                              num_workers=self.num_workers, pin_memory=False, shuffle=True)
        # loaders = {"QM9": qm9_dataloader, 'DDI': ddi_dataloader}
        loaders = [qm9_dataloader, ddi_dataloader]
        return loaders


if __name__ == '__main__':
    os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
    data_dir = os.path.join('GraphCoAttention', 'data')
    wandb.init()
    wandb_logger = WandbLogger("jogarch", project='flux', log_model='all')
    trainer = pl.Trainer(gpus=[0], max_epochs=2000, check_val_every_n_epoch=500, accumulate_grad_batches=1)
    trainer.fit(Learner(data_dir, bs=15, lr=0.001, n_cycles=40, hidden_dim=225, n_head=5))

Exemplo n.º 11
0
    hparams['batch_size'] = opt.batch_size
    hparams['num_workers'] = 0

    hparams['bands'] = len(util.get_wavelengths_for(opt.camera_type))

    print("Hparams: %s" % hparams)

    all_records = get_for_camera_type(
        get_for_fruit(all_fruits, hparams['fruit']), hparams['camera_type'])

    # use only a part of the recordings
    selected_records = np.random.choice(all_records, 20)

    autoencoder = FalseColorAutoencoderModule(hparams, selected_records)
    logger = WandbLogger(hparams['git_id'],
                         offline=not opt.online_logging,
                         save_dir=opt.log_path,
                         project='deephs_pretrained_autoencoder')

    checkpoint_callback = ModelCheckpoint(filepath='best.ckpt',
                                          save_top_k=1,
                                          verbose=True,
                                          monitor='val_loss',
                                          mode='min')

    trainer = lightning.Trainer(max_epochs=200,
                                gpus=-1,
                                logger=logger,
                                checkpoint_callback=checkpoint_callback)

    trainer.fit(autoencoder)
    best_autoencoder = FalseColorAutoencoderModule.load_from_checkpoint(
Exemplo n.º 12
0
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        tensorboard_logs = {'train_loss': loss}
        return {'loss': loss, 'log': tensorboard_logs}

    def validation_step(self, batch, batch_idx):
        # OPTIONAL
        x, y = batch
        y_hat = self(x)
        return {'val_loss': F.cross_entropy(y_hat, y)}

    def validation_epoch_end(self, outputs):
        # OPTIONAL
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        tensorboard_logs = {'val_loss': avg_loss}
        return {'val_loss': avg_loss, 'log': tensorboard_logs}


experiments_names = '_'.join([str(v) for v in config.values()])
print(experiments_names)
wandb_logger = WandbLogger(name=experiments_names, project='tiny-imagenet')

model = TinyImagenetModel(BASE_DIR,
                          df=val_df,
                          config=config,
                          augmentation_func=seq_cutout.augment)

trainer = pl.Trainer(max_epochs=50, gpus=1, logger=wandb_logger)

trainer.fit(model)