Exemplo n.º 1
0
def main_loop(config):
    os.makedirs(config.tmp_dir, exist_ok=True)
    qd_dataset = SimpleListDataset(config.train_source)
    interface = BlkPosInterface(qd_dataset)
    logger_intro = TensorBoardLogger(config.log_dir,
                                     name='introspector',
                                     version=config.version)
    logger_reason = TensorBoardLogger(config.log_dir,
                                      name='reasoner',
                                      version=config.version)
    if config.init_relevance != '':
        if hasattr(config, 'conditional_transforms'):
            ct = config.conditional_transforms
            del config.conditional_transforms
        else:
            ct = []
        init_relevance(qd_dataset,
                       method=config.init_relevance,
                       conditional_transforms=ct)

    introspector = IntrospectorModule(config)
    reasoner = ReasonerModule(config)

    def _create_new_trainer(epoch, logger):
        return Trainer(
            max_epochs=epoch,
            gpus=config.gpus,
            distributed_backend='ddp',
            default_save_path=config.save_dir,
            logger=logger,
            weights_summary=None,
            early_stop_callback=False,
            check_val_every_n_epoch=1,
        )

    min_epoch = min(
        find_lastest_checkpoint(
            os.path.join(config.save_dir, 'introspector',
                         f'version_{config.version}', 'checkpoints'),
            epoch=True),
        find_lastest_checkpoint(
            os.path.join(config.save_dir, 'reasoner',
                         f'version_{config.version}', 'checkpoints'),
            epoch=True)) + 1
    logging.info(f'Continue training at epoch {min_epoch}...')
    for epoch in range(min_epoch, config.num_epochs):
        intro_dataset = interface.build_random_buffer(
            num_samples=config.num_samples)
        introspector.set_dataset(intro_dataset)
        trainer = _create_new_trainer(epoch + 1, logger_intro)
        trainer.fit(introspector)

        interface.collect_estimations_from_dir(config.tmp_dir)
        reason_dataset = interface.build_promising_buffer(
            num_samples=config.num_samples)
        reasoner.set_dataset(reason_dataset)
        trainer = _create_new_trainer(epoch + 1, logger_reason)
        trainer.fit(reasoner)
        if config.latent and epoch > 1:
            interface.apply_changes_from_dir(config.tmp_dir)
Exemplo n.º 2
0
def test_tensorboard_log_metrics(tmpdir, step_idx):
    logger = TensorBoardLogger(tmpdir)
    metrics = {
        "float": 0.3,
        "int": 1,
        "FloatTensor": torch.tensor(0.1),
        "IntTensor": torch.tensor(1)
    }
    logger.log_metrics(metrics, step_idx)
Exemplo n.º 3
0
 def configure_logger(self, logger):
     if logger is True:
         # default logger
         self.logger = TensorBoardLogger(save_dir=self.default_save_path,
                                         version=self.slurm_job_id,
                                         name='lightning_logs')
         self.logger.rank = 0
     elif logger is False:
         self.logger = None
     else:
         self.logger = logger
         self.logger.rank = 0
Exemplo n.º 4
0
def main():
    ## Training validation
    model = EstimatePose(algo='SemanticKDD')

    logger = TensorBoardLogger(save_dir='./log',
                               name='lightning_logs',
                               version=3)

    checkpoint = ModelCheckpoint(filepath=output_folder,
                                 verbose=1,
                                 save_top_k=-1,
                                 monitor='val_loss',
                                 save_weights_only=False,
                                 period=50,
                                 prefix='semanticKDD_unnormalized')

    trainer = pl.Trainer(train_percent_check=1,
                         val_percent_check=1,
                         logger=logger,
                         min_epochs=300,
                         max_epochs=350,
                         gpus=1,
                         show_progress_bar=True,
                         checkpoint_callback=checkpoint,
                         early_stop_callback=False,
                         resume_from_checkpoint=None)

    trainer.fit(model)
Exemplo n.º 5
0
    def configure_early_stopping(self, early_stop_callback, logger):
        if early_stop_callback is True:
            self.early_stop_callback = EarlyStopping(monitor='val_loss',
                                                     patience=3,
                                                     strict=True,
                                                     verbose=True,
                                                     mode='min')
            self.enable_early_stop = True
        elif early_stop_callback is None:
            self.early_stop_callback = EarlyStopping(monitor='val_loss',
                                                     patience=3,
                                                     strict=False,
                                                     verbose=False,
                                                     mode='min')
            self.enable_early_stop = True
        elif not early_stop_callback:
            self.early_stop_callback = None
            self.enable_early_stop = False
        else:
            self.early_stop_callback = early_stop_callback
            self.enable_early_stop = True

        # configure logger
        if logger is True:
            # default logger
            self.logger = TensorBoardLogger(save_dir=self.default_save_path,
                                            version=self.slurm_job_id,
                                            name='lightning_logs')
            self.logger.rank = 0
        elif logger is False:
            self.logger = None
        else:
            self.logger = logger
            self.logger.rank = 0
Exemplo n.º 6
0
def ergo_ii_tuning():
    parser = ArgumentParser()
    parser.add_argument('--version', type=int)
    parser.add_argument('--gpu', type=int)
    parser.add_argument('--dataset', type=str)
    parser.add_argument('--use_alpha', type=bool, default=True)
    parser.add_argument('--use_vj', type=bool, default=True)
    parser.add_argument('--use_mhc', type=bool, default=True)
    parser.add_argument('--tcr_encoding_model', type=str)
    parser.add_argument('--cat_encoding', type=str, default='embedding')
    parser.add_argument('--aa_embedding_dim', type=int, default=10)
    parser.add_argument('--cat_embedding_dim', type=int, default=50)
    parser.add_argument('--lstm_dim', type=int, default=500)
    parser.add_argument('--encoding_dim', type=int, default=100)
    parser.add_argument('--lr', type=float, default=1e-4)
    parser.add_argument('--wd', type=float, default=0)
    parser.add_argument('--dropout', type=float, default=0.1)
    hparams = parser.parse_args()
    model = ERGOLightning(hparams)
    logger = TensorBoardLogger("ERGO-II_tuning_update",
                               name="ergo_tuning_model",
                               version=hparams.version)
    early_stop_callback = EarlyStopping(monitor='val_auc',
                                        patience=3,
                                        mode='max')
    trainer = Trainer(gpus=[hparams.gpu],
                      logger=logger,
                      early_stop_callback=early_stop_callback)
    trainer.fit(model)
Exemplo n.º 7
0
def diabetes_experiment():
    parser = ArgumentParser()
    parser.add_argument('--version', type=int)
    parser.add_argument('--gpu', type=int)
    parser.add_argument('--dataset', type=str, default='mcpas_human')
    parser.add_argument('--tcr_encoding_model', type=str, default='AE')
    parser.add_argument('--cat_encoding', type=str, default='embedding')
    parser.add_argument('--use_alpha', type=bool, default=True)
    parser.add_argument('--use_vj', type=bool, default=True)
    parser.add_argument('--use_mhc', type=bool, default=True)
    parser.add_argument('--aa_embedding_dim', type=int, default=10)
    parser.add_argument('--cat_embedding_dim', type=int, default=50)
    parser.add_argument('--lstm_dim', type=int, default=500)
    parser.add_argument('--encoding_dim', type=int, default=100)
    parser.add_argument('--lr', type=float, default=1e-4)
    parser.add_argument('--wd', type=float, default=0)
    parser.add_argument('--dropout', type=float, default=0.1)
    # for diabetes
    parser.add_argument('--weight_factor', type=int, default=5)
    hparams = parser.parse_args()
    model = ERGODiabetes(hparams)
    # logger = TensorBoardLogger("diabetes_logs", name="d_mcpas_lstm_with_alpha")
    logger = TensorBoardLogger("diabetes_logs",
                               name="ergo_ii_diabetes",
                               version=hparams.version)
    early_stop_callback = EarlyStopping(monitor='val_auc',
                                        patience=3,
                                        mode='max')
    trainer = Trainer(gpus=[hparams.gpu],
                      logger=logger,
                      early_stop_callback=early_stop_callback)
    trainer.fit(model)
Exemplo n.º 8
0
    def run(self, profile=True):
        network = self.network_args
        optimizer = self.opt_args["opt"]
        dataloader = {
            "train": self.train_args["dataloader"],
            "val": self.val_args["dataloader"],
            "test": self.test_args["dataloader"],
        }

        pl = PL(network=network, dataloader=dataloader, optimizer=optimizer)

        trainer = Trainer(
            logger=TensorBoardLogger(save_dir="./Logs", name=self.log_args["project_name"]),
            gpus=self.hw_args["gpu_idx"],
            check_val_every_n_epoch=self.log_args["val_log_freq_epoch"],
            max_epochs=self.train_args["epoch"],
            min_epochs=self.train_args["epoch"],
            log_save_interval=1,
            row_log_interval=1,
            profiler=profile,
        )

        trainer.fit(pl)

        trainer.test(pl)

        return pl.final_target
Exemplo n.º 9
0
def create_logger(model_save_dir):
    # str.rpartition(separator) cuts up the string into a 3-tuple of (a,b,c), where
    #   a: everything before the last occurrence of the separator
    #   b: the separator
    #   c: everything after the last occurrence of the separator)
    result_dir, _, run_name = model_save_dir.rpartition('/')

    return TensorBoardLogger(save_dir=result_dir, version=run_name, name="")
Exemplo n.º 10
0
def construct_trainer(args):
    logger = TensorBoardLogger(save_dir=args.log_dir, name=STAGE)
    early_stop = EarlyStopping(monitor="val_loss", patience=1, mode="min")
    trainer = Trainer(logger=logger,
                      gradient_clip_val=1.0,
                      checkpoint_callback=False,
                      early_stop_callback=early_stop,
                      max_epochs=args.epochs,
                      gpus=args.device)
    return trainer
Exemplo n.º 11
0
def test_tensorboard_automatic_versioning(tmpdir):
    """Verify that automatic versioning works"""

    root_dir = tmpdir.mkdir("tb_versioning")
    root_dir.mkdir("version_0")
    root_dir.mkdir("version_1")

    logger = TensorBoardLogger(save_dir=tmpdir, name="tb_versioning")

    assert logger.version == 2
Exemplo n.º 12
0
def ergo_ii_experiment():
    parser = ArgumentParser()
    parser.add_argument('iter', type=int)
    parser.add_argument('gpu', type=int)
    parser.add_argument('dataset', type=str, help='mcpas_human or vdjdb')
    parser.add_argument('tcr_encoding_model', type=str, help='LSTM or AE')
    parser.add_argument('--cat_encoding', type=str, default='embedding')
    parser.add_argument('--use_alpha', action='store_true')
    parser.add_argument('--use_vj', action='store_true')
    parser.add_argument('--use_mhc', action='store_true')
    parser.add_argument('--use_t_type', action='store_true')
    parser.add_argument('--aa_embedding_dim', type=int, default=10)
    parser.add_argument('--cat_embedding_dim', type=int, default=50)
    parser.add_argument('--lstm_dim', type=int, default=500)
    parser.add_argument('--encoding_dim', type=int, default=100)
    parser.add_argument('--lr', type=float, default=1e-3)
    parser.add_argument('--wd', type=float, default=1e-5)
    parser.add_argument('--dropout', type=float, default=0.1)
    hparams = parser.parse_args()
    model = ERGOLightning(hparams)
    # version flags
    version = ''
    version += str(hparams.iter)
    if hparams.dataset == 'mcpas_human':
        version += 'h'
    elif hparams.dataset == 'vdjdb':
        version += 'v'
    elif hparams.dataset == 'vdjdb_no10x':
        version += 'f'
    elif hparams.dataset == 'mcpas':
        version += 'm'
    else:
        version += 'x'
    if hparams.tcr_encoding_model == 'AE':
        version += 'e'
    elif hparams.tcr_encoding_model == 'LSTM':
        version += 'l'
    if hparams.use_alpha:
        version += 'a'
    if hparams.use_vj:
        version += 'j'
    if hparams.use_mhc:
        version += 'h'
    if hparams.use_t_type:
        version += 't'
    logger = TensorBoardLogger("ERGO-II_paper_logs",
                               name="paper_models",
                               version=version)
    early_stop_callback = EarlyStopping(monitor='val_auc',
                                        patience=3,
                                        mode='max')
    trainer = Trainer(gpus=[hparams.gpu],
                      logger=logger,
                      early_stop_callback=early_stop_callback)
    trainer.fit(model)
Exemplo n.º 13
0
def train(args):
    m = BengaliModule(args)
    if True:
        print("start surgery")
        FREEZE = False
        if FREEZE:
            checkpoint_path = "lightning_logs/205_seres_dropblock.yml/_ckpt_epoch_198.ckpt"
        else:
            # checkpoint_path = "lightning_logs/206_seres_dropblock_surgery.yml/freeze_train/_ckpt_epoch_6.ckpt"
            # checkpoint_path = "lightning_logs/206.fold3.yml/freeze_train/_ckpt_epoch_5.ckpt"
            checkpoint_path = "lightning_logs/206.fold3.yml/_ckpt_epoch_84.ckpt"
        checkpoint = torch.load(checkpoint_path)['state_dict']

        if FREEZE:
            for k in checkpoint.copy().keys():
                print(k)
                #if k.startswith('classifier.predictor.base_model.conv1') or \
                #        k.startswith('classifier.predictor.base_model.bn1'):
                if k.startswith('classifier.predictor.base_model.layer0'):
                    print("   delete dict key", k)
                    checkpoint.pop(k)

        print("load for train")
        miss, unexp = m.load_state_dict(checkpoint, strict=False)
        print("  missing", miss)
        print("  unexpected", unexp)

        for name, param in m.named_parameters():
            if name.startswith('classifier.predictor.base_model.layer0'):
                param.requires_grad = True
            else:
                param.requires_grad = not FREEZE
            print(name, param.size(), param.requires_grad)
        # exit()

    checkpoint_callback = ModelCheckpointCustom(
        filepath=f'lightning_logs/{os.path.basename(args.config)}',
        save_top_k=3,
        verbose=True,
        monitor='recall/weight_mean',
        mode='max',
        prefix='')
    logger = logger = TensorBoardLogger(
        save_dir='lightning_logs/',
        name=os.path.basename(args.config),
    )
    trainer = pl.Trainer(
        # default_save_path=f'lightning_logs/{os.path.basename(args.config)}',
        logger=logger,
        early_stop_callback=None,
        max_epochs=C.n_epoch,
        checkpoint_callback=checkpoint_callback,
        fast_dev_run=False)
    trainer.fit(m)
Exemplo n.º 14
0
def pretrain(app_train, app_test, sequences, dims, train_index, val_index,
             args):
    encoders = {}
    for name, diminfo in dims.items():
        cat = sequences[f'{name}_cat']
        cont = sequences[f'{name}_cont']
        train_loader = torch.utils.data.DataLoader(
            SequenceDataset(app_train,
                            cat,
                            cont,
                            index=train_index,
                            app_test=app_test),
            batch_size=args.batch_size,
            shuffle=True,
            num_workers=6,
            worker_init_fn=worker_init_fn)
        val_loader = torch.utils.data.DataLoader(SequenceDataset(
            app_train, cat, cont, index=val_index),
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=6,
                                                 worker_init_fn=worker_init_fn)
        model = DIMLSTMModule(diminfo, args.n_hidden, train_loader, val_loader,
                              args)
        logdir = '../logs/81_dimlstm'
        path = pathlib.Path(logdir) / name
        if not path.exists():
            path.mkdir(parents=True)
        logger = TensorBoardLogger(logdir, name=name)
        early_stopping = EarlyStopping(patience=args.patience,
                                       monitor='val_loss_main',
                                       mode='min')
        filepath = pathlib.Path(
            logdir) / name / f'version_{logger.version}' / 'checkpoints'
        model_checkpoint = ModelCheckpoint(str(filepath),
                                           monitor='val_loss_main',
                                           mode='min')
        trainer = pl.Trainer(default_save_path=logdir,
                             gpus=-1,
                             max_epochs=args.n_epochs,
                             early_stop_callback=early_stopping,
                             logger=logger,
                             row_log_interval=100,
                             checkpoint_callback=model_checkpoint)
        trainer.fit(model)

        best_model = load_model(model,
                                name,
                                trainer.logger.version,
                                logdir=logdir)
        encoders[name] = best_model.encoder
    return encoders
Exemplo n.º 15
0
Arquivo: run.py Projeto: yyht/openie6
def get_logger(mode, hparams):
    log_dir = hparams.save + '/logs/'
    if os.path.exists(log_dir + f'{mode}'):
        mode_logs = list(glob.glob(log_dir + f'/{mode}_*'))
        new_mode_index = len(mode_logs) + 1
        print('Moving old log to...')
        print(
            shutil.move(hparams.save + f'/logs/{mode}',
                        hparams.save + f'/logs/{mode}_{new_mode_index}'))
    logger = TensorBoardLogger(save_dir=hparams.save,
                               name='logs',
                               version=mode + '.part')
    return logger
Exemplo n.º 16
0
def test_tensorboard_manual_versioning(tmpdir):
    """Verify that manual versioning works"""

    root_dir = tmpdir.mkdir("tb_versioning")
    root_dir.mkdir("version_0")
    root_dir.mkdir("version_1")
    root_dir.mkdir("version_2")

    logger = TensorBoardLogger(save_dir=tmpdir,
                               name="tb_versioning",
                               version=1)

    assert logger.version == 1
Exemplo n.º 17
0
def test_tensorboard_pickle(tmpdir):
    """Verify that pickling trainer with Tensorboard logger works."""

    # hparams = tutils.get_hparams()
    # model = LightningTestModel(hparams)

    logger = TensorBoardLogger(save_dir=tmpdir, name="tensorboard_pickle_test")

    trainer_options = dict(max_epochs=1, logger=logger)

    trainer = Trainer(**trainer_options)
    pkl_bytes = pickle.dumps(trainer)
    trainer2 = pickle.loads(pkl_bytes)
    trainer2.logger.log_metrics({"acc": 1.0})
Exemplo n.º 18
0
def main(args):
    if args.mode == 'train':
        load_version = 0 if args.resume else None
        logger = TensorBoardLogger(save_dir=args.exp_dir,
                                   name=args.exp,
                                   version=load_version)
        trainer = create_trainer(args, logger)
        model = UnetMRIModel(args)
        trainer.fit(model)
    else:  # args.mode == 'test'
        assert args.checkpoint is not None
        model = UnetMRIModel.load_from_checkpoint(str(args.checkpoint))
        model.hparams.sample_rate = 1.
        trainer = create_trainer(args, logger=False)
        trainer.test(model)
Exemplo n.º 19
0
def main(args: Namespace) -> None:
    if args.seed is not None:
        seed_everything(args.seed)

    model = eval(args.pl_model)(args)

    os.makedirs(pjoin('./lightning_logs', args.name), exist_ok=True)
    logger = TensorBoardLogger(
        save_dir='./lightning_logs/',
        name=args.name,
    )

    db = None if torch.cuda.device_count() == 1 \
        else args.distributed_backend
    default_args = dict(
        gpus=-1,
        distributed_backend=db,
        precision=16 if args.fp16 else 32,
        amp_level='O1',
        profiler=True,
        num_sanity_val_steps=1,
        # num_sanity_val_steps=1 if args.test_run else 0,
        accumulate_grad_batches=args.batch_split,
        logger=logger,
        benchmark=(args.seed is None),
        deterministic=(args.seed is not None),
        callbacks=[CSVRecordingCallback()],
        limit_val_batches=0.1 if args.test_run else 1.,
        limit_train_batches=0.1 if args.test_run else 1.,
        gradient_clip_val=1.0,
    )
    # Let the specific model could overwrite the default args
    default_args.update(model.pl_trainer_args())

    trainer = pl.Trainer(**default_args)
    if not model.is_finished_run(pjoin(args.logdir, args.name)):
        # Record hyperparameters
        json.dump(vars(args),
                  open(pjoin(args.logdir, args.name, 'hparams.json'), 'w'))

        trainer.fit(model)

    # Run the test set if defined in lightning model
    gave_test_loader = hasattr(model, 'test_dataloader') and \
                       model.test_dataloader()
    if gave_test_loader:
        with Timer('testing'):
            trainer.test(model)
Exemplo n.º 20
0
def run_training():
    logger = TensorBoardLogger("lightning_logs", name="model_logs")
    early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=5000, patience=7, verbose=False, mode="min")

    model = MixedRegression()

    trainer = pl.Trainer(gpus=1, logger=logger, early_stop_callback=early_stop_callback)
    # trainer = pl.Trainer(logger=logger, early_stop_callback=early_stop_callback)
    lr_finder = trainer.lr_find(model)
    new_lr = lr_finder.suggestion()
    model.hparams.lr = new_lr
    print(f'LR: {new_lr}')

    trainer.fit(model)
    trainer.test(model)
    print('Done training!')
Exemplo n.º 21
0
def test_tensorboard_logger(tmpdir):
    """Verify that basic functionality of Tensorboard logger works."""

    hparams = tutils.get_hparams()
    model = LightningTestModel(hparams)

    logger = TensorBoardLogger(save_dir=tmpdir, name="tensorboard_logger_test")

    trainer_options = dict(max_epochs=1,
                           train_percent_check=0.01,
                           logger=logger)

    trainer = Trainer(**trainer_options)
    result = trainer.fit(model)

    print("result finished")
    assert result == 1, "Training failed"
Exemplo n.º 22
0
def mhc_experiment():
    parser = ArgumentParser()
    parser.add_argument('--encoding_model', type=str, default='CNN')
    parser.add_argument('--embedding_dim', type=int, default=10)
    parser.add_argument('--encoding_dim', type=int, default=100)
    parser.add_argument('--dropout', type=float, default=0.1)
    parser.add_argument('--lr', type=float, default=1e-4)
    parser.add_argument('--version', type=str, default='0')
    hparams = parser.parse_args()
    model = ERGOPepMHC(hparams)
    if hparams.encoding_model == 'LSTM':
        name = "double_lstm_model"
    elif hparams.encoding_model == 'CNN':
        name = "double_cnn_model"
    logger = TensorBoardLogger("pep_mhc_logs", name=name, version=hparams.version)
    # logger = TensorBoardLogger("pep_mhc_logs", name="double_cnn_model", version=hparams.version)
    early_stop_callback = EarlyStopping(monitor='val_auc', patience=3, mode='max')
    trainer = Trainer(gpus=[6], logger=logger, early_stop_callback=early_stop_callback)
    trainer.fit(model)
Exemplo n.º 23
0
def main():
    hparams = Namespace(
        pretrained=True,
        encoder="efficientnet-b0",
        # scales=[0, 1, 2, 3],
        scales=[0],
        disparity_smoothness=1e-3,  # control sharpness of disparity
        lr=3e-4,
        step_size=10,
        batch_size=3,
        height=160,
        width=320,  # have to be divisible by 2**5
        min_depth=0.1,
        max_depth=100.0,
        target_id=7,
        sources_ids=[0, 14],
        sequence_length=15,
        device="cuda",
        dt=1 / 20,
    )
    loggin_dir = r"C:\Users\tonys\projects\python\comma\effdepth-models"
    checkpoint_path = join(
        loggin_dir,
        r"efficient-160x320\depth-{epoch:02d}",
    )
    # load_checkpoint_path = join(
    #     loggin_dir, r"manual-velocity-better\depth-epoch=01.ckpt",
    # )
    # model = EfficientTraining.load_from_checkpoint(load_checkpoint_path)
    model = EfficientTraining(hparams)
    checkpoint_callback = ModelCheckpoint(checkpoint_path, save_top_k=-1)
    trainer = Trainer(
        logger=TensorBoardLogger(loggin_dir),
        checkpoint_callback=checkpoint_callback,
        early_stop_callback=False,
        max_epochs=20,
        accumulate_grad_batches=4,
        gpus=1 if hparams.device == "cuda" else 0,
        benchmark=True,
    )
    trainer.fit(model)
Exemplo n.º 24
0
def main(hparams):
    if hparams.seed != -1:
        random.seed(hparams.seed)
        torch.manual_seed(hparams.seed)

    logger = TensorBoardLogger("tb_logs", name=hparams.exp_name)
    gpus = list(map(int, hparams.gpus.split(',')))
    if len(gpus) == 1 and gpus[0] == -1:
        gpus = -1

    trainer = pl.Trainer(
        early_stop_callback=False,
        logger=logger,
        gpus=gpus,
        max_epochs=hparams.epochs,
        val_percent_check=0.05,
        use_amp=hparams.use_16bit,
    )

    model = WGanGP(hparams)
    trainer.fit(model)
Exemplo n.º 25
0
 def __init__(self, name, n_epochs, patience):
     logger = TensorBoardLogger('../logs', name=name)
     early_stopping = EarlyStopping(
         patience=patience,
         monitor='auc',
         mode='max'
     )
     filepath = pathlib.Path('../logs') / name / f'version_{logger.version}' / 'checkpoints'
     model_checkpoint = ModelCheckpoint(
         str(filepath),
         monitor='auc',
         mode='max'
     )
     super().__init__(
         default_save_path='../logs',
         gpus=-1,
         max_epochs=n_epochs,
         early_stop_callback=early_stopping,
         logger=logger,
         row_log_interval=100,
         checkpoint_callback=model_checkpoint
     )
Exemplo n.º 26
0
def main(cfg: DictConfig):
    p = argparse.ArgumentParser()
    # hparm はargsからしか保存できないので移し替える。
    args = p.parse_args()
    for key, value in cfg.model.items():
        args.__setattr__(key, value)

    tblogger = TensorBoardLogger(save_dir=cfg.exp.save_dir, name=cfg.exp.name)
    pacfg = cfg.patch
    trcfg = cfg.trainer

    # TODO: path周りきれいにする
    train_path_df = DataPathMaker(pacfg.data_dir, patch_dir_name=pacfg.train_patch).\
        create_dataframe(pacfg.train_ids)
    val_path_df = DataPathMaker(pacfg.data_dir, patch_dir_name=pacfg.val_patch).\
        create_dataframe(pacfg.val_ids)

    tr_im_list = train_path_df[train_path_df['type'] == 'image']['path'].astype(str).values
    val_im_list = val_path_df[val_path_df['type'] == 'image']['path'].astype(str).values

    tr_lb_list = train_path_df[train_path_df['type'] == 'label']['path'].astype(str).values
    val_lb_list = val_path_df[val_path_df['type'] == 'label']['path'].astype(str).values

    model = KitsTrainer(args, tr_im_list, tr_lb_list, val_im_list, val_lb_list)

    # Called when the validation loop ends
    # checkpoint_callback = ModelCheckpoint(filepath ='ckpt', save_weights_only=True)

    trainer = pl.Trainer(
        # checkpoint_callback=checkpoint_callback,
        gpus=trcfg.gpus,
        row_log_interval=trcfg.row_log_interval,
        checkpoint_callback=False,
        logger=tblogger,
        max_epochs=trcfg.epoch,
        early_stop_callback=None)
    trainer.fit(model)
Exemplo n.º 27
0
            pass

    min_depth = 3
    max_depth = stgs.VAE_HPARAMS['max_depth']  # maximum network depth

    print(f'Using maximum sequence length of {stgs.VAE_HPARAMS["max_len"]}.')
    torch.cuda.empty_cache()
    vae = NA_VAE(stgs.VAE_HPARAMS)
    vae = vae.float()
    vae = vae.cuda()
    # vae.load_state_dict(torch.load(f'{checkpoint_path}/weights.pt'))

    torch.cuda.empty_cache()
    version = datetime.strftime(datetime.fromtimestamp(seed),
                                '%Y-%m-%d..%H.%M.%S')
    logger = TensorBoardLogger(checkpoint_path, version=version)
    checkpoint = ModelCheckpoint(filepath=checkpoint_path,
                                 save_top_k=1,
                                 verbose=True,
                                 monitor='loss',
                                 mode='min')
    early_stop = EarlyStopping(
        monitor='loss',
        patience=stgs.VAE_HPARAMS['early_stop_patience'],
        verbose=True,
        mode='min')
    max_steps = stgs.VAE_HPARAMS['max_steps']
    # kld loss annealing also depends on max length (or max epochs?)
    vae.get_data_generator(max(min_depth, max_depth - 1), max_depth, seed=seed)

    trainer = pl.Trainer(gpus=-1,
Exemplo n.º 28
0
def test_tensorboard_log_hyperparams(tmpdir):
    logger = TensorBoardLogger(tmpdir)
    hparams = {"float": 0.3, "int": 1, "string": "abc", "bool": True}
    logger.log_hyperparams(hparams)
Exemplo n.º 29
0
def main():
    args = parse_args()
    seed_everything(args.seed)
    app_train = joblib.load(
        '../data/03_powertransform/application_train.joblib')
    app_test = joblib.load('../data/03_powertransform/application_test.joblib')
    sequences = read_sequences('../data/04_sequence/')
    dims = joblib.load('../data/07_dims/dims03.joblib')
    dims.pop('application_train')
    dims.pop('application_test')

    for name, diminfo in dims.items():
        cat = sequences[f'{name}_cat']
        cont = sequences[f'{name}_cont']
        train_loader = torch.utils.data.DataLoader(
            SequenceDataset(app_train, cat, cont),
            batch_size=args.batch_size,
            shuffle=True,
            num_workers=6,
            worker_init_fn=worker_init_fn)
        test_loader = torch.utils.data.DataLoader(
            SequenceDataset(app_test, cat, cont),
            batch_size=args.batch_size,
            shuffle=False,
            num_workers=6,
            worker_init_fn=worker_init_fn)
        model = DIMLSTMModule(diminfo, args.n_hidden, train_loader,
                              test_loader, args)
        logdir = '../logs/21_dimlstm'
        path = pathlib.Path(logdir) / name
        if not path.exists():
            path.mkdir(parents=True)
        logger = TensorBoardLogger(logdir, name=name)
        early_stopping = EarlyStopping(patience=args.patience,
                                       monitor='val_loss_main',
                                       mode='min')
        filepath = pathlib.Path(
            logdir) / name / f'version_{logger.version}' / 'checkpoints'
        model_checkpoint = ModelCheckpoint(str(filepath),
                                           monitor='val_loss_main',
                                           mode='min')
        trainer = pl.Trainer(default_save_path=logdir,
                             gpus=-1,
                             max_epochs=args.n_epochs,
                             early_stop_callback=early_stopping,
                             logger=logger,
                             row_log_interval=100,
                             checkpoint_callback=model_checkpoint)
        trainer.fit(model)

        best_model = load_model(model,
                                name,
                                trainer.logger.version,
                                logdir=logdir)
        train_loader_no_shuffle = torch.utils.data.DataLoader(
            SequenceDataset(app_train, cat, cont),
            batch_size=args.batch_size,
            shuffle=False,
            num_workers=6,
            worker_init_fn=worker_init_fn)
        df_train = predict(name, best_model, train_loader_no_shuffle)
        df_test = predict(name, best_model, test_loader)
        df_encoding = pd.concat([df_train, df_test])
        dump(df_encoding, f'../data/21_dimlstm/{name}.joblib')
Exemplo n.º 30
0
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=(self.lr))

    def train_dataloader(self):
        return DataLoader(self.train_set, batch_size=self.batch_size)

    def val_dataloader(self):
        return DataLoader(self.val_set, batch_size=self.batch_size)

    def test_dataloader(self):
        return DataLoader(self.test_set, batch_size=self.batch_size)


if __name__ == "__main__":
    logger = TensorBoardLogger("lightning_logs", name="image_only")
    early_stop_callback = EarlyStopping(monitor="val_loss",
                                        min_delta=5000,
                                        patience=7,
                                        verbose=False,
                                        mode="min")

    model = LitClassifier()
    trainer = pl.Trainer(gpus=1,
                         logger=logger,
                         early_stop_callback=early_stop_callback)

    lr_finder = trainer.lr_find(model)
    fig = lr_finder.plot(suggest=True, show=True)
    new_lr = lr_finder.suggestion()
    print(new_lr)