Пример #1
0
    def __init__(self,
                 multilingualIndex,
                 pretrained_embeddings,
                 wce,
                 batch_size=512,
                 nepochs=50,
                 gpus=0,
                 n_jobs=-1,
                 patience=20,
                 stored_path=None):
        """
        Init RecurrentGen.
        :param multilingualIndex: MultilingualIndex, it is a dictionary of training and test documents
        indexed by language code.
        :param pretrained_embeddings: dict {lang: tensor of embeddings}, it contains the pretrained embeddings to use
        as embedding layer.
        :param wce: Bool, whether to deploy Word-Class Embeddings (as proposed by A. Moreo). If True, supervised
        embeddings are concatenated to the deployed supervised embeddings. WCE dimensionality is equal to
        the number of target classes.
        :param batch_size: int, number of samples in a batch.
        :param nepochs: int, number of max epochs to train the model.
        :param gpus: int,  specifies how many GPUs to use per node. If False computation will take place on cpu.
        :param n_jobs: int, number of concurrent workers (i.e., parallelizing data loading).
        :param patience: int, number of epochs with no improvements in val-macroF1 before early stopping.
        :param stored_path: str, path to a pretrained model. If None the model will be trained from scratch.
        """
        super().__init__()
        self.multilingualIndex = multilingualIndex
        self.langs = multilingualIndex.langs
        self.batch_size = batch_size
        self.gpus = gpus
        self.n_jobs = n_jobs
        self.stored_path = stored_path
        self.nepochs = nepochs
        self.patience = patience

        # EMBEDDINGS to be deployed
        self.pretrained = pretrained_embeddings
        self.wce = wce

        self.multilingualIndex.train_val_split(val_prop=0.2,
                                               max_val=2000,
                                               seed=1)
        self.multilingualIndex.embedding_matrices(self.pretrained,
                                                  supervised=self.wce)
        self.model = self._init_model()
        self.logger = TensorBoardLogger(save_dir='tb_logs',
                                        name='rnn',
                                        default_hp_metric=False)
        self.early_stop_callback = EarlyStopping(monitor='val-macroF1',
                                                 min_delta=0.00,
                                                 patience=self.patience,
                                                 verbose=False,
                                                 mode='max')

        # modifying EarlyStopping global var in order to compute >= with respect to the best score
        self.early_stop_callback.mode_dict['max'] = torch.ge

        self.lr_monitor = LearningRateMonitor(logging_interval='epoch')
def test_v1_7_0_deprecate_lr_sch_names(tmpdir):
    model = BoringModel()
    lr_monitor = LearningRateMonitor()
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, callbacks=[lr_monitor])
    trainer.fit(model)

    with pytest.deprecated_call(match="`LearningRateMonitor.lr_sch_names` has been deprecated in v1.5"):
        assert lr_monitor.lr_sch_names == ["lr-SGD"]
Пример #3
0
def main(args, dm_setup='fit'):
    pl.seed_everything(args.seed)

    dm = SaltDM.from_argparse_args(args)
    dm.setup(dm_setup)
    model = LitResUnet(**vars(args))
    model.hparams.update(dm.kwargs)

    # print(wandb_logger.log_dir)

    # checkpoint_dir = os.path.join(tt_logger.log_dir, 'ckpt')

    checkpoint_dir = os.path.join(
        '../params/{}/f{:02d}_{}'.format(args.model, args.val_fold_idx,
                                         args.version), 'ckpt')

    checkpoint_file_path = os.path.join(checkpoint_dir, 'last.ckpt')
    if os.path.isfile(checkpoint_file_path):
        args.resume_from_checkpoint = checkpoint_file_path
        print('Detect checkpoint:', args.resume_from_checkpoint)

    # Callbacks
    callbacks = []
    if args.loss_func == 'lovasz_hinge':
        callbacks.append(ToLovaszHingeLossCB())
    if args.logger_type != 'none':
        callbacks.append(LearningRateMonitor())
    if args.swa_epoch_start > 0:
        callbacks.append(
            StochasticWeightAveraging(swa_epoch_start=args.swa_epoch_start))
    if args.snapshot_size > 0:
        callbacks.append(ResetSnapshotCB(checkpoint_dir))

    trainer = pl.Trainer.from_argparse_args(
        args,
        # logger=tt_logger,
        logger=get_logger(args),
        callbacks=callbacks,
        #                                             callbacks=[ResetSnapshotCB(checkpoint_dir)],
        checkpoint_callback=checkpointcb(args, checkpoint_dir))
    # trainer = pl.Trainer.from_argparse_args(args, logger=tt_logger, callbacks=[ResetSnapshotCB(), LearningRateMonitor()], checkpoint_callback=False)
    return dm, model, trainer
Пример #4
0
            'opt-eps': 1e-3,
            'momentum': 0.9,
            'weight_decay': 4e-5
        }
        optim_parameters = SimpleNamespace(**optim_parameters)
        optimizer = create_optimizer(optim_parameters, self.model)
        return optimizer


if __name__ == '__main__':
    neptune_logger = NeptuneLogger(
        project_name='detectwaste/efficientdet-lighning',
        experiment_name='effdet-lighning',
    )
    gpu_monitor = GPUStatsMonitor()
    lr_logger = LearningRateMonitor()
    module = EfficientDetModule()
    trainer = pl.Trainer(
        gpus=[0, 1, 2, 3, 4, 5, 6, 7],
        accelerator='ddp',
        replace_sampler_ddp=False,
        gradient_clip_val=10,
        logger=neptune_logger,
        # limit_train_batches=4 * 4 * 12,
        # limit_val_batches=0,
        log_every_n_steps=10,
        sync_batchnorm=True,
        max_epochs=50,
        callbacks=[gpu_monitor, lr_logger])

    trainer.fit(module)