Exemplos de LightningTestModel.on_train_start em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: pytorch_lightning.testing

Classe / Tipo: LightningTestModel

Método / Função: on_train_start

Exemplos em hotexamples.com: 2

LightningTestModel.on_train_start em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de pytorch_lightning.testing.LightningTestModel.on_train_start em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

LightningTestModel(30)

freeze(6)

unfreeze(6)

load_from_metrics(4)

on_sanity_check_start(4)

test_dataloader(4)

eval(3)

on_epoch_start(2)

on_train_start(2)

load_from_checkpoint(1)

prev_called_batch_idx(1)

prev_called_batch_nb(1)

Métodos Frequentes

LightningTestModel (30)

freeze (6)

unfreeze (6)

load_from_metrics (4)

on_sanity_check_start (4)

test_dataloader (4)

eval (3)

on_epoch_start (2)

on_train_start (2)

load_from_checkpoint (1)

Métodos Frequentes

prev_called_batch_idx (1)

prev_called_batch_nb (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_restore_models.py Projeto: raphaelmemmesheimer/pytorch-lightning

def test_cpu_restore_training(tmpdir): """Verify continue training session on CPU.""" tutils.reset_seed() hparams = tutils.get_hparams() model = LightningTestModel(hparams) # logger file to get meta test_logger_version = 10 logger = tutils.get_test_tube_logger(tmpdir, False, version=test_logger_version) trainer_options = dict( max_epochs=8, val_check_interval=0.50, val_percent_check=0.2, train_percent_check=0.2, logger=logger, checkpoint_callback=ModelCheckpoint(tmpdir, save_top_k=-1) ) # fit model trainer = Trainer(**trainer_options) result = trainer.fit(model) real_global_epoch = trainer.current_epoch # traning complete assert result == 1, 'amp + ddp model failed to complete' # wipe-out trainer and model # retrain with not much data... this simulates picking training back up after slurm # we want to see if the weights come back correctly new_logger = tutils.get_test_tube_logger(tmpdir, False, version=test_logger_version) trainer_options = dict( max_epochs=2, val_check_interval=0.50, val_percent_check=0.2, train_percent_check=0.2, logger=new_logger, checkpoint_callback=ModelCheckpoint(tmpdir), ) trainer = Trainer(**trainer_options) model = LightningTestModel(hparams) # set the epoch start hook so we can predict before the model does the full training def assert_good_acc(): assert trainer.current_epoch == real_global_epoch assert trainer.current_epoch >= 0 # if model and state loaded correctly, predictions will be good even though we # haven't trained with the new loaded model trainer.model.eval() for dataloader in trainer.get_val_dataloaders(): tutils.run_prediction(dataloader, trainer.model) model.on_train_start = assert_good_acc # by calling fit again, we trigger training, loading weights from the cluster # and our hook to predict using current model before any more weight updates trainer.fit(model)

Exemplo n.º 2

0

Exibir arquivo

def test_dp_resume(tmpdir): """Make sure DP continues training correctly.""" if not tutils.can_run_gpu_test(): return tutils.reset_seed() hparams = tutils.get_hparams() model = LightningTestModel(hparams) trainer_options = dict( show_progress_bar=True, max_epochs=2, gpus=2, distributed_backend='dp', ) # get logger logger = tutils.get_test_tube_logger(tmpdir, debug=False) # exp file to get weights # logger file to get weights checkpoint = tutils.init_checkpoint_callback(logger) # add these to the trainer options trainer_options['logger'] = logger trainer_options['checkpoint_callback'] = checkpoint # fit model trainer = Trainer(**trainer_options) trainer.is_slurm_managing_tasks = True result = trainer.fit(model) # track epoch before saving real_global_epoch = trainer.current_epoch # correct result and ok accuracy assert result == 1, 'amp + dp model failed to complete' # --------------------------- # HPC LOAD/SAVE # --------------------------- # save trainer.hpc_save(tmpdir, logger) # init new trainer new_logger = tutils.get_test_tube_logger(tmpdir, version=logger.version) trainer_options['logger'] = new_logger trainer_options['checkpoint_callback'] = ModelCheckpoint(tmpdir) trainer_options['train_percent_check'] = 0.2 trainer_options['val_percent_check'] = 0.2 trainer_options['max_epochs'] = 1 new_trainer = Trainer(**trainer_options) # set the epoch start hook so we can predict before the model does the full training def assert_good_acc(): assert new_trainer.current_epoch == real_global_epoch and new_trainer.current_epoch > 0 # if model and state loaded correctly, predictions will be good even though we # haven't trained with the new loaded model dp_model = new_trainer.model dp_model.eval() dataloader = trainer.get_train_dataloader() tutils.run_prediction(dataloader, dp_model, dp=True) # new model model = LightningTestModel(hparams) model.on_train_start = assert_good_acc # fit new model which should load hpc weights new_trainer.fit(model) # test freeze on gpu model.freeze() model.unfreeze()