def _do_test_datamodule(self, num_workers, epochs, batch_size, accumulate_grad_batches, accelerator, **kwargs): hparams = Namespace( batch_size=batch_size, val_batch_size=batch_size, test_batch_size=batch_size, accumulate_grad_batches=accumulate_grad_batches, num_workers=num_workers, max_epochs=epochs, max_steps=None, accelerator=accelerator, **standard_args, **kwargs, ) tokenizer = BertTokenizer.from_pretrained("bert-base-cased") # instantiate PL trainer and model trainer = pl.Trainer.from_argparse_args(hparams) model = DummyTransformerModel(hparams) # Datasets datamodule = DummyDataModule(hparams, test_number=1, tokenizer=tokenizer) model.datamodule = datamodule trainer.fit(model, datamodule=datamodule)
def do_test_datamodule(num_workers, batch_size, accumulate_grad_batches, iterable, **kwargs): hyperparameters = Namespace( batch_size=batch_size, val_batch_size=batch_size, test_batch_size=batch_size, accumulate_grad_batches=accumulate_grad_batches, num_workers=num_workers, max_epochs=1, max_steps=-1, iterable=iterable, **standard_args, **kwargs, ) if hasattr(hyperparameters, "devices"): hyperparameters.devices = get_random_gpus_list(hyperparameters.devices) tokenizer = BertTokenizer('tests/data/vocab.txt') # instantiate PL trainer and model trainer = pl.Trainer.from_argparse_args(hyperparameters) model = DummyTransformerModelWithOptim(hyperparameters, check_ids=True) # Datasets datamodule = DummyDataModule(hyperparameters, length_train=96, length_valid=96, length_test=96, tokenizer=tokenizer) model.datamodule = datamodule trainer.fit(model, datamodule=datamodule)
def _do_test_fix_max_steps(self, max_epochs, accumulate_grad_batches, batch_size, expected_max_steps, **kwargs): hparams = Namespace(batch_size=batch_size, val_batch_size=batch_size, test_batch_size=batch_size, accumulate_grad_batches=accumulate_grad_batches, num_workers=4, max_epochs=max_epochs, max_steps=None, **standard_args, **kwargs) # instantiate PL trainer trainer = Trainer.from_argparse_args(hparams) tokenizer = BertTokenizer.from_pretrained("bert-base-cased") model = TestTransformersModel(hparams) # Datasets datamodule = DummyDataModule(hparams, tokenizer=tokenizer) model.datamodule = datamodule trainer.fit(model, datamodule=datamodule) # Assert max steps computed correctly assert model.computed_max_steps == expected_max_steps
def do_test_fix_max_steps(max_epochs, accumulate_grad_batches, batch_size, **kwargs): hyperparameters = Namespace( batch_size=batch_size, val_batch_size=batch_size, test_batch_size=batch_size, accumulate_grad_batches=accumulate_grad_batches, num_workers=2, max_epochs=max_epochs, max_steps=-1, iterable=False, **standard_args, **kwargs ) if hasattr(hyperparameters, "devices"): hyperparameters.devices = get_random_gpus_list(hyperparameters.devices) # instantiate PL trainer trainer = Trainer.from_argparse_args(hyperparameters) tokenizer = BertTokenizer('tests/data/vocab.txt') # not checking ids because sometimes the sampler will duplicate elements to fill all gpus model = DummyTransformerModelWithOptim(hyperparameters) # Datasets datamodule = DummyDataModule(hyperparameters, length_train=40, length_test=40, length_valid=40, tokenizer=tokenizer) trainer.fit(model, datamodule=datamodule) assert trainer.global_step == model.computed_steps, ( f"global {trainer.global_step} steps but computed {model.computed_steps}" )
def test_model_checkpointing_callback(epochs, accumulate_grad_batches, batch_size, callback_interval, no_val_callback, expected_results): hyperparameters = Namespace( batch_size=batch_size, val_batch_size=batch_size, test_batch_size=batch_size, accumulate_grad_batches=accumulate_grad_batches, num_workers=4, max_epochs=epochs, max_steps=-1, accelerator='cpu', iterable=False, checkpoint_interval=callback_interval, no_val_checkpointing=no_val_callback, no_epoch_checkpointing=False, pre_trained_dir='pre_trained_name', name=random_name(), val_check_interval=0.25, **standard_args, ) tokenizer = BertTokenizer('tests/data/vocab.txt') callback = TransformersModelCheckpointCallback(hyperparameters) # instantiate PL trainer trainer = pl.Trainer.from_argparse_args( hyperparameters, profiler='simple', logger=None, callbacks=[callback], ) # instantiate PL model model = DummyTransformerModelWithOptim(hyperparameters) # Datasets datamodule = DummyDataModule(hyperparameters, length_train=96, length_valid=96, length_test=96, tokenizer=tokenizer) model.datamodule = datamodule trainer.fit(model, datamodule=datamodule) folder = os.path.join(hyperparameters.output_dir, hyperparameters.pre_trained_dir, hyperparameters.name) listing = os.listdir(folder) shutil.rmtree(folder) assert set(listing) == set( expected_results), f"{listing} vs {set(expected_results)}"
def test_schedulers(scheduler_class, parameters, expected_lrs): hparams = Namespace( batch_size=1, val_batch_size=1, test_batch_size=1, num_workers=0, max_epochs=1, max_steps=20, last_epoch=-1, gpus=0, log_every_n_steps=1, **standard_args, ) parameters['last_epoch'] = -1 class SchedulerModel(DummyTransformerModel): def __init__(self, hparams): super().__init__(hparams) self.lrs = [] def _get_actual_lr(self): return self.trainer.optimizers[0].__dict__['param_groups'][0]['lr'] def training_step(self, batch, *args): res = super().training_step(batch, *args) return {**res, 'lr': self._get_actual_lr()} def training_epoch_end(self, outputs, *args, **kwargs): self.lrs = [o['lr'] for o in outputs] + [self._get_actual_lr()] def configure_optimizers(self): # Define adam optimizer optimizer = AdamW(self.model.parameters(), lr=1.0) scheduler = scheduler_class(optimizer, **parameters) return { 'optimizer': optimizer, 'lr_scheduler': {'scheduler': scheduler, 'interval': 'step'} } tokenizer = BertTokenizer.from_pretrained("bert-base-cased") # instantiate PL trainer and model trainer = pl.Trainer.from_argparse_args(hparams) model = SchedulerModel(hparams) # Datasets and Fit datamodule = DummyDataModule(hparams, tokenizer=tokenizer) trainer.fit(model, datamodule=datamodule) assert torch.allclose(torch.tensor(expected_lrs), torch.tensor(model.lrs), ), (f"{expected_lrs} vs {model.lrs}")
def test_model_checkpointing_callback(epochs, accumulate_grad_batches, batch_size, callback_interval, val_callback, expected_results): hparams = Namespace( batch_size=batch_size, val_batch_size=batch_size, test_batch_size=batch_size, accumulate_grad_batches=accumulate_grad_batches, num_workers=4, max_epochs=epochs, max_steps=None, gpus=0, checkpoint_interval=callback_interval, no_val_checkpointing=not val_callback, no_epoch_checkpointing=False, pre_trained_dir='pre_trained_name', name="test", val_check_interval=0.25, **standard_args, ) tokenizer = BertTokenizer.from_pretrained("bert-base-cased") callback = TransformersModelCheckpointCallback(hparams) # instantiate PL trainer trainer = pl.Trainer.from_argparse_args( hparams, profiler='simple', logger=None, callbacks=[callback], ) # instantiate PL model model = DummyTransformerModel(hparams) # Datasets datamodule = DummyDataModule(hparams, test_number=2, tokenizer=tokenizer) model.datamodule = datamodule trainer.fit(model, datamodule=datamodule) folder = os.path.join(hparams.output_dir, hparams.pre_trained_dir, hparams.name) listing = os.listdir(folder) shutil.rmtree(hparams.output_dir) assert set(listing) == set( expected_results), f"{listing} vs {set(expected_results)}"
def test_optimizers(optimizer_class, batch_size): hyperparameters = Namespace( batch_size=batch_size, val_batch_size=1, test_batch_size=1, num_workers=0, max_epochs=1, max_steps=20, accelerator='cpu', iterable=False, log_every_n_steps=1, accumulate_grad_batches=1, **standard_args, ) del ( hyperparameters.learning_rate, hyperparameters.weight_decay, hyperparameters.adam_epsilon, hyperparameters.adam_betas ) parser = ArgumentParser() optimizer_class.add_optimizer_specific_args(parser) hyperparameters = Namespace(**vars(hyperparameters), **vars(parser.parse_args(""))) hyperparameters.optimizer_class = optimizer_class tokenizer = BertTokenizer('tests/data/vocab.txt') # instantiate PL trainer and model trainer = pl.Trainer.from_argparse_args(hyperparameters) model = OptimModel(hyperparameters) # Datasets and Fit datamodule = DummyDataModule(hyperparameters, length_train=96, tokenizer=tokenizer) trainer.fit(model, datamodule=datamodule)
def test_schedulers(scheduler_class, parameters, expected_lrs): hyperparameters = Namespace( batch_size=1, val_batch_size=1, test_batch_size=1, num_workers=0, max_epochs=1, max_steps=20, scheduler_last_epoch=-1, scheduler_verbose=False, accelerator='cpu', iterable=False, log_every_n_steps=1, accumulate_grad_batches=1, **standard_args, **parameters, ) scheduler_class.add_scheduler_specific_args(ArgumentParser()) class SchedulerModel(DummyTransformerModel): def __init__(self, hyperparameters): super().__init__(hyperparameters) self.lrs = [] def _get_actual_lr(self): return self.trainer.optimizers[0].__dict__['param_groups'][0]['lr'] def training_step(self, batch, *args): res = super().training_step(batch, *args) return {**res, 'lr': self._get_actual_lr()} def training_epoch_end(self, outputs, *args, **kwargs): self.lrs = [o['lr'] for o in outputs] + [self._get_actual_lr()] def configure_optimizers(self): # Define adam optimizer optimizer = AdamW(self.model.parameters(), lr=1.0) scheduler = scheduler_class(hyperparameters, optimizer) return { 'optimizer': optimizer, 'lr_scheduler': { 'scheduler': scheduler, 'interval': 'step', 'frequency': 1 } } tokenizer = BertTokenizer('tests/data/vocab.txt') # instantiate PL trainer and model trainer = pl.Trainer.from_argparse_args(hyperparameters) model = SchedulerModel(hyperparameters) # Datasets and Fit datamodule = DummyDataModule(hyperparameters, length_train=96, tokenizer=tokenizer) trainer.fit(model, datamodule=datamodule) assert torch.allclose( torch.tensor(expected_lrs), torch.tensor(model.lrs), ), (f"{expected_lrs} vs {model.lrs}")