def test_reduce_on_plateau_error_throw_when_no_metrics_exist(self): with self.assertRaises(ConfigurationError) as context: LearningRateScheduler.from_params(Optimizer.from_params(self.model.named_parameters(), Params({"type": "adam"})), Params({"type": "reduce_on_plateau", "mode": "min"})).step(None, None) assert "learning rate scheduler requires a validation metric" in str(context.exception)
def test_reduce_on_plateau_works_when_metrics_exist(self): LearningRateScheduler.from_params( optimizer=Optimizer.from_params( model_parameters=self.model.named_parameters(), params=Params({"type": "adam"}) ), params=Params({"type": "reduce_on_plateau"}), ).step(10)
def test_trainer_can_resume_with_lr_scheduler(self): lr_scheduler = LearningRateScheduler.from_params( self.optimizer, Params({ "type": "exponential", "gamma": 0.5 })) callbacks = self.default_callbacks() + [LrsCallback(lr_scheduler)] trainer = CallbackTrainer(model=self.model, optimizer=self.optimizer, callbacks=callbacks, num_epochs=2, serialization_dir=self.TEST_DIR) trainer.train() new_lr_scheduler = LearningRateScheduler.from_params( self.optimizer, Params({ "type": "exponential", "gamma": 0.5 })) callbacks = self.default_callbacks() + [LrsCallback(new_lr_scheduler)] new_trainer = CallbackTrainer(model=self.model, optimizer=self.optimizer, callbacks=callbacks, num_epochs=4, serialization_dir=self.TEST_DIR) new_trainer.handler.fire_event(Events.RESTORE_CHECKPOINT) assert new_trainer.epoch_number == 2 assert new_lr_scheduler.lr_scheduler.last_epoch == 1 new_trainer.train()
def test_trainer_can_resume_with_lr_scheduler(self): # pylint: disable=protected-access lr_scheduler = LearningRateScheduler.from_params( self.optimizer, Params({"type": "exponential", "gamma": 0.5})) trainer = Trainer(model=self.model, optimizer=self.optimizer, iterator=self.iterator, learning_rate_scheduler=lr_scheduler, train_dataset=self.instances, validation_dataset=self.instances, num_epochs=2, serialization_dir=self.TEST_DIR) trainer.train() new_lr_scheduler = LearningRateScheduler.from_params( self.optimizer, Params({"type": "exponential", "gamma": 0.5})) new_trainer = Trainer(model=self.model, optimizer=self.optimizer, iterator=self.iterator, learning_rate_scheduler=new_lr_scheduler, train_dataset=self.instances, validation_dataset=self.instances, num_epochs=4, serialization_dir=self.TEST_DIR) epoch, _ = new_trainer._restore_checkpoint() assert epoch == 2 assert new_trainer._learning_rate_scheduler.lr_scheduler.last_epoch == 1 new_trainer.train()
def test_trainer_can_resume_with_lr_scheduler(self): lr_scheduler = LearningRateScheduler.from_params( self.optimizer, Params({ "type": "exponential", "gamma": 0.5 })) trainer = Trainer(model=self.model, optimizer=self.optimizer, iterator=self.iterator, learning_rate_scheduler=lr_scheduler, train_dataset=self.instances, validation_dataset=self.instances, num_epochs=2, serialization_dir=self.TEST_DIR) trainer.train() new_lr_scheduler = LearningRateScheduler.from_params( self.optimizer, Params({ "type": "exponential", "gamma": 0.5 })) new_trainer = Trainer(model=self.model, optimizer=self.optimizer, iterator=self.iterator, learning_rate_scheduler=new_lr_scheduler, train_dataset=self.instances, validation_dataset=self.instances, num_epochs=4, serialization_dir=self.TEST_DIR) epoch = new_trainer._restore_checkpoint() assert epoch == 2 assert new_trainer._learning_rate_scheduler.lr_scheduler.last_epoch == 1 new_trainer.train()
def test_from_params(self): optim = self._get_optimizer() sched = LearningRateScheduler.from_params( optim, Params({"type": "slanted_triangular", "num_epochs": 5, "num_steps_per_epoch": 10, "gradual_unfreezing": True, "discriminative_fine_tuning": True, "decay_factor": 0.5})).lr_scheduler assert sched.num_epochs == 5 assert sched.num_steps_per_epoch == 10 assert sched.gradual_unfreezing is True assert sched.freezing_current is True assert len(optim.param_groups) == 3 # The default parameter group in the Optimizer is empty assert not optim.param_groups[-1]["params"] assert optim.param_groups[-2]["lr"] == 1.0 / sched.ratio assert optim.param_groups[-3]["lr"] == 0.5 / sched.ratio with self.assertRaises(TypeError): # num_epochs and num_steps_per_epoch are required LearningRateScheduler.from_params(optim, Params({"type": "slanted_triangular", "num_epochs": 5})) LearningRateScheduler.from_params( optim, Params({"type": "slanted_triangular", "num_steps_epochs": 10}))
def test_trainer_can_resume_with_lr_scheduler(self): lr_scheduler = LearningRateScheduler.from_params( self.optimizer, Params({"type": "exponential", "gamma": 0.5})) callbacks = self.default_callbacks() + [UpdateLearningRate(lr_scheduler)] trainer = CallbackTrainer(model=self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, callbacks=callbacks, num_epochs=2, serialization_dir=self.TEST_DIR) trainer.train() new_lr_scheduler = LearningRateScheduler.from_params( self.optimizer, Params({"type": "exponential", "gamma": 0.5})) callbacks = self.default_callbacks() + [UpdateLearningRate(new_lr_scheduler)] new_trainer = CallbackTrainer(model=self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, callbacks=callbacks, num_epochs=4, serialization_dir=self.TEST_DIR) new_trainer.handler.fire_event(Events.TRAINING_START) assert new_trainer.epoch_number == 2 assert new_lr_scheduler.lr_scheduler.last_epoch == 1 new_trainer.train()
def test_reduce_on_plateau_error_throw_when_no_metrics_exist(self): model = torch.nn.Sequential(torch.nn.Linear(10, 10)) with self.assertRaises(ConfigurationError) as context: LearningRateScheduler.from_params(Optimizer.from_params(model.named_parameters(), Params({"type": "adam"})), Params({"type": "reduce_on_plateau"})).step(None, None) self.assertTrue( 'The reduce_on_plateau learning rate scheduler requires a validation metric' in str(context.exception))
def test_reduce_on_plateau_error_throw_when_no_metrics_exist(self): with pytest.raises( ConfigurationError, match="learning rate scheduler requires a validation metric" ): LearningRateScheduler.from_params( optimizer=Optimizer.from_params( model_parameters=self.model.named_parameters(), params=Params({"type": "adam"}) ), params=Params({"type": "reduce_on_plateau"}), ).step(None)
def test_from_params(self): """Make sure ``from_params`` initializes an instance properly.""" optim = self._get_optimizer() sched = LearningRateScheduler.from_params(optim, Params({"type": "cosine", "t_initial": 5})).lr_scheduler assert sched.t_initial == 5 assert sched._initialized is True # Learning should be unchanged after initializing scheduler. assert optim.param_groups[0]["lr"] == 1.0 with self.assertRaises(TypeError): # t_initial is required. LearningRateScheduler.from_params(optim, Params({"type": "cosine"}))
def test_trainer_configs(configurations_path): configs = _read_configs(configurations_path, "Trainer") linear = nn.Linear(2, 2) for config_name, config in configs.items(): assert isinstance(config, TrainerConfiguration) # TODO: Maybe these checks could go directly in the `TrainerConfiguration` class optimizer_dict = { "model_parameters": linear.named_parameters(), **config.optimizer, } optimizer = Optimizer.from_params(Params(optimizer_dict)) lrs_dict = {"optimizer": optimizer, **config.learning_rate_scheduler} LearningRateScheduler.from_params(Params(lrs_dict))
def from_params(cls, model: Model, serialization_dir: str, iterator: DataIterator, train_data: Iterable[Instance], validation_data: Optional[Iterable[Instance]], params: Params, validation_iterator: DataIterator = None) -> 'GANTrainer': patience = params.pop_int("patience", None) validation_metric = params.pop("validation_metric", "-loss") shuffle = params.pop_bool("shuffle", True) num_epochs = params.pop_int("num_epochs", 20) cuda_device = params.pop_int("cuda_device", -1) grad_norm = params.pop_float("grad_norm", None) grad_clipping = params.pop_float("grad_clipping", None) lr_scheduler_params = params.pop("learning_rate_scheduler", None) if cuda_device >= 0: model = model.cuda(cuda_device) parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(parameters, params.pop("optimizer")) if lr_scheduler_params: scheduler = LearningRateScheduler.from_params( optimizer, lr_scheduler_params) else: scheduler = None num_serialized_models_to_keep = params.pop_int( "num_serialized_models_to_keep", 20) keep_serialized_model_every_num_seconds = params.pop_int( "keep_serialized_model_every_num_seconds", None) model_save_interval = params.pop_float("model_save_interval", None) summary_interval = params.pop_int("summary_interval", 100) histogram_interval = params.pop_int("histogram_interval", None) params.assert_empty(cls.__name__) return cls(model, optimizer, iterator, train_data, validation_data, patience=patience, validation_metric=validation_metric, validation_iterator=validation_iterator, shuffle=shuffle, num_epochs=num_epochs, serialization_dir=serialization_dir, cuda_device=cuda_device, grad_norm=grad_norm, grad_clipping=grad_clipping, learning_rate_scheduler=scheduler, num_serialized_models_to_keep=num_serialized_models_to_keep, keep_serialized_model_every_num_seconds= keep_serialized_model_every_num_seconds, model_save_interval=model_save_interval, summary_interval=summary_interval, histogram_interval=histogram_interval)
def test_noam_learning_rate_schedule_does_not_crash(self): model = torch.nn.Sequential(torch.nn.Linear(10, 10)) lrs = LearningRateScheduler.from_params(Optimizer.from_params(model.named_parameters(), Params({"type": "adam"})), Params({"type": "noam", "model_size": 10, "warmup_steps": 2000})) lrs.step(None) lrs.step_batch(None)
def test_from_params(self): """Make sure `from_params` initializes an instance properly.""" optim = self._get_optimizer() sched = LearningRateScheduler.from_params( optimizer=optim, params=Params({"type": "cosine", "t_initial": 5}) ) assert sched.t_initial == 5 assert sched.last_epoch == -1 # Learning should be unchanged after initializing scheduler. assert optim.param_groups[0]["lr"] == 1.0 with self.assertRaises(ConfigurationError): # t_initial is required. LearningRateScheduler.from_params(optimizer=optim, params=Params({"type": "cosine"}))
def get_scheduler(self) -> LearningRateScheduler: return LearningRateScheduler.from_params( Params({ "type": "combined", "schedulers": [ [ 2, { "type": "polynomial_decay", "warmup_steps": 10, "end_learning_rate": 0.5, }, ], [ 5, { "type": "polynomial_decay", "warmup_steps": 0, "end_learning_rate": 0.1, }, ], ], }), optimizer=self.optimizer, num_steps_per_epoch=10, )
def test_exponential_works_properly(self): scheduler = LearningRateScheduler.from_params( optimizer=Optimizer.from_params(self.model.named_parameters(), Params({ "type": "sgd", "lr": 1.0 })), params=Params({ "type": "exponential", "gamma": 0.5 }), ) optimizer = scheduler.lr_scheduler.optimizer optimizer.step() # to avoid a pytorch warning # Initial learning rate should be unchanged for first epoch. assert optimizer.param_groups[0]["lr"] == 1.0 # But since the way PyTorch LR schedulers work is a little wonky, # the LR will also be unchanged for the second epoch (epoch id 0). scheduler.step(epoch=0) assert optimizer.param_groups[0]["lr"] == 1.0 # Now the learning rate starts to be updated... scheduler.step(epoch=1) assert optimizer.param_groups[0]["lr"] == 0.5 scheduler.step(epoch=2) assert optimizer.param_groups[0]["lr"] == 0.5**2
def test_linear_with_warmup_works_properly(self): scheduler = LearningRateScheduler.from_params( optimizer=Optimizer.from_params( model_parameters=self.model.named_parameters(), params=Params({"type": "sgd", "lr": 1.0}), ), params=Params( { "type": "linear_with_warmup", "warmup_steps": 2, "num_epochs": 2, "num_steps_per_epoch": 3, } ), ) optimizer = scheduler.optimizer # Linear warmup for 2 steps. scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.5 # 1.0 * 1/2 scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 1.0 # 1.0 * 2/2 # Linear decay for 4 steps. scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.75 scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.5 scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.25 scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.0
def _setup_training(self, tasks, train_params, optimizer_params, scheduler_params, iterator): # Task bookkeeping task_infos = {task.name: {} for task in tasks} for task in tasks: task_info = task_infos[task.name] tr_generator = iterator(task.train_data, num_epochs=None, cuda_device=self._cuda_device) task_info['n_tr_batches'] = iterator.get_num_batches( task.train_data) task_info['tr_generator'] = tr_generator task_info['loss'] = 0.0 task_info['total_batches_trained'] = 0 task_info['n_batches_since_val'] = 0 task_info['optimizer'] = Optimizer.from_params( train_params, copy.deepcopy(optimizer_params)) task_info['scheduler'] = LearningRateScheduler.from_params( task_info['optimizer'], copy.deepcopy(scheduler_params)) task_info['stopped'] = False task_info['last_log'] = time.time() # Metric bookkeeping all_metrics = [task.val_metric for task in tasks ] + ['micro_accuracy', 'macro_accuracy'] metric_infos = {metric: {'hist': [], 'stopped': False, 'best': (-1, {})} for \ metric in all_metrics} self._task_infos = task_infos self._metric_infos = metric_infos return task_infos, metric_infos
def test_polynomial_decay_works_properly(self): scheduler = LearningRateScheduler.from_params( optimizer=Optimizer.from_params( model_parameters=self.model.named_parameters(), params=Params({"type": "sgd", "lr": 1.0}), ), params=Params( { "type": "polynomial_decay", "warmup_steps": 2, "num_epochs": 2, "num_steps_per_epoch": 3, "end_learning_rate": 0.1, "power": 2, } ), ) optimizer = scheduler.optimizer # Linear warmup for 2 steps. scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.5 # 1.0 * 1/2 scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 1.0 # 1.0 * 2/2 # Polynomial decay for 4 steps. scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.60625 # (1.0 - 0.1) * (3/4) ** 2 + 0.1 scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.325 # (1.0 - 0.1) * (2/4) ** 2 + 0.1 scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.15625 # (1.0 - 0.1) * (1/4) ** 2 + 0.1 scheduler.step_batch() assert optimizer.param_groups[0]["lr"] == 0.1 # (1.0 - 0.1) * (0/4) ** 2 + 0.1
def _run_scheduler_get_lrs(self, params, num_steps_per_epoch): optimizer = self._get_optimizer() params["type"] = "slanted_triangular" scheduler = LearningRateScheduler.from_params(optimizer=optimizer, params=Params( deepcopy(params))) lrs = [] batch_num_total = 0 for epoch in range(params["num_epochs"]): for _ in range(num_steps_per_epoch): batch_num_total += 1 # allennlp trainer calls step_batch after updating parameters # so collect lr at time of parameter update lrs.append([ param_group["lr"] * float(param_group["params"][0].requires_grad) for param_group in optimizer.param_groups[:2] ]) scheduler.step_batch(batch_num_total) if params.get("gradual_unfreezing") and epoch == 0: assert scheduler.freezing_current # step() takes two arguments: validation metric and epoch scheduler.step(None, epoch) return lrs
def from_params( # type: ignore cls, params: Params, optimizer: torch.optim.Optimizer, **extras) -> "UpdateLearningRate": return cls( LearningRateScheduler.from_params( params=params.pop("learning_rate_scheduler"), optimizer=optimizer))
def test_no_metric_wrapper_can_support_none_for_metrics(self): lrs = LearningRateScheduler.from_params( Optimizer.from_params(self.model.named_parameters(), Params({"type": "adam"})), Params({ "type": "step", "step_size": 1 })) lrs.step(None, None)
def test_from_params(self): optim = self._get_optimizer() sched = LearningRateScheduler.from_params( optim, Params({ "type": "cosine", "t_max": 5 })).lr_scheduler assert sched.t_max == 5 assert sched._initialized is True # Learning should be unchanged after initializing scheduler. assert optim.param_groups[0]["lr"] == 1.0 with self.assertRaises(TypeError): # t_max is required. LearningRateScheduler.from_params(optim, Params({"type": "cosine"}))
def test_no_metric_wrapper_can_support_none_for_metrics(self): lrs = LearningRateScheduler.from_params( optimizer=Optimizer.from_params( model_parameters=self.model.named_parameters(), params=Params({"type": "adam"}) ), params=Params({"type": "step", "step_size": 1}), ) lrs.lr_scheduler.optimizer.step() # to avoid a pytorch warning lrs.step(None)
def test_noam_learning_rate_schedule_does_not_crash(self): lrs = LearningRateScheduler.from_params( optimizer=Optimizer.from_params( model_parameters=self.model.named_parameters(), params=Params({"type": "adam"}) ), params=Params({"type": "noam", "model_size": 10, "warmup_steps": 2000}), ) lrs.step(None) lrs.step_batch(None)
def construct_learning_scheduler(optimizer): # scheduler = ReduceLROnPlateau(optimizer, 'max', factor=0.5, patience=10, verbose=True) return LearningRateScheduler.from_params(optimizer, params=Params( { "type": "multi_step", "milestones": [45, 60, 75], "gamma": 0.5 } ))
def test_no_metric_wrapper_can_support_none_for_metrics(self): model = torch.nn.Sequential(torch.nn.Linear(10, 10)) lrs = LearningRateScheduler.from_params( Optimizer.from_params(model.named_parameters(), Params({"type": "adam"})), Params({ "type": "step", "step_size": 1 })) lrs.step(None, None)
def test_trainer_can_run_with_lr_scheduler(self): lr_params = Params({"type": "reduce_on_plateau"}) lr_scheduler = LearningRateScheduler.from_params(self.optimizer, lr_params) callbacks = self.default_callbacks() + [UpdateLearningRate(lr_scheduler)] trainer = CallbackTrainer(model=self.model, optimizer=self.optimizer, callbacks=callbacks, num_epochs=2) trainer.train()
def init_and_restore_scheduler(optimizer: torch.optim.Optimizer, params: Dict[str, Any], state_dict: Dict[str, Any] = None): """ Initialize a new scheduler and optionally restore its state from a checkpoint. """ params["type"] = "cosine" scheduler = LearningRateScheduler.from_params(optimizer, Params(params)) if state_dict is not None: scheduler.lr_scheduler.load_state_dict(state_dict) return scheduler
def test_trainer_can_run_with_lr_scheduler(self): lr_params = Params({"type": "reduce_on_plateau"}) lr_scheduler = LearningRateScheduler.from_params(self.optimizer, lr_params) trainer = Trainer(model=self.model, optimizer=self.optimizer, iterator=self.iterator, learning_rate_scheduler=lr_scheduler, validation_metric="-loss", train_dataset=self.instances, validation_dataset=self.instances, num_epochs=2) trainer.train()
def test_schedules(self): """Make sure the math is correct.""" for epochs, params, lr_checks, _ in self.cosine_schedule_cases: optimizer = self._get_optimizer() params["type"] = "cosine" scheduler = LearningRateScheduler.from_params(optimizer, Params(params)).lr_scheduler lrs = [optimizer.param_groups[0]["lr"]] for epoch in range(epochs): scheduler.step(epoch) lrs.append(optimizer.param_groups[0]["lr"]) for it, lr in lr_checks: assert lrs[it] == lr
def from_params(cls, model: Model, serialization_dir: str, iterator: DataIterator, train_data: Iterable[Instance], validation_data: Optional[Iterable[Instance]], params: Params, validation_iterator: DataIterator = None) -> 'Trainer': patience = params.pop_int("patience", None) validation_metric = params.pop("validation_metric", "-loss") num_epochs = params.pop_int("num_epochs", 20) cuda_device = params.pop_int("cuda_device", -1) grad_norm = params.pop_float("grad_norm", None) grad_clipping = params.pop_float("grad_clipping", None) lr_scheduler_params = params.pop("learning_rate_scheduler", None) if cuda_device >= 0: model = model.cuda(cuda_device) parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(parameters, params.pop("optimizer")) if lr_scheduler_params: scheduler = LearningRateScheduler.from_params(optimizer, lr_scheduler_params) else: scheduler = None num_serialized_models_to_keep = params.pop_int("num_serialized_models_to_keep", 20) keep_serialized_model_every_num_seconds = params.pop_int( "keep_serialized_model_every_num_seconds", None) model_save_interval = params.pop_float("model_save_interval", None) summary_interval = params.pop_int("summary_interval", 100) histogram_interval = params.pop_int("histogram_interval", None) params.assert_empty(cls.__name__) return Trainer(model, optimizer, iterator, train_data, validation_data, patience=patience, validation_metric=validation_metric, validation_iterator=validation_iterator, num_epochs=num_epochs, serialization_dir=serialization_dir, cuda_device=cuda_device, grad_norm=grad_norm, grad_clipping=grad_clipping, learning_rate_scheduler=scheduler, num_serialized_models_to_keep=num_serialized_models_to_keep, keep_serialized_model_every_num_seconds=keep_serialized_model_every_num_seconds, model_save_interval=model_save_interval, summary_interval=summary_interval, histogram_interval=histogram_interval)
def _run_scheduler_get_lrs(self, params, num_steps_per_epoch): optimizer = self._get_optimizer() params["type"] = "slanted_triangular" scheduler = LearningRateScheduler.from_params(optimizer, Params(params)) lrs = [] batch_num_total = 0 for epoch in range(params["num_epochs"]): for _ in range(num_steps_per_epoch): batch_num_total += 1 # allennlp trainer calls step_batch after updating parameters # so collect lr at time of parameter update lrs.append([param_group["lr"] * float(param_group['params'][0].requires_grad) for param_group in optimizer.param_groups[:2]]) scheduler.step_batch(batch_num_total) if params.get("gradual_unfreezing") and epoch == 0: assert scheduler.lr_scheduler.freezing_current # step() takes two arguments: validation metric and epoch scheduler.step(None, epoch) return lrs
def test_reduce_on_plateau_works_when_metrics_exist(self): model = torch.nn.Sequential(torch.nn.Linear(10, 10)) LearningRateScheduler.from_params(Optimizer.from_params(model.named_parameters(), Params({"type": "adam"})), Params({"type": "reduce_on_plateau"})).step(10, None)
def test_no_metric_wrapper_can_support_none_for_metrics(self): model = torch.nn.Sequential(torch.nn.Linear(10, 10)) lrs = LearningRateScheduler.from_params(Optimizer.from_params(model.named_parameters(), Params({"type": "adam"})), Params({"type": "step", "step_size": 1})) lrs.step(None, None)