示例#1
0
 class Config(ConfigBase):
     #: Training epochs
     epochs: int = 10
     #: Stop after how many epochs when the eval metric is not improving
     early_stop_after: int = 0
     #: Clip gradient norm if set
     max_clip_norm: Optional[float] = None
     #: Whether metrics on training data should be computed and reported.
     report_train_metrics: bool = True
     #: Target time limit for training, default (None) to no time limit.
     target_time_limit_seconds: Optional[int] = None
     #: Whether to do evaluation and model selection based on it.
     do_eval: bool = True
     #: Number of samples for logging training progress.
     num_samples_to_log_progress: int = 1000
     #: Number of forward & backward per batch before update gradients, the
     #: actual_batch_size = batch_size x num_accumulated_batches
     num_accumulated_batches: int = 1
     #: Define epoch as a fixed number of batches. Subsequent epochs will continue
     #: to iterate through the data, cycling through it when they reach the end.
     #: If not set, use exactly one pass through the dataset as one epoch.
     #: This configuration only affects the train epochs, test and eval
     #: will always test their entire datasets.
     num_batches_per_epoch: Optional[int] = None
     #: config for optimizer, used in parameter update
     optimizer: Optimizer.Config = Adam.Config()
     scheduler: Optional[Scheduler.Config] = None
示例#2
0
 class Config(ConfigBase):
     data: Data.Config = Data.Config()
     model: Model.Config
     trainer: NewTaskTrainer.Config = NewTaskTrainer.Config()
     optimizer: Optimizer.Config = Adam.Config()
     scheduler: Scheduler.Config = Scheduler.Config()
     exporter: Optional[ModelExporter.Config] = None
示例#3
0
 class Config(ConfigBase):
     features: FeatureConfig = FeatureConfig()
     featurizer: Featurizer.Config = SimpleFeaturizer.Config()
     data_handler: DataHandler.Config
     trainer: Trainer.Config = Trainer.Config()
     optimizer: Optimizer.Config = Adam.Config()
     scheduler: Optional[Scheduler.Config] = Scheduler.Config()
     exporter: Optional[ModelExporter.Config] = None
        def test_load_checkpoint(self):
            with tempfile.NamedTemporaryFile() as checkpoint_file:
                train_data = tests_module.test_file("train_data_tiny.tsv")
                eval_data = tests_module.test_file("test_data_tiny.tsv")
                config = PyTextConfig(
                    task=DocumentClassificationTask.Config(data=Data.Config(
                        source=TSVDataSource.Config(
                            train_filename=train_data,
                            eval_filename=eval_data,
                            field_names=["label", "slots", "text"],
                        ))),
                    version=LATEST_VERSION,
                    save_snapshot_path=checkpoint_file.name,
                )
                task = create_task(config.task)
                model = task.model
                # test checkpoint saving and loading
                optimizer = create_optimizer(Adam.Config(), model)
                scheduler = create_scheduler(Scheduler.Config(), optimizer)
                training_state = TrainingState(
                    model=model,
                    optimizer=optimizer,
                    scheduler=scheduler,
                    start_time=0,
                    epoch=0,
                    rank=0,
                    stage=Stage.TRAIN,
                    epochs_since_last_improvement=0,
                    best_model_state=None,
                    best_model_metric=None,
                    tensorizers=None,
                )

                checkpoint_path = checkpoint_file.name
                save(
                    config,
                    model,
                    None,
                    task.data.tensorizers,
                    training_state,
                    checkpoint_file,
                )
                task_restored, config_restored, training_state_restored = load(
                    checkpoint_path)
                optimizer_restored = training_state_restored.optimizer
                scheduler_restored = training_state_restored.scheduler
                self.assertOptimizerEqual(optimizer, optimizer_restored)
                self.assertNotNone(scheduler_restored)
                self.assertEqual(config, config_restored)
                self.assertModulesEqual(model, task_restored.model)
                model.eval()
                task_restored.model.eval()

                inputs = torch.LongTensor([[1, 2, 3]]), torch.LongTensor([3])
                self.assertEqual(
                    model(*inputs).tolist(),
                    task_restored.model(*inputs).tolist())
示例#5
0
 def __init__(
     self,
     data: Data,
     model: Model,
     metric_reporter: Optional[MetricReporter] = None,
     trainer: Optional[NewTaskTrainer] = None,
     optimizer: Optional[Optimizer] = None,
     scheduler: Optional[Scheduler] = None,
     exporter: Optional[ModelExporter] = None,
 ):
     self.data = data
     self.model = model
     # Attempt to build a default metric reporter
     self.metric_reporter = metric_reporter or self.create_metric_reporter(
         self.Config.metric_reporter, model)
     self.trainer = trainer or NewTaskTrainer()
     self.optimizer = optimizer or Adam(model.parameters(),
                                        **Adam.Config()._asdict())
     self.scheduler = scheduler
     self.exporter = exporter
示例#6
0
 def test_simple_trainer(self):
     train_dataloader = self._generate_data()
     model = models.xlmr_dummy_binary_doc_classifier(pretrained=False)
     optimizer = Adam(model.parameters(),
                      lr=0.001,
                      weight_decay=0.00001,
                      eps=1e-8)
     trainer = SimpleTrainer()
     trained_model = trainer.fit(train_dataloader,
                                 model,
                                 optimizer,
                                 epoch=1)
     assert isinstance(trained_model, nn.Module)
示例#7
0
    def test_load_checkpoint_in_dist_training(self):
        with tempfile.NamedTemporaryFile() as checkpoint_file:
            train_data = tests_module.test_file("train_data_tiny.tsv")
            eval_data = tests_module.test_file("test_data_tiny.tsv")
            config = PyTextConfig(
                task=DocumentClassificationTask.Config(data=Data.Config(
                    source=BlockShardedTSVDataSource.Config(
                        train_filename=train_data,
                        eval_filename=eval_data,
                        field_names=["label", "slots", "text"],
                    ))),
                version=LATEST_VERSION,
                save_snapshot_path=checkpoint_file.name,
            )
            task = create_task(config.task)
            model = task.model
            # test checkpoint saving and loading
            optimizer = create_optimizer(Adam.Config(), model)
            scheduler = create_scheduler(Scheduler.Config(), optimizer)
            training_state = TrainingState(
                model=model,
                optimizer=optimizer,
                scheduler=scheduler,
                start_time=0,
                epoch=0,
                rank=0,
                stage=Stage.TRAIN,
                epochs_since_last_improvement=0,
                best_model_state=None,
                best_model_metric=None,
                tensorizers=task.data.tensorizers,
            )

            id = "epoch-1"
            saved_path = save(config, model, None, task.data.tensorizers,
                              training_state, id)
            new_rank = 2
            new_world_size = 4
            task_restored, config_restored, training_state_restored = load(
                saved_path, rank=new_rank, world_size=new_world_size)
            self.assertCheckpointEqual(
                model,
                config,
                training_state,
                task_restored.model,
                config_restored,
                training_state_restored,
            )
            self.assertEqual(task_restored.data.data_source.rank, new_rank)
            self.assertEqual(task_restored.data.data_source.world_size,
                             new_world_size)
示例#8
0
 def test_compatible_trainer(self):
     train_dataloader = self._generate_data()
     val_dataloader = self._generate_data()
     model = models.xlmr_dummy_binary_doc_classifier(pretrained=False)
     optimizer = Adam(model.parameters(),
                      lr=0.001,
                      weight_decay=0.00001,
                      eps=1e-8)
     trainer = CompatibleTrainer(model, epochs=1)
     trained_model, _ = trainer.train(train_dataloader,
                                      val_dataloader,
                                      model,
                                      optimizer,
                                      label_names=["0", "1"])
     assert isinstance(trained_model, nn.Module)
示例#9
0
 class Config(ConfigBase):
     #: Training epochs
     epochs: int = 10
     #: Stop after how many epochs when the eval metric is not improving
     early_stop_after: int = 0
     #: Clip gradient norm if set
     max_clip_norm: Optional[float] = None
     #: Whether metrics on training data should be computed and reported.
     report_train_metrics: bool = True
     #: Target time limit for training, default (None) to no time limit.
     target_time_limit_seconds: Optional[int] = None
     #: Whether to do evaluation and model selection based on it.
     do_eval: bool = True
     #: if do_eval, do we load the best model state dict after training or just
     # use the latest model state
     load_best_model_after_train: bool = True
     #: Number of samples for logging training progress.
     num_samples_to_log_progress: int = 1000
     #: Number of forward & backward per batch before update gradients, the
     #: actual_batch_size = batch_size x num_accumulated_batches
     num_accumulated_batches: int = 1
     #: Define epoch as a fixed number of batches. Subsequent epochs will continue
     #: to iterate through the data, cycling through it when they reach the end.
     #: If not set, use exactly one pass through the dataset as one epoch.
     #: This configuration only affects the train epochs, test and eval
     #: will always test their entire datasets.
     num_batches_per_epoch: Optional[int] = None
     #: config for optimizer, used in parameter update
     optimizer: Optimizer.Config = Adam.Config()
     scheduler: Optional[Scheduler.Config] = None
     sparsifier: Optional[Sparsifier.Config] = None
     #: Define arguments for fp16 training. A fp16_optimizer will be created
     #: and wraps the original optimizer, which will scale loss during
     #: backward and master weight will be maintained on original optimizer.
     #: https://arxiv.org/abs/1710.03740
     fp16_args: FP16Optimizer.Config = FP16OptimizerFairseq.Config()
     use_tensorboard: bool = False
     find_unused_parameters: bool = True
     #: Set a discriminative learning rate for some of the parameters in model.
     #: If None, all parameters will have the same lr.
     discriminative_lr: Optional[float] = None
     #: Model parameters match any patterns in the list will have discriminative_lr
     #: Parameters not matching any patterns will have default lr.
     # E.g. ["decoder.mlp.0", "decoder.mlp.3"]
     discriminative_lr_params_pattern: Optional[List[str]] = None
     #: Model parameters match any patterns in the list will have lr = 0.0
     freeze_params_pattern: Optional[List[str]] = None
示例#10
0
 class Config(ConfigBase):
     #: Training epochs
     epochs: int = 10
     #: Stop after how many epochs when the eval metric is not improving
     early_stop_after: int = 0
     #: Clip gradient norm if set
     max_clip_norm: Optional[float] = None
     #: Whether metrics on training data should be computed and reported.
     report_train_metrics: bool = True
     #: Target time limit for training, default (None) to no time limit.
     target_time_limit_seconds: Optional[int] = None
     #: Whether to do evaluation and model selection based on it.
     do_eval: bool = True
     #: Number of samples for logging training progress.
     num_samples_to_log_progress = 1000
     # config for optimizer, used in parameter update
     optimizer: Optimizer.Config = Adam.Config()
     scheduler: Optional[Scheduler.Config] = None
示例#11
0
 def test_simple_trainer(self):
     train_dataloader = self._generate_data()
     model = models.RobertaModel(
         model_path=None,
         dense_dim=0,
         embedding_dim=32,
         out_dim=2,
         vocab_size=105,
         num_attention_heads=1,
         num_encoder_layers=1,
         output_dropout=0.4,
     )
     optimizer = Adam(model.parameters(),
                      lr=0.001,
                      weight_decay=0.00001,
                      eps=1e-8)
     trainer = SimpleTrainer()
     trained_model = trainer.fit(train_dataloader,
                                 model,
                                 optimizer,
                                 epoch=1)
     assert isinstance(trained_model, nn.Module)
示例#12
0
 def test_compatible_trainer(self):
     train_dataloader = self._generate_data()
     val_dataloader = self._generate_data()
     model = models.RobertaModel(
         model_path=None,
         dense_dim=0,
         embedding_dim=32,
         out_dim=2,
         vocab_size=105,
         num_attention_heads=1,
         num_encoder_layers=1,
         output_dropout=0.4,
     )
     optimizer = Adam(model.parameters(),
                      lr=0.001,
                      weight_decay=0.00001,
                      eps=1e-8)
     trainer = CompatibleTrainer(model, epochs=1)
     trained_model, _ = trainer.train(train_dataloader,
                                      val_dataloader,
                                      model,
                                      optimizer,
                                      label_names=["0", "1"])
     assert isinstance(trained_model, nn.Module)