Пример #1
0
 def model(self):
     if self._wrapped_model is None:
         if (
             self.data_parallel_world_size > 1
             and not self.args.use_bmuf
             and not self.tpu
         ):
             self._wrapped_model = models.DistributedFairseqModel(
                 self.args, self._model,
                 process_group=self.data_parallel_process_group
             )
         else:
             self._wrapped_model = self._model
     return self._wrapped_model
Пример #2
0
 def criterion(self):
     if self._wrapped_criterion is None:
         if (
             utils.has_parameters(self._criterion)
             and self.data_parallel_world_size > 1
             and not self.args.use_bmuf
         ):
             self._wrapped_criterion = models.DistributedFairseqModel(
                 self.args, self._criterion,
                 process_group=self.data_parallel_process_group
             )
         else:
             self._wrapped_criterion = self._criterion
     return self._wrapped_criterion
Пример #3
0
    def model(self):
        if self._wrapped_model is None:
            if self.args.distributed_world_size > 1:
                self._wrapped_model = models.DistributedFairseqModel(
                    self.args,
                    self._model,
                )

            else:
                self._wrapped_model = self._model
            if self.args.kd:
                for value in self._wrapped_model.distill_models.values():
                    value.to(
                        next(self._wrapped_model.encoder.parameters()).device)
        return self._wrapped_model
Пример #4
0
 def model(self):
     if self._wrapped_model is None:
         if (
             self.data_parallel_world_size > 1
             and not self.cfg.optimization.use_bmuf
             and not self.tpu
         ):
             self._wrapped_model = models.DistributedFairseqModel(
                 self.cfg.distributed_training,
                 self._model,
                 process_group=self.data_parallel_process_group,
             )
         else:
             self._wrapped_model = self._model
     return self._wrapped_model
Пример #5
0
 def criterion(self):
     if self._wrapped_criterion is None:
         if (
             utils.has_parameters(self._criterion)
             and self.use_distributed_wrapper
         ):
             self._wrapped_criterion = models.DistributedFairseqModel(
                 self.cfg.distributed_training,
                 self._criterion,
                 process_group=self.data_parallel_process_group,
                 device=self.device,
             )
         else:
             self._wrapped_criterion = self._criterion
     return self._wrapped_criterion
Пример #6
0
 def criterion(self):
     if self._wrapped_criterion is None:
         if (
             utils.has_parameters(self._criterion)
             and self.data_parallel_world_size > 1
             and not self.cfg.optimization.use_bmuf
             and not self.tpu
         ):
             self._wrapped_criterion = models.DistributedFairseqModel(
                 self.cfg.distributed_training,
                 self._criterion,
                 process_group=self.data_parallel_process_group,
             )
         else:
             self._wrapped_criterion = self._criterion
     return self._wrapped_criterion