示例#1
0
    def batch_loss(self, batch_group: List[TensorDict],
                   for_training: bool) -> torch.Tensor:
        """
        Does a forward pass on the given batches and returns the ``loss`` value in the result.
        If ``for_training`` is `True` also applies regularization penalty.
        """
        if self._multiple_gpu:
            output_dict = training_util.data_parallel(batch_group, self.model,
                                                      self._cuda_devices)
        else:
            assert len(batch_group) == 1
            batch = batch_group[0]
            batch = nn_util.move_to_device(batch, self._cuda_devices[0])
            output_dict = self.model(**batch)

        try:
            loss = output_dict["loss"]
            if for_training:
                loss += self.model.get_regularization_penalty()
        except KeyError:
            if for_training:
                raise RuntimeError(
                    "The model you are trying to optimize does not contain a"
                    " 'loss' key in the output of model.forward(inputs).")
            loss = None

        return loss
示例#2
0
 def _run_model(self, batch_group):
     if self.n_gpu_use > 1:
         output_dict = training_util.data_parallel(batch_group, self.model,
                                                   self.device)
     else:
         assert len(batch_group) == 1
         batch = batch_group[0]
         batch = nn_util.move_to_device(batch, self.device[0])
         output_dict = self.model(**batch)
     return output_dict
示例#3
0
 def get_output_dict(self, batch_group: List[TensorDict], for_training: bool) -> Dict[str, torch.Tensor]:
     """
     Does a forward pass on the given batches and returns the ``loss`` value in the result.
     If ``for_training`` is `True` also applies regularization penalty.
     """
     if self._multiple_gpu:
         output_dict = training_util.data_parallel(batch_group, self.model, self._cuda_devices)
     else:
         assert len(batch_group) == 1
         batch = batch_group[0]
         batch = nn_util.move_to_device(batch, self._cuda_devices[0])
         output_dict = self.model(**batch)
     return output_dict
示例#4
0
    def batch_loss(self,
                   batch_group: List[TensorDict],
                   for_training: bool,
                   eval_metric=True):
        """
        Does a forward pass on the given batches and returns the ``loss`` value in the result.
        If ``for_training`` is `True` also applies regularization penalty.
        """
        if self.trainer._multiple_gpu:
            output_dict = training_util.data_parallel(
                batch_group, self.trainer.model, self.trainer._cuda_devices)
        else:
            assert len(batch_group) == 1
            batch = batch_group[0]
            batch = nn_util.move_to_device(batch,
                                           self.trainer._cuda_devices[0])
            output_dict = self.trainer.model(**batch, eval_metric=eval_metric)

        if for_training and eval_metric:
            output_dict[
                'regularization_penalty'] = self.trainer.model.get_regularization_penalty(
                )

        return output_dict
示例#5
0
    def batch_loss(self, batch_group: List[TensorDict],
                   for_training: bool) -> torch.Tensor:
        """
        Does a forward pass on the given batches and returns the ``loss`` value in the result.
        If ``for_training`` is `True` also applies regularization penalty.
        """
        if self._multiple_gpu:
            output_dict = training_util.data_parallel(batch_group, self.model,
                                                      self._cuda_devices)
        else:
            # if self._num_gradient_accumulation_steps == 1:
            assert len(batch_group) == 1
            batch = batch_group[0]
            batch = nn_util.move_to_device(batch, self._cuda_devices[0])
            output_dict = self.model(**batch)
            # try:
            #    output_dict = self.model(**batch)
            # wrappedmodel = ModelWrapper(self.model)
            # processed_inputs = wrappedmodel.process_inputs(batch)
            # output_dict = checkpoint(wrappedmodel, processed_inputs)
            # except RuntimeError:
            #    print("Probably CUDA out of memory")
            #    return None

        try:
            loss = output_dict["loss"]
            if for_training:
                loss += self.model.get_regularization_penalty()
        except KeyError:
            if for_training:
                raise RuntimeError(
                    "The model you are trying to optimize does not contain a"
                    " 'loss' key in the output of model.forward(inputs).")
            loss = None

        return loss