def backward_fn(loss: Tensor) -> None: self.trainer._call_strategy_hook("backward", loss, optimizer, opt_idx) # check if model weights are nan if self.trainer._terminate_on_nan: detect_nan_parameters(self.trainer.lightning_module)
def backward_fn(loss: Tensor): self.backward(loss, optimizer, opt_idx) # check if model weights are nan if self.trainer.terminate_on_nan: detect_nan_parameters(self.trainer.lightning_module) return loss
def test_detect_nan_parameters(value): model = nn.Linear(2, 3) detect_nan_parameters(model) nn.init.constant_(model.bias, value) assert not torch.isfinite(model.bias).all() with pytest.raises(ValueError, match=r".*Detected nan and/or inf values in `bias`.*"): detect_nan_parameters(model)
def _check_finite(self, loss: Tensor) -> None: """Checks fotr finite parameters and loss values. Args: loss: the loss value to check to be finite """ if not torch.isfinite(loss).all(): raise ValueError(f'The loss returned in `training_step` is {loss}.') model = self.trainer.lightning_module detect_nan_parameters(model)
def check_finite_loss(model: "pl.LightningModule", loss: torch.Tensor) -> None: """Checks for finite parameters and loss values. Args: model: a reference to the ``LightningModule`` loss: the loss value to check to be finite """ if not torch.isfinite(loss).all(): raise ValueError(f"The loss returned in `training_step` is {loss}.") detect_nan_parameters(model)
def detect_nan_tensors(self, loss: Tensor) -> None: rank_zero_deprecation( "Internal: TrainerTrainingTricksMixin.detect_nan_tensors is deprecated in v1.3" " and will be removed in v1.5." " Use `pytorch_lightning.utilities.finite_checks.detect_nan_parameters` instead." ) # check if loss is nan if not torch.isfinite(loss).all(): raise ValueError("The loss returned in `training_step` is nan or inf.") model = self.lightning_module detect_nan_parameters(model)
def _check_finite(self, loss: torch.Tensor) -> None: if not torch.isfinite(loss).all(): raise ValueError( f'The loss returned in `training_step` is {loss}.') model = self.trainer.lightning_module detect_nan_parameters(model)
def backward_fn(loss: Tensor) -> None: self.trainer.accelerator.backward(loss, optimizer, opt_idx) # check if model weights are nan if self.trainer._terminate_on_nan: detect_nan_parameters(self.trainer.lightning_module)