def test_ea_jac_t_mat_jac_prod(problem: DerivativesTestProblem, request) -> None: """Test KFRA backpropagation. H_in → 1/N ∑ₙ Jₙ^T H_out Jₙ Notes: - `Dropout` cannot be tested,as the `autograd` implementation does a forward pass over each sample, while the `backpack` implementation requires only one forward pass over the batched data. This leads to different outputs, as `Dropout` is not deterministic. Args: problem: Test case. request: PyTest request, used to get test id. """ skip_adaptive_avg_pool3d_cuda(request) problem.set_up() out_features = problem.output_shape[1:].numel() mat = rand(out_features, out_features).to(problem.device) backpack_res = BackpackDerivatives(problem).ea_jac_t_mat_jac_prod(mat) autograd_res = AutogradDerivatives(problem).ea_jac_t_mat_jac_prod(mat) check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_sqrt_hessian_sampled_squared_approximates_hessian( problem: DerivativesTestProblem, subsampling: Union[List[int], None], mc_samples: int = 1000000, chunks: int = 10, ) -> None: """Test the MC-sampled sqrt decomposition of the input Hessian. Compares the Hessian to reconstruction from individual Hessian MC-sampled sqrt. Args: problem: Test case. subsampling: Indices of active samples. mc_samples: number of samples. Defaults to 1000000. chunks: Number of passes the MC samples will be processed sequentially. """ problem.set_up() skip_subsampling_conflict(problem, subsampling) backpack_res = BackpackDerivatives(problem).input_hessian_via_sqrt_hessian( mc_samples=mc_samples, chunks=chunks, subsampling=subsampling ) autograd_res = AutogradDerivatives(problem).input_hessian(subsampling=subsampling) RTOL, ATOL = 1e-2, 7e-3 check_sizes_and_values(autograd_res, backpack_res, rtol=RTOL, atol=ATOL) problem.tear_down()
def test_jac_t_mat_prod( problem: DerivativesTestProblem, subsampling: Union[None, List[int]], request, V: int = 3, ) -> None: """Test the transposed Jacobian-matrix product. Args: problem: Problem for derivative test. subsampling: Indices of active samples. request: Pytest request, used for getting id. V: Number of vectorized transposed Jacobian-vector products. Default: ``3``. """ skip_adaptive_avg_pool3d_cuda(request) problem.set_up() skip_batch_norm_train_mode_with_subsampling(problem, subsampling) skip_subsampling_conflict(problem, subsampling) mat = rand_mat_like_output(V, problem, subsampling=subsampling) backpack_res = BackpackDerivatives(problem).jac_t_mat_prod( mat, subsampling=subsampling ) autograd_res = AutogradDerivatives(problem).jac_t_mat_prod( mat, subsampling=subsampling ) check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_hessian_is_zero(problem): """Check if the input-output Hessian is (non-)zero.""" problem.set_up() backpack_res = BackpackDerivatives(problem).hessian_is_zero() autograd_res = AutogradDerivatives(problem).hessian_is_zero() assert backpack_res == autograd_res problem.tear_down()
def test_sum_hessian(problem): """Test the summed Hessian. Args: problem (DerivativesProblem): Problem for derivative test. """ problem.set_up() backpack_res = BackpackDerivatives(problem).sum_hessian() autograd_res = AutogradDerivatives(problem).sum_hessian() check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_make_hessian_mat_prod(problem: DerivativesTestProblem) -> None: """Test hessian_mat_prod. Args: problem: test problem """ problem.set_up() mat = rand(4, *problem.input_shape, device=problem.device) autograd_res = AutogradDerivatives(problem).hessian_mat_prod(mat) backpack_res = BackpackDerivatives(problem).hessian_mat_prod(mat) check_sizes_and_values(backpack_res, autograd_res)
def test_jac_t_mat_prod(problem, V=3): """Test the transposed Jacobian-matrix product. Args: problem (DerivativesProblem): Problem for derivative test. V (int): Number of vectorized transposed Jacobian-vector products. """ problem.set_up() mat = torch.rand(V, *problem.output_shape).to(problem.device) backpack_res = BackpackDerivatives(problem).jac_t_mat_prod(mat) autograd_res = AutogradDerivatives(problem).jac_t_mat_prod(mat) check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_bias_jac_mat_prod(problem: DerivativesTestProblem, V: int = 3) -> None: """Test the Jacobian-matrix product w.r.t. to the bias. Args: problem: Test case. V: Number of vectorized Jacobian-vector products. Default: ``3``. """ problem.set_up() mat = rand(V, *problem.module.bias.shape).to(problem.device) backpack_res = BackpackDerivatives(problem).bias_jac_mat_prod(mat) autograd_res = AutogradDerivatives(problem).bias_jac_mat_prod(mat) check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_sqrt_hessian_sampled_squared_approximates_hessian( problem, mc_samples=100000): """Test the MC-sampled sqrt decomposition of the input Hessian. Args: problem (DerivativesProblem): Problem for derivative test. Compares the Hessian to reconstruction from individual Hessian MC-sampled sqrt. """ problem.set_up() backpack_res = BackpackDerivatives(problem).input_hessian_via_sqrt_hessian( mc_samples=mc_samples) autograd_res = AutogradDerivatives(problem).input_hessian() RTOL, ATOL = 1e-2, 2e-2 check_sizes_and_values(autograd_res, backpack_res, rtol=RTOL, atol=ATOL) problem.tear_down()
def test_bias_jac_t_mat_prod(problem, sum_batch, V=3): """Test the transposed Jacobian-matrix product w.r.t. to the biass. Args: problem (DerivativesProblem): Problem for derivative test. sum_batch (bool): Sum results over the batch dimension. V (int): Number of vectorized transposed Jacobian-vector products. """ problem.set_up() mat = torch.rand(V, *problem.output_shape).to(problem.device) backpack_res = BackpackDerivatives(problem).bias_jac_t_mat_prod( mat, sum_batch) autograd_res = AutogradDerivatives(problem).bias_jac_t_mat_prod( mat, sum_batch) check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_sqrt_hessian_squared_equals_hessian(problem): """Test the sqrt decomposition of the input Hessian. Args: problem (DerivativesProblem): Problem for derivative test. Compares the Hessian to reconstruction from individual Hessian sqrt. """ problem.set_up() backpack_res = BackpackDerivatives( problem).input_hessian_via_sqrt_hessian() autograd_res = AutogradDerivatives(problem).input_hessian() print(backpack_res.device) print(autograd_res.device) check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_weight_jac_t_mat_prod(problem, sum_batch, save_memory, V=3): """Test the transposed Jacobian-matrix product w.r.t. to the weights. Args: problem (DerivativesProblem): Problem for derivative test. sum_batch (bool): Sum results over the batch dimension. save_memory (bool): Use Owkin implementation to save memory. V (int): Number of vectorized transposed Jacobian-vector products. """ problem.set_up() mat = torch.rand(V, *problem.output_shape).to(problem.device) with weight_jac_t_save_memory(save_memory): backpack_res = BackpackDerivatives(problem).weight_jac_t_mat_prod( mat, sum_batch) autograd_res = AutogradDerivatives(problem).weight_jac_t_mat_prod( mat, sum_batch) check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_hessian_is_zero(no_loss_problem: DerivativesTestProblem) -> None: """Check if the input-output Hessian is (non-)zero. Note: `hessian_is_zero` is a global statement that assumes arbitrary inputs. It can thus happen that the Hessian diagonal is zero for the current input, but not in general. Args: no_loss_problem: Test case whose module is not a loss. """ backpack_res = BackpackDerivatives(no_loss_problem).hessian_is_zero() autograd_res = AutogradDerivatives(no_loss_problem).hessian_is_zero() if autograd_res and not backpack_res: warn( "Autograd Hessian diagonal is zero for this input " " while BackPACK implementation implies inputs with non-zero Hessian." ) else: assert backpack_res == autograd_res
def test_sqrt_hessian_squared_equals_hessian( problem: DerivativesTestProblem, subsampling: Union[List[int], None] ) -> None: """Test the sqrt decomposition of the input Hessian. Args: problem: Test case. subsampling: Indices of active samples. Compares the Hessian to reconstruction from individual Hessian sqrt. """ problem.set_up() skip_subsampling_conflict(problem, subsampling) backpack_res = BackpackDerivatives(problem).input_hessian_via_sqrt_hessian( subsampling=subsampling ) autograd_res = AutogradDerivatives(problem).input_hessian(subsampling=subsampling) check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_ea_jac_t_mat_jac_prod(problem): """Test KFRA backpropagation H_in → 1/N ∑ₙ Jₙ^T H_out Jₙ Notes: - `Dropout` cannot be tested,as the `autograd` implementation does a forward pass over each sample, while the `backpack` implementation requires only one forward pass over the batched data. This leads to different outputs, as `Dropout` is not deterministic. Args: problem (DerivativesProblem): Problem for derivative test. """ problem.set_up() out_features = torch.prod(torch.tensor(problem.output_shape[1:])) mat = torch.rand(out_features, out_features).to(problem.device) backpack_res = BackpackDerivatives(problem).ea_jac_t_mat_jac_prod(mat) autograd_res = AutogradDerivatives(problem).ea_jac_t_mat_jac_prod(mat) check_sizes_and_values(autograd_res, backpack_res) problem.tear_down()
def test_param_mjp( problem: DerivativesTestProblem, sum_batch: bool, subsampling: List[int] or None, request, ) -> None: """Test all parameter derivatives. Args: problem: test problem sum_batch: whether to sum along batch axis subsampling: subsampling indices request: problem request """ skip_subsampling_conflict(problem, subsampling) test_save_memory: bool = "Conv" in request.node.callspec.id V = 3 for param_str, _ in problem.module.named_parameters(): print(f"testing derivative wrt {param_str}") for save_memory in [True, False] if test_save_memory else [None]: if test_save_memory: print(f"testing with save_memory={save_memory}") mat = rand_mat_like_output(V, problem, subsampling=subsampling) with weight_jac_t_save_memory( save_memory=save_memory ) if test_save_memory else nullcontext(): backpack_res = BackpackDerivatives(problem).param_mjp( param_str, mat, sum_batch, subsampling=subsampling ) autograd_res = AutogradDerivatives(problem).param_mjp( param_str, mat, sum_batch, subsampling=subsampling ) check_sizes_and_values(autograd_res, backpack_res)