示例#1
0
def test_ea_jac_t_mat_jac_prod(problem: DerivativesTestProblem, request) -> None:
    """Test KFRA backpropagation.

    H_in →  1/N ∑ₙ Jₙ^T H_out Jₙ

    Notes:
        - `Dropout` cannot be tested,as the `autograd` implementation does a forward
        pass over each sample, while the `backpack` implementation requires only
        one forward pass over the batched data. This leads to different outputs,
        as `Dropout` is not deterministic.

    Args:
        problem: Test case.
        request: PyTest request, used to get test id.
    """
    skip_adaptive_avg_pool3d_cuda(request)

    problem.set_up()
    out_features = problem.output_shape[1:].numel()
    mat = rand(out_features, out_features).to(problem.device)

    backpack_res = BackpackDerivatives(problem).ea_jac_t_mat_jac_prod(mat)
    autograd_res = AutogradDerivatives(problem).ea_jac_t_mat_jac_prod(mat)

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
示例#2
0
def test_sqrt_hessian_sampled_squared_approximates_hessian(
    problem: DerivativesTestProblem,
    subsampling: Union[List[int], None],
    mc_samples: int = 1000000,
    chunks: int = 10,
) -> None:
    """Test the MC-sampled sqrt decomposition of the input Hessian.

    Compares the Hessian to reconstruction from individual Hessian MC-sampled sqrt.

    Args:
        problem: Test case.
        subsampling: Indices of active samples.
        mc_samples: number of samples. Defaults to 1000000.
        chunks: Number of passes the MC samples will be processed sequentially.
    """
    problem.set_up()
    skip_subsampling_conflict(problem, subsampling)

    backpack_res = BackpackDerivatives(problem).input_hessian_via_sqrt_hessian(
        mc_samples=mc_samples, chunks=chunks, subsampling=subsampling
    )
    autograd_res = AutogradDerivatives(problem).input_hessian(subsampling=subsampling)

    RTOL, ATOL = 1e-2, 7e-3
    check_sizes_and_values(autograd_res, backpack_res, rtol=RTOL, atol=ATOL)
    problem.tear_down()
示例#3
0
def test_jac_t_mat_prod(
    problem: DerivativesTestProblem,
    subsampling: Union[None, List[int]],
    request,
    V: int = 3,
) -> None:
    """Test the transposed Jacobian-matrix product.

    Args:
        problem: Problem for derivative test.
        subsampling: Indices of active samples.
        request: Pytest request, used for getting id.
        V: Number of vectorized transposed Jacobian-vector products. Default: ``3``.
    """
    skip_adaptive_avg_pool3d_cuda(request)

    problem.set_up()
    skip_batch_norm_train_mode_with_subsampling(problem, subsampling)
    skip_subsampling_conflict(problem, subsampling)
    mat = rand_mat_like_output(V, problem, subsampling=subsampling)

    backpack_res = BackpackDerivatives(problem).jac_t_mat_prod(
        mat, subsampling=subsampling
    )
    autograd_res = AutogradDerivatives(problem).jac_t_mat_prod(
        mat, subsampling=subsampling
    )

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
def test_hessian_is_zero(problem):
    """Check if the input-output Hessian is (non-)zero."""
    problem.set_up()

    backpack_res = BackpackDerivatives(problem).hessian_is_zero()
    autograd_res = AutogradDerivatives(problem).hessian_is_zero()

    assert backpack_res == autograd_res
    problem.tear_down()
def test_sum_hessian(problem):
    """Test the summed Hessian.

    Args:
        problem (DerivativesProblem): Problem for derivative test.
    """
    problem.set_up()

    backpack_res = BackpackDerivatives(problem).sum_hessian()
    autograd_res = AutogradDerivatives(problem).sum_hessian()

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
示例#6
0
def test_make_hessian_mat_prod(problem: DerivativesTestProblem) -> None:
    """Test hessian_mat_prod.

    Args:
        problem: test problem
    """
    problem.set_up()
    mat = rand(4, *problem.input_shape, device=problem.device)

    autograd_res = AutogradDerivatives(problem).hessian_mat_prod(mat)
    backpack_res = BackpackDerivatives(problem).hessian_mat_prod(mat)

    check_sizes_and_values(backpack_res, autograd_res)
def test_jac_t_mat_prod(problem, V=3):
    """Test the transposed Jacobian-matrix product.

    Args:
        problem (DerivativesProblem): Problem for derivative test.
        V (int): Number of vectorized transposed Jacobian-vector products.
    """
    problem.set_up()
    mat = torch.rand(V, *problem.output_shape).to(problem.device)

    backpack_res = BackpackDerivatives(problem).jac_t_mat_prod(mat)
    autograd_res = AutogradDerivatives(problem).jac_t_mat_prod(mat)

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
示例#8
0
def test_bias_jac_mat_prod(problem: DerivativesTestProblem, V: int = 3) -> None:
    """Test the Jacobian-matrix product w.r.t. to the bias.

    Args:
        problem: Test case.
        V: Number of vectorized Jacobian-vector products. Default: ``3``.
    """
    problem.set_up()
    mat = rand(V, *problem.module.bias.shape).to(problem.device)

    backpack_res = BackpackDerivatives(problem).bias_jac_mat_prod(mat)
    autograd_res = AutogradDerivatives(problem).bias_jac_mat_prod(mat)

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
def test_sqrt_hessian_sampled_squared_approximates_hessian(
        problem, mc_samples=100000):
    """Test the MC-sampled sqrt decomposition of the input Hessian.

    Args:
        problem (DerivativesProblem): Problem for derivative test.

    Compares the Hessian to reconstruction from individual Hessian MC-sampled sqrt.
    """
    problem.set_up()

    backpack_res = BackpackDerivatives(problem).input_hessian_via_sqrt_hessian(
        mc_samples=mc_samples)
    autograd_res = AutogradDerivatives(problem).input_hessian()

    RTOL, ATOL = 1e-2, 2e-2
    check_sizes_and_values(autograd_res, backpack_res, rtol=RTOL, atol=ATOL)
    problem.tear_down()
def test_bias_jac_t_mat_prod(problem, sum_batch, V=3):
    """Test the transposed Jacobian-matrix product w.r.t. to the biass.

    Args:
        problem (DerivativesProblem): Problem for derivative test.
        sum_batch (bool): Sum results over the batch dimension.
        V (int): Number of vectorized transposed Jacobian-vector products.
    """
    problem.set_up()
    mat = torch.rand(V, *problem.output_shape).to(problem.device)

    backpack_res = BackpackDerivatives(problem).bias_jac_t_mat_prod(
        mat, sum_batch)
    autograd_res = AutogradDerivatives(problem).bias_jac_t_mat_prod(
        mat, sum_batch)

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
def test_sqrt_hessian_squared_equals_hessian(problem):
    """Test the sqrt decomposition of the input Hessian.

    Args:
        problem (DerivativesProblem): Problem for derivative test.

    Compares the Hessian to reconstruction from individual Hessian sqrt.
    """
    problem.set_up()

    backpack_res = BackpackDerivatives(
        problem).input_hessian_via_sqrt_hessian()
    autograd_res = AutogradDerivatives(problem).input_hessian()

    print(backpack_res.device)
    print(autograd_res.device)

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
def test_weight_jac_t_mat_prod(problem, sum_batch, save_memory, V=3):
    """Test the transposed Jacobian-matrix product w.r.t. to the weights.

    Args:
        problem (DerivativesProblem): Problem for derivative test.
        sum_batch (bool): Sum results over the batch dimension.
        save_memory (bool): Use Owkin implementation to save memory.
        V (int): Number of vectorized transposed Jacobian-vector products.
    """
    problem.set_up()
    mat = torch.rand(V, *problem.output_shape).to(problem.device)

    with weight_jac_t_save_memory(save_memory):
        backpack_res = BackpackDerivatives(problem).weight_jac_t_mat_prod(
            mat, sum_batch)
    autograd_res = AutogradDerivatives(problem).weight_jac_t_mat_prod(
        mat, sum_batch)

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
示例#13
0
def test_hessian_is_zero(no_loss_problem: DerivativesTestProblem) -> None:
    """Check if the input-output Hessian is (non-)zero.

    Note:
        `hessian_is_zero` is a global statement that assumes arbitrary inputs.
        It can thus happen that the Hessian diagonal is zero for the current
        input, but not in general.

    Args:
        no_loss_problem: Test case whose module is not a loss.
    """
    backpack_res = BackpackDerivatives(no_loss_problem).hessian_is_zero()
    autograd_res = AutogradDerivatives(no_loss_problem).hessian_is_zero()

    if autograd_res and not backpack_res:
        warn(
            "Autograd Hessian diagonal is zero for this input "
            " while BackPACK implementation implies inputs with non-zero Hessian."
        )
    else:
        assert backpack_res == autograd_res
示例#14
0
def test_sqrt_hessian_squared_equals_hessian(
    problem: DerivativesTestProblem, subsampling: Union[List[int], None]
) -> None:
    """Test the sqrt decomposition of the input Hessian.

    Args:
        problem: Test case.
        subsampling: Indices of active samples.

    Compares the Hessian to reconstruction from individual Hessian sqrt.
    """
    problem.set_up()
    skip_subsampling_conflict(problem, subsampling)

    backpack_res = BackpackDerivatives(problem).input_hessian_via_sqrt_hessian(
        subsampling=subsampling
    )
    autograd_res = AutogradDerivatives(problem).input_hessian(subsampling=subsampling)

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
def test_ea_jac_t_mat_jac_prod(problem):
    """Test KFRA backpropagation

    H_in →  1/N ∑ₙ Jₙ^T H_out Jₙ

    Notes:
        - `Dropout` cannot be tested,as the `autograd` implementation does a forward
        pass over each sample, while the `backpack` implementation requires only
        one forward pass over the batched data. This leads to different outputs,
        as `Dropout` is not deterministic.

    Args:
        problem (DerivativesProblem): Problem for derivative test.
    """
    problem.set_up()
    out_features = torch.prod(torch.tensor(problem.output_shape[1:]))
    mat = torch.rand(out_features, out_features).to(problem.device)

    backpack_res = BackpackDerivatives(problem).ea_jac_t_mat_jac_prod(mat)
    autograd_res = AutogradDerivatives(problem).ea_jac_t_mat_jac_prod(mat)

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
示例#16
0
def test_param_mjp(
    problem: DerivativesTestProblem,
    sum_batch: bool,
    subsampling: List[int] or None,
    request,
) -> None:
    """Test all parameter derivatives.

    Args:
        problem: test problem
        sum_batch: whether to sum along batch axis
        subsampling: subsampling indices
        request: problem request
    """
    skip_subsampling_conflict(problem, subsampling)
    test_save_memory: bool = "Conv" in request.node.callspec.id
    V = 3

    for param_str, _ in problem.module.named_parameters():
        print(f"testing derivative wrt {param_str}")
        for save_memory in [True, False] if test_save_memory else [None]:
            if test_save_memory:
                print(f"testing with save_memory={save_memory}")

            mat = rand_mat_like_output(V, problem, subsampling=subsampling)
            with weight_jac_t_save_memory(
                save_memory=save_memory
            ) if test_save_memory else nullcontext():
                backpack_res = BackpackDerivatives(problem).param_mjp(
                    param_str, mat, sum_batch, subsampling=subsampling
                )
            autograd_res = AutogradDerivatives(problem).param_mjp(
                param_str, mat, sum_batch, subsampling=subsampling
            )

            check_sizes_and_values(autograd_res, backpack_res)