Exemplo n.º 1
0
def test_sqrt_hessian_sampled_squared_approximates_hessian(
    problem: DerivativesTestProblem,
    subsampling: Union[List[int], None],
    mc_samples: int = 1000000,
    chunks: int = 10,
) -> None:
    """Test the MC-sampled sqrt decomposition of the input Hessian.

    Compares the Hessian to reconstruction from individual Hessian MC-sampled sqrt.

    Args:
        problem: Test case.
        subsampling: Indices of active samples.
        mc_samples: number of samples. Defaults to 1000000.
        chunks: Number of passes the MC samples will be processed sequentially.
    """
    problem.set_up()
    skip_subsampling_conflict(problem, subsampling)

    backpack_res = BackpackDerivatives(problem).input_hessian_via_sqrt_hessian(
        mc_samples=mc_samples, chunks=chunks, subsampling=subsampling
    )
    autograd_res = AutogradDerivatives(problem).input_hessian(subsampling=subsampling)

    RTOL, ATOL = 1e-2, 7e-3
    check_sizes_and_values(autograd_res, backpack_res, rtol=RTOL, atol=ATOL)
    problem.tear_down()
Exemplo n.º 2
0
def test_ea_jac_t_mat_jac_prod(problem: DerivativesTestProblem, request) -> None:
    """Test KFRA backpropagation.

    H_in →  1/N ∑ₙ Jₙ^T H_out Jₙ

    Notes:
        - `Dropout` cannot be tested,as the `autograd` implementation does a forward
        pass over each sample, while the `backpack` implementation requires only
        one forward pass over the batched data. This leads to different outputs,
        as `Dropout` is not deterministic.

    Args:
        problem: Test case.
        request: PyTest request, used to get test id.
    """
    skip_adaptive_avg_pool3d_cuda(request)

    problem.set_up()
    out_features = problem.output_shape[1:].numel()
    mat = rand(out_features, out_features).to(problem.device)

    backpack_res = BackpackDerivatives(problem).ea_jac_t_mat_jac_prod(mat)
    autograd_res = AutogradDerivatives(problem).ea_jac_t_mat_jac_prod(mat)

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
Exemplo n.º 3
0
def test_for_loop_replace() -> None:
    """Application of retain_graph: replace an outer for-loop.

    This test is based on issue #220 opened by Romain3Ch216.
    It computes per-component individual gradients of a tensor-valued output
    with a for loop over components, rather than over samples and components.
    """
    manual_seed(0)
    B = 5
    M = 3
    h = 2

    x = randn(B, h)
    fc = extend(Linear(h, M))
    A = fc(x)

    grad_autograd = zeros(B, M, *fc.weight.shape)
    for b in range(B):
        for m in range(M):
            with backpack(retain_graph=True):
                grads = autograd.grad(A[b, m], fc.weight, retain_graph=True)
            grad_autograd[b, m] = grads[0]

    grad_backpack = zeros(B, M, *fc.weight.shape)
    for i in range(M):
        with backpack(BatchGrad(), retain_graph=True):
            A[:, i].backward(ones_like(A[:, i]), retain_graph=True)
        grad_backpack[:, i] = fc.weight.grad_batch

    check_sizes_and_values(grad_backpack, grad_autograd)
Exemplo n.º 4
0
def test_ggn_mc(
    problem: ExtensionsTestProblem, subsampling: Union[List[int], None]
) -> None:
    """Compare MC-approximated GGN from BackPACK with exact version from autograd.

    Args:
        problem: Test case with small network whose GGN can be evaluated.
        subsampling: Indices of active samples. ``None`` uses the full mini-batch.
    """
    skip_large_parameters(problem)
    skip_subsampling_conflict(problem, subsampling)

    autograd_res = AutogradExtensions(problem).ggn(subsampling=subsampling)
    atol, rtol = 5e-3, 5e-3
    mc_samples, chunks = 150000, 15
    backpack_res = BackpackExtensions(problem).ggn_mc(
        mc_samples, chunks=chunks, subsampling=subsampling
    )

    # compare normalized entries ∈ [-1; 1] (easier to tune atol)
    max_val = max(autograd_res.abs().max(), backpack_res.abs().max())
    # NOTE: The GGN can be exactly zero; e.g. if a ReLU after all parameters zeroes
    # its input, its Jacobian is thus zero and will cancel the backpropagated GGN
    if not isclose(max_val, 0):
        autograd_res, backpack_res = autograd_res / max_val, backpack_res / max_val

    check_sizes_and_values(autograd_res, backpack_res, atol=atol, rtol=rtol)
Exemplo n.º 5
0
def test_jac_t_mat_prod(
    problem: DerivativesTestProblem,
    subsampling: Union[None, List[int]],
    request,
    V: int = 3,
) -> None:
    """Test the transposed Jacobian-matrix product.

    Args:
        problem: Problem for derivative test.
        subsampling: Indices of active samples.
        request: Pytest request, used for getting id.
        V: Number of vectorized transposed Jacobian-vector products. Default: ``3``.
    """
    skip_adaptive_avg_pool3d_cuda(request)

    problem.set_up()
    skip_batch_norm_train_mode_with_subsampling(problem, subsampling)
    skip_subsampling_conflict(problem, subsampling)
    mat = rand_mat_like_output(V, problem, subsampling=subsampling)

    backpack_res = BackpackDerivatives(problem).jac_t_mat_prod(
        mat, subsampling=subsampling
    )
    autograd_res = AutogradDerivatives(problem).jac_t_mat_prod(
        mat, subsampling=subsampling
    )

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
Exemplo n.º 6
0
    def check_equivalence(self) -> None:
        """Check if the given parameters lead to the same output.

        Checks the sizes and values.
        """
        stride, kernel_size, _ = self._get_derivatives().get_avg_pool_parameters(
            self.module
        )
        module_equivalent: Module = self._make_module_equivalent(stride, kernel_size)
        output_equivalent: Tensor = module_equivalent(self.input)

        check_sizes_and_values(self.output, output_equivalent)
Exemplo n.º 7
0
def test_make_hessian_mat_prod(problem: DerivativesTestProblem) -> None:
    """Test hessian_mat_prod.

    Args:
        problem: test problem
    """
    problem.set_up()
    mat = rand(4, *problem.input_shape, device=problem.device)

    autograd_res = AutogradDerivatives(problem).hessian_mat_prod(mat)
    backpack_res = BackpackDerivatives(problem).hessian_mat_prod(mat)

    check_sizes_and_values(backpack_res, autograd_res)
Exemplo n.º 8
0
def test_batch_l2_grad_hook(problem):
    """Test squared ℓ₂ norm of individual gradients computed via extension hook.

    Args:
        problem (ExtensionsTestProblem): Problem for extension test.
    """
    problem.set_up()

    backpack_res = BackpackExtensions(problem).batch_l2_grad_extension_hook()
    autograd_res = AutogradExtensions(problem).batch_l2_grad()

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
def test_diag_ggn_batch(problem):
    """Test the individual diagonal of Generalized Gauss-Newton/Fisher

    Args:
        problem (ExtensionsTestProblem): Problem for extension test.
    """
    problem.set_up()

    backpack_res = BackpackExtensions(problem).diag_ggn_exact_batch()
    autograd_res = AutogradExtensions(problem).diag_ggn_batch()

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
Exemplo n.º 10
0
def test_sum_grad_squared(problem):
    """Test sum of square of individual gradients

    Args:
        problem (ExtensionsTestProblem): Problem for extension test.
    """
    problem.set_up()

    backpack_res = BackpackExtensions(problem).sgs()
    autograd_res = AutogradExtensions(problem).sgs()

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
def test_batch_grad(problem):
    """Test individual gradients

    Args:
        problem (ExtensionsTestProblem): Problem for extension test.
    """
    problem.set_up()

    backpack_res = BackpackExtensions(problem).batch_grad()
    autograd_res = AutogradExtensions(problem).batch_grad()

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
Exemplo n.º 12
0
def test_sum_grad_squared_hook(problem):
    """Test individual gradient second moment computed via extension hook.

    Args:
        problem (ExtensionsTestProblem): Problem for extension test.
    """
    problem.set_up()

    backpack_res = BackpackExtensions(problem).sgs_extension_hook()
    autograd_res = AutogradExtensions(problem).sgs()

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
Exemplo n.º 13
0
def test_diag_h_batch(problem):
    """Test Diagonal of Hessian

    Args:
        problem (ExtensionsTestProblem): Problem for extension test.
    """
    problem.set_up()

    backpack_res = BackpackExtensions(problem).diag_h_batch()
    autograd_res = AutogradExtensions(problem).diag_h_batch()

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
def test_sum_hessian(problem):
    """Test the summed Hessian.

    Args:
        problem (DerivativesProblem): Problem for derivative test.
    """
    problem.set_up()

    backpack_res = BackpackDerivatives(problem).sum_hessian()
    autograd_res = AutogradDerivatives(problem).sum_hessian()

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
Exemplo n.º 15
0
def test_variance(problem: ExtensionsTestProblem) -> None:
    """Test variance of individual gradients.

    Args:
        problem: Test case.
    """
    problem.set_up()

    backpack_res = BackpackExtensions(problem).variance()
    autograd_res = AutogradExtensions(problem).variance()

    rtol = 5e-5
    check_sizes_and_values(autograd_res, backpack_res, rtol=rtol)
    problem.tear_down()
Exemplo n.º 16
0
def test_bias_jac_mat_prod(problem: DerivativesTestProblem, V: int = 3) -> None:
    """Test the Jacobian-matrix product w.r.t. to the bias.

    Args:
        problem: Test case.
        V: Number of vectorized Jacobian-vector products. Default: ``3``.
    """
    problem.set_up()
    mat = rand(V, *problem.module.bias.shape).to(problem.device)

    backpack_res = BackpackDerivatives(problem).bias_jac_mat_prod(mat)
    autograd_res = AutogradDerivatives(problem).bias_jac_mat_prod(mat)

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
Exemplo n.º 17
0
def test_diag_ggn(problem, request):
    """Test the diagonal of generalized Gauss-Newton.

    Args:
        problem (ExtensionsTestProblem): Problem for extension test.
        request: problem request
    """
    skip_adaptive_avg_pool3d_cuda(request)
    problem.set_up()

    backpack_res = BackpackExtensions(problem).diag_ggn()
    autograd_res = AutogradExtensions(problem).diag_ggn()

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
def test_jac_t_mat_prod(problem, V=3):
    """Test the transposed Jacobian-matrix product.

    Args:
        problem (DerivativesProblem): Problem for derivative test.
        V (int): Number of vectorized transposed Jacobian-vector products.
    """
    problem.set_up()
    mat = torch.rand(V, *problem.output_shape).to(problem.device)

    backpack_res = BackpackDerivatives(problem).jac_t_mat_prod(mat)
    autograd_res = AutogradDerivatives(problem).jac_t_mat_prod(mat)

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
Exemplo n.º 19
0
def test_batch_grad(
    problem: ExtensionsTestProblem, subsampling: Union[List[int], None]
) -> None:
    """Test individual gradients.

    Args:
        problem: Test case.
        subsampling: Indices of active samples.
    """
    skip_if_subsampling_conflict(problem, subsampling)

    backpack_res = BackpackExtensions(problem).batch_grad(subsampling)
    autograd_res = AutogradExtensions(problem).batch_grad(subsampling)

    check_sizes_and_values(autograd_res, backpack_res)
Exemplo n.º 20
0
def test_ggn_exact(
    problem: ExtensionsTestProblem, subsampling: Union[List[int], None]
) -> None:
    """Compare exact GGN from BackPACK's matrix square root with autograd.

    Args:
        problem: Test case with small network whose GGN can be evaluated.
        subsampling: Indices of active samples. ``None`` uses the full mini-batch.
    """
    skip_large_parameters(problem)
    skip_subsampling_conflict(problem, subsampling)

    autograd_res = AutogradExtensions(problem).ggn(subsampling=subsampling)
    backpack_res = BackpackExtensions(problem).ggn(subsampling=subsampling)

    check_sizes_and_values(autograd_res, backpack_res)
def test_diag_ggn_mc_batch_light(problem):
    """Test the MC approximation of individual diagonal of
    Generalized Gauss-Newton/Fisher with few mc_samples (light version)

    Args:
        problem (ExtensionsTestProblem): Problem for extension test.
    """
    problem.set_up()

    backpack_res = BackpackExtensions(problem).diag_ggn_exact_batch()
    mc_samples = 5000
    backpack_res_mc_avg = BackpackExtensions(problem).diag_ggn_mc_batch(mc_samples)

    check_sizes_and_values(
        backpack_res, backpack_res_mc_avg, atol=MC_ATOL, rtol=MC_LIGHT_RTOL
    )
    problem.tear_down()
Exemplo n.º 22
0
def test_kfac_should_approx_ggn_montecarlo(problem: ExtensionsTestProblem):
    """Check that for batch_size = 1, the K-FAC is the same as the GGN.

    Should be true for linear layers and in the limit of infinite mc_samples.

    Args:
        problem: Test case.
    """
    problem.set_up()
    autograd_res = AutogradExtensions(problem).ggn_blocks()

    mc_samples = 300000
    backpack_kfac = BackpackExtensions(problem).kfac_chunk(mc_samples)
    backpack_res = [kfacs_to_mat(kfac) for kfac in backpack_kfac]

    check_sizes_and_values(autograd_res, backpack_res, atol=5e-3, rtol=5e-3)

    problem.tear_down()
def test_sqrt_hessian_sampled_squared_approximates_hessian(
        problem, mc_samples=100000):
    """Test the MC-sampled sqrt decomposition of the input Hessian.

    Args:
        problem (DerivativesProblem): Problem for derivative test.

    Compares the Hessian to reconstruction from individual Hessian MC-sampled sqrt.
    """
    problem.set_up()

    backpack_res = BackpackDerivatives(problem).input_hessian_via_sqrt_hessian(
        mc_samples=mc_samples)
    autograd_res = AutogradDerivatives(problem).input_hessian()

    RTOL, ATOL = 1e-2, 2e-2
    check_sizes_and_values(autograd_res, backpack_res, rtol=RTOL, atol=ATOL)
    problem.tear_down()
def test_bias_jac_t_mat_prod(problem, sum_batch, V=3):
    """Test the transposed Jacobian-matrix product w.r.t. to the biass.

    Args:
        problem (DerivativesProblem): Problem for derivative test.
        sum_batch (bool): Sum results over the batch dimension.
        V (int): Number of vectorized transposed Jacobian-vector products.
    """
    problem.set_up()
    mat = torch.rand(V, *problem.output_shape).to(problem.device)

    backpack_res = BackpackDerivatives(problem).bias_jac_t_mat_prod(
        mat, sum_batch)
    autograd_res = AutogradDerivatives(problem).bias_jac_t_mat_prod(
        mat, sum_batch)

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
def test_sqrt_hessian_squared_equals_hessian(problem):
    """Test the sqrt decomposition of the input Hessian.

    Args:
        problem (DerivativesProblem): Problem for derivative test.

    Compares the Hessian to reconstruction from individual Hessian sqrt.
    """
    problem.set_up()

    backpack_res = BackpackDerivatives(
        problem).input_hessian_via_sqrt_hessian()
    autograd_res = AutogradDerivatives(problem).input_hessian()

    print(backpack_res.device)
    print(autograd_res.device)

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
def test_weight_jac_t_mat_prod(problem, sum_batch, save_memory, V=3):
    """Test the transposed Jacobian-matrix product w.r.t. to the weights.

    Args:
        problem (DerivativesProblem): Problem for derivative test.
        sum_batch (bool): Sum results over the batch dimension.
        save_memory (bool): Use Owkin implementation to save memory.
        V (int): Number of vectorized transposed Jacobian-vector products.
    """
    problem.set_up()
    mat = torch.rand(V, *problem.output_shape).to(problem.device)

    with weight_jac_t_save_memory(save_memory):
        backpack_res = BackpackDerivatives(problem).weight_jac_t_mat_prod(
            mat, sum_batch)
    autograd_res = AutogradDerivatives(problem).weight_jac_t_mat_prod(
        mat, sum_batch)

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
def test_diag_ggn_mc_batch(problem):
    """Test the MC approximation of individual diagonal of Gauss-Newton
       with more samples (slow version)

    Args:
        problem (ExtensionsTestProblem): Problem for extension test.
    """
    problem.set_up()

    backpack_res = BackpackExtensions(problem).diag_ggn_exact_batch()
    mc_samples = 300000
    chunks = 30
    backpack_res_mc_avg = BackpackExtensions(problem).diag_ggn_mc_batch_chunk(
        mc_samples, chunks=chunks
    )

    check_sizes_and_values(
        backpack_res, backpack_res_mc_avg, atol=MC_ATOL, rtol=MC_RTOL
    )
    problem.tear_down()
Exemplo n.º 28
0
def test_sqrt_hessian_squared_equals_hessian(
    problem: DerivativesTestProblem, subsampling: Union[List[int], None]
) -> None:
    """Test the sqrt decomposition of the input Hessian.

    Args:
        problem: Test case.
        subsampling: Indices of active samples.

    Compares the Hessian to reconstruction from individual Hessian sqrt.
    """
    problem.set_up()
    skip_subsampling_conflict(problem, subsampling)

    backpack_res = BackpackDerivatives(problem).input_hessian_via_sqrt_hessian(
        subsampling=subsampling
    )
    autograd_res = AutogradDerivatives(problem).input_hessian(subsampling=subsampling)

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
def test_ea_jac_t_mat_jac_prod(problem):
    """Test KFRA backpropagation

    H_in →  1/N ∑ₙ Jₙ^T H_out Jₙ

    Notes:
        - `Dropout` cannot be tested,as the `autograd` implementation does a forward
        pass over each sample, while the `backpack` implementation requires only
        one forward pass over the batched data. This leads to different outputs,
        as `Dropout` is not deterministic.

    Args:
        problem (DerivativesProblem): Problem for derivative test.
    """
    problem.set_up()
    out_features = torch.prod(torch.tensor(problem.output_shape[1:]))
    mat = torch.rand(out_features, out_features).to(problem.device)

    backpack_res = BackpackDerivatives(problem).ea_jac_t_mat_jac_prod(mat)
    autograd_res = AutogradDerivatives(problem).ea_jac_t_mat_jac_prod(mat)

    check_sizes_and_values(autograd_res, backpack_res)
    problem.tear_down()
Exemplo n.º 30
0
def test_param_mjp(
    problem: DerivativesTestProblem,
    sum_batch: bool,
    subsampling: List[int] or None,
    request,
) -> None:
    """Test all parameter derivatives.

    Args:
        problem: test problem
        sum_batch: whether to sum along batch axis
        subsampling: subsampling indices
        request: problem request
    """
    skip_subsampling_conflict(problem, subsampling)
    test_save_memory: bool = "Conv" in request.node.callspec.id
    V = 3

    for param_str, _ in problem.module.named_parameters():
        print(f"testing derivative wrt {param_str}")
        for save_memory in [True, False] if test_save_memory else [None]:
            if test_save_memory:
                print(f"testing with save_memory={save_memory}")

            mat = rand_mat_like_output(V, problem, subsampling=subsampling)
            with weight_jac_t_save_memory(
                save_memory=save_memory
            ) if test_save_memory else nullcontext():
                backpack_res = BackpackDerivatives(problem).param_mjp(
                    param_str, mat, sum_batch, subsampling=subsampling
                )
            autograd_res = AutogradDerivatives(problem).param_mjp(
                param_str, mat, sum_batch, subsampling=subsampling
            )

            check_sizes_and_values(autograd_res, backpack_res)