def compute_hessian_eigenthings(model,
                                dataloader,
                                loss,
                                num_eigenthings=10,
                                full_dataset=True,
                                mode='power_iter',
                                use_gpu=True,
                                max_samples=512,
                                hvp_operator_class=HVPOperatorParams,
                                **kwargs):
    """
    Computes the top `num_eigenthings` eigenvalues and eigenvecs
    for the hessian of the given model by using subsampled power iteration
    with deflation and the hessian-vector product

    Parameters
    ---------------

    model : Module
        pytorch model for this netowrk
    dataloader : torch.data.DataLoader
        dataloader with x,y pairs for which we compute the loss.
    loss : torch.nn.modules.Loss | torch.nn.functional criterion
        loss function to differentiate through
    num_eigenthings : int
        number of eigenvalues/eigenvecs to compute. computed in order of
        decreasing eigenvalue magnitude.
    full_dataset : boolean
        if true, each power iteration call evaluates the gradient over the
        whole dataset.
    mode : str ['power_iter', 'lanczos']
        which backend to use to compute the top eigenvalues.
    use_gpu:
        if true, attempt to use cuda for all lin alg computatoins
    max_samples:
        the maximum number of samples that can fit on-memory. used
        to accumulate gradients for large batches.
    **kwargs:
        contains additional parameters passed onto lanczos or power_iter.
    """
    hvp_operator = hvp_operator_class(model,
                                      dataloader,
                                      loss,
                                      use_gpu=use_gpu,
                                      full_dataset=full_dataset,
                                      max_samples=max_samples)
    if mode == 'power_iter':
        eigenvals, eigenvecs = deflated_power_iteration(hvp_operator,
                                                        num_eigenthings,
                                                        use_gpu=use_gpu,
                                                        **kwargs)
    elif mode == 'lanczos':
        eigenvals, eigenvecs = lanczos(hvp_operator,
                                       num_eigenthings,
                                       use_gpu=use_gpu,
                                       **kwargs)
    else:
        raise ValueError(
            "Unsupported mode %s (must be power_iter or lanczos)" % mode)
    return eigenvals, eigenvecs
Пример #2
0
def test_matrix(mat):
    """
    Tests the accuracy of deflated power iteration on the given matrix.
    It computes the average percent eigenval error and eigenvec simliartiy err
    """
    tensor = torch.from_numpy(mat).float()
    op = LambdaOperator(lambda x: torch.matmul(tensor, x), tensor.size()[:1])
    true_eigenvals, true_eigenvecs = np.linalg.eig(mat)
    true_eigenvecs = [true_eigenvecs[:, i] for i in range(len(true_eigenvals))]

    estimated_eigenvals, estimated_eigenvecs = deflated_power_iteration(
        op,
        num_eigenthings=args.num_eigenthings,
        power_iter_steps=args.power_iter_steps,
        momentum=args.momentum,
        use_gpu=False)
    estimated_eigenvecs = list(map(lambda t: t.numpy(), estimated_eigenvecs))

    # truncate estimates
    true_inds = np.argsort(true_eigenvals)
    true_eigenvals = np.array(
        true_eigenvals)[true_inds][-args.num_eigenthings:]
    true_eigenvecs = np.array(
        true_eigenvecs)[true_inds][-args.num_eigenthings:]

    est_inds = np.argsort(estimated_eigenvals)
    estimated_eigenvals = np.array(estimated_eigenvals)[est_inds]
    estimated_eigenvecs = np.array(estimated_eigenvecs)[est_inds]

    eigenval_err = compute_eigenval_err(true_eigenvals, estimated_eigenvals)
    eigenvec_err = compute_eigenvec_err(true_eigenvecs, estimated_eigenvecs)
    return eigenval_err, eigenvec_err
Пример #3
0
def compute_hessian_eigenthings(model,
                                dataloader,
                                loss,
                                num_eigenthings=10,
                                full_dataset=True,
                                mode="power_iter",
                                use_gpu=True,
                                fp16=False,
                                max_possible_gpu_samples=2**16,
                                **kwargs):
    """
    Computes the top `num_eigenthings` eigenvalues and eigenvecs
    for the hessian of the given model by using subsampled power iteration
    with deflation and the hessian-vector product

    Parameters
    ---------------

    model : Module
        pytorch model for this netowrk
    dataloader : torch.data.DataLoader
        dataloader with x,y pairs for which we compute the loss.
    loss : torch.nn.modules.Loss | torch.nn.functional criterion
        loss function to differentiate through
    num_eigenthings : int
        number of eigenvalues/eigenvecs to compute. computed in order of
        decreasing eigenvalue magnitude.
    full_dataset : boolean
        if true, each power iteration call evaluates the gradient over the
        whole dataset.
        (if False, you might want to check if the eigenvalue estimate variance
         depends on batch size)
    mode : str ['power_iter', 'lanczos']
        which backend algorithm to use to compute the top eigenvalues.
    use_gpu:
        if true, attempt to use cuda for all lin alg computatoins
    fp16: bool
        if true, store and do math with eigenvectors, gradients, etc. in fp16.
        (you should test if this is numerically stable for your application)
    max_possible_gpu_samples:
        the maximum number of samples that can fit on-memory. used
        to accumulate gradients for large batches.
        (note: if smaller than dataloader batch size, this can have odd
         interactions with batch norm statistics)
    **kwargs:
        contains additional parameters passed onto lanczos or power_iter.
    """
    hvp_operator = HVPOperator(
        model,
        dataloader,
        loss,
        use_gpu=use_gpu,
        full_dataset=full_dataset,
        max_possible_gpu_samples=max_possible_gpu_samples,
    )
    eigenvals, eigenvecs = None, None
    if mode == "power_iter":
        eigenvals, eigenvecs = deflated_power_iteration(hvp_operator,
                                                        num_eigenthings,
                                                        use_gpu=use_gpu,
                                                        fp16=fp16,
                                                        **kwargs)
    elif mode == "lanczos":
        eigenvals, eigenvecs = lanczos(hvp_operator,
                                       num_eigenthings,
                                       use_gpu=use_gpu,
                                       fp16=fp16,
                                       **kwargs)
    else:
        raise ValueError(
            "Unsupported mode %s (must be power_iter or lanczos)" % mode)
    return eigenvals, eigenvecs