def grad_and_value(fun, x): """ Returns a function that returns both gradient and value of a function. """ vjp, val = _make_vjp(fun, x) if not vspace(val).size == 1: raise TypeError("grad_and_value only applies to real scalar-output " "functions.") return vjp(vspace(val).ones()), val
def _grad_with_forward(fun, x): """This function is a replica of ``autograd.grad``, with the only difference being that it returns both the gradient *and* the forward pass value.""" vjp, ans = _make_vjp(fun, x) if not vspace(ans).size == 1: raise TypeError( "Grad only applies to real scalar-output functions. " "Try jacobian, elementwise_grad or holomorphic_grad.") grad_value = vjp(vspace(ans).ones()) return grad_value, ans
def jacobian_pkl(fun, x): vjp, ans = _make_vjp(fun, x) ans_vspace = vspace(ans) jacobian_shape = ans_vspace.shape + vspace(x).shape grads = map(vjp, ans_vspace.standard_basis()) grads_out = np.stack(grads) if (np.prod(jacobian_shape) == np.prod(grads_out.shape)): return np.reshape(grads_out, jacobian_shape) else: my_jacobian_shape = ans_vspace.shape + vspace(x).shape + ( 2, ) # 2 to support real/im re_im_grads = np.squeeze(np.reshape(grads_out, my_jacobian_shape)) out = re_im_grads[..., 0] + 1j * re_im_grads[..., 1] return out
def jacobian_and_value(fun, x): """ Returns a function that returns both the Jacobian and value of a function. Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ val = fun(x) v_vspace = vspace(val) x_vspace = vspace(x) x_rep = np.tile(x, (v_vspace.size, ) + (1, ) * x_vspace.ndim) vjp_rep, _ = _make_vjp(fun, x_rep) jacobian_shape = v_vspace.shape + x_vspace.shape basis_vectors = np.array([b for b in v_vspace.standard_basis()]) jacobian = vjp_rep(basis_vectors) return np.reshape(jacobian, jacobian_shape), val
def hessian_grad_and_value(fun, x): """ Returns a function that returns Hessian, gradient and value of a function. Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ def grad_fun(x): vjp, val = _make_vjp(fun, x) return vjp(vspace(val).ones()), val x_vspace = vspace(x) x_rep = np.tile(x, (x_vspace.size, ) + (1, ) * x_vspace.ndim) vjp_grad, (grad, val) = _make_vjp(lambda x: atuple(grad_fun(x)), x_rep) hessian_shape = x_vspace.shape + x_vspace.shape basis_vectors = np.array([b for b in x_vspace.standard_basis()]) hessian = vjp_grad((basis_vectors, vspace(val).zeros())) return np.reshape(hessian, hessian_shape), grad[0], val[0]
def ans_jacobian(function, argnum): """ Get the value and the jacobian of a function. This differential operator follows autograd's jacobian implementation: https://github.com/HIPS/autograd/blob/master/autograd/differential_operators.py Args: function :: any -> any - the function to differentiate argnum :: int - the argument number to differentiate with respect to Returns: ans_jacobian any -> tuple(any :: any, jacobian :: ndarray) - a function that returns the value of `function` and the jacobian of `function` evaluated at a given argument of `function` """ vjp, ans = _make_vjp(function, argnum) ans_vspace = vspace(ans) jacobian_shape = ans_vspace.shape + vspace(argnum).shape grads = list(map(vjp, ans_vspace.standard_basis())) jacobian = np.reshape(np.stack(grads), jacobian_shape) return ans, jacobian
def mtp_hessian_grad_and_value(fun, x): """ Returns a function that returns MTP, Jacobian and value of a function. For a scalar-valued function `fun` the matrix-Tressian-product (MTP) is here defined as a function of a matrix `m` corresponding to mtp(m) = sum(m[:, :] * t[:, :, :], axis=(-1, -2)) where `t` is the 'Tressian' of `f = fun(x)` wrt `x` i.e. the rank-3 tensor of third-order partial derivatives of the scalar-valued function, such that t[i, j, k] = d**3 f / (dx[i] * dx[j] * dx[k]) Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ mtp, (hessian, grad, val) = _make_vjp(lambda x: atuple(hessian_grad_and_value(fun)(x)), x) return (lambda m: mtp( (m, vspace(grad).zeros(), vspace(val).zeros())), hessian, grad, val)
def mhp_jacobian_and_value(fun, x): """ Returns a function that returns MHP, Jacobian and value of a function. For a vector-valued function `fun` the matrix-Hessian-product (MHP) is here defined as a function of a matrix `m` corresponding to mhp(m) = sum(m[:, :] * h[:, :, :], axis=(-1, -2)) where `h` is the vector-Hessian of `f = fun(x)` wrt `x` i.e. the rank-3 tensor of second-order partial derivatives of the vector-valued function, such that h[k, i, j] = (d**2 f[i]) / (dx[j] * dx[k]) Assumes that the function `fun` broadcasts along the first dimension of the input being differentiated with respect to such that a batch of outputs can be computed concurrently for a batch of inputs. """ mhp, (jacob, val) = _make_vjp(lambda x: atuple(jacobian_and_value(fun)(x)), x) return lambda m: mhp((m, vspace(val).zeros())), jacob, val
def ans_jacobian(function, argnum): """ Get the value and the jacobian of a function. This differential operator supports numpy and pycuda arrays. Args: function :: any -> any - the function to differentiate argnum :: int - the argument number to differentiate with respect to Returns: ans_jacobian any -> tuple(any :: any, jacobian :: ndarray) - a function that returns the value of `function` and the jacobian of `function` evaluated at a given argument of `function` """ vjp, ans = _make_vjp(function, argnum) ans_vspace = vspace(ans) jacobian_shape = ans_vspace.shape + vspace(argnum).shape grads = list(map(vjp, ans_vspace.standard_basis())) if isinstance(grads[0], np.ndarray): jacobian = np.reshape(np.stack(grads), jacobian_shape) elif isinstance(grads[0], GPUArray): jacobian = stack_gpu(grads).reshape(jacobian_shape) return ans, jacobian
def grad_fun(x): vjp, val = _make_vjp(fun, x) return vjp(vspace(val).ones()), val
def elementwise_grad(fun, x, initial_grad=None): vjp, ans = _make_vjp(fun, x) if vspace(ans).iscomplex: raise TypeError( "Elementwise_grad only applies to real-output functions.") return vjp(vspace(ans).ones() if initial_grad is None else initial_grad)