def test_multivector2vector_independent(): def f(x, y): return x**2 + 2 * y**3 + 1 def dfdx(x, y): return 2 * x def dfdy(x, y): return 6 * y**2 x = np.array([2, 3, 4]) y = np.array([1, 2, 3]) ref_x = [4, 6, 8] ref_y = [6, 24, 54] npt.assert_array_almost_equal(fd(f, False)(x, y), ref_x, 5) npt.assert_array_almost_equal(fd(f, False, 1)(x, y), ref_y, 4) npt.assert_array_almost_equal_nulp(cs(f, False)(x, y), ref_x) npt.assert_array_almost_equal_nulp(cs(f, False, 1)(x, y), ref_y) npt.assert_array_equal(autograd(f, False)(x, y), ref_x) npt.assert_array_equal(autograd(f, False, 1)(x, y), ref_y) pf = primitive(f) defvjp(pf, lambda ans, x, y: lambda g: g * dfdx(x, y), lambda ans, x, y: lambda g: g * dfdy(x, y)) npt.assert_array_equal(autograd(pf, False)(x, y), ref_x) npt.assert_array_equal(autograd(pf, False, 1)(x, y), ref_y)
def wrap_namespace(old, new): unchanged_types = set([types.FloatType, types.IntType, types.NoneType, types.TypeType]) function_types = set([np.ufunc, types.FunctionType, types.BuiltinFunctionType]) for name, obj in old.iteritems(): if type(obj) in function_types: new[name] = primitive(obj) elif type(obj) in unchanged_types: new[name] = obj
def wrap_namespace(old, new): unchanged_types = set([float, int, type(None), type]) function_types = set([np.ufunc, types.FunctionType, types.BuiltinFunctionType]) for name, obj in six.iteritems(old): if type(obj) in function_types: new[name] = primitive(obj) elif type(obj) in unchanged_types: new[name] = obj
def wrap_namespace(old, new): unchanged_types = set([float, int, type(None), type]) function_types = set( [np.ufunc, types.FunctionType, types.BuiltinFunctionType]) for name, obj in six.iteritems(old): if type(obj) in function_types: new[name] = primitive(obj) elif type(obj) in unchanged_types: new[name] = obj
def wrap_namespace(old, new): unchanged_types = set( [types.FloatType, types.IntType, types.NoneType, types.TypeType]) function_types = set( [np.ufunc, types.FunctionType, types.BuiltinFunctionType]) for name, obj in old.iteritems(): if type(obj) in function_types: new[name] = primitive(obj) elif type(obj) in unchanged_types: new[name] = obj
def set_gradient_funcs(self, power_ct_grad_func): def get_grad(ans, ws, **kwargs): def grad(g): return g * power_ct_grad_func(ws, **kwargs) return grad primitive_power_ct = primitive(self.power_ct) defvjp(primitive_power_ct, get_grad) self.power_ct = primitive_power_ct
def wrap_namespace(old, new): unchanged_types = {float, int, type(None), type} int_types = {np.int, np.int8, np.int16, np.int32, np.int64, np.integer} function_types = {np.ufunc, types.FunctionType, types.BuiltinFunctionType} for name, obj in iteritems(old): if type(obj) in function_types: new[name] = primitive(obj) elif type(obj) is type and obj in int_types: new[name] = wrap_intdtype(obj) elif type(obj) in unchanged_types: new[name] = obj
def checkpoint(fun): """Returns a checkpointed version of `fun`, where intermediate values computed during the forward pass of `fun` are discarded and then recomputed for the backward pass. Useful to save memory, effectively trading off time and memory. See e.g. arxiv.org/abs/1604.06174. """ def wrapped_grad(argnum, g, ans, vs, gvs, args, kwargs): return make_vjp(fun, argnum)(*args, **kwargs)[0](g) wrapped = primitive(fun) wrapped.vjp = wrapped_grad return wrapped
def test_scalar2scalar(): def f(x): return x**2 + 1 x = np.array([3]) npt.assert_equal(cs(f)(x), 6) npt.assert_almost_equal(fd(f)(x), 6, 5) npt.assert_equal(autograd(f)(x), 6) pf = primitive(f) defvjp(pf, lambda ans, x: lambda g: g * 2 * x) npt.assert_array_equal(autograd(pf, False)(x), 6)
def test_scalar2multi_scalar(): def fxy(x): return x**2 + 1, 2 * x + 1 def f(x): fx, fy = fxy(x) return fx + fy x = 3. ref = 8 npt.assert_equal(cs(f)(x), ref) npt.assert_almost_equal(fd(f)(x), ref, 5) npt.assert_equal(autograd(f)(x), ref) pf = primitive(f) defvjp(pf, lambda ans, x: lambda g: g * (2 * x + 2)) npt.assert_array_equal(autograd(pf, False)(x), ref) pf = primitive(fxy) defvjp(pf, lambda ans, x: lambda g: (g[0] * 2 * x, g[1] * 2)) npt.assert_array_equal(autograd(f, False)(x), ref)
def test_vector2vector_independent(): def f(x): return x**2 + 1 def df(x): return 2 * x x = np.array([2, 3, 4]) ref = [4, 6, 8] npt.assert_array_almost_equal(fd(f, False)(x), ref, 5) npt.assert_array_equal(cs(f, False)(x), ref) npt.assert_array_equal(autograd(f, False)(x), ref) pf = primitive(f) defvjp(pf, lambda ans, x: lambda g: g * df(x)) npt.assert_array_equal(autograd(pf, False)(x), ref)
def test_vector2vector_dependent(): def f(x): return x**2 + x[::-1] def df(x): return np.diag(2 * x) + np.diag(np.ones(3))[::-1] x = np.array([2., 3, 4]) ref = [[4., 0., 1.], [0., 7., 0.], [1., 0., 8.]] npt.assert_array_almost_equal(fd(f, True)(x), ref, 5) npt.assert_array_almost_equal_nulp(cs(f, True)(x), ref) npt.assert_array_equal(autograd(f, True)(x), ref) pf = primitive(f) defvjp(pf, lambda ans, x: lambda g: np.dot(g, df(x))) npt.assert_array_equal(autograd(pf, True)(x), ref)
def make_mlp(shapes): """ Make a multilayer perceptron function where we get gradients from optimized theano code. This returns a function handle mlp = make_mlp(shapes) where mlp(input, params) pushes inputs through a multi-layer perceptron with params = [(W1, b1), (W2, b2), ..., (WL, bL)] a list of weights and biases. Autograd will be able to differentiate functions that use MLP with theano's gradients """ def pack(params): return np.concatenate([np.concatenate([np.ravel(W), b]) for W, b in params]) def unpack(params): offset = 0 for m, n in shapes: yield params[offset:offset+m*n].reshape((m,n)), params[offset+m*n:offset+(m+1)*n] offset += (m+1)*n def mlp(x, params): for W, b in unpack(params): x = T.tanh(T.dot(x, W) + b) return x # define the MLPs jacobian-vector product using theano params = T.dvector('params') x = T.dmatrix('x') g = T.dmatrix('g') mlpval = mlp(x, params) gradfun = theano.function([x, params, g], T.Lop(mlpval, params, g)) # create python executable MLP function, define autograd primitive theano_mlpfun = theano.function([x, params], mlpval) mlpfun = primitive(lambda x, params: theano_mlpfun(x, pack(params))) mlpfun.defgrad(lambda ans, x, params: lambda g: list(unpack(gradfun(x, pack(params), g))), 1) return mlpfun
from __future__ import absolute_import import scipy.special from autograd.core import primitive polygamma = primitive(scipy.special.polygamma) psi = primitive(scipy.special.psi) # psi(x) is just polygamma(0, x) digamma = primitive(scipy.special.digamma) # digamma is another name for psi. gamma = primitive(scipy.special.gamma) polygamma.defgrad_is_zero(argnums=(0, )) polygamma.defgrad(lambda ans, n, x: lambda g: g * polygamma(n + 1, x), argnum=1) psi.defgrad(lambda ans, x: lambda g: g * polygamma(1, x)) digamma.defgrad(lambda ans, x: lambda g: g * polygamma(1, x)) gamma.defgrad(lambda ans, x: lambda g: g * ans * psi(x)) ### Bessel functions ### j0 = primitive(scipy.special.j0) y0 = primitive(scipy.special.y0) j1 = primitive(scipy.special.j1) y1 = primitive(scipy.special.y1) jn = primitive(scipy.special.jn) yn = primitive(scipy.special.yn) j0.defgrad(lambda ans, x: lambda g: -g * j1(x)) y0.defgrad(lambda ans, x: lambda g: -g * y1(x)) j1.defgrad(lambda ans, x: lambda g: g * (j0(x) - jn(2, x)) / 2.0) y1.defgrad(lambda ans, x: lambda g: g * (y0(x) - yn(2, x)) / 2.0) jn.defgrad_is_zero(argnums=(0, ))
"""Gradients of the univariate t distribution.""" from __future__ import absolute_import import scipy.stats import autograd.numpy as np from autograd.core import primitive from autograd.numpy.numpy_grads import unbroadcast from autograd.scipy.special import psi pdf = primitive(scipy.stats.t.pdf) cdf = primitive(scipy.stats.t.cdf) logpdf = primitive(scipy.stats.t.logpdf) logcdf = primitive(scipy.stats.t.logcdf) def grad_tlogpdf_diff(diff, df): return -diff * (1.0 + df) / (diff**2 + df) def grad_tlogpdf_x(x, df, loc, scale): return grad_tlogpdf_diff((x - loc) / scale, df) / scale def grad_tlogpdf_loc(x, df, loc, scale): return -grad_tlogpdf_diff((x - loc) / scale, df) / scale def grad_tlogpdf_scale(x, df, loc, scale): diff = x - loc return -(df * (scale**2 - diff**2)) / (scale * (df * scale**2 + diff**2))
from autograd.misc import flatten from itertools import islice, imap, cycle import operator from functools import partial from toolz import curry # autograd internals from autograd.builtins import SequenceBox from autograd.core import getval, primitive ### neural nets identity = lambda x: x sigmoid = lambda x: 1. / (1. + np.exp(-x)) relu = lambda x: np.maximum(x, 0.) log1pexp = primitive(lambda x: np.log1p(np.exp(x))) log1pexp.defgrad(lambda ans, x: lambda g: g / (1 + np.exp(-x))) normalize = lambda x: x / np.sum(x, axis=-1, keepdims=True) softmax = lambda x: normalize(np.exp(x - np.max(x, axis=-1, keepdims=True))) ### misc def rle(stateseq): pos, = np.where(np.diff(stateseq) != 0) pos = np.concatenate(([0], pos + 1, [len(stateseq)])) return stateseq[pos[:-1]], np.diff(pos) isarray = lambda x: hasattr(x, 'ndim') flat = lambda x: flatten(x)[0]
def make_conv_mlp(input_shape, layer_specs): """ Make a convolutional multilayer perceptron function where we get gradients from optimized theano code. This returns a function handle mlp = make_mlp(shapes) where mlp(input, params) is a function of - inputs = tensor of size [num data, number of input feature maps (color-in), image height, image width] - params decomposes into a list of parameter tensors, the convolutional tensor size is: [number of feature maps at output (color-out), (number of filters?) number of feature maps at input (color-in), filter height, filter width] Autograd will be able to differentiate functions that use MLP with theano's gradients """ # compute number of params and output shapes for each layer - compile a # list of offset = 0 param_slices = [] cur_shape = input_shape for layer in layer_specs: N_weights, cur_shape = layer.build_weights_dict(cur_shape) param_slices.append(slice(offset, offset + N_weights)) offset += N_weights num_params = offset out_size = np.prod(cur_shape) def unpack(params): for pslice in param_slices: yield params[pslice] def mlp(inputs, params): """applies each layer to the input, given parameter vector. shape of inputs : [data, color, y, x]""" cur_units = inputs for layer, layer_params in zip(layer_specs, unpack(params)): cur_units = layer.forward_pass(cur_units, layer_params) # make sure we're returning a 2-d ... for theano Lop return cur_units.reshape((inputs.shape[0], -1)) # define symbolic variables that theano will manipulate inputs = T.tensor4(name='inputs', dtype='float32') params = T.fvector('params') g = T.fmatrix('g') # define the mlp symblic function and executable function mlpval = mlp(inputs, params) theano_mlpfun = theano.function([inputs, params], mlpval, allow_input_downcast=True) # define the Jacobian-Vector gradient function needed for autograd gradfun = theano.function([inputs, params, g], T.Lop(mlpval, params, g), allow_input_downcast=True) # create python executable MLP function, define autograd primitive mlpfun = primitive(lambda x, params: theano_mlpfun(x, params)) mlpfun.defgrad(lambda ans, x, params: lambda g: gradfun(x, params, g), 1) return mlpfun, num_params, out_size
def make_natural_sample_grad_arg0(intermediates, ans, messages, pair_params, num_samples): return primitive( lambda g: cython_natural_sample_backward_grad(g, intermediates))
def test_vector2multi_vector(): def fxy(x): return x**2 + 1, 2 * x + 1 def f0(x): return fxy(x)[0] def fsum(x): fx, fy = fxy(x) return fx + fy x = np.array([1., 2, 3]) ref0 = [2, 4, 6] refsum = [4, 6, 8] npt.assert_equal(cs(f0)(x), ref0) npt.assert_almost_equal(fd(f0)(x), ref0, 5) npt.assert_equal(autograd(f0)(x), ref0) pf0 = primitive(f0) defvjp(pf0, lambda ans, x: lambda g: g * (2 * x)) npt.assert_array_equal(autograd(pf0, False)(x), ref0) npt.assert_equal(cs(fsum)(x), refsum) npt.assert_almost_equal(fd(fsum)(x), refsum, 5) npt.assert_equal(autograd(fsum)(x), refsum) pfsum = primitive(fsum) defvjp(pfsum, lambda ans, x: lambda g: g * (2 * x + 2)) npt.assert_array_equal(autograd(pfsum, False)(x), refsum) pfxy = primitive(fxy) def dfxy(x): return 2 * x, np.full(x.shape, 2) def gsum(x): fx, fy = pfxy(x) return fx + fy def g0(x): return pfxy(x)[0] pgsum = primitive(gsum) pg0 = primitive(g0) defvjp(pgsum, lambda ans, x: lambda g: g * np.sum(dfxy(x), 0)) defvjp(pg0, lambda ans, x: lambda g: g * dfxy(x)[0]) npt.assert_array_equal(autograd(pgsum, False)(x), refsum) npt.assert_array_equal(autograd(pg0, False)(x), ref0) defvjp(pfxy, lambda ans, x: lambda g: dfxy(x)[0]) def h0(x): return pfxy(x)[0] npt.assert_array_equal(autograd(h0, False)(x), ref0) defvjp(pfxy, lambda ans, x: lambda g: np.sum(g * np.asarray(dfxy(x)), 0)) def hsum(x): fx, fy = pfxy(x) return fx + fy npt.assert_array_equal(autograd(hsum, False)(x), refsum)
"""Gradients of the normal distribution.""" from __future__ import absolute_import import scipy.stats import autograd.numpy as anp from autograd.core import primitive from autograd.numpy.numpy_grads import unbroadcast pdf = primitive(scipy.stats.norm.pdf) cdf = primitive(scipy.stats.norm.cdf) logpdf = primitive(scipy.stats.norm.logpdf) logcdf = primitive(scipy.stats.norm.logcdf) pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x, lambda g: -g * ans * (x - loc) / scale**2)) pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc, lambda g: g * ans * (x - loc) / scale**2), argnum=1) pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: g * ans * (((x - loc)/scale)**2 - 1.0)/scale), argnum=2) cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x, lambda g: g * pdf(x, loc, scale))) cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc, lambda g: -g * pdf(x, loc, scale)), argnum=1) cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: -g * pdf(x, loc, scale)*(x-loc)/scale), argnum=2) logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x, lambda g: -g * (x - loc) / scale**2)) logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc, lambda g: g * (x - loc) / scale**2), argnum=1) logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: g * (-1.0/scale + (x - loc)**2/scale**3)), argnum=2) logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x, lambda g: g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale)))) logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc, lambda g: -g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))), argnum=1) logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: -g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))*(x-loc)/scale), argnum=2)
from __future__ import absolute_import import scipy.signal from autograd.core import primitive from autograd.numpy import flipud convolve = primitive(scipy.signal.convolve) convolve2d = primitive(scipy.signal.convolve2d) def get_same_slice(L_in0, L_in1): left_pad = L_in0 - (L_in1 + 1) / 2 return slice(left_pad, left_pad + L_in1) def make_grad_convolve_0(ans, in0, in1, mode='full'): if mode == 'full': return lambda g: convolve(g, flipud(in1), mode='valid') elif mode == 'same': return lambda g: flipud(convolve(flipud(g), in1, mode='same')) elif mode == 'valid': return lambda g: convolve(g, flipud(in1), mode='full') else: raise Exception("Unrecognized mode {0}".format(mode)) convolve.defgrad(make_grad_convolve_0, argnum=0) def make_grad_convolve_1(ans, in0, in1, mode='full'): if mode == 'full':
def make_natural_filter_grad_arg2(intermediates, ans, init_params, pair_params, node_params): return primitive(lambda g: cython_natural_filter_grad(g, intermediates))
from __future__ import absolute_import import scipy.special import autograd.numpy as np from autograd.core import primitive polygamma = primitive(scipy.special.polygamma) psi = primitive(scipy.special.psi) # psi(x) is just polygamma(0, x) digamma = primitive(scipy.special.digamma) # digamma is another name for psi. gamma = primitive(scipy.special.gamma) gammaln = primitive(scipy.special.gammaln) gammasgn = primitive(scipy.special.gammasgn) rgamma = primitive(scipy.special.rgamma) multigammaln = primitive(scipy.special.multigammaln) gammasgn.defgrad_is_zero() polygamma.defgrad_is_zero(argnums=(0, )) polygamma.defgrad(lambda ans, n, x: lambda g: g * polygamma(n + 1, x), argnum=1) psi.defgrad(lambda ans, x: lambda g: g * polygamma(1, x)) digamma.defgrad(lambda ans, x: lambda g: g * polygamma(1, x)) gamma.defgrad(lambda ans, x: lambda g: g * ans * psi(x)) gammaln.defgrad(lambda ans, x: lambda g: g * psi(x)) rgamma.defgrad(lambda ans, x: lambda g: g * psi(x) / -gamma(x)) multigammaln.defgrad(lambda ans, a, d: lambda g: g * np.sum( digamma(np.expand_dims(a, -1) - np.arange(d) / 2.), -1)) multigammaln.defgrad_is_zero(argnums=(1, )) ### Bessel functions ### j0 = primitive(scipy.special.j0)
from __future__ import absolute_import import scipy.misc from autograd.core import primitive import autograd.numpy as anp from autograd.numpy.numpy_grads import repeat_to_match_shape logsumexp = primitive(scipy.misc.logsumexp) def make_grad_logsumexp(ans, x, axis=None, b=1.0, keepdims=False): repeater, _ = repeat_to_match_shape(x, axis, keepdims) return lambda g: repeater(g) * b * anp.exp(x - repeater(ans)) logsumexp.defgrad(make_grad_logsumexp)
from __future__ import absolute_import import scipy.stats import autograd.numpy as np from autograd.scipy.special import digamma from autograd.core import primitive rvs = primitive(scipy.stats.dirichlet.rvs) pdf = primitive(scipy.stats.dirichlet.pdf) logpdf = primitive(scipy.stats.dirichlet.logpdf) logpdf.defvjp(lambda g, ans, vs, gvs, x, alpha: g * (alpha - 1) / x, argnum=0) logpdf.defvjp(lambda g, ans, vs, gvs, x, alpha: g * (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x)), argnum=1) # Same as log pdf, but multiplied by the pdf (ans). pdf.defvjp(lambda g, ans, vs, gvs, x, alpha: g * ans * (alpha - 1) / x, argnum=0) pdf.defvjp(lambda g, ans, vs, gvs, x, alpha: g * ans * (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x)), argnum=1)
v = solve_triangular(a, g, trans=_flip(a, trans), lower=lower) return -transpose(tri(anp.matmul(anp.reshape(v, ans.shape), T(ans)))) return solve_triangular_grad solve_triangular.defgrad(make_grad_solve_triangular) solve_triangular.defgrad(lambda ans, a, b, trans=0, lower=False, **kwargs: lambda g: solve_triangular(a, g, trans=_flip(a, trans), lower=lower), argnum=1) ### cholesky solve_trans = lambda L, X: solve_triangular(L, X, lower=True, trans='T') solve_conj = lambda L, X: solve_trans(L, T(solve_trans(L, T(X)))) phi = lambda X: anp.tril(X) / (1. + anp.eye(X.shape[-1])) cholesky = primitive(np.linalg.cholesky) cholesky.defgrad(lambda L, A: lambda g: symm(solve_conj(L, phi(anp.matmul(T(L), g))))) ### operations on cholesky factors solve_tri = partial(solve_triangular, lower=True) solve_posdef_from_cholesky = lambda L, x: solve_tri(L, solve_tri(L, x), trans='T') @primitive def inv_posdef_from_cholesky(L, lower=True): flat_L = np.reshape(L, (-1,) + L.shape[-2:]) return np.reshape(cyla.inv_posdef_from_cholesky(C(flat_L), lower), L.shape) square_grad = lambda X: lambda g: anp.matmul(g, X) + anp.matmul(T(g), X) sym_inv_grad = lambda Xinv: lambda g: -anp.matmul(Xinv, anp.matmul(g, Xinv))
"""Gradients of the univariate t distribution.""" from __future__ import absolute_import import scipy.stats import autograd.numpy as np from autograd.core import primitive from autograd.numpy.numpy_grads import unbroadcast from autograd.scipy.special import psi pdf = primitive(scipy.stats.t.pdf) cdf = primitive(scipy.stats.t.cdf) logpdf = primitive(scipy.stats.t.logpdf) logcdf = primitive(scipy.stats.t.logcdf) def grad_tlogpdf_diff(diff, df): return -diff * (1.0 + df) / (diff**2 + df) def grad_tlogpdf_x(x, df, loc, scale): return grad_tlogpdf_diff((x - loc) / scale, df) / scale def grad_tlogpdf_loc(x, df, loc, scale): return -grad_tlogpdf_diff((x - loc) / scale, df) / scale def grad_tlogpdf_scale(x, df, loc, scale): diff = x - loc return -(df * (scale**2 - diff**2))/(scale * (df * scale**2 + diff**2)) def grad_tlogpdf_df(x, df, loc, scale): y = (x - loc)/scale return 0.5 * ((y**2 * (df+1))/(df * (y**2 + df)) - np.log(y**2 / df + 1) - 1.0/df -psi(df/2.0) + psi((df + 1)/2.0)) pdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, x, lambda g: g * ans * grad_tlogpdf_x( x, df, loc, scale)), argnum=0) pdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, df, lambda g: g * ans * grad_tlogpdf_df( x, df, loc, scale)), argnum=1) pdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, loc, lambda g: g * ans * grad_tlogpdf_loc( x, df, loc, scale)), argnum=2) pdf.defgrad(lambda ans, x, df, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: g * ans * grad_tlogpdf_scale(x, df, loc, scale)), argnum=3)
def make_natural_smoother_grad_arg0(intermediates, ans, forward_messages, pair_params): return primitive(lambda g: cython_natural_smoother_grad(g, intermediates))
def make_grad_hmm_logZ(intermediates, ans, hmm): _, pair_params, _ = hmm return primitive(lambda g: hmm_logZ_grad(g, intermediates))
import autograd.numpy as np import scipy.stats as ss import scipy.special as sp from autograd.core import primitive a2d = np.atleast_2d beta = primitive(sp.beta) def make_grad_beta(ans, X, sz=(1, 1), a=1, b=1): def gradient_product(g): pass return gradient_product beta.defgrad(make_grad_beta) # # ================================================================== # class choice_erp: @staticmethod def diffparms(): return ["p"] @staticmethod
from __future__ import absolute_import import scipy.special from autograd.core import primitive polygamma = primitive(scipy.special.polygamma) psi = primitive(scipy.special.psi) # psi(x) is just polygamma(0, x) digamma = primitive(scipy.special.digamma) # digamma is another name for psi. gamma = primitive(scipy.special.gamma) polygamma.defgrad_is_zero(argnums=(0,)) polygamma.defgrad(lambda ans, n, x: lambda g: g * polygamma(n + 1, x), argnum=1) psi.defgrad(lambda ans, x: lambda g: g * polygamma(1, x)) digamma.defgrad(lambda ans, x: lambda g: g * polygamma(1, x)) gamma.defgrad(lambda ans, x: lambda g: g * ans * psi(x)) ### Bessel functions ### j0 = primitive(scipy.special.j0) y0 = primitive(scipy.special.y0) j1 = primitive(scipy.special.j1) y1 = primitive(scipy.special.y1) jn = primitive(scipy.special.jn) yn = primitive(scipy.special.yn) j0.defgrad(lambda ans, x: lambda g: -g * j1(x)) y0.defgrad(lambda ans, x: lambda g: -g * y1(x)) j1.defgrad(lambda ans, x: lambda g: g * (j0(x) - jn(2, x)) / 2.0) y1.defgrad(lambda ans, x: lambda g: g * (y0(x) - yn(2, x)) / 2.0) jn.defgrad_is_zero(argnums=(0,))
def make_natural_sample_grad_arg0(intermediates, ans, messages, pair_params, num_samples): return primitive(lambda g: cython_natural_sample_backward_grad(g, intermediates))
from __future__ import absolute_import import scipy.stats import autograd.numpy as np from autograd.core import primitive from autograd.numpy.numpy_grads import unbroadcast pdf = primitive(scipy.stats.multivariate_normal.pdf) logpdf = primitive(scipy.stats.multivariate_normal.logpdf) entropy = primitive(scipy.stats.multivariate_normal.entropy) # With thanks to Eric Bresch. # Some formulas are from # "An extended collection of matrix derivative results # for forward and reverse mode algorithmic differentiation" # by Mike Giles # https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf def lower_half(mat): # Takes the lower half of the matrix, and half the diagonal. # Necessary since numpy only uses lower half of covariance matrix. if len(mat.shape) == 2: return 0.5 * (np.tril(mat) + np.triu(mat, 1).T) elif len(mat.shape) == 3: return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1, 2)) else: raise ArithmeticError def generalized_outer_product(mat):
from __future__ import absolute_import import scipy.stats import autograd.numpy as np from autograd.core import primitive from autograd.numpy.numpy_grads import unbroadcast pdf = primitive(scipy.stats.multivariate_normal.pdf) logpdf = primitive(scipy.stats.multivariate_normal.logpdf) entropy = primitive(scipy.stats.multivariate_normal.entropy) # With thanks to Eric Bresch. # Some formulas are from # "An extended collection of matrix derivative results # for forward and reverse mode algorithmic differentiation" # by Mike Giles # https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf def lower_half(mat): # Takes the lower half of the matrix, and half the diagonal. # Necessary since numpy only uses lower half of covariance matrix. if len(mat.shape) == 2: return 0.5 * (np.tril(mat) + np.triu(mat, 1).T) elif len(mat.shape) == 3: return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1,2)) else: raise ArithmeticError def generalized_outer_product(mat): if len(mat.shape) == 1: return np.outer(mat, mat)
from __future__ import absolute_import import scipy.special import autograd.numpy as np from autograd.core import primitive polygamma = primitive(scipy.special.polygamma) psi = primitive(scipy.special.psi) # psi(x) is just polygamma(0, x) digamma = primitive(scipy.special.digamma) # digamma is another name for psi. gamma = primitive(scipy.special.gamma) gammaln = primitive(scipy.special.gammaln) gammasgn = primitive(scipy.special.gammasgn) rgamma = primitive(scipy.special.rgamma) multigammaln = primitive(scipy.special.multigammaln) gammasgn.defgrad_is_zero() polygamma.defgrad_is_zero(argnums=(0,)) polygamma.defgrad(lambda ans, n, x: lambda g: g * polygamma(n + 1, x), argnum=1) psi.defgrad( lambda ans, x: lambda g: g * polygamma(1, x)) digamma.defgrad( lambda ans, x: lambda g: g * polygamma(1, x)) gamma.defgrad( lambda ans, x: lambda g: g * ans * psi(x)) gammaln.defgrad( lambda ans, x: lambda g: g * psi(x)) rgamma.defgrad( lambda ans, x: lambda g: g * psi(x) / -gamma(x)) multigammaln.defgrad(lambda ans, a, d: lambda g: g * np.sum(digamma(np.expand_dims(a, -1) - np.arange(d)/2.), -1)) multigammaln.defgrad_is_zero(argnums=(1,)) ### Bessel functions ### j0 = primitive(scipy.special.j0)