return 0.5 * (np.tril(mat) + np.triu(mat, 1).T) elif len(mat.shape) == 3: return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1,2)) else: raise ArithmeticError def generalized_outer_product(mat): if len(mat.shape) == 1: return np.outer(mat, mat) elif len(mat.shape) == 2: return np.einsum('ij,ik->ijk', mat, mat) else: raise ArithmeticError def covgrad(x, mean, cov): # I think once we have Cholesky we can make this nicer. solved = np.linalg.solve(cov, (x - mean).T).T return lower_half(np.linalg.inv(cov) - generalized_outer_product(solved)) logpdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, x, lambda g: -np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=0) logpdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, mean, lambda g: np.expand_dims(g, 1) * np.linalg.solve(cov, (x - mean).T).T), argnum=1) logpdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, cov, lambda g: -np.reshape(g, np.shape(g) + (1, 1)) * covgrad(x, mean, cov)), argnum=2) # Same as log pdf, but multiplied by the pdf (ans). pdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, x, lambda g: -g * ans * np.linalg.solve(cov, x - mean)), argnum=0) pdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, mean, lambda g: g * ans * np.linalg.solve(cov, x - mean)), argnum=1) pdf.defgrad(lambda ans, x, mean, cov: unbroadcast(ans, cov, lambda g: -g * ans * covgrad(x, mean, cov)), argnum=2) entropy.defgrad_is_zero(argnums=(0,)) entropy.defgrad(lambda ans, mean, cov: unbroadcast(ans, cov, lambda g: 0.5 * g * np.linalg.inv(cov).T), argnum=1)
"""Gradients of the normal distribution.""" from __future__ import absolute_import import scipy.stats import autogradwithbay.numpy as anp from autogradwithbay.core import primitive from autogradwithbay.numpy.numpy_grads import unbroadcast pdf = primitive(scipy.stats.norm.pdf) cdf = primitive(scipy.stats.norm.cdf) logpdf = primitive(scipy.stats.norm.logpdf) logcdf = primitive(scipy.stats.norm.logcdf) pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x, lambda g: -g * ans * (x - loc) / scale**2)) pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc, lambda g: g * ans * (x - loc) / scale**2), argnum=1) pdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: g * ans * (((x - loc)/scale)**2 - 1.0)/scale), argnum=2) cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x, lambda g: g * pdf(x, loc, scale))) cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc, lambda g: -g * pdf(x, loc, scale)), argnum=1) cdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: -g * pdf(x, loc, scale)*(x-loc)/scale), argnum=2) logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x, lambda g: -g * (x - loc) / scale**2)) logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc, lambda g: g * (x - loc) / scale**2), argnum=1) logpdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: g * (-1.0/scale + (x - loc)**2/scale**3)), argnum=2) logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, x, lambda g: g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale)))) logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, loc, lambda g: -g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))), argnum=1) logcdf.defgrad(lambda ans, x, loc=0.0, scale=1.0: unbroadcast(ans, scale, lambda g: -g * anp.exp(logpdf(x, loc, scale) - logcdf(x, loc, scale))*(x-loc)/scale), argnum=2)