def test_diag(): """ Test that linalg.diag has the same behavior as numpy.diag. numpy.diag has two behaviors: (1) when given a vector, it returns a matrix with that vector as the diagonal. (2) when given a matrix, returns a vector which is the diagonal of the matrix. (1) and (2) are tested by test_alloc_diag and test_extract_diag respectively. This test makes sure that linalg.diag instantiates the right op based on the dimension of the input. """ # test that it builds a matrix with given diagonal when using vector inputs x = theano.tensor.vector() y = diag(x) assert y.owner.op.__class__ == AllocDiag # test that it extracts the diagonal when using matrix input x = theano.tensor.matrix() y = extract_diag(x) assert y.owner.op.__class__ == ExtractDiag # other types should raise error x = theano.tensor.tensor3() ok = False try: y = extract_diag(x) except TypeError: ok = True assert ok
def huang_wang_covariance_prior(name1, name2, d, observed_scatter_matrix=None, observed_sample_size=0, model=None): """ Construct a noninformative or informative prior for a mv normal covariance matrix following Huang and Wang, "Simple Marginally Noninformative Prior Distributions for Covariance Matrices" ( http://ba.stat.cmu.edu/journal/2013/vol08/issue02/huang.pdf ) If no observations are included, the resulting prior has an almost flat half-t distribution over the variance of the variables, while efficiently having an uniform-prior (range [-1,1] for the correlation coefficients. This prior does not introduce a dependency between variance and correlation strength, as happens with a simple InverseWishart prior. Arguments: name1: Name for the Inverse Wishart distribution which will be created name2: Name for the Inverse Gamma distribution which will be created as a prior for the diagonal elements of the inv_S param of the Inverse Wishart d: Dimensionality, i.e. number of variables to create a joint covariance prior for. observed_scatter_matrix: d x d dimensional scatter matrix of (possibly virtual) observations to combine the noninformative prior with, to form an informative prior (or posterior). model: (optional) the model Returns: A tuple consisting of the covariance prior (an InverseWishart), and the hyperprior (InverseGamma) for the diagonal elements of the InverseWishart inv_S parameter """ A = float_info.max / 4.0 # Large number d_ones = np.ones(d, dtype=np.float64) a_hyperprior = InverseGamma(name=name2, d_ones / 2.0, d_ones / A, model=model) S = diag(4.0 / a_hyperprior) if observed_scatter_matrix is not None: S = S + observed_scatter_matrix cov_prior = InverseWishart(name=name1, d + 1 + observed_sample_size, S, model=model) return cov_prior, a_hyperprior
def __init__(self, kernel, X=None, Y=None): self.kernel = kernel self.X = X self.Y = Y self.th_hyp = self.kernel.th_hyp self.th_X = self.kernel.th_X self.th_N = self.kernel.th_N self.th_D = self.kernel.th_D self.th_K = self.kernel.th_K self.th_Y = T.matrix('Y') prec = sT.matrix_inverse(self.th_K) # Calculate the lml in a slow but stable way self.th_lml_stable = ( -0.5 * sT.trace(T.dot(self.th_Y.T, T.dot(prec, self.th_Y))) + -T.sum(T.log(sT.diag(sT.cholesky(self.th_K)))) + -0.5 * self.th_N * T.log(2.0 * const.pi)) # or in a fast but unstable way self.th_lml = ( -0.5 * sT.trace(T.dot(self.th_Y.T, T.dot(prec, self.th_Y))) + -0.5 * T.log(sT.det(self.th_K)) + -0.5 * self.th_N * T.log(2.0 * const.pi)) self.th_dlml_dhyp = theano.grad(self.th_lml, self.th_hyp) # Compile them to functions self.lml = theano.function([self.th_hyp, self.th_X, self.th_Y], self.th_lml) self.lml_stable = theano.function([self.th_hyp, self.th_X, self.th_Y], self.th_lml_stable) self.dlml_dhyp = theano.function([self.th_hyp, self.th_X, self.th_Y], self.th_dlml_dhyp)
def grad(self, inp, cost_grad): """ Note: The gradient is currently implemented for matrices only. """ a, val = inp grad = cost_grad[0] if (a.dtype.startswith('complex')): return [None, None] elif a.ndim > 2: raise NotImplementedError('%s: gradient is currently implemented' ' for matrices only' % self.__class__.__name__) wr_a = fill_diagonal(grad, 0) # valid for any number of dimensions wr_val = diag(grad).sum() # diag is only valid for matrices return [wr_a, wr_val]
def huang_wang_covariance_prior(name1, name2, d, observed_scatter_matrix=None, observed_sample_size=0, model=None): ''' Construct a noninformative or informative prior for a mv normal covariance matrix following Huang and Wang, "Simple Marginally Noninformative Prior Distributions for Covariance Matrices" ( http://ba.stat.cmu.edu/journal/2013/vol08/issue02/huang.pdf ) If no observations are included, the resulting prior has an almost flat half-t distribution over the variance of the variables, while efficiently having an uniform-prior (range [-1,1] for the correlation coefficients. This prior does not introduce a dependency between variance and correlation strength, as happens with a simple InverseWishart prior. Arguments: name1: Name for the Inverse Wishart distribution which will be created name2: Name for the Inverse Gamma distribution which will be created as a prior for the diagonal elements of the inv_S param of the Inverse Wishart d: Dimensionality, i.e. number of variables to create a joint covariance prior for. observed_scatter_matrix: d x d dimensional scatter matrix of (possibly virtual) observations to combine the noninformative prior with, to form an informative prior (or posterior). model: (optional) the model Returns: A tuple consisting of the covariance prior (an InverseWishart), and the hyperprior (InverseGamma) for the diagonal elements of the InverseWishart inv_S parameter ''' A = float_info.max / 4. # Large number d_ones = np.ones(d, dtype=np.float64) a_hyperprior = InverseGamma(name=name2, d_ones / 2., d_ones / A, model=model) S = diag(4. / a_hyperprior) if (observed_scatter_matrix is not None): S = S + observed_scatter_matrix cov_prior = InverseWishart(name=name1, d + 1 + observed_sample_size, S, model=model) return cov_prior, a_hyperprior
def test_diag(self): # test that it builds a matrix with given diagonal when using # vector inputs x = theano.tensor.vector() y = diag(x) assert y.owner.op.__class__ == AllocDiag # test that it extracts the diagonal when using matrix input x = theano.tensor.matrix() y = extract_diag(x) assert y.owner.op.__class__ == ExtractDiag # other types should raise error x = theano.tensor.tensor3() ok = False try: y = extract_diag(x) except TypeError: ok = True assert ok
# [-1.4, 1.5], # [1.4, -1.5], # [-45.0, 83.5], # [-100.3, 68.3], # [1000.4, 432.4], # [32441.8, 12341.3]]) N = 100 x = rnd.randn(N, 1) D = x.shape[0] d = x - mu p = T.log(T.prod((2*const.pi)**(-0.5*D) * sT.det(sigma)**-0.5 * T.exp(sT.diag(-0.5*T.dot(d, T.dot(prec, d.T)))))) p1 = T.sum(-0.5*D*T.log(2*const.pi) + -0.5*T.log(sT.det(sigma)) + -0.5*sT.diag(T.dot(d, T.dot(prec, d.T))) ) p2 = T.sum(-0.5*D*T.log(2*const.pi) + -T.sum(T.log(sT.diag(sT.cholesky(sigma)))) + -0.5*sT.diag(T.dot(d, T.dot(prec, d.T))) ) fp = th.function([mu, sigma], p) fp1 = th.function([mu, sigma], p1) fp2 = th.function([mu, sigma], p2)
mu = T.vector('mu') sigma = T.matrix('sigma') prec = sT.matrix_inverse(sigma) x = np.array([[1.5, -1.5], [-1.5, 1.5], [-1.4, 1.5], [1.4, -1.5]]) D = x.shape[1] N = x.shape[0] d = x - mu #p = -(const.pi)T.dot(T.dot(d, sigma), d.T).trace() #p = T.log((2*const.pi)**(-0.5*D) * ) #p = sT.det(sigma) + T.dot(mu, mu) p = T.log( T.prod((2 * const.pi)**(-0.5 * D) * sT.det(sigma)**-0.5 * T.exp(sT.diag(-0.5 * T.dot(d, T.dot(prec, d.T)))))) dp_dmu = T.grad(p, mu) dp_dsigma = T.grad(p, sigma) fp = th.function([mu, sigma], p) fd = th.function([mu], d) fdp_dmu = th.function([mu, sigma], dp_dmu) fdp_dsigma = th.function([mu, sigma], dp_dsigma) curmu = np.array([7.5, -3.23]) cursig = np.array([[1., 0], [0, 1.]]) # curmu = np.zeros(2) # cursig = np.dot(x.T, x) / N # Compare to multivalued normal