def likelihood(f, l, R, mu, eps, sigma2, lambda_1=1e-4): # The similarity matrix W is a linear combination of the slices in R W = T.tensordot(R, mu, axes=1) # The following indices correspond to labeled and unlabeled examples labeled = T.eq(l, 1).nonzero() # Calculating the graph Laplacian of W D = T.diag(W.sum(axis=0)) L = D - W # The Covariance (or Kernel) matrix is the inverse of the (regularized) Laplacian epsI = eps * T.eye(L.shape[0]) rL = L + epsI Sigma = nlinalg.matrix_inverse(rL) # The marginal density of labeled examples uses Sigma_LL as covariance (sub-)matrix Sigma_LL = Sigma[labeled][:, labeled][:, 0, :] # We also consider additive Gaussian noise with variance sigma2 K_L = Sigma_LL + (sigma2 * T.eye(Sigma_LL.shape[0])) # Calculating the inverse and the determinant of K_L iK_L = nlinalg.matrix_inverse(K_L) dK_L = nlinalg.det(K_L) f_L = f[labeled] # The (L1-regularized) log-likelihood is given by the summation of the following four terms term_A = - (1 / 2) * f_L.dot(iK_L.dot(f_L)) term_B = - (1 / 2) * T.log(dK_L) term_C = - (1 / 2) * T.log(2 * np.pi) term_D = - lambda_1 * T.sum(abs(mu)) return term_A + term_B + term_C + term_D
def compute_S(idx, Sp1, zAA, zBB): Sm = ifelse(T.eq(idx, nT-2), T.dot(zBB[iib[-1]], Tla.matrix_inverse(zAA[iia[-1]])), T.dot(zBB[iib[idx]],Tla.matrix_inverse(zAA[iia[T.min([idx+1,nT-2])]] - T.dot(Sp1,T.transpose(zBB[iib[T.min([idx+1,nT-2])]])))) ) return Sm
def quadratic_saturating_loss(mx, Sx, target, Q, *args, **kwargs): ''' Squashing loss penalty function c(x) = ( 1 - e^(-0.5*quadratic_loss(x, target)) ) ''' if Sx is None: if mx.ndim == 1: mx = mx[None, :] delta = mx - target[None, :] deltaQ = delta.dot(Q) cost = 1.0 - tt.exp(-0.5 * tt.batched_dot(deltaQ, delta)) return cost else: # stochastic case (moment matching) delta = mx - target SxQ = Sx.dot(Q) EyeM = tt.eye(mx.shape[0]) IpSxQ = EyeM + SxQ Ip2SxQ = EyeM + 2 * SxQ S1 = tt.dot(Q, matrix_inverse(IpSxQ)) S2 = tt.dot(Q, matrix_inverse(Ip2SxQ)) # S1 = solve(IpSxQ.T, Q.T).T # S2 = solve(Ip2SxQ.T, Q.T).T # mean m_cost = -tt.exp(-0.5 * delta.dot(S1).dot(delta)) / tt.sqrt(det(IpSxQ)) # var s_cost = tt.exp(-delta.dot(S2).dot(delta)) / tt.sqrt( det(Ip2SxQ)) - m_cost**2 return 1.0 + m_cost, s_cost
def compute_S(idx, Sp1, zAA, zBB): Sm = ifelse( T.eq(idx, nT - 2), T.dot(zBB[iib[-1]], Tla.matrix_inverse(zAA[iia[-1]])), T.dot( zBB[iib[idx]], Tla.matrix_inverse(zAA[iia[T.min([idx + 1, nT - 2])]] - T.dot( Sp1, T.transpose(zBB[iib[T.min([idx + 1, nT - 2])]]))))) return Sm
def __init__(self, GenerativeParams, xDim, yDim, srng = None, nrng = None): super(LDS, self).__init__(GenerativeParams,xDim,yDim,srng,nrng) # parameters if 'A' in GenerativeParams: self.A = theano.shared(value=GenerativeParams['A'].astype(theano.config.floatX), name='A' ,borrow=True) # dynamics matrix else: # TBD:MAKE A BETTER WAY OF SAMPLING DEFAULT A self.A = theano.shared(value=.5*np.diag(np.ones(xDim).astype(theano.config.floatX)), name='A' ,borrow=True) # dynamics matrix if 'QChol' in GenerativeParams: self.QChol = theano.shared(value=GenerativeParams['QChol'].astype(theano.config.floatX), name='QChol' ,borrow=True) # cholesky of innovation cov matrix else: self.QChol = theano.shared(value=(np.eye(xDim)).astype(theano.config.floatX), name='QChol' ,borrow=True) # cholesky of innovation cov matrix if 'Q0Chol' in GenerativeParams: self.Q0Chol = theano.shared(value=GenerativeParams['Q0Chol'].astype(theano.config.floatX), name='Q0Chol',borrow=True) # cholesky of starting distribution cov matrix else: self.Q0Chol = theano.shared(value=(np.eye(xDim)).astype(theano.config.floatX), name='Q0Chol',borrow=True) # cholesky of starting distribution cov matrix if 'RChol' in GenerativeParams: self.RChol = theano.shared(value=np.ndarray.flatten(GenerativeParams['RChol'].astype(theano.config.floatX)), name='RChol' ,borrow=True) # cholesky of observation noise cov matrix else: self.RChol = theano.shared(value=np.random.randn(yDim).astype(theano.config.floatX)/10, name='RChol' ,borrow=True) # cholesky of observation noise cov matrix if 'x0' in GenerativeParams: self.x0 = theano.shared(value=GenerativeParams['x0'].astype(theano.config.floatX), name='x0' ,borrow=True) # set to zero for stationary distribution else: self.x0 = theano.shared(value=np.zeros((xDim,)).astype(theano.config.floatX), name='x0' ,borrow=True) # set to zero for stationary distribution if 'NN_XtoY_Params' in GenerativeParams: self.NN_XtoY = GenerativeParams['NN_XtoY_Params']['network'] else: # Define a neural network that maps the latent state into the output gen_nn = lasagne.layers.InputLayer((None, xDim)) self.NN_XtoY = lasagne.layers.DenseLayer(gen_nn, yDim, nonlinearity=lasagne.nonlinearities.linear, W=lasagne.init.Orthogonal()) # set to our lovely initial values if 'C' in GenerativeParams: self.NN_XtoY.W.set_value(GenerativeParams['C'].astype(theano.config.floatX)) if 'd' in GenerativeParams: self.NN_XtoY.b.set_value(GenerativeParams['d'].astype(theano.config.floatX)) # we assume diagonal covariance (RChol is a vector) self.Rinv = 1./(self.RChol**2) #Tla.matrix_inverse(T.dot(self.RChol ,T.transpose(self.RChol))) self.Lambda = Tla.matrix_inverse(T.dot(self.QChol ,self.QChol.T)) self.Lambda0 = Tla.matrix_inverse(T.dot(self.Q0Chol,self.Q0Chol.T)) # Call the neural network output a rate, basically to keep things consistent with the PLDS class self.rate = lasagne.layers.get_output(self.NN_XtoY, inputs = self.Xsamp)
def logNormalPDFmat(X, Mu, XChol, xDim): ''' Use this version when X is a matrix [N x xDim] ''' Lambda = Tla.matrix_inverse(T.dot(XChol, T.transpose(XChol))) XMu = X - Mu return (-0.5 * T.dot(XMu, T.dot(Lambda, T.transpose(XMu))) + 0.5 * X.shape[0] * T.log(Tla.det(Lambda)) - 0.5 * np.log(2 * np.pi) * X.shape[0] * xDim)
def get_bivariate_normal_spec(): X1,X2,mu,sigma = [T.scalar('X1'),T.scalar('X2'), T.vector('mu'), T.matrix('sigma')] GaussianDensitySpec = FunctionSpec(variables=[X1, X2, mu, sigma], output_expression = -0.5*T.dot(T.dot((T.concatenate([X1.dimshuffle('x'),X2.dimshuffle('x')])-mu).T, nlinalg.matrix_inverse(sigma)), (T.concatenate([X1.dimshuffle('x'),X2.dimshuffle('x')])-mu))) return GaussianDensitySpec
def th_MvLDAN(data_inputs, labels): n_view = len(data_inputs) dtype = 'float32' mean = [] std = [] data = [] for v in range(n_view): _data = theano.shared(data_inputs[v]) _mean = T.mean(_data, axis=0).reshape([1, -1]) _std = T.std(_data, axis=0).reshape([1, -1]) _std += T.eq(_std, 0).astype(dtype) data.append((_data - _mean) / _std) mean.append(_mean) std.append(_std) Sw, Sb, _ = th_MvLDAN_Sw_Sb(data, labels) from theano.tensor import nlinalg eigvals, eigvecs = nlinalg.eig(T.dot(nlinalg.matrix_inverse(Sw), Sb)) # evals = slinalg.eigvalsh(Sb, Sw) mean = list(theano.function([], mean)()) std = list(theano.function([], std)()) eigvals, eigvecs = theano.function([], [eigvals, eigvecs])() inx = np.argsort(eigvals)[::-1] eigvals = eigvals[inx] eigvecs = eigvecs[:, inx] W = [] pre = 0 for v in range(n_view): W.append(eigvecs[pre:pre + mean[v].shape[1], :]) pre += mean[v].shape[1] return [mean, std], W, eigvals
def gaussian_kl_loss(mx, Sx, mt, St): ''' Returns KL ( Normal(mx, Sx) || Normal(mt, St) ) ''' if St is None: target_samples = mt mt, St = empirical_gaussian_params(target_samples) if Sx is None: # evaluate empirical KL (expectation over the rolled out samples) x = mx mx, Sx = empirical_gaussian_params(x) def logprob(x, m, S): delta = x - m L = cholesky(S) beta = solve_lower_triangular(L, delta.T).T lp = -0.5 * tt.square(beta).sum(-1) lp -= tt.sum(tt.log(tt.diagonal(L))) lp -= (0.5 * m.size * tt.log(2 * np.pi)).astype( theano.config.floatX) return lp return (logprob(x, mx, Sx) - logprob(x, mt, St)).mean(0) else: delta = mt - mx Stinv = matrix_inverse(St) kl = tt.log(det(St)) - tt.log(det(Sx)) kl += trace(Stinv.dot(delta.T.dot(delta) + Sx - St)) return 0.5 * kl
def test_gpu_matrix_inverse_inplace_opt(self): A = theano.tensor.fmatrix("A") fn = theano.function([A], matrix_inverse(A), mode=mode_with_gpu) assert any([ node.op.inplace for node in fn.maker.fgraph.toposort() if isinstance(node.op, GpuMagmaMatrixInverse) ])
def propagate(f, l, R, mu, eps): # The similarity matrix W is a linear combination of the slices in R W = T.tensordot(R, mu, axes=1) # The following indices correspond to labeled and unlabeled examples labeled = T.eq(l, 1).nonzero() unlabeled = T.eq(l, 0).nonzero() # Calculating the graph Laplacian of W D = T.diag(W.sum(axis=0)) L = D - W # Computing L_UU (the Laplacian over unlabeled examples) L_UU = L[unlabeled][:, unlabeled][:, 0, :] # Computing the inverse of the (regularized) Laplacian iA = (L_UU + epsI)^-1 epsI = eps * T.eye(L_UU.shape[0]) rL_UU = L_UU + epsI iA = nlinalg.matrix_inverse(rL_UU) # Computing W_UL (the similarity matrix between unlabeled and labeled examples) W_UL = W[unlabeled][:, labeled][:, 0, :] f_L = f[labeled] # f* = (L_UU + epsI)^-1 W_UL f_L f_star = iA.dot(W_UL.dot(f_L)) return f_star
def compute_D(idx, Dm1, zS, zAA, zBB): D = ifelse(T.eq(idx, nT-1), T.dot(Tla.matrix_inverse(zAA[iia[-1]]), III + T.dot(T.transpose(zBB[iib[idx-1]]), T.dot(Dm1,S[0]))) , ifelse(T.eq(idx, 0), Tla.matrix_inverse(zAA[iia[0]] - T.dot(zBB[iib[0]], T.transpose(S[-1]))), T.dot(Tla.matrix_inverse(zAA[iia[idx]] - T.dot(zBB[iib[T.min([idx,nT-2])]],T.transpose(S[T.max([-idx-1,-nT+1])]))), III + T.dot(T.transpose(zBB[iib[T.min([idx-1,nT-2])]]), T.dot(Dm1,S[-idx]))) ) ) return D
def _tf_solve_inverse(self, A, b, reg): ''' solve via pseudo inverse Ax=b return x= inv(A).b''' A2 = T.dot(A.T, A) A2reg = A2 + T.eye(A.shape[1]) * reg vv = T.dot(b, A) v = T.dot(vv, matrix_inverse(A2reg)) return v
def _get_updates(self): n = self.params['batch_size'] N = self.params['train_size'] prec_lik = self.params['prec_lik'] prec_prior = self.params['prec_prior'] gc_norm = self.params['gc_norm'] gamma = float(n + N) / n # compute log-likelihood error = self.model_outputs - self.true_outputs logliks = log_normal(error, prec_lik) sumloglik = logliks.sum() # compute gradient of likelihood wrt each data point grads = tensor.jacobian(expression=logliks, wrt=self.weights) grads = tensor.concatenate([g.flatten(ndim=2) for g in grads], axis=1) avg_grads = grads.mean(axis=0) dist_grads = grads - avg_grads # compute variance of gradient var_grads = (1. / (n - 1)) * tensor.dot(dist_grads.T, dist_grads) logprior = log_prior_normal(self.weights, prec_prior) grads_prior = tensor.grad(cost=logprior, wrt=self.weights) grads_prior = tensor.concatenate([g.flatten() for g in grads_prior]) # update Fisher information I_t_next = (1 - 1 / self.it) * self.I_t + 1 / self.it * var_grads # compute noise if 'B' in self.params: B = self.params['B'] else: B = gamma * I_t_next * N # B += np.eye(self.n_weights) * (10 ** -9) B_ch = slinalg.cholesky(B) noise = tensor.dot(((2. / tensor.sqrt(self.lr)) * B_ch), trng.normal((self.n_weights, 1))) # expensive inversion inv_cond_mat = gamma * N * I_t_next + (4. / self.lr) * B cond_mat = nlinalg.matrix_inverse(inv_cond_mat) updates = [] updates.append((self.I_t, I_t_next)) updates.append((self.it, self.it + 1)) # update the parameters updated_params = 2 * tensor.dot( cond_mat, grads_prior + N * avg_grads + noise.flatten()) updated_params = updated_params.flatten() last_row = 0 for p in self.weights: sub_index = np.prod(p.get_value().shape) up = updated_params[last_row:last_row + sub_index] up = up.reshape(p.shape) updates.append((p, up)) last_row += sub_index return updates, sumloglik
def __call__(self, A, b, inference=False): if inference is True: solve = slinalg.Solve() x = solve(A, b) else: x = nlinalg.matrix_inverse(A).dot(b) return x
def blk_chol_inv(A, B, b, lower=True, transpose=False): ''' Solve the equation Cx = b for x, where C is assumed to be a block-bi-diagonal matrix ( where only the first (lower or upper) off-diagonal block is nonzero. Inputs: A - [T x n x n] tensor, where each A[i,:,:] is the ith block diagonal matrix B - [T-1 x n x n] tensor, where each B[i,:,:] is the ith (upper or lower) 1st block off-diagonal matrix lower (default: True) - boolean specifying whether to treat B as the lower or upper 1st block off-diagonal of matrix C transpose (default: False) - boolean specifying whether to transpose the off-diagonal blocks B[i,:,:] (useful if you want to compute solve the problem C^T x = b with a representation of C.) Outputs: x - solution of Cx = b ''' if transpose: A = A.dimshuffle(0, 2, 1) B = B.dimshuffle(0, 2, 1) if lower: x0 = Tla.matrix_inverse(A[0]).dot(b[0]) def lower_step(Akp1, Bk, bkp1, xk): return Tla.matrix_inverse(Akp1).dot(bkp1 - Bk.dot(xk)) X = theano.scan(fn=lower_step, sequences=[A[1:], B, b[1:]], outputs_info=[x0])[0] X = T.concatenate([T.shape_padleft(x0), X]) else: xN = Tla.matrix_inverse(A[-1]).dot(b[-1]) def upper_step(Akm1, Bkm1, bkm1, xk): return Tla.matrix_inverse(Akm1).dot(bkm1 - (Bkm1).dot(xk)) X = theano.scan(fn=upper_step, sequences=[A[:-1][::-1], B[::-1], b[:-1][::-1]], outputs_info=[xN])[0] X = T.concatenate([T.shape_padleft(xN), X])[::-1] return X
def _calc_caylay_delta(step_size, param, gradient): A = Tensor.dot(((step_size / 2) * gradient).T, param) - Tensor.dot(param.T, ((step_size / 2) * gradient)) I = Tensor.identity_like(A) temp = I + A # Q = Tensor.dot(batched_inv(temp.dimshuffle('x',0,1))[0], (I - A)) Q = Tensor.dot(matrix_inverse(temp), I - A) update = Tensor.dot(param, Q) delta = (step_size / 2) * Tensor.dot((param + update), A) return update, delta
def test_inverse_singular(): singular = numpy.array([[1, 0, 0]] + [[0, 1, 0]] * 2, dtype=theano.config.floatX) a = tensor.matrix() f = function([a], matrix_inverse(a)) try: f(singular) except numpy.linalg.LinAlgError: return assert False
def invLogDet(C): # Return inv(A) and log det A where A = C . C^T iC = nlinalg.matrix_inverse(C) iC.name = 'i' + C.name iA = T.dot(iC.T, iC) iA.name = 'i' + C.name[1:] logDetA = 2.0 * T.sum(T.log(T.abs_(T.diag(C)))) logDetA.name = 'logDet' + C.name[1:] return (iA, logDetA)
def invLogDet(C): # Return inv(A) and log det A where A = C . C^T iC = nlinalg.matrix_inverse(C) iC.name = "i" + C.name iA = T.dot(iC.T, iC) iA.name = "i" + C.name[1:] logDetA = 2.0 * T.sum(T.log(T.abs_(T.diag(C)))) logDetA.name = "logDet" + C.name[1:] return (iA, logDetA)
def test_inverse_singular(): singular = np.array([[1, 0, 0]] + [[0, 1, 0]] * 2, dtype=theano.config.floatX) a = tensor.matrix() f = function([a], matrix_inverse(a)) try: f(singular) except np.linalg.LinAlgError: return assert False
def compute_D(idx, Dm1, zS, zAA, zBB): D = ifelse( T.eq(idx, nT - 1), T.dot( Tla.matrix_inverse(zAA[iia[-1]]), III + T.dot(T.transpose(zBB[iib[idx - 1]]), T.dot(Dm1, S[0]))), ifelse( T.eq(idx, 0), Tla.matrix_inverse(zAA[iia[0]] - T.dot(zBB[iib[0]], T.transpose(S[-1]))), T.dot( Tla.matrix_inverse( zAA[iia[idx]] - T.dot(zBB[iib[T.min([idx, nT - 2])]], T.transpose(S[T.max([-idx - 1, -nT + 1])]))), III + T.dot(T.transpose(zBB[iib[T.min([idx - 1, nT - 2])]]), T.dot(Dm1, S[-idx]))))) return D
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) return bound( ((n - p - 1) * log(IVI) - trace(matrix_inverse(V).dot(X)) - n * p * log(2) - n * log(IVI) - 2 * multigammaln(p, n / 2)) / 2, n > (p - 1))
def gaussInit(muin, varin): d = muin.shape[0] vardet, varinv = nlinalg.det(varin), nlinalg.matrix_inverse(varin) logconst = -d / 2. * np.log(2 * PI) - .5 * T.log(vardet) def logP(x): submu = x - muin out = logconst - .5 * T.sum(submu * (T.dot(submu, varinv.T)), axis=1) return out return logP
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ((n - p - 1) * tt.log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * tt.log(2) - n * tt.log(IVI) - 2 * multigammaln(n / 2., p)) / 2, matrix_pos_def(X), tt.eq(X, X.T), n > (p - 1))
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ((n - p - 1) * log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * log(2) - n * log(IVI) - 2 * multigammaln(n / 2., p)) / 2, n > (p - 1))
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ((n - p - 1) * log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * log(2) - n * log(IVI) - 2 * multigammaln(n / 2., p)) / 2, gt(n, (p - 1)), all(gt(eigh(X)[0], 0)), eq(X, X.T))
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ((n - p - 1) * T.log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * T.log(2) - n * T.log(IVI) - 2 * multigammaln(n / 2., p)) / 2, T.all(eigh(X)[0] > 0), T.eq(X, X.T), n > (p - 1))
def project(x_val, x_coords, x_star_coords, cov_fn): """Projects a Gaussian process defined by `x_val` at `x_coords` onto a set of coordinates `x_star_coords`. :param x_val: values of the GP at `x_coords` :param x_coords: a set of coordinates for each `x_val` :param x_star_coords: a set of coordinates onto which to project the GP :param cov_fn: a covariance function returning a covariance matrix given a set of coordinates :returns: a vector of projected values at `x_star_coords` """ kxx = cov_fn(x_coords) kxxtx = matrix_inverse(stabilize(kxx)) kxxs = tt.dot(kxxtx, x_val) knew = cov_fn(x_star_coords, x_coords) return tt.dot(knew, kxxs)
def invert_weight_matrix_symb(w): invw = [] for i in range(len(w)): # layer_weight = w[-(i+1)] if i%2 == 1: layer_weight = w[-(i+1)] print("inv val", -(i+1+1), "of length", len(w)) invw.append(matrix_inverse(layer_weight)) else: layer_weight = w[-(i+1)] print("bias inv val", -(i+1-1), "of length", len(w)) invw.append(-layer_weight) return invw
def blk_chol_inv(A, B, b, lower = True, transpose = False): ''' Solve the equation Cx = b for x, where C is assumed to be a block-bi-diagonal matrix ( where only the first (lower or upper) off-diagonal block is nonzero. Inputs: A - [T x n x n] tensor, where each A[i,:,:] is the ith block diagonal matrix B - [T-1 x n x n] tensor, where each B[i,:,:] is the ith (upper or lower) 1st block off-diagonal matrix lower (default: True) - boolean specifying whether to treat B as the lower or upper 1st block off-diagonal of matrix C transpose (default: False) - boolean specifying whether to transpose the off-diagonal blocks B[i,:,:] (useful if you want to compute solve the problem C^T x = b with a representation of C.) Outputs: x - solution of Cx = b ''' if transpose: A = A.dimshuffle(0, 2, 1) B = B.dimshuffle(0, 2, 1) if lower: x0 = Tla.matrix_inverse(A[0]).dot(b[0]) def lower_step(Akp1, Bk, bkp1, xk): return Tla.matrix_inverse(Akp1).dot(bkp1-Bk.dot(xk)) X = theano.scan(fn = lower_step, sequences=[A[1:], B, b[1:]], outputs_info=[x0])[0] X = T.concatenate([T.shape_padleft(x0), X]) else: xN = Tla.matrix_inverse(A[-1]).dot(b[-1]) def upper_step(Akm1, Bkm1, bkm1, xk): return Tla.matrix_inverse(Akm1).dot(bkm1-(Bkm1).dot(xk)) X = theano.scan(fn = upper_step, sequences=[A[:-1][::-1], B[::-1], b[:-1][::-1]], outputs_info=[xN])[0] X = T.concatenate([T.shape_padleft(xN), X])[::-1] return X
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ((n - p - 1) * log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * log(2) - n * log(IVI) - 2 * multigammaln(n / 2., p)) / 2, gt(n, (p - 1)), all(gt(eigh(X)[0], 0)), eq(X, X.T) )
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound(((n - p - 1) * tt.log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * tt.log(2) - n * tt.log(IVI) - 2 * multigammaln(n / 2., p)) / 2, matrix_pos_def(X), tt.eq(X, X.T), n > (p - 1))
def logp(self, X): nu = self.nu p = self.p V = self.V IVI = det(V) IXI = det(X) return bound(((nu - p - 1) * tt.log(IXI) - trace(matrix_inverse(V).dot(X)) - nu * p * tt.log(2) - nu * tt.log(IVI) - 2 * multigammaln(nu / 2., p)) / 2, matrix_pos_def(X), tt.eq(X, X.T), nu > (p - 1), broadcast_conditions=False)
def cholInvLogDet(A, dim, jitter, fast=False): A_jitter = A + jitter * T.eye(dim) cA = myCholesky()(A_jitter) cA.name = "c" + A.name if fast: (iA, logDetA) = invLogDet(cA) else: iA = nlinalg.matrix_inverse(A_jitter) # logDetA = T.log( nlinalg.Det()(A_jitter) ) logDetA = 2.0 * T.sum(T.log(T.abs_(T.diag(cA)))) iA.name = "i" + A.name logDetA.name = "logDet" + A.name return (cA, iA, logDetA)
def cholInvLogDet(A, dim, jitter, fast=False): A_jitter = A + jitter * T.eye(dim) cA = myCholesky()(A_jitter) cA.name = 'c' + A.name if fast: (iA, logDetA) = invLogDet(cA) else: iA = nlinalg.matrix_inverse(A_jitter) #logDetA = T.log( nlinalg.Det()(A_jitter) ) logDetA = 2.0 * T.sum(T.log(T.abs_(T.diag(cA)))) iA.name = 'i' + A.name logDetA.name = 'logDet' + A.name return (cA, iA, logDetA)
def logp(self, value): S = self.Sigma nu = self.nu mu = self.mu d = S.shape[0] X = value - mu Q = X.dot(matrix_inverse(S)).dot(X.T).sum() log_det = tt.log(det(S)) log_pdf = gammaln((nu + d) / 2.) - 0.5 * \ (d * tt.log(np.pi * nu) + log_det) - gammaln(nu / 2.) log_pdf -= 0.5 * (nu + d) * tt.log(1 + Q / nu) return log_pdf
def test_inverse_correctness(): rng = numpy.random.RandomState(utt.fetch_seed()) r = rng.randn(4, 4).astype(theano.config.floatX) x = tensor.matrix() xi = matrix_inverse(x) ri = function([x], xi)(r) assert ri.shape == r.shape assert ri.dtype == r.dtype rir = numpy.dot(ri, r) rri = numpy.dot(r, ri) assert _allclose(numpy.identity(4), rir), rir assert _allclose(numpy.identity(4), rri), rri
def logp(self, X): nu = self.nu p = self.p V = self.V IVI = det(V) IXI = det(X) return bound(((nu - p - 1) * tt.log(IXI) - trace(matrix_inverse(V).dot(X)) - nu * p * tt.log(2) - nu * tt.log(IVI) - 2 * multigammaln(nu / 2., p)) / 2, matrix_pos_def(X), tt.eq(X, X.T), nu > (p - 1), broadcast_conditions=False )
def multiNormInit_sharedParams(mean, varmat, dim): ''' :param mean: theano.tensor.TensorVaraible :param varmat: theano.tensor.TensorVaraible :param dim: number :return: ''' d = dim const = -d / 2. * np.log(2 * PI) - 0.5 * T.log(T.abs_(tlin.det(varmat))) varinv = tlin.matrix_inverse(varmat) def loglik(x): subx = x - mean subxcvt = T.dot(subx, varinv) # Nxd subxsqr = subx * subxcvt # Nxd return -T.sum(subxsqr, axis=1) / 2. + const return loglik
def negative_log_likelihood_symbolic(L, y, mu, R, eta, eps): """ Negative Marginal Log-Likelihood in a Gaussian Process regression model. The marginal likelihood for a set of parameters Theta is defined as follows: \log(y|X, \Theta) = - 1/2 y^T K_y^-1 y - 1/2 log |K_y| - n/2 log 2 \pi where K_y = K_f + sigma^2_n I is the covariance matrix for the noisy targets y, and K_f is the covariance matrix for the noise-free latent f. """ N = L.shape[0] W = T.tensordot(R, eta, axes=1) large_W = T.zeros((N * 2, N * 2)) large_W = T.set_subtensor(large_W[:N, :N], 2. * mu * W) large_W = T.set_subtensor(large_W[N:, :N], T.diag(L)) large_W = T.set_subtensor(large_W[:N, N:], T.diag(L)) large_D = T.diag(T.sum(abs(large_W), axis=0)) large_M = large_D - large_W PrecisionMatrix = T.inc_subtensor(large_M[:N, :N], mu * eps * T.eye(N)) # Let's try to avoid singular matrices _EPSILON = 1e-8 PrecisionMatrix += _EPSILON * T.eye(N * 2) # K matrix in a Gaussian Process regression model CovarianceMatrix = nlinalg.matrix_inverse(PrecisionMatrix) L_idx = L.nonzero()[0] y_l = y[L_idx] CovarianceMatrix_L = CovarianceMatrix[N + L_idx, :][:, N + L_idx] log_likelihood = 0. log_likelihood -= .5 * y_l.T.dot(CovarianceMatrix_L.dot(y_l)) log_likelihood -= .5 * T.log(nlinalg.det(CovarianceMatrix_L)) log_likelihood -= .5 * T.log(2 * T.pi) return -log_likelihood
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ( (n - p - 1) * T.log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * T.log(2) - n * T.log(IVI) - 2 * multigammaln(n / 2.0, p) ) / 2, T.all(eigh(X)[0] > 0), T.eq(X, X.T), n > (p - 1), )
def f(self, x, sampling=True, **kwargs): x /= np.cast[theano.config.floatX](np.sqrt(self.dim_in)) indx, indy = self.params[3], self.params[4] indx /= np.cast[theano.config.floatX](np.sqrt(self.dim_in)) if sampling: noisex = sample_mult_noise(T.exp(self.params[-2]), indx.shape) noisey = sample_mult_noise(T.exp(self.params[-1]), indy.shape) indy *= noisey; indx *= noisex Rr, Rc = T.exp(self.params[1]), T.exp(self.params[2]) U = T.sqr(Rr) sigma11 = T.dot(indx * U.dimshuffle('x', 0), indx.T) + eps_ind * T.eye(self.n_inducing) sigma22 = T.dot(x * U.dimshuffle('x', 0), x.T) sigma12 = T.dot(indx * U.dimshuffle('x', 0), x.T) mu_ind = T.dot(indx, self.params[0]) inv_sigma11 = Tn.matrix_inverse(sigma11) mu_x = T.dot(x, self.params[0]) + T.dot(sigma12.T, inv_sigma11).dot(indy - mu_ind) if not sampling: return mu_x sigma_x = Tn.extract_diag(sigma22 - T.dot(sigma12.T, inv_sigma11).dot(sigma12)) std = T.outer(T.sqrt(sigma_x), Rc) out_sample = sample_gauss(mu_x, std) return out_sample
def make_inv(y, p1, p2, nlayers, gammas, betas, weights, inv_stds, means): inv_network_layers = [] mlow = T.max(1.0/y, axis=1) e = np.array(np.exp(1.0),dtype=T.config.floatX) mhigh = T.min(e/y, axis=1) m = p1*(mhigh-mlow) + mlow unsoftmax = T.log(y*m.dimshuffle(0, 'x')) lastl = T.concatenate([unsoftmax,p2],axis=1) inv_network = lasagne.layers.InputLayer(shape=(None, 28*28), input_var=lastl) j = 1 for i in range(nlayers): inv_network = lasagne.layers.NonlinearityLayer(inv_network, nonlinearity = inv_nonlinearity) inv_network_layers.append(inv_network) inv_network = InvBatchNormLayer(inv_network, gamma = gammas[-j], beta=betas[-j], inv_std=inv_stds[-j], mean=means[-j]) inv_network_layers.append(inv_network) inv_network = lasagne.layers.DenseLayer(inv_network, 28*28, W = matrix_inverse(weights[-j]), b=None, nonlinearity=None) inv_network_layers.append(inv_network) j = j + 1 return inv_network, inv_network_layers, p1, p2
def upper_step(Akm1, Bkm1, bkm1, xk): return Tla.matrix_inverse(Akm1).dot(bkm1 - (Bkm1).dot(xk))
def grad(self, inputs, g_outputs): [gz] = g_outputs [x] = inputs return [gz * matrix_inverse(x).T]
def step(self, X, states): inv = TLA.matrix_inverse(X) return inv, []
def lower_step(Akp1, Bk, bkp1, xk): return Tla.matrix_inverse(Akp1).dot(bkp1 - Bk.dot(xk))
def upper_step(Akm1, Bkm1, bkm1, xk): return Tla.matrix_inverse(Akm1).dot(bkm1-(Bkm1).dot(xk))
def compute_chol(Aip1, Bi, Li, Ci): Ci = T.dot(Bi.T, Tla.matrix_inverse(Li).T) Dii = Aip1 - T.dot(Ci, Ci.T) Lii = Tsla.cholesky(Dii) return [Lii, Ci]
def lower_step(Akp1, Bk, bkp1, xk): return Tla.matrix_inverse(Akp1).dot(bkp1-Bk.dot(xk))
def compute_chol(Aip1, Bi, Li, Ci): Ci = T.dot(Bi.T, Tla.matrix_inverse(Li).T) Dii = Aip1 - T.dot(Ci, Ci.T) Lii = Tsla.cholesky(Dii) return [Lii,Ci]
def ProjecVec(A, vec): """ This function calculate the projection of vec onto the column plane spanned by A """ return T.dot(T.dot(T.dot(A, NL.matrix_inverse(T.dot(A.T, A) + 1e-8*T.eye(2))), A.T), vec)
def get_normal_spec(): X,mu,sigma = [T.vector('X'), T.vector('Mu'), T.matrix('Sigma')] GaussianDensitySpec = FunctionSpec(variables=[X, mu, sigma], output_expression = -0.5*T.dot(T.dot((X-mu).T, nlinalg.matrix_inverse(sigma)), (X-mu))) return GaussianDensitySpec