def define_process(self): # Prior self.prior_freedom = self.freedom() self.prior_mean = self.location_space self.prior_covariance = self.kernel_f_space * self.prior_freedom self.prior_variance = tnl.extract_diag(self.prior_covariance) self.prior_std = tt.sqrt(self.prior_variance) self.prior_noise = tt.sqrt(tnl.extract_diag(self.kernel_space * self.prior_freedom)) self.prior_median = self.prior_mean sigma = 2 self.prior_quantile_up = self.prior_mean + sigma * self.prior_std self.prior_quantile_down = self.prior_mean - sigma * self.prior_std self.prior_noise_up = self.prior_mean + sigma * self.prior_noise self.prior_noise_down = self.prior_mean - sigma * self.prior_noise self.prior_sampler = self.prior_mean + self.random_scalar * cholesky_robust(self.prior_covariance).dot(self.random_th) # Posterior self.posterior_freedom = self.prior_freedom + self.inputs.shape[1] beta = (self.mapping_outputs - self.location_inputs).T.dot(tsl.solve(self.kernel_inputs, self.mapping_outputs - self.location_inputs)) coeff = (self.prior_freedom + beta - 2)/(self.posterior_freedom - 2) self.posterior_mean = self.location_space + self.kernel_f_space_inputs.dot( tsl.solve(self.kernel_inputs, self.mapping_outputs - self.location_inputs)) self.posterior_covariance = coeff * (self.kernel_f.cov(self.space_th) - self.kernel_f_space_inputs.dot( tsl.solve(self.kernel_inputs, self.kernel_f_space_inputs.T))) self.posterior_variance = tnl.extract_diag(self.posterior_covariance) self.posterior_std = tt.sqrt(self.posterior_variance) self.posterior_noise = coeff * tt.sqrt(tnl.extract_diag(self.kernel.cov(self.space_th) - self.kernel_f_space_inputs.dot( tsl.solve(self.kernel_inputs, self.kernel_f_space_inputs.T)))) self.posterior_median = self.posterior_mean self.posterior_quantile_up = self.posterior_mean + sigma * self.posterior_std self.posterior_quantile_down = self.posterior_mean - sigma * self.posterior_std self.posterior_noise_up = self.posterior_mean + sigma * self.posterior_noise self.posterior_noise_down = self.posterior_mean - sigma * self.posterior_noise self.posterior_sampler = self.posterior_mean + self.random_scalar * cholesky_robust(self.posterior_covariance).dot(self.random_th)
def th_define_process(self): #print('stochastic_define_process') # Basic Tensors self.mapping_outputs = tt_to_num(self.f_mapping.inv(self.th_outputs)) self.mapping_latent = tt_to_num(self.f_mapping(self.th_outputs)) #self.mapping_scalar = tt_to_num(self.f_mapping.inv(self.th_scalar)) self.prior_location_space = self.f_location(self.th_space) self.prior_location_inputs = self.f_location(self.th_inputs) self.prior_kernel_space = tt_to_cov(self.f_kernel_noise.cov(self.th_space)) self.prior_kernel_inputs = tt_to_cov(self.f_kernel_noise.cov(self.th_inputs)) self.prior_cholesky_space = cholesky_robust(self.prior_kernel_space) self.prior_kernel_f_space = self.f_kernel.cov(self.th_space) self.prior_kernel_f_inputs = self.f_kernel.cov(self.th_inputs) self.prior_cholesky_f_space = cholesky_robust(self.prior_kernel_f_space) self.cross_kernel_space_inputs = tt_to_num(self.f_kernel_noise.cov(self.th_space, self.th_inputs)) self.cross_kernel_f_space_inputs = tt_to_num(self.f_kernel.cov(self.th_space, self.th_inputs)) self.posterior_location_space = self.prior_location_space + self.cross_kernel_space_inputs.dot( tsl.solve(self.prior_kernel_inputs, self.mapping_outputs - self.prior_location_inputs)) self.posterior_location_f_space = self.prior_location_space + self.cross_kernel_f_space_inputs.dot( tsl.solve(self.prior_kernel_inputs, self.mapping_outputs - self.prior_location_inputs)) self.posterior_kernel_space = self.prior_kernel_space - self.cross_kernel_space_inputs.dot( tsl.solve(self.prior_kernel_inputs, self.cross_kernel_space_inputs.T)) self.posterior_cholesky_space = cholesky_robust(self.posterior_kernel_space) self.posterior_kernel_f_space = self.prior_kernel_f_space - self.cross_kernel_f_space_inputs.dot( tsl.solve(self.prior_kernel_inputs, self.cross_kernel_f_space_inputs.T)) self.posterior_cholesky_f_space = cholesky_robust(self.posterior_kernel_f_space) self.prior_kernel_diag_space = tt_to_bounded(tnl.extract_diag(self.prior_kernel_space), zero32) self.prior_kernel_diag_f_space = tt_to_bounded(tnl.extract_diag(self.prior_kernel_f_space), zero32) self.posterior_kernel_diag_space = tt_to_bounded(tnl.extract_diag(self.posterior_kernel_space), zero32) self.posterior_kernel_diag_f_space = tt_to_bounded(tnl.extract_diag(self.posterior_kernel_f_space), zero32) self.prior_kernel_sd_space = tt.sqrt(self.prior_kernel_diag_space) self.prior_kernel_sd_f_space = tt.sqrt(self.prior_kernel_diag_f_space) self.posterior_kernel_sd_space = tt.sqrt(self.posterior_kernel_diag_space) self.posterior_kernel_sd_f_space = tt.sqrt(self.posterior_kernel_diag_f_space) self.prior_cholesky_diag_space = tnl.alloc_diag(self.prior_kernel_sd_space) self.prior_cholesky_diag_f_space = tnl.alloc_diag(self.prior_kernel_sd_f_space) self.posterior_cholesky_diag_space = tnl.alloc_diag(self.posterior_kernel_sd_space) self.posterior_cholesky_diag_f_space = tnl.alloc_diag(self.posterior_kernel_sd_f_space)
def test_extract_diag_empty(self): c = self.shared(np.array([[], []], self.floatX)) f = theano.function([], extract_diag(c), mode=self.mode) assert [isinstance(node.inputs[0].type, self.type) for node in f.maker.fgraph.toposort() if isinstance(node.op, ExtractDiag)] == [True]
def marginal_tgp(self): value = tt.vector('marginal_tgp') value.tag.test_value = zeros(1) delta = self.mapping.inv(value) - self.mean(self.space) cov = self.kernel.cov(self.space) cho = cholesky_robust(cov) L = sL.solve_lower_triangular(cho, delta) return value, tt.exp(-np.float32(0.5) * (cov.shape[0].astype(th.config.floatX) * tt.log(np.float32(2.0 * np.pi)) + L.T.dot(L)) - tt.sum(tt.log(nL.extract_diag(cho))) + self.mapping.logdet_dinv(value))
def test_diag(self): # test that it builds a matrix with given diagonal when using # vector inputs x = theano.tensor.vector() y = diag(x) assert y.owner.op.__class__ == AllocDiag # test that it extracts the diagonal when using matrix input x = theano.tensor.matrix() y = extract_diag(x) assert y.owner.op.__class__ == ExtractDiag
def prior_gp(self, cov=False, noise=False): mu = self.mean(self.space) if noise: k_cov = self.kernel.cov(self.space) else: k_cov = self.kernel_f.cov(self.space) var = nL.extract_diag(k_cov) if cov: return mu, var, k_cov else: return mu, var
def test_diag(self): # test that it builds a matrix with given diagonal when using # vector inputs x = theano.tensor.vector() y = diag(x) assert y.owner.op.__class__ == AllocDiag # test that it extracts the diagonal when using matrix input x = theano.tensor.matrix() y = extract_diag(x) assert y.owner.op.__class__ == ExtractDiag # other types should raise error x = theano.tensor.tensor3() ok = False try: y = extract_diag(x) except TypeError: ok = True assert ok
def test_extract_diag(self): rng = np.random.RandomState(utt.fetch_seed()) m = rng.rand(2, 3).astype(self.floatX) x = self.shared(m) g = extract_diag(x) f = theano.function([], g) assert [ isinstance(node.inputs[0].type, self.type) for node in f.maker.fgraph.toposort() if isinstance(node.op, ExtractDiag) ] == [True] for shp in [(2, 3), (3, 2), (3, 3), (1, 1), (0, 0)]: m = rng.rand(*shp).astype(self.floatX) x.set_value(m) v = np.diag(m) r = f() # The right diagonal is extracted assert (r == v).all() # Test we accept only matrix xx = theano.tensor.vector() ok = False try: extract_diag(xx) except TypeError: ok = True except ValueError: ok = True assert ok # Test infer_shape f = theano.function([], g.shape) topo = f.maker.fgraph.toposort() if config.mode != "FAST_COMPILE": assert sum([node.op.__class__ == ExtractDiag for node in topo]) == 0 for shp in [(2, 3), (3, 2), (3, 3)]: m = rng.rand(*shp).astype(self.floatX) x.set_value(m) assert f() == min(shp)
def local_det_chol(node): """ If we have det(X) and there is already an L=cholesky(X) floating around, then we can use prod(diag(L)) to get the determinant. """ if node.op == det: x, = node.inputs for (cl, xpos) in x.clients: if isinstance(cl.op, Cholesky): L = cl.outputs[0] return [tensor.prod(extract_diag(L)**2)]
def local_det_chol(node): """ If we have det(X) and there is already an L=cholesky(X) floating around, then we can use prod(diag(L)) to get the determinant. """ if node.op == det: x, = node.inputs for (cl, xpos) in x.clients: if isinstance(cl.op, Cholesky): L = cl.outputs[0] return [tensor.prod(extract_diag(L) ** 2)]
def subprocess_gp(self, subkernel, cov=False, noise=False): k_ni = subkernel.cov(self.space, self.inputs) mu = self.mean(self.space) + k_ni.dot(sL.solve(self.cov_inputs, self.inv_outputs - self.mean_inputs)) if noise: k_cov = self.kernel.cov(self.space) - k_ni.dot(sL.solve(self.cov_inputs, k_ni.T)) else: k_cov = self.kernel_f.cov(self.space) - k_ni.dot(sL.solve(self.cov_inputs, k_ni.T)) var = nL.extract_diag(debug(k_cov, 'k_cov')) if cov: return mu, var, k_cov else: return mu, var
def test_extract_diag(self): rng = np.random.RandomState(utt.fetch_seed()) m = rng.rand(2, 3).astype(self.floatX) x = self.shared(m) g = extract_diag(x) f = theano.function([], g) assert [isinstance(node.inputs[0].type, self.type) for node in f.maker.fgraph.toposort() if isinstance(node.op, ExtractDiag)] == [True] for shp in [(2, 3), (3, 2), (3, 3), (1, 1), (0, 0)]: m = rng.rand(*shp).astype(self.floatX) x.set_value(m) v = np.diag(m) r = f() # The right diagonal is extracted assert (r == v).all() # Test we accept only matrix xx = theano.tensor.vector() ok = False try: extract_diag(xx) except TypeError: ok = True except ValueError: ok = True assert ok # Test infer_shape f = theano.function([], g.shape) topo = f.maker.fgraph.toposort() if config.mode != 'FAST_COMPILE': assert sum([node.op.__class__ == ExtractDiag for node in topo]) == 0 for shp in [(2, 3), (3, 2), (3, 3)]: m = rng.rand(*shp).astype(self.floatX) x.set_value(m) assert f() == min(shp)
def _get_updates(self): n = self.params['batch_size'] N = self.params['train_size'] prec_lik = self.params['prec_lik'] prec_prior = self.params['prec_prior'] gc_norm = self.params['gc_norm'] alpha = self.params['alpha'] mu = self.params['mu'] use_gamma = self.params['use_gamma'] # compute log-likelihood error = self.model_outputs - self.true_outputs logliks = log_normal(error, prec_lik) sumloglik = logliks.sum() meanloglik = sumloglik / n # compute gradients grads = tensor.grad(cost=meanloglik, wrt=self.weights) # update preconditioning matrix V_t_next = [ alpha * v + (1 - alpha) * g * g for g, v in zip(grads, self.V_t) ] G_t = [1. / (mu + tensor.sqrt(v)) for v in V_t_next] logprior = log_prior_normal(self.weights, prec_prior) grads_prior = tensor.grad(cost=logprior, wrt=self.weights) updates = [] [updates.append((v, v_n)) for v, v_n in zip(self.V_t, V_t_next)] for p, g, gp, gt in zip(self.weights, grads, grads_prior, G_t): # inject noise noise = tensor.sqrt(self.lr * gt) * trng.normal(p.shape) if use_gamma: # compute gamma gamma = nlinalg.extract_diag( tensor.jacobian(gt.flatten(), p).flatten(ndim=2)) gamma = gamma.reshape(p.shape) updates.append((p, p + 0.5 * self.lr * ((gt * (gp + N * g)) + gamma) + noise)) else: updates.append( (p, p + 0.5 * self.lr * (gt * (gp + N * g)) + noise)) return updates, sumloglik
def __init__(self, dim, name=None, scale=None): super(LinLayer, self).__init__(dim, name) # define weight mask and weight self.scale = (.0002 / self.dim)**.5 if scale: self.scale = scale mask = np.triu(np.ones((dim, dim))) weight = mathZ.weightsInit(dim, dim, scale=self.scale, normalise=True) # TODO scaling self.mask = utilsT.sharedf(mask) self.w = utilsT.sharedf(weight * mask) self.b = utilsT.sharedf(np.zeros(dim)) self.u = utilsT.sharedf( mathZ.biasInit(dim, mean=0, scale=self.scale) / 2) self.wmked = self.mask * self.w # masked weight self.wdiag = tlin.extract_diag(self.wmked) self.params = [self.w, self.b, self.u] self.paramshapes = [(dim, dim), (dim, ), (dim, )]
def f(self, x, sampling=True, **kwargs): x /= np.cast[theano.config.floatX](np.sqrt(self.dim_in)) indx, indy = self.params[3], self.params[4] indx /= np.cast[theano.config.floatX](np.sqrt(self.dim_in)) if sampling: noisex = sample_mult_noise(T.exp(self.params[-2]), indx.shape) noisey = sample_mult_noise(T.exp(self.params[-1]), indy.shape) indy *= noisey; indx *= noisex Rr, Rc = T.exp(self.params[1]), T.exp(self.params[2]) U = T.sqr(Rr) sigma11 = T.dot(indx * U.dimshuffle('x', 0), indx.T) + eps_ind * T.eye(self.n_inducing) sigma22 = T.dot(x * U.dimshuffle('x', 0), x.T) sigma12 = T.dot(indx * U.dimshuffle('x', 0), x.T) mu_ind = T.dot(indx, self.params[0]) inv_sigma11 = Tn.matrix_inverse(sigma11) mu_x = T.dot(x, self.params[0]) + T.dot(sigma12.T, inv_sigma11).dot(indy - mu_ind) if not sampling: return mu_x sigma_x = Tn.extract_diag(sigma22 - T.dot(sigma12.T, inv_sigma11).dot(sigma12)) std = T.outer(T.sqrt(sigma_x), Rc) out_sample = sample_gauss(mu_x, std) return out_sample
def __init__(self, name, dim, lr): ''' out = x + tanh( x*w + b ) :param name: str :param dim: int, dimension of the input nodes :param lr: theano symbolic, learning rate :return: ''' super(IafLinear,self).__init__(name) self.lr = lr self.dimin = self.dimout = dim self.mask = weights.autoregMaskL(self.dimin) scale = (.0002/self.dimin)**0.5 self.w = weights.linAutoregInitGauss(self.dimin, scale=scale,name='w') self.b = weights.biasInitRandn(self.dimout, mean=0, scale=scale, name='b') self.u = weights.biasInitRandn(self.dimout, mean=0, scale=scale, name='u') self.params = [self.w, self.b, self.u] self.paramshapes = [(dim,dim),(dim,),(dim,)] self.wdiag = Tlin.extract_diag( self.w ) self.meanlogdetjaco = T.fscalar() self.cost = T.fscalar()
def logDetJacobian(self): diags = Tlin.extract_diag(self.w) return T.sum( T.log( T.abs_(diags) ) )