Python fill_diagonal示例，theano.tensor.fill_diagonal Python示例

示例#1

0

显示文件

文件： binmixND.py 项目： dvav/clonosGP

def get_model(data, K, alpha, sigma, sigma2, eta, *args, **kargs):
    r = data.pivot(index='MUTID', columns='SAMPLEID', values='r').values
    R = data.pivot(index='MUTID', columns='SAMPLEID', values='R').values
    VAF0 = data.pivot(index='MUTID', columns='SAMPLEID', values='VAF0').values
    r, R, VAF0 = r[:, :, None], R[:, :, None], VAF0[:, :, None]

    nsamples = data.SAMPLEID.nunique()

    idxs = aux.corr_vector_to_matrix_indices(nsamples)
    D = tns.eye(nsamples) * sigma**2
    with pmc.Model() as model:
        # alpha = pmc.Gamma('alpha', 1.0, 1.0)
        u = pmc.Beta('u', 1.0, alpha, shape=K - 1)
        lw = pmc.Deterministic('lw', aux.stick_breaking_log(u))

        C_ = pmc.LKJCorr('C', eta=eta, n=nsamples)
        C = tns.fill_diagonal(C_[idxs], 1.0)
        Sigma = D.dot(C)
        psi = pmc.MvNormal('psi',
                           mu=nmp.zeros(nsamples),
                           cov=Sigma,
                           shape=(K, nsamples))
        phi = pmc.Deterministic('phi', pmc.invlogit(psi.T))

        # psi = pmc.MvNormal('psi', mu=nmp.zeros(nsamples), cov=D, shape=(K, nsamples))
        # phi = pmc.Deterministic('phi', pmc.invlogit(psi.T))

        theta = pmc.Deterministic('theta', VAF0 * phi[None, :, :])

        pmc.DensityDist('r', aux.binmixND_logp_fcn(R, theta, lw), observed=r)
    return model

示例#2

0

显示文件

    def Kcost_nesterov(self, learning_rate = 1e-2, epsilon = 1, gamma = 0.9):
        """
        Returns the cost of SGD with Nesterov's accelerated gradient.
        """
        print ('Using Nesterov with gamma = %f, learning rate = %f, epsilon = %f'\
         % (gamma, learning_rate, epsilon))
        if self.gpu:
            vW = theano.shared(np.zeros(self.W.eval().shape).astype(np.float32))
            vb = theano.shared(np.zeros(self.b.eval().shape).astype(np.float32))
        else:
            vW = theano.shared(np.zeros(self.W.eval().shape))
            vb = theano.shared(np.zeros(self.b.eval().shape))


        nextW = self.W - gamma * vW
        nextb = self.b - gamma * vb

        cost = T.mean(T.exp((0.5 - self.x) * (T.dot(self.x,\
                T.fill_diagonal(nextW, 0)) + nextb))) * epsilon

        Wgrad = T.grad(cost, nextW)
        bgrad = T.grad(cost, nextb)

        gparams = [Wgrad, bgrad]
        momentum = [vW, vb]
        momentum_updates = [(v, gamma * v + learning_rate * gparam)\
        for v, gparam in zip(momentum, gparams)]

        updates = [(param, param - v) \
        for param, v in zip(self.params, momentum)]

        updates = updates + momentum_updates

        return cost, updates

示例#3

0

显示文件

def get_model(x, r, R, vaf0, K=10):
    nsamples = r.shape[1]
    r, R, vaf0 = r[:, :, None], R[:, :, None], vaf0[:, :, None]
    idxs = aux.corr_vector_to_matrix_indices(K)
    with pmc.Model() as model:
        w = pmc.Dirichlet('w', nmp.ones(K))
        lw = tns.log(w)

        # alpha = pmc.Gamma('alpha', 1.0, 1.0)
        # u = pmc.Beta('u', 1.0, alpha, shape=K-1)
        # lw = aux.stick_breaking_log(u)

        rho = pmc.Gamma('rho', 1.0, 1.0)
        Cc = tns.fill_diagonal(pmc.LKJCorr('C', eta=2.0, n=K)[idxs], 1.0)
        Cr = aux.cov_quad_exp(x, 1.0, rho)
        mu_psi = pmc.MatrixNormal('mu_psi',
                                  mu=nmp.zeros((nsamples, K)),
                                  rowcov=Cr,
                                  colcov=Cc,
                                  shape=(nsamples, K))
        psi = pmc.Normal('psi', mu=mu_psi, sd=0.1, shape=(nsamples, K))
        phi = pmc.Deterministic('phi', pmc.invlogit(psi))

        # psi = pmc.MvNormal('psi', mu=nmp.zeros(K), tau=nmp.eye(K), shape=(nsamples, K))
        # phi = pmc.Deterministic('phi', pmc.invlogit(psi))

        theta = pmc.Deterministic('theta', vaf0 * phi[None, :, :])
        pmc.DensityDist('r', aux.binmixND_logp_fcn(R, theta, lw), observed=r)
    return model

示例#4

0

显示文件

文件： thesne.py 项目： waleking/tsnetwork

def cost_var(X, Y, sigma, Adj, l_kl, l_e, l_c, l_r, r_eps):
    N = X.shape[0]
    num_edges = 0.5 * T.sum(Adj)

    # Used to normalize s.t. the l_*'s sum up to one.
    l_sum = l_kl + l_e + l_c + l_r

    p_ij_conditional = p_ij_conditional_var(X, sigma)
    p_ij = p_ij_sym_var(p_ij_conditional)
    q_ij = q_ij_student_t_var(Y)

    p_ij_safe = T.maximum(p_ij, epsilon)
    q_ij_safe = T.maximum(q_ij, epsilon)

    # Kullback-Leibler term
    kl = T.sum(p_ij * T.log(p_ij_safe / q_ij_safe), axis=1)

    # Edge contraction term
    edge_contraction = (1 / (2 * num_edges)) * T.sum(Adj * sqeuclidean_var(Y),
                                                     axis=1)

    # Compression term
    compression = (1 / (2 * N)) * T.sum(Y**2, axis=1)

    # Repulsion term
    # repulsion = (1 / (2 * N**2)) * T.sum(T.fill_diagonal(1 / (euclidean_var(Y) + r_eps), 0), axis=1)
    repulsion = -(1 / (2 * N**2)) * T.sum(
        T.fill_diagonal(T.log(euclidean_var(Y) + r_eps), 0), axis=1)

    cost = (l_kl / l_sum) * kl + (l_e / l_sum) * edge_contraction + (
        l_c / l_sum) * compression + (l_r / l_sum) * repulsion

    return cost

示例#5

0

显示文件

文件： LKJcorr_mult2.py 项目： benjaminleroy/lbnl_project

	def _results_inner(self,n,x):
		out,_ = theano.scan(lambda x_in: (n-1.)*\
			tt.log(tt.nlinalg.det(
				tt.fill_diagonal(x_in[self.tri_index]*(self.tri_index!=-1),1)
				)),sequences = [x])

		return out

示例#6

0

显示文件

文件： gru4rec.py 项目： marcromeyn/GRU4Rec

 def softmax_neg(self, X):
     if hasattr(self, 'hack_matrix'):
         X = X * self.hack_matrix
         e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x')) * self.hack_matrix
     else:
         e_x = T.fill_diagonal(T.exp(X - X.max(axis=1).dimshuffle(0, 'x')), 0)
     return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')

示例#7

0

显示文件

文件： gru4rec2.py 项目： Curlykonda/chameleon-recsys

 def softmax_neg(self, X):
     if hasattr(self, 'hack_matrix'):
         X = X * self.hack_matrix
         e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x')) * self.hack_matrix
     else:
         e_x = T.fill_diagonal(T.exp(X - X.max(axis=1).dimshuffle(0, 'x')), 0)
     return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')

示例#8

0

显示文件

def cost_var(X, Y, sigma, l_kl, l_c, l_r, r_eps):
    N = X.shape[0]

    # Used to normalize s.t. the l_*'s sum up to one.
    l_sum = l_kl + l_c + l_r

    p_ij_conditional = p_ij_conditional_var(X, sigma)
    p_ij = p_ij_sym_var(p_ij_conditional)
    q_ij = q_ij_student_t_var(Y)

    p_ij_safe = T.maximum(p_ij, epsilon)
    q_ij_safe = T.maximum(q_ij, epsilon)

    # Kullback-Leibler term
    kl = T.sum(p_ij * T.log(p_ij_safe / q_ij_safe), axis=1)

    # Compression term
    compression = (1 / (2 * N)) * T.sum(Y**2, axis=1)

    # Repulsion term
    repulsion = -(1 / (2 * N**2)) * T.sum(
        T.fill_diagonal(T.log(euclidean_var(Y) + r_eps), 0), axis=1)

    # Sum of all terms.
    cost = (l_kl / l_sum) * kl + (l_c / l_sum) * compression + (
        l_r / l_sum) * repulsion

    return cost

示例#9

0

显示文件

文件： LKJcorr_mult3.py 项目： benjaminleroy/lbnl_project

	def logp(self, x):
		n = self.n
		p = self.p
		s = self.s


		if s > 1:
			X = self._x_creation(x)

			result = self._normalizing_constant(n, p, s) + self._results_inner(n,x)
			return pm.dist_math.bound(result,
						tt.all(X <= 1), tt.all(X >= -1),
						self._check_pos_def(x),
						n > 0)
		else:
			X = x[self.tri_index]
			X = tt.fill_diagonal(X, 1)

			result = self._normalizing_constant(n, p, s)
			result += (n - 1.) * tt.log(tt.nlinalg.det(X))
			# n-1 probably needs to become structure[0]-1
			# I don't really know the likehood structure honestly

			return pm.dist_math.bound(result,
						tt.all(X <= 1), tt.all(X >= -1),
						matrix_pos_def(X),
						n > 0)

示例#10

0

显示文件

def fill_correlation_matrix(c_vec):
    """
    Create a Theano tensor object representing a correlation matrix
    of a multivariate normal distribution.

    :param c_vec: PyMC3 model variable corresponding to the `LKJCorr` prior
                  on  elements of the correlation matrix
    :return: correlation matrix as a Theano tensor object
    """

    n = c_vec.tag.test_value.shape[0]
    n_layers = n - 1

    m = np.zeros((n, n))
    res = tt.nlinalg.matrix_dot(m, 1)

    ind = 0

    for layer in range(n_layers):
        start_col = layer + 1
        for j in range(start_col, n):
            m[layer, j] = 1
            m[j, layer] = 1
            res += tt.nlinalg.matrix_dot(m, c_vec[ind])

            ind += 1
            m[layer, j] = 0
            m[j, layer] = 0

    res = tt.fill_diagonal(res, 1.)
    return res

示例#11

0

显示文件

    def Kcost_momentum(self, learning_rate = 1e-2, epsilon = 1, gamma = 0.9):
        """
        Returns the cost of SGD with Momentum.
        """
        print ('Using Momentum with gamma = %f, learning rate = %f, epsilon = %f'\
         % (gamma, learning_rate, epsilon))

        cost = T.mean(T.exp((0.5 - self.x) * \
        (T.dot(self.x, T.fill_diagonal(self.W, 0)) + self.b))) * epsilon

        gparams = T.grad(cost, self.params)

        if self.gpu:
            vW = theano.shared(np.zeros(self.W.eval().shape).astype(np.float32))
            vb = theano.shared(np.zeros(self.b.eval().shape).astype(np.float32))
        else:
            vW = theano.shared(np.zeros(self.W.eval().shape))
            vb = theano.shared(np.zeros(self.b.eval().shape))

        momentum = [vW, vb]
        momentum_updates = [(v, gamma * v + learning_rate * gparam) \
        for v, gparam in zip(momentum, gparams)]

        updates = [(param, param - v) \
        for param, v in zip(self.params, momentum)]

        updates = updates + momentum_updates

        return cost, updates

示例#12

0

显示文件

    def Kcost_adagrad(self, learning_rate = 1e-2, epsilon = 1, smoothingterm = 1):
        """
        Returns the cost of SGD using adagrad.
        """
        print ('Using Adagrad with smoothing term = %.9f, learning rate = %f, epsilon = %f'\
         % (smoothingterm, learning_rate, epsilon))

        param_shapes = [param.get_value().shape for param in self.params ]
        grad_hists = [theano.shared(np.zeros(param_shape,
                        dtype = theano.config.floatX),
                        borrow = True,
                        name = 'grad_hist_' + param.name)
                        for param_shape, param in zip(param_shapes, self.params)]

        cost = T.mean(T.exp((0.5 - self.x) * (T.dot(self.x,\
                T.fill_diagonal(self.W, 0)) + self.b))) * epsilon


        gparams = T.grad(cost, self.params)

        grad_hist_updates = [(g_hist, g_hist + g ** 2) for g_hist, g in zip(grad_hists, gparams)]

        updates = [(param, param - learning_rate * gparam/(T.sqrt(grad_hist + smoothingterm)))\
        for param, grad_hist, gparam in zip(self.params, grad_hists, gparams)]

        updates = updates + grad_hist_updates

        return cost, updates

示例#13

0

显示文件

    def logp(self, x):
        n = self.n
        p = self.p

        X = x[self.tri_index]
        X = T.fill_diagonal(X, 1)

        result = self._normalizing_constant(n, p)
        result += (n - 1.) * T.log(det(X))
        return bound(result, T.all(X <= 1), T.all(X >= -1), n > 0)

示例#14

0

显示文件

文件： multivariate.py 项目： ingmarschuster/pymc3

    def logp(self, x):
        n = self.n
        p = self.p

        X = x[self.tri_index]
        X = T.fill_diagonal(X, 1)

        result = self._normalizing_constant(n, p)
        result += (n - 1.0) * T.log(det(X))
        return bound(result, T.all(X <= 1), T.all(X >= -1), n > 0)

示例#15

0

显示文件

文件： thesne.py 项目： waleking/tsnetwork

def p_ij_conditional_var(X, sigma):
    N = X.shape[0]

    sqdistance = X**2

    esqdistance = T.exp(-sqdistance / ((2 * (sigma**2)).reshape((N, 1))))
    esqdistance_zd = T.fill_diagonal(esqdistance, 0)

    row_sum = T.sum(esqdistance_zd, axis=1).reshape((N, 1))

    return esqdistance_zd / row_sum  # Possibly dangerous

示例#16

0

显示文件

文件： multivariate.py 项目： zaczou/pymc3

    def logp(self, x):
        n = self.n
        p = self.p

        X = x[self.tri_index]
        X = tt.fill_diagonal(X, 1)

        result = self._normalizing_constant(n, p)
        result += (n - 1.) * tt.log(det(X))
        return bound(result, tt.all(X <= 1), tt.all(X >= -1),
                     matrix_pos_def(X), n > 0)

示例#17

0

显示文件

文件： LKJcorr_mult.py 项目： benjaminleroy/lbnl_project

	def _results_inner(self,n,p,s,x):
		# use theano.scan to create x
		result1 = self._normalizing_constant(n,p)*s
		result2,_ = theano.scan(lambda x_min: 
			(n-1) * tt.log(
						tt.nlinalg.det(
							tt.fill_diagonal(
								x_min[self.tri_index],1)
							)
						),
			sequences = [x])
		return result1+result2

示例#18

0

显示文件

文件： multivariate.py 项目： hvasbath/pymc3

    def logp(self, x):
        n = self.n
        p = self.p

        X = x[self.tri_index]
        X = tt.fill_diagonal(X, 1)

        result = self._normalizing_constant(n, p)
        result += (n - 1.) * tt.log(det(X))
        return bound(result,
                     tt.all(X <= 1), tt.all(X >= -1),
                     matrix_pos_def(X),
                     n > 0)

示例#19

0

显示文件

    def loss_forcedsymmetry(self, learning_rate = 1e-2, epsilon = 1):
        """
        Returns the cost of vanilla SGD.
        """

        cost = T.mean(T.exp((0.5 - self.x) * (T.dot(self.x, self.W) + self.b))) * epsilon
        Wgrad = T.grad(cost, self.W)
        bgrad = T.grad(cost, self.b)

        Wupdate = T.fill_diagonal(0.5 * ((self.W - learning_rate * Wgrad) + (self.W - learning_rate * Wgrad).T), 0)
        updates = [(self.W, Wupdate), (self.b, self.b - learning_rate * bgrad )]

        return cost, updates

示例#20

0

显示文件

    def logp(self, x):
        n = self.n
        eta = self.eta

        X = x[self.tri_index]
        X = tt.fill_diagonal(X, 1)

        result = _lkj_normalizing_constant(eta, n)
        result += (eta - 1.) * tt.log(det(X))
        return bound(result,
                     tt.all(X <= 1),
                     tt.all(X >= -1),
                     matrix_pos_def(X),
                     eta > 0,
                     broadcast_conditions=False)

示例#21

0

显示文件

文件： multivariate.py 项目： aasensio/pymc3

    def logp(self, x):
        n = self.n
        eta = self.eta

        X = x[self.tri_index]
        X = tt.fill_diagonal(X, 1)

        result = _lkj_normalizing_constant(eta, n)
        result += (eta - 1.) * tt.log(det(X))
        return bound(result,
                     tt.all(X <= 1), tt.all(X >= -1),
                     matrix_pos_def(X),
                     eta > 0,
                     broadcast_conditions=False
        )

示例#22

0

显示文件

文件： core.py 项目： paulorauber/thesne

def p_Xp_given_X_var(X, sigma, metric):
    N = X.shape[0]

    if metric == 'euclidean':
        sqdistance = sqeuclidean_var(X)
    elif metric == 'precomputed':
        sqdistance = X**2
    else:
        raise Exception('Invalid metric')

    esqdistance = T.exp(-sqdistance / ((2 * (sigma**2)).reshape((N, 1))))
    esqdistance_zd = T.fill_diagonal(esqdistance, 0)

    row_sum = T.sum(esqdistance_zd, axis=1).reshape((N, 1))

    return esqdistance_zd/row_sum  # Possibly dangerous

示例#23

0

显示文件

文件： tsne.py 项目： abbasmg/Thesis

def p_Xp_given_X_var(X, sigma, metric):
    N = X.shape[0]

    if metric == 'euclidean':
        sqdistance = sqeuclidean_var(X)
    elif metric == 'precomputed':
        sqdistance = X**2
    else:
        raise Exception('Invalid metric')

    esqdistance = T.exp(-sqdistance / ((2 * (sigma**2)).reshape((N, 1))))
    esqdistance_zd = T.fill_diagonal(esqdistance, 0)

    row_sum = T.sum(esqdistance_zd, axis=1).reshape((N, 1))

    return esqdistance_zd / row_sum  # Possibly dangerous

示例#24

0

显示文件

def calc_original_cond_prob(X, sigma, metric):
    N = X.shape[0]

    if metric == 'euclidean':
        data_distances = calc_euclidean_norms(X)
    elif metric == 'precomputed':
        data_distances = X**2
    else:
        raise Exception('Invalid metric')

    esqdistance = T.exp(-data_distances / ((2 * (sigma**2)).reshape((N, 1))))
    esqdistance_zd = T.fill_diagonal(esqdistance, 0)

    row_sum = T.sum(esqdistance_zd, axis=1).reshape((N, 1))

    return esqdistance_zd / row_sum  # Possibly dangerous

示例#25

0

显示文件

文件： glm.py 项目： IACS-AM-207-Braavos/AM-207

def pm_make_cov(sigma_priors, corr_coeffs, ndim):
    """Assemble a covariance matrix single variable standard deviations and correlation coefficients"""
    # Citation: AM 207 lecture notes: http://am207.info/wiki/corr.html
    # Diagonal matrix of standard deviation for each varialbes
    sigma_matrix = tt.nlinalg.diag(sigma_priors)
    # A symmetric nxn matrix has n choose 2 = n(n-1)/2 distinct elements
    n_elem = int(ndim * (ndim - 1) / 2)
    # Convert between array indexing and [i, j) indexing
    tri_index = np.zeros([ndim, ndim], dtype=int)
    tri_index[np.triu_indices(ndim, k=1)] = np.arange(n_elem)
    tri_index[np.triu_indices(ndim, k=1)[::-1]] = np.arange(n_elem)
    # Assemble the covariance matrix using the equation
    # CovMat = DiagMat * CorrMat * DiagMat
    corr_matrix = corr_coeffs[tri_index]
    corr_matrix = tt.fill_diagonal(corr_matrix, 1)
    return tt.nlinalg.matrix_dot(sigma_matrix, corr_matrix, sigma_matrix)

示例#26

0

显示文件

文件： lmpf.py 项目： zunction/notebooks

    def Kcost(self, learning_rate=0.01):
        """
        Returns the cost
        """

        cost = T.mean(T.exp((0.5 - self.x) * (T.dot(self.x, self.W) + self.b)))
        #         gparams = T.grad(cost, self.params)
        #         updates = [(param, param - learning_rate * gparam) for param, gparam in zip(self.params, gparams)]
        Wgrad = T.grad(cost, self.W)
        #         T.fill_diagonal(Wgrad, 0)
        bgrad = T.grad(cost, self.b)

        Wupdate = T.fill_diagonal(
            0.5 * ((self.W - learning_rate * Wgrad) +
                   (self.W - learning_rate * Wgrad).T), 0)
        updates = [(self.W, Wupdate), (self.b, self.b - learning_rate * bgrad)]
        #         updates = [(self.W, self.W - learning_rate * Wgrad), (self.b, self.b - learning_rate * bgrad )]

        return cost, updates

示例#27

0

显示文件

文件： LKJcorr_mult.py 项目： benjaminleroy/lbnl_project

	def logp(self, x):
		# x is assumed to be (s x n_elem) if s > 1 or n_elem
		n = self.n
		p = self.p
		s = self.s
		if s !=1:
			X = self._X_inner_creation(x)
			result = self._results_inner(n,p,s,x)
			return pm.dist_math.bound(result,
				tt.all(X <= 1), tt.all(X >= -1),
				n > 0)
		else:
			X = x[self.tri_index]
			X = tt.fill_diagonal(X, 1)
			result = self._normalizing_constant(n, p)
			result += (n - 1.) * tt.log(tt.nlinalg.det(X))
			return pm.dist_math.bound(result,
						 tt.all(X <= 1), tt.all(X >= -1),
						 n > 0)

示例#28

0

显示文件

文件： dist_math.py 项目： qinghsui/pymc

    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)
        ok = tt.all(tt.nlinalg.diag(chol_x) > 0)
        chol_x = tt.switch(ok, chol_x, tt.fill_diagonal(chol_x, 1))
        dz = tt.switch(ok, dz, floatX(1))

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tt.tril(mtx) - tt.diag(tt.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            solve = tt.slinalg.Solve(A_structure="upper_triangular")
            return solve(outer.T, solve(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tt.tril(s + s.T) - tt.diag(tt.diagonal(s))
        else:
            grad = tt.triu(s + s.T) - tt.diag(tt.diagonal(s))
        return [tt.switch(ok, grad, floatX(np.nan))]

示例#29

0

显示文件

文件： dist_math.py 项目： leezqcst/pymc3

    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)
        ok = tt.all(tt.nlinalg.diag(chol_x) > 0)
        chol_x = tt.switch(ok, chol_x, tt.fill_diagonal(chol_x, 1))
        dz = tt.switch(ok, dz, floatX(1))

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tt.tril(mtx) - tt.diag(tt.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            solve = tt.slinalg.Solve(A_structure="upper_triangular")
            return solve(outer.T, solve(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tt.tril(s + s.T) - tt.diag(tt.diagonal(s))
        else:
            grad = tt.triu(s + s.T) - tt.diag(tt.diagonal(s))
        return [tt.switch(ok, grad, floatX(np.nan))]

示例#30

0

显示文件

    def cost(self, lr=1e-2, epsilon=1):
        """
        Returns the cost of vanilla SGD.
        The update rule enforces that the W matrix at each timestep is symmetric.
        """

        print(51 * '=')
        print(24 * '#' + 'MPF' + 24 * '#')
        print(51 * '=')
        print('Input size: {0}'.format(self.n))
        print('Learning temperature: {0}'.format(self.temperature))
        print('Learning rate: {0}'.format(lr))

        cost = epsilon * T.mean(
            T.exp((0.5 - self.x) *
                  (T.dot(self.x, self.W) + self.b) / self.temperature))
        Wgrad = T.grad(cost, self.W)
        bgrad = T.grad(cost, self.b)

        Wupdate = T.fill_diagonal(
            0.5 * ((self.W - lr * Wgrad) + (self.W - lr * Wgrad).T), 0)
        updates = [(self.W, Wupdate), (self.b, self.b - lr * bgrad)]

        return cost, updates

示例#31

0

显示文件

文件： LKJcorr_mult.py 项目： benjaminleroy/lbnl_project

	def _X_inner_creation(self,x):
		# use theano.scan to create x
		result,_ = theano.scan(lambda x_min: tt.fill_diagonal(x_min[self.tri_index]*(self.tri_index!=-1),1), # update for specialized structure
			sequences = [x])
		return result

示例#32

0

显示文件

文件： dists.py 项目： zheh12/bayesalpha

    def logp(self, x):
        # -1/2 (x-mu) @ Sigma^-1 @ (x-mu)^T - 1/2 log(2pi^k|Sigma|)
        # Sigma = diag(std) @ Corr @ diag(std)
        # Sigma^-1 = diag(std^-1) @ Corr^-1 @ diag(std^-1)
        # Corr is a block matrix of special form
        #           +----------+
        # Corr = [[ | 1, b1, b1|,  0,  0,  0,..., 0]
        #         [ |b1,  1, b1|,  0,  0,  0,..., 0]
        #         [ |b1, b1,  1|,  0,  0,  0,..., 0]
        #           +-----------+----------+
        #         [  0,  0,  0, | 1, b2, b2|,..., 0]
        #         [  0,  0,  0, |b2,  1, b2|,..., 0]
        #         [  0,  0,  0, |b2, b2,  1|,..., 0]
        #                       +----------+
        #         [            ...                 ]
        #         [  0,  0,  0,   0,  0,  0 ,..., 1]]
        #
        # Corr = [[B1,  0,  0, ...,  0]
        #         [ 0, B2,  0, ...,  0]
        #         [ 0,  0, B3, ...,  0]
        #         [        ...        ]
        #         [ 0,  0,  0, ..., Bk]]
        #
        # Corr^-1 = [[B1^-1,     0,      0, ...,     0]
        #            [    0, B2^-1,      0, ...,     0]
        #            [    0,     0,  B3^-1, ...,     0]
        #            [              ...               ]
        #            [    0,     0,      0, ..., Bk^-1]]
        #
        # |B| matrix of rank r is easy
        # https://math.stackexchange.com/a/1732839
        # Let D = eye(r) * (1-b)
        # Then B = D + b * ones((r, r))
        # |B| = (1-b) ** r + b * r * (1-b) ** (r-1)
        # |B| = (1.-b) ** (r-1) * (1. + b * (r - 1))
        # log(|B|) = log(1-b)*(r-1) + log1p(b*(r-1))
        #
        # Inverse B^-1 is easy as well
        # https://math.stackexchange.com/a/1766118
        # let
        # c = 1/b + r*1/(1-b)
        # (B^-1)ii = 1/(1-b) - 1/(c*(1-b)**2)
        # (B^-1)ij =         - 1/(c*(1-b)**2)
        #
        # assuming
        # z = (x - mu) / std
        # we have det fix
        # detfix = -sum(log(std))
        #
        # now we need to compute z @ Corr^-1 @ z^T
        # note that B can be unique per timestep
        # so we need z_t @ Corr_t^-1 @ z_t^T in perfect
        # z_t @ Corr_t^-1 @ z_t^T is a sum of block terms
        # quad = z_ct @ B_ct^-1 @ z_ct^T = (B^-1)_iict * sum(z_ct**2) + (B^-1)_ijct*sum_{i!=j}(z_ict * z_jct)
        #
        # finally all terms are computed explicitly
        # logp = detfix - 1/2 * ( quad + log(pi*2) * k + log(|B|) )

        x = tt.as_tensor_variable(x)
        clust_ids, clust_pos, clust_counts = \
            tt.extra_ops.Unique(return_inverse=True,
                                return_counts=True)(self.clust)
        clust_order = tt.argsort(clust_pos)
        mu = self.mu
        corr = self.corr[..., clust_ids]
        std = self.std
        if std.ndim == 0:
            std = tt.repeat(std, x.shape[-1])
        if std.ndim == 1:
            std = std[None, :]
        if corr.ndim == 1:
            corr = corr[None, :]
        z = (x - mu) / std
        z = z[..., clust_order]
        detfix = -tt.log(std).sum(-1)
        # following the notation above
        r = clust_counts
        b = corr
        # detB = (1.-b) ** (r-1) * (1. + b * (r - 1))
        logdetB = tt.log1p(-b) * (r - 1) + tt.log1p(b * (r - 1))
        c = 1 / b + r / (1. - b)
        invBij = -1. / (c * (1. - b)**2)
        invBii = 1. / (1. - b) + invBij
        invBij = tt.repeat(invBij, clust_counts, axis=-1)
        invBii = tt.repeat(invBii, clust_counts, axis=-1)

        # to compute (Corr^-1)_ijt*sum_{i!=j}(z_it * z_jt)
        # we use masked cross products
        mask = tt.arange(x.shape[-1])[None, :]
        mask = tt.repeat(mask, x.shape[-1], axis=0)
        mask = tt.maximum(mask, mask.T)
        block_end_pos = tt.cumsum(r)
        block_end_pos = tt.repeat(block_end_pos, clust_counts)
        mask = tt.lt(mask, block_end_pos)
        mask = tt.and_(mask, mask.T)
        mask = tt.fill_diagonal(mask.astype('float32'), 0.)
        # type: tt.TensorVariable

        invBiizizi_sum = ((z**2) * invBii).sum(-1)
        invBijzizj_sum = (
            (z.dimshuffle(0, 1, 'x') * mask.dimshuffle('x', 0, 1) *
             z.dimshuffle(0, 'x', 1)) * invBij.dimshuffle(0, 1, 'x')).sum(
                 [-1, -2])
        quad = invBiizizi_sum + invBijzizj_sum
        k = pm.floatX(x.shape[-1])
        logp = (detfix - .5 *
                (quad + pm.floatX(np.log(np.pi * 2)) * k + logdetB.sum(-1)))
        if self.nonzero:
            logp = tt.switch(tt.eq(x, 0).any(-1), 0., logp)
        return bound(logp,
                     tt.gt(corr, -1.),
                     tt.lt(corr, 1.),
                     tt.gt(std, 0.),
                     broadcast_conditions=False)

示例#33

0

显示文件

文件： LKJcorr_mult.py 项目： benjaminleroy/lbnl_project

	def _X_inner_creation(self,x):
		# use theano.scan to create x
		result,_ = theano.scan(lambda x_min: tt.fill_diagonal(x_min[self.tri_index],1),
			sequences = [x])
		return result

示例#34

0

显示文件

文件： thesne.py 项目： waleking/tsnetwork

def q_ij_student_t_var(Y):
    sqdistance = sqeuclidean_var(Y)
    one_over = T.fill_diagonal(1 / (sqdistance + 1), 0)
    return one_over / one_over.sum()

示例#35

0

显示文件

文件： LKJsmart_6d.py 项目： benjaminleroy/lbnl_project

def cov_funct_special(lkj,sigma,tri_index):
	# need to approach the structure part
	result,_ =  theano.scan(lambda l,s: tt.diag(s).dot(tt.fill_diagonal(l[tri_index]*1*(tri_index!=-1),1)).dot(tt.diag(s)), 
		sequences = [lkj,sigma])
	return result

示例#36

0

显示文件

文件： LKJcorr_mult3.py 项目： benjaminleroy/lbnl_project

	def _check_pos_def(self,x):
		out,_ = theano.scan(lambda x_in: tt.all(tt.nlinalg.eigh(tt.fill_diagonal(x_in[self.tri_index]*(self.tri_index!=-1),1))[0]>0),
			sequences = [x])
		return tt.all(out)

示例#37

0

显示文件

文件： LKJcorr_mult2.py 项目： benjaminleroy/lbnl_project

	def _x_creation(self,x):
		out,_ = theano.scan(lambda x_in: 
			tt.fill_diagonal(x_in[self.tri_index]*(self.tri_index!=-1),1),
			sequences = [x])

		return out

示例#38

0

显示文件

def _Tcov(sigma, rho):
    """Build a covariance matrix"""
    C = T.alloc(rho, 2, 2)
    C = T.fill_diagonal(C, 1.)
    S = T.diag(sigma)
    return T.nlinalg.matrix_dot(S, C, S)

示例#39

0

显示文件

    def __init__(self,
                 dimension,
                 mu_data,
                 tau_data,
                 prior="Gaussian",
                 parameters={
                     "location": None,
                     "scale": None,
                     "corr": False
                 },
                 hyper_alpha=None,
                 hyper_beta=None,
                 hyper_gamma=None,
                 hyper_delta=None,
                 transformation=None,
                 parametrization="non-central",
                 name='',
                 model=None):

        assert isinstance(dimension, int), "dimension must be integer!"
        assert dimension in [3, 5, 6], "Not a valid dimension!"

        D = dimension

        # 2) call super's init first, passing model and name
        # to it name will be prefix for all variables here if
        # no name specified for model there will be no prefix
        super().__init__(str(D) + "D", model)
        # now you are in the context of instance,
        # `modelcontext` will return self you can define
        # variables in several ways note, that all variables
        # will get model's name prefix

        #------------------- Data ------------------------------------------------------
        N = int(len(mu_data) / D)
        if N == 0:
            sys.exit(
                "Data has length zero!. You must provide at least one data point"
            )
        #-------------------------------------------------------------------------------

        #============= Transformations ====================================

        if transformation is "mas":
            Transformation = Iden

        elif transformation is "pc":
            if D is 3:
                Transformation = cartesianToSpherical
            elif D is 6:
                Transformation = phaseSpaceToAstrometry_and_RV
            elif D is 5:
                Transformation = phaseSpaceToAstrometry
                D = 6

        else:
            sys.exit("Transformation is not accepted")
        #==================================================================

        #================ Hyper-parameters =====================================
        if hyper_delta is None:
            shape = 1
        else:
            shape = len(hyper_delta)

        #--------- Location ----------------------------------
        if parameters["location"] is None:

            location = [
                pm.Normal("loc_{0}".format(i),
                          mu=hyper_alpha[i][0],
                          sigma=hyper_alpha[i][1],
                          shape=shape) for i in range(D)
            ]

            #--------- Join variables --------------
            mu = pm.math.stack(location, axis=1)

        else:
            mu = parameters["location"]
        #------------------------------------------------------

        #------------- Scale --------------------------
        if parameters["scale"] is None:
            scale = [
                pm.Gamma("scl_{0}".format(i),
                         alpha=2.0,
                         beta=2.0 / hyper_beta[i][0],
                         shape=shape) for i in range(D)
            ]

        else:
            scale = parameters["scale"]
        #--------------------------------------------------

        #----------------------- Correlation -----------------------------------------
        if parameters["corr"]:
            pm.LKJCorr('chol_corr', eta=hyper_gamma, n=D)
            C = tt.fill_diagonal(
                self.chol_corr[np.zeros((D, D), dtype=np.int64)], 1.)
            # print_ = tt.printing.Print('C')(C)
        else:
            C = np.eye(D)
        #-----------------------------------------------------------------------------

        #-------------------- Covariance -------------------------
        sigma_diag = pm.math.stack(scale, axis=1)
        cov = theano.shared(np.zeros((shape, D, D)))

        for i in range(shape):
            sigma = tt.nlinalg.diag(sigma_diag[i])
            covi = tt.nlinalg.matrix_dot(sigma, C, sigma)
            cov = tt.set_subtensor(cov[i], covi)
        #---------------------------------------------------------
        #========================================================================

        #===================== True values ============================================
        if prior is "Gaussian":
            pm.MvNormal("source", mu=mu, cov=cov[0], shape=(N, D))

        elif prior is "GMM":
            pm.Dirichlet("weights", a=hyper_delta, shape=shape)

            comps = [
                pm.MvNormal.dist(mu=mu[i], cov=cov[i]) for i in range(shape)
            ]

            pm.Mixture("source",
                       w=self.weights,
                       comp_dists=comps,
                       shape=(N, D))

        else:
            sys.exit("The specified prior is not supported")
        #=================================================================================

        #----------------------- Transformation---------------------------------------
        transformed = Transformation(self.source)
        #-----------------------------------------------------------------------------

        #------------ Flatten --------------------------------------------------------
        true = pm.math.flatten(transformed)
        #----------------------------------------------------------------------------

        #----------------------- Likelihood ----------------------------------------
        pm.MvNormal('obs', mu=true, tau=tau_data, observed=mu_data)
        #------------------------------------------------------------------------------

示例#40

0

显示文件

文件： Bayesian.py 项目： RandomKori/Forex_statistics

def covariance(sigma, rho):
    C = T.fill_diagonal(T.alloc(rho, 2, 2), 1.)
    S = T.diag(sigma)
    M = S.dot(C).dot(S)
    return M

示例#41

0

显示文件

文件： thesne.py 项目： waleking/tsnetwork

def q_ij_gaussian_var(Y):
    sqdistance = sqeuclidean_var(Y)
    gauss = T.fill_diagonal(T.exp(-sqdistance), 0)
    return gauss / gauss.sum()

示例#42

0

显示文件

文件： LKJ_correlation.py 项目： taku-y/pymc3

# In order to convert the upper triangular correlation values to a complete
# correlation matrix, we need to construct an index matrix:
n_elem = int(n_var * (n_var - 1) / 2)
tri_index = np.zeros([n_var, n_var], dtype=int)
tri_index[np.triu_indices(n_var, k=1)] = np.arange(n_elem)
tri_index[np.triu_indices(n_var, k=1)[::-1]] = np.arange(n_elem)

with pm.Model() as model:

    mu = pm.Normal('mu', mu=0, sd=1, shape=n_var)

    # We can specify separate priors for sigma and the correlation matrix:
    sigma = pm.Uniform('sigma', shape=n_var)
    corr_triangle = pm.LKJCorr('corr', n=1, p=n_var)
    corr_matrix = corr_triangle[tri_index]
    corr_matrix = tt.fill_diagonal(corr_matrix, 1)

    cov_matrix = tt.diag(sigma).dot(corr_matrix.dot(tt.diag(sigma)))

    like = pm.MvNormal('likelihood', mu=mu, cov=cov_matrix, observed=dataset)


def run(n=1000):
    if n == "short":
        n = 50
    with model:
        start = pm.find_MAP()
        step = pm.NUTS(scaling=start)
        trace = pm.sample(n, step=step, start=start)
    return trace

示例#43

0

显示文件

文件： LKJ_correlation.py 项目： zjlearn/pymc3

# In order to convert the upper triangular correlation values to a complete
# correlation matrix, we need to construct an index matrix:
n_elem = int(n_var * (n_var - 1) / 2)
tri_index = np.zeros([n_var, n_var], dtype=int)
tri_index[np.triu_indices(n_var, k=1)] = np.arange(n_elem)
tri_index[np.triu_indices(n_var, k=1)[::-1]] = np.arange(n_elem)

with Model() as model:

    mu = Normal('mu', mu=0, tau=1**-2, shape=n_var)

    # We can specify separate priors for sigma and the correlation matrix:
    sigma = Uniform('sigma', shape=n_var)
    corr_triangle = LKJCorr('corr', n=1, p=n_var)
    corr_matrix = corr_triangle[tri_index]
    corr_matrix = tt.fill_diagonal(corr_matrix, 1)

    cov_matrix = tt.diag(sigma).dot(corr_matrix.dot(tt.diag(sigma)))

    like = MvNormal('likelihood', mu=mu, tau=inv(cov_matrix), observed=dataset)


def run(n=1000):
    if n == "short":
        n = 50
    with model:
        start = find_MAP()
        step = NUTS(scaling=start)
        tr = sample(n, step=step, start=start)

示例#44

0

显示文件

文件： core.py 项目： paulorauber/thesne

def p_Yp_Y_var(Y):
    sqdistance = sqeuclidean_var(Y)
    one_over = T.fill_diagonal(1/(sqdistance + 1), 0)
    return one_over/one_over.sum()  # Possibly dangerous

示例#45

0

显示文件

文件： GP.py 项目： shaodidong/bayesian-analysis

    x = np.random.uniform(0, 10, size=N)
    y = np.random.normal(np.sin(x), np.sqrt(0.01))

    plt.plot(x, y, 'o')
    plt.xlabel('$x$', fontsize=16)
    plt.ylabel('$f(x)$', fontsize=16, rotation=0)

    with pm.Model() as GP:
        mu = np.zeros(N)
        eta = pm.HalfCauchy('eta', 0.1)
        rho = pm.HalfCauchy('rho', 1)
        sigma = pm.HalfCauchy('sigma', 1)

        D = squared_distance(x, x)  #SED(x,x)

        K = tt.fill_diagonal(eta * pm.math.exp(-rho * D),
                             eta + sigma)  #(K(x, x) + σ I)

        obs = pm.MvNormal('obs', mu, cov=K, observed=y)

        test_points = np.linspace(0, 10, 100)
        D_pred = squared_distance(test_points, test_points)  #SED(x*,x*)
        D_off_diag = squared_distance(x, test_points)  #SED(x,x*) n * N

        K_oo = eta * pm.math.exp(-rho * D_pred)  #K(x*,x*)
        K_o = eta * pm.math.exp(-rho * D_off_diag)  #K(x,x*)

        inv_K = tt.nlinalg.matrix_inverse(K)

        mu_post = pm.Deterministic('mu_post',
                                   pm.math.dot(pm.math.dot(K_o.T, inv_K), y))
        SIGMA_post = pm.Deterministic(

示例#46

0

显示文件

def build_mod_bpmf_model(train, alpha=2, dim=10, std=0.01):
    """Build the modified BPMF model using pymc3. The original model uses
    Wishart priors on the covariance matrices. Unfortunately, the Wishart
    distribution in pymc3 is currently not suitable for sampling. This
    version decomposes the covariance matrix into:

        diag(sigma) \dot corr_matrix \dot diag(std).

    We use uniform priors on the standard deviations (sigma) and LKJCorr
    priors on the correlation matrices (corr_matrix):

        sigma ~ Uniform
        corr_matrix ~ LKJCorr(n=1, p=dim)

    """
    n, m = train.shape
    beta_0 = 1  # scaling factor for lambdas; unclear on its use

    # Mean value imputation on training data.
    train = train.copy()
    nan_mask = np.isnan(train)
    train[nan_mask] = train[~nan_mask].mean()

    # We will use separate priors for sigma and correlation matrix.
    # In order to convert the upper triangular correlation values to a
    # complete correlation matrix, we need to construct an index matrix:
    n_elem = dim * (dim - 1) / 2
    tri_index = np.zeros([dim, dim], dtype=int)
    tri_index[np.triu_indices(dim, k=1)] = np.arange(n_elem)
    tri_index[np.triu_indices(dim, k=1)[::-1]] = np.arange(n_elem)

    logging.info('building the BPMF model')
    with pm.Model() as bpmf:
        # Specify user feature matrix
        sigma_u = pm.Uniform('sigma_u', shape=dim)
        corr_triangle_u = pm.LKJCorr('corr_u',
                                     n=1,
                                     p=dim,
                                     testval=np.random.randn(n_elem) * std)

        corr_matrix_u = corr_triangle_u[tri_index]
        corr_matrix_u = t.fill_diagonal(corr_matrix_u, 1)
        cov_matrix_u = t.diag(sigma_u).dot(corr_matrix_u.dot(t.diag(sigma_u)))
        lambda_u = t.nlinalg.matrix_inverse(cov_matrix_u)

        mu_u = pm.Normal('mu_u',
                         mu=0,
                         tau=beta_0 * t.diag(lambda_u),
                         shape=dim,
                         testval=np.random.randn(dim) * std)
        U = pm.MvNormal('U',
                        mu=mu_u,
                        tau=lambda_u,
                        shape=(n, dim),
                        testval=np.random.randn(n, dim) * std)

        # Specify item feature matrix
        sigma_v = pm.Uniform('sigma_v', shape=dim)
        corr_triangle_v = pm.LKJCorr('corr_v',
                                     n=1,
                                     p=dim,
                                     testval=np.random.randn(n_elem) * std)

        corr_matrix_v = corr_triangle_v[tri_index]
        corr_matrix_v = t.fill_diagonal(corr_matrix_v, 1)
        cov_matrix_v = t.diag(sigma_v).dot(corr_matrix_v.dot(t.diag(sigma_v)))
        lambda_v = t.nlinalg.matrix_inverse(cov_matrix_v)

        mu_v = pm.Normal('mu_v',
                         mu=0,
                         tau=beta_0 * t.diag(lambda_v),
                         shape=dim,
                         testval=np.random.randn(dim) * std)
        V = pm.MvNormal('V',
                        mu=mu_v,
                        tau=lambda_v,
                        shape=(m, dim),
                        testval=np.random.randn(m, dim) * std)

        # Specify rating likelihood function
        R = pm.Normal('R',
                      mu=t.dot(U, V.T),
                      tau=alpha * np.ones((n, m)),
                      observed=train)

    logging.info('done building the BPMF model')
    return bpmf

示例#47

0

显示文件

            w = Dirichlet('w', a=pm.floatX(alpha), shape=(n_components, ))

            # Impose sparse structure onto mean with off-diagonal elements all being the same, because background should be the same throughout.
            mus_signal = MvNormal(
                'mus_signal',
                mu=pm.floatX(signalMean_priorMean),
                tau=pm.floatX(np.eye(n_dimensions) / signalMean_priorSD**2),
                shape=n_dimensions)
            mus_background = MvNormal('mus_background',
                                      mu=pm.floatX(backgroundMean_priorMean),
                                      tau=pm.floatX(
                                          np.eye(n_dimensions) /
                                          backgroundMean_priorSD**2),
                                      shape=n_dimensions)
            mus = tt.fill_diagonal(
                tt.reshape(tt.tile(mus_background, n_components),
                           (n_components, n_dimensions)),
                0) + tt.eye(n_components, n_dimensions) * mus_signal

            # Impose structure for covariance as well, with off-diagonal elements being zero, just because that model is easier to fit.
            sigmas_signal = pm.Gamma('sigmas_signal',
                                     mu=pm.floatX(signalSD_priorMean),
                                     sd=pm.floatX(signalSD_priorSD),
                                     shape=n_dimensions)
            sigmas_background = pm.Gamma('sigmas_background',
                                         mu=pm.floatX(backgroundSD_priorMean),
                                         sd=pm.floatX(backgroundSD_priorSD),
                                         shape=n_dimensions)
            sigmas = tt.fill_diagonal(
                tt.reshape(tt.tile(sigmas_background, n_components),
                           (n_components, n_dimensions)),
                0) + tt.eye(n_components, n_dimensions) * sigmas_signal