示例#1
0
文件: binmixND.py 项目: dvav/clonosGP
def get_model(data, K, alpha, sigma, sigma2, eta, *args, **kargs):
    r = data.pivot(index='MUTID', columns='SAMPLEID', values='r').values
    R = data.pivot(index='MUTID', columns='SAMPLEID', values='R').values
    VAF0 = data.pivot(index='MUTID', columns='SAMPLEID', values='VAF0').values
    r, R, VAF0 = r[:, :, None], R[:, :, None], VAF0[:, :, None]

    nsamples = data.SAMPLEID.nunique()

    idxs = aux.corr_vector_to_matrix_indices(nsamples)
    D = tns.eye(nsamples) * sigma**2
    with pmc.Model() as model:
        # alpha = pmc.Gamma('alpha', 1.0, 1.0)
        u = pmc.Beta('u', 1.0, alpha, shape=K - 1)
        lw = pmc.Deterministic('lw', aux.stick_breaking_log(u))

        C_ = pmc.LKJCorr('C', eta=eta, n=nsamples)
        C = tns.fill_diagonal(C_[idxs], 1.0)
        Sigma = D.dot(C)
        psi = pmc.MvNormal('psi',
                           mu=nmp.zeros(nsamples),
                           cov=Sigma,
                           shape=(K, nsamples))
        phi = pmc.Deterministic('phi', pmc.invlogit(psi.T))

        # psi = pmc.MvNormal('psi', mu=nmp.zeros(nsamples), cov=D, shape=(K, nsamples))
        # phi = pmc.Deterministic('phi', pmc.invlogit(psi.T))

        theta = pmc.Deterministic('theta', VAF0 * phi[None, :, :])

        pmc.DensityDist('r', aux.binmixND_logp_fcn(R, theta, lw), observed=r)
    return model
示例#2
0
    def Kcost_nesterov(self, learning_rate = 1e-2, epsilon = 1, gamma = 0.9):
        """
        Returns the cost of SGD with Nesterov's accelerated gradient.
        """
        print ('Using Nesterov with gamma = %f, learning rate = %f, epsilon = %f'\
         % (gamma, learning_rate, epsilon))
        if self.gpu:
            vW = theano.shared(np.zeros(self.W.eval().shape).astype(np.float32))
            vb = theano.shared(np.zeros(self.b.eval().shape).astype(np.float32))
        else:
            vW = theano.shared(np.zeros(self.W.eval().shape))
            vb = theano.shared(np.zeros(self.b.eval().shape))


        nextW = self.W - gamma * vW
        nextb = self.b - gamma * vb

        cost = T.mean(T.exp((0.5 - self.x) * (T.dot(self.x,\
                T.fill_diagonal(nextW, 0)) + nextb))) * epsilon

        Wgrad = T.grad(cost, nextW)
        bgrad = T.grad(cost, nextb)

        gparams = [Wgrad, bgrad]
        momentum = [vW, vb]
        momentum_updates = [(v, gamma * v + learning_rate * gparam)\
        for v, gparam in zip(momentum, gparams)]

        updates = [(param, param - v) \
        for param, v in zip(self.params, momentum)]

        updates = updates + momentum_updates

        return cost, updates
示例#3
0
def get_model(x, r, R, vaf0, K=10):
    nsamples = r.shape[1]
    r, R, vaf0 = r[:, :, None], R[:, :, None], vaf0[:, :, None]
    idxs = aux.corr_vector_to_matrix_indices(K)
    with pmc.Model() as model:
        w = pmc.Dirichlet('w', nmp.ones(K))
        lw = tns.log(w)

        # alpha = pmc.Gamma('alpha', 1.0, 1.0)
        # u = pmc.Beta('u', 1.0, alpha, shape=K-1)
        # lw = aux.stick_breaking_log(u)

        rho = pmc.Gamma('rho', 1.0, 1.0)
        Cc = tns.fill_diagonal(pmc.LKJCorr('C', eta=2.0, n=K)[idxs], 1.0)
        Cr = aux.cov_quad_exp(x, 1.0, rho)
        mu_psi = pmc.MatrixNormal('mu_psi',
                                  mu=nmp.zeros((nsamples, K)),
                                  rowcov=Cr,
                                  colcov=Cc,
                                  shape=(nsamples, K))
        psi = pmc.Normal('psi', mu=mu_psi, sd=0.1, shape=(nsamples, K))
        phi = pmc.Deterministic('phi', pmc.invlogit(psi))

        # psi = pmc.MvNormal('psi', mu=nmp.zeros(K), tau=nmp.eye(K), shape=(nsamples, K))
        # phi = pmc.Deterministic('phi', pmc.invlogit(psi))

        theta = pmc.Deterministic('theta', vaf0 * phi[None, :, :])
        pmc.DensityDist('r', aux.binmixND_logp_fcn(R, theta, lw), observed=r)
    return model
示例#4
0
def cost_var(X, Y, sigma, Adj, l_kl, l_e, l_c, l_r, r_eps):
    N = X.shape[0]
    num_edges = 0.5 * T.sum(Adj)

    # Used to normalize s.t. the l_*'s sum up to one.
    l_sum = l_kl + l_e + l_c + l_r

    p_ij_conditional = p_ij_conditional_var(X, sigma)
    p_ij = p_ij_sym_var(p_ij_conditional)
    q_ij = q_ij_student_t_var(Y)

    p_ij_safe = T.maximum(p_ij, epsilon)
    q_ij_safe = T.maximum(q_ij, epsilon)

    # Kullback-Leibler term
    kl = T.sum(p_ij * T.log(p_ij_safe / q_ij_safe), axis=1)

    # Edge contraction term
    edge_contraction = (1 / (2 * num_edges)) * T.sum(Adj * sqeuclidean_var(Y),
                                                     axis=1)

    # Compression term
    compression = (1 / (2 * N)) * T.sum(Y**2, axis=1)

    # Repulsion term
    # repulsion = (1 / (2 * N**2)) * T.sum(T.fill_diagonal(1 / (euclidean_var(Y) + r_eps), 0), axis=1)
    repulsion = -(1 / (2 * N**2)) * T.sum(
        T.fill_diagonal(T.log(euclidean_var(Y) + r_eps), 0), axis=1)

    cost = (l_kl / l_sum) * kl + (l_e / l_sum) * edge_contraction + (
        l_c / l_sum) * compression + (l_r / l_sum) * repulsion

    return cost
	def _results_inner(self,n,x):
		out,_ = theano.scan(lambda x_in: (n-1.)*\
			tt.log(tt.nlinalg.det(
				tt.fill_diagonal(x_in[self.tri_index]*(self.tri_index!=-1),1)
				)),sequences = [x])

		return out
示例#6
0
 def softmax_neg(self, X):
     if hasattr(self, 'hack_matrix'):
         X = X * self.hack_matrix
         e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x')) * self.hack_matrix
     else:
         e_x = T.fill_diagonal(T.exp(X - X.max(axis=1).dimshuffle(0, 'x')), 0)
     return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')
示例#7
0
 def softmax_neg(self, X):
     if hasattr(self, 'hack_matrix'):
         X = X * self.hack_matrix
         e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x')) * self.hack_matrix
     else:
         e_x = T.fill_diagonal(T.exp(X - X.max(axis=1).dimshuffle(0, 'x')), 0)
     return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')
示例#8
0
def cost_var(X, Y, sigma, l_kl, l_c, l_r, r_eps):
    N = X.shape[0]

    # Used to normalize s.t. the l_*'s sum up to one.
    l_sum = l_kl + l_c + l_r

    p_ij_conditional = p_ij_conditional_var(X, sigma)
    p_ij = p_ij_sym_var(p_ij_conditional)
    q_ij = q_ij_student_t_var(Y)

    p_ij_safe = T.maximum(p_ij, epsilon)
    q_ij_safe = T.maximum(q_ij, epsilon)

    # Kullback-Leibler term
    kl = T.sum(p_ij * T.log(p_ij_safe / q_ij_safe), axis=1)

    # Compression term
    compression = (1 / (2 * N)) * T.sum(Y**2, axis=1)

    # Repulsion term
    repulsion = -(1 / (2 * N**2)) * T.sum(
        T.fill_diagonal(T.log(euclidean_var(Y) + r_eps), 0), axis=1)

    # Sum of all terms.
    cost = (l_kl / l_sum) * kl + (l_c / l_sum) * compression + (
        l_r / l_sum) * repulsion

    return cost
	def logp(self, x):
		n = self.n
		p = self.p
		s = self.s


		if s > 1:
			X = self._x_creation(x)

			result = self._normalizing_constant(n, p, s) + self._results_inner(n,x)
			return pm.dist_math.bound(result,
						tt.all(X <= 1), tt.all(X >= -1),
						self._check_pos_def(x),
						n > 0)
		else:
			X = x[self.tri_index]
			X = tt.fill_diagonal(X, 1)

			result = self._normalizing_constant(n, p, s)
			result += (n - 1.) * tt.log(tt.nlinalg.det(X))
			# n-1 probably needs to become structure[0]-1
			# I don't really know the likehood structure honestly

			return pm.dist_math.bound(result,
						tt.all(X <= 1), tt.all(X >= -1),
						matrix_pos_def(X),
						n > 0)
示例#10
0
def fill_correlation_matrix(c_vec):
    """
    Create a Theano tensor object representing a correlation matrix
    of a multivariate normal distribution.

    :param c_vec: PyMC3 model variable corresponding to the `LKJCorr` prior
                  on  elements of the correlation matrix
    :return: correlation matrix as a Theano tensor object
    """

    n = c_vec.tag.test_value.shape[0]
    n_layers = n - 1

    m = np.zeros((n, n))
    res = tt.nlinalg.matrix_dot(m, 1)

    ind = 0

    for layer in range(n_layers):
        start_col = layer + 1
        for j in range(start_col, n):
            m[layer, j] = 1
            m[j, layer] = 1
            res += tt.nlinalg.matrix_dot(m, c_vec[ind])

            ind += 1
            m[layer, j] = 0
            m[j, layer] = 0

    res = tt.fill_diagonal(res, 1.)
    return res
示例#11
0
    def Kcost_momentum(self, learning_rate = 1e-2, epsilon = 1, gamma = 0.9):
        """
        Returns the cost of SGD with Momentum.
        """
        print ('Using Momentum with gamma = %f, learning rate = %f, epsilon = %f'\
         % (gamma, learning_rate, epsilon))

        cost = T.mean(T.exp((0.5 - self.x) * \
        (T.dot(self.x, T.fill_diagonal(self.W, 0)) + self.b))) * epsilon

        gparams = T.grad(cost, self.params)

        if self.gpu:
            vW = theano.shared(np.zeros(self.W.eval().shape).astype(np.float32))
            vb = theano.shared(np.zeros(self.b.eval().shape).astype(np.float32))
        else:
            vW = theano.shared(np.zeros(self.W.eval().shape))
            vb = theano.shared(np.zeros(self.b.eval().shape))

        momentum = [vW, vb]
        momentum_updates = [(v, gamma * v + learning_rate * gparam) \
        for v, gparam in zip(momentum, gparams)]

        updates = [(param, param - v) \
        for param, v in zip(self.params, momentum)]

        updates = updates + momentum_updates

        return cost, updates
示例#12
0
    def Kcost_adagrad(self, learning_rate = 1e-2, epsilon = 1, smoothingterm = 1):
        """
        Returns the cost of SGD using adagrad.
        """
        print ('Using Adagrad with smoothing term = %.9f, learning rate = %f, epsilon = %f'\
         % (smoothingterm, learning_rate, epsilon))

        param_shapes = [param.get_value().shape for param in self.params ]
        grad_hists = [theano.shared(np.zeros(param_shape,
                        dtype = theano.config.floatX),
                        borrow = True,
                        name = 'grad_hist_' + param.name)
                        for param_shape, param in zip(param_shapes, self.params)]

        cost = T.mean(T.exp((0.5 - self.x) * (T.dot(self.x,\
                T.fill_diagonal(self.W, 0)) + self.b))) * epsilon


        gparams = T.grad(cost, self.params)

        grad_hist_updates = [(g_hist, g_hist + g ** 2) for g_hist, g in zip(grad_hists, gparams)]

        updates = [(param, param - learning_rate * gparam/(T.sqrt(grad_hist + smoothingterm)))\
        for param, grad_hist, gparam in zip(self.params, grad_hists, gparams)]

        updates = updates + grad_hist_updates

        return cost, updates
示例#13
0
    def logp(self, x):
        n = self.n
        p = self.p

        X = x[self.tri_index]
        X = T.fill_diagonal(X, 1)

        result = self._normalizing_constant(n, p)
        result += (n - 1.) * T.log(det(X))
        return bound(result, T.all(X <= 1), T.all(X >= -1), n > 0)
示例#14
0
    def logp(self, x):
        n = self.n
        p = self.p

        X = x[self.tri_index]
        X = T.fill_diagonal(X, 1)

        result = self._normalizing_constant(n, p)
        result += (n - 1.0) * T.log(det(X))
        return bound(result, T.all(X <= 1), T.all(X >= -1), n > 0)
示例#15
0
def p_ij_conditional_var(X, sigma):
    N = X.shape[0]

    sqdistance = X**2

    esqdistance = T.exp(-sqdistance / ((2 * (sigma**2)).reshape((N, 1))))
    esqdistance_zd = T.fill_diagonal(esqdistance, 0)

    row_sum = T.sum(esqdistance_zd, axis=1).reshape((N, 1))

    return esqdistance_zd / row_sum  # Possibly dangerous
示例#16
0
    def logp(self, x):
        n = self.n
        p = self.p

        X = x[self.tri_index]
        X = tt.fill_diagonal(X, 1)

        result = self._normalizing_constant(n, p)
        result += (n - 1.) * tt.log(det(X))
        return bound(result, tt.all(X <= 1), tt.all(X >= -1),
                     matrix_pos_def(X), n > 0)
示例#17
0
	def _results_inner(self,n,p,s,x):
		# use theano.scan to create x
		result1 = self._normalizing_constant(n,p)*s
		result2,_ = theano.scan(lambda x_min: 
			(n-1) * tt.log(
						tt.nlinalg.det(
							tt.fill_diagonal(
								x_min[self.tri_index],1)
							)
						),
			sequences = [x])
		return result1+result2
示例#18
0
    def logp(self, x):
        n = self.n
        p = self.p

        X = x[self.tri_index]
        X = tt.fill_diagonal(X, 1)

        result = self._normalizing_constant(n, p)
        result += (n - 1.) * tt.log(det(X))
        return bound(result,
                     tt.all(X <= 1), tt.all(X >= -1),
                     matrix_pos_def(X),
                     n > 0)
示例#19
0
    def loss_forcedsymmetry(self, learning_rate = 1e-2, epsilon = 1):
        """
        Returns the cost of vanilla SGD.
        """

        cost = T.mean(T.exp((0.5 - self.x) * (T.dot(self.x, self.W) + self.b))) * epsilon
        Wgrad = T.grad(cost, self.W)
        bgrad = T.grad(cost, self.b)

        Wupdate = T.fill_diagonal(0.5 * ((self.W - learning_rate * Wgrad) + (self.W - learning_rate * Wgrad).T), 0)
        updates = [(self.W, Wupdate), (self.b, self.b - learning_rate * bgrad )]

        return cost, updates
示例#20
0
    def logp(self, x):
        n = self.n
        eta = self.eta

        X = x[self.tri_index]
        X = tt.fill_diagonal(X, 1)

        result = _lkj_normalizing_constant(eta, n)
        result += (eta - 1.) * tt.log(det(X))
        return bound(result,
                     tt.all(X <= 1),
                     tt.all(X >= -1),
                     matrix_pos_def(X),
                     eta > 0,
                     broadcast_conditions=False)
示例#21
0
    def logp(self, x):
        n = self.n
        eta = self.eta

        X = x[self.tri_index]
        X = tt.fill_diagonal(X, 1)

        result = _lkj_normalizing_constant(eta, n)
        result += (eta - 1.) * tt.log(det(X))
        return bound(result,
                     tt.all(X <= 1), tt.all(X >= -1),
                     matrix_pos_def(X),
                     eta > 0,
                     broadcast_conditions=False
        )
示例#22
0
def p_Xp_given_X_var(X, sigma, metric):
    N = X.shape[0]

    if metric == 'euclidean':
        sqdistance = sqeuclidean_var(X)
    elif metric == 'precomputed':
        sqdistance = X**2
    else:
        raise Exception('Invalid metric')

    esqdistance = T.exp(-sqdistance / ((2 * (sigma**2)).reshape((N, 1))))
    esqdistance_zd = T.fill_diagonal(esqdistance, 0)

    row_sum = T.sum(esqdistance_zd, axis=1).reshape((N, 1))

    return esqdistance_zd/row_sum  # Possibly dangerous
示例#23
0
文件: tsne.py 项目: abbasmg/Thesis
def p_Xp_given_X_var(X, sigma, metric):
    N = X.shape[0]

    if metric == 'euclidean':
        sqdistance = sqeuclidean_var(X)
    elif metric == 'precomputed':
        sqdistance = X**2
    else:
        raise Exception('Invalid metric')

    esqdistance = T.exp(-sqdistance / ((2 * (sigma**2)).reshape((N, 1))))
    esqdistance_zd = T.fill_diagonal(esqdistance, 0)

    row_sum = T.sum(esqdistance_zd, axis=1).reshape((N, 1))

    return esqdistance_zd / row_sum  # Possibly dangerous
示例#24
0
def calc_original_cond_prob(X, sigma, metric):
    N = X.shape[0]

    if metric == 'euclidean':
        data_distances = calc_euclidean_norms(X)
    elif metric == 'precomputed':
        data_distances = X**2
    else:
        raise Exception('Invalid metric')

    esqdistance = T.exp(-data_distances / ((2 * (sigma**2)).reshape((N, 1))))
    esqdistance_zd = T.fill_diagonal(esqdistance, 0)

    row_sum = T.sum(esqdistance_zd, axis=1).reshape((N, 1))

    return esqdistance_zd / row_sum  # Possibly dangerous
示例#25
0
def pm_make_cov(sigma_priors, corr_coeffs, ndim):
    """Assemble a covariance matrix single variable standard deviations and correlation coefficients"""
    # Citation: AM 207 lecture notes: http://am207.info/wiki/corr.html
    # Diagonal matrix of standard deviation for each varialbes
    sigma_matrix = tt.nlinalg.diag(sigma_priors)
    # A symmetric nxn matrix has n choose 2 = n(n-1)/2 distinct elements
    n_elem = int(ndim * (ndim - 1) / 2)
    # Convert between array indexing and [i, j) indexing
    tri_index = np.zeros([ndim, ndim], dtype=int)
    tri_index[np.triu_indices(ndim, k=1)] = np.arange(n_elem)
    tri_index[np.triu_indices(ndim, k=1)[::-1]] = np.arange(n_elem)
    # Assemble the covariance matrix using the equation
    # CovMat = DiagMat * CorrMat * DiagMat
    corr_matrix = corr_coeffs[tri_index]
    corr_matrix = tt.fill_diagonal(corr_matrix, 1)
    return tt.nlinalg.matrix_dot(sigma_matrix, corr_matrix, sigma_matrix)
示例#26
0
    def Kcost(self, learning_rate=0.01):
        """
        Returns the cost
        """

        cost = T.mean(T.exp((0.5 - self.x) * (T.dot(self.x, self.W) + self.b)))
        #         gparams = T.grad(cost, self.params)
        #         updates = [(param, param - learning_rate * gparam) for param, gparam in zip(self.params, gparams)]
        Wgrad = T.grad(cost, self.W)
        #         T.fill_diagonal(Wgrad, 0)
        bgrad = T.grad(cost, self.b)

        Wupdate = T.fill_diagonal(
            0.5 * ((self.W - learning_rate * Wgrad) +
                   (self.W - learning_rate * Wgrad).T), 0)
        updates = [(self.W, Wupdate), (self.b, self.b - learning_rate * bgrad)]
        #         updates = [(self.W, self.W - learning_rate * Wgrad), (self.b, self.b - learning_rate * bgrad )]

        return cost, updates
示例#27
0
	def logp(self, x):
		# x is assumed to be (s x n_elem) if s > 1 or n_elem
		n = self.n
		p = self.p
		s = self.s
		if s !=1:
			X = self._X_inner_creation(x)
			result = self._results_inner(n,p,s,x)
			return pm.dist_math.bound(result,
				tt.all(X <= 1), tt.all(X >= -1),
				n > 0)
		else:
			X = x[self.tri_index]
			X = tt.fill_diagonal(X, 1)
			result = self._normalizing_constant(n, p)
			result += (n - 1.) * tt.log(tt.nlinalg.det(X))
			return pm.dist_math.bound(result,
						 tt.all(X <= 1), tt.all(X >= -1),
						 n > 0)
示例#28
0
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)
        ok = tt.all(tt.nlinalg.diag(chol_x) > 0)
        chol_x = tt.switch(ok, chol_x, tt.fill_diagonal(chol_x, 1))
        dz = tt.switch(ok, dz, floatX(1))

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tt.tril(mtx) - tt.diag(tt.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            solve = tt.slinalg.Solve(A_structure="upper_triangular")
            return solve(outer.T, solve(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tt.tril(s + s.T) - tt.diag(tt.diagonal(s))
        else:
            grad = tt.triu(s + s.T) - tt.diag(tt.diagonal(s))
        return [tt.switch(ok, grad, floatX(np.nan))]
示例#29
0
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)
        ok = tt.all(tt.nlinalg.diag(chol_x) > 0)
        chol_x = tt.switch(ok, chol_x, tt.fill_diagonal(chol_x, 1))
        dz = tt.switch(ok, dz, floatX(1))

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tt.tril(mtx) - tt.diag(tt.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            solve = tt.slinalg.Solve(A_structure="upper_triangular")
            return solve(outer.T, solve(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tt.tril(s + s.T) - tt.diag(tt.diagonal(s))
        else:
            grad = tt.triu(s + s.T) - tt.diag(tt.diagonal(s))
        return [tt.switch(ok, grad, floatX(np.nan))]
示例#30
0
    def cost(self, lr=1e-2, epsilon=1):
        """
        Returns the cost of vanilla SGD.
        The update rule enforces that the W matrix at each timestep is symmetric.
        """

        print(51 * '=')
        print(24 * '#' + 'MPF' + 24 * '#')
        print(51 * '=')
        print('Input size: {0}'.format(self.n))
        print('Learning temperature: {0}'.format(self.temperature))
        print('Learning rate: {0}'.format(lr))

        cost = epsilon * T.mean(
            T.exp((0.5 - self.x) *
                  (T.dot(self.x, self.W) + self.b) / self.temperature))
        Wgrad = T.grad(cost, self.W)
        bgrad = T.grad(cost, self.b)

        Wupdate = T.fill_diagonal(
            0.5 * ((self.W - lr * Wgrad) + (self.W - lr * Wgrad).T), 0)
        updates = [(self.W, Wupdate), (self.b, self.b - lr * bgrad)]

        return cost, updates
示例#31
0
	def _X_inner_creation(self,x):
		# use theano.scan to create x
		result,_ = theano.scan(lambda x_min: tt.fill_diagonal(x_min[self.tri_index]*(self.tri_index!=-1),1), # update for specialized structure
			sequences = [x])
		return result
示例#32
0
    def logp(self, x):
        # -1/2 (x-mu) @ Sigma^-1 @ (x-mu)^T - 1/2 log(2pi^k|Sigma|)
        # Sigma = diag(std) @ Corr @ diag(std)
        # Sigma^-1 = diag(std^-1) @ Corr^-1 @ diag(std^-1)
        # Corr is a block matrix of special form
        #           +----------+
        # Corr = [[ | 1, b1, b1|,  0,  0,  0,..., 0]
        #         [ |b1,  1, b1|,  0,  0,  0,..., 0]
        #         [ |b1, b1,  1|,  0,  0,  0,..., 0]
        #           +-----------+----------+
        #         [  0,  0,  0, | 1, b2, b2|,..., 0]
        #         [  0,  0,  0, |b2,  1, b2|,..., 0]
        #         [  0,  0,  0, |b2, b2,  1|,..., 0]
        #                       +----------+
        #         [            ...                 ]
        #         [  0,  0,  0,   0,  0,  0 ,..., 1]]
        #
        # Corr = [[B1,  0,  0, ...,  0]
        #         [ 0, B2,  0, ...,  0]
        #         [ 0,  0, B3, ...,  0]
        #         [        ...        ]
        #         [ 0,  0,  0, ..., Bk]]
        #
        # Corr^-1 = [[B1^-1,     0,      0, ...,     0]
        #            [    0, B2^-1,      0, ...,     0]
        #            [    0,     0,  B3^-1, ...,     0]
        #            [              ...               ]
        #            [    0,     0,      0, ..., Bk^-1]]
        #
        # |B| matrix of rank r is easy
        # https://math.stackexchange.com/a/1732839
        # Let D = eye(r) * (1-b)
        # Then B = D + b * ones((r, r))
        # |B| = (1-b) ** r + b * r * (1-b) ** (r-1)
        # |B| = (1.-b) ** (r-1) * (1. + b * (r - 1))
        # log(|B|) = log(1-b)*(r-1) + log1p(b*(r-1))
        #
        # Inverse B^-1 is easy as well
        # https://math.stackexchange.com/a/1766118
        # let
        # c = 1/b + r*1/(1-b)
        # (B^-1)ii = 1/(1-b) - 1/(c*(1-b)**2)
        # (B^-1)ij =         - 1/(c*(1-b)**2)
        #
        # assuming
        # z = (x - mu) / std
        # we have det fix
        # detfix = -sum(log(std))
        #
        # now we need to compute z @ Corr^-1 @ z^T
        # note that B can be unique per timestep
        # so we need z_t @ Corr_t^-1 @ z_t^T in perfect
        # z_t @ Corr_t^-1 @ z_t^T is a sum of block terms
        # quad = z_ct @ B_ct^-1 @ z_ct^T = (B^-1)_iict * sum(z_ct**2) + (B^-1)_ijct*sum_{i!=j}(z_ict * z_jct)
        #
        # finally all terms are computed explicitly
        # logp = detfix - 1/2 * ( quad + log(pi*2) * k + log(|B|) )

        x = tt.as_tensor_variable(x)
        clust_ids, clust_pos, clust_counts = \
            tt.extra_ops.Unique(return_inverse=True,
                                return_counts=True)(self.clust)
        clust_order = tt.argsort(clust_pos)
        mu = self.mu
        corr = self.corr[..., clust_ids]
        std = self.std
        if std.ndim == 0:
            std = tt.repeat(std, x.shape[-1])
        if std.ndim == 1:
            std = std[None, :]
        if corr.ndim == 1:
            corr = corr[None, :]
        z = (x - mu) / std
        z = z[..., clust_order]
        detfix = -tt.log(std).sum(-1)
        # following the notation above
        r = clust_counts
        b = corr
        # detB = (1.-b) ** (r-1) * (1. + b * (r - 1))
        logdetB = tt.log1p(-b) * (r - 1) + tt.log1p(b * (r - 1))
        c = 1 / b + r / (1. - b)
        invBij = -1. / (c * (1. - b)**2)
        invBii = 1. / (1. - b) + invBij
        invBij = tt.repeat(invBij, clust_counts, axis=-1)
        invBii = tt.repeat(invBii, clust_counts, axis=-1)

        # to compute (Corr^-1)_ijt*sum_{i!=j}(z_it * z_jt)
        # we use masked cross products
        mask = tt.arange(x.shape[-1])[None, :]
        mask = tt.repeat(mask, x.shape[-1], axis=0)
        mask = tt.maximum(mask, mask.T)
        block_end_pos = tt.cumsum(r)
        block_end_pos = tt.repeat(block_end_pos, clust_counts)
        mask = tt.lt(mask, block_end_pos)
        mask = tt.and_(mask, mask.T)
        mask = tt.fill_diagonal(mask.astype('float32'), 0.)
        # type: tt.TensorVariable

        invBiizizi_sum = ((z**2) * invBii).sum(-1)
        invBijzizj_sum = (
            (z.dimshuffle(0, 1, 'x') * mask.dimshuffle('x', 0, 1) *
             z.dimshuffle(0, 'x', 1)) * invBij.dimshuffle(0, 1, 'x')).sum(
                 [-1, -2])
        quad = invBiizizi_sum + invBijzizj_sum
        k = pm.floatX(x.shape[-1])
        logp = (detfix - .5 *
                (quad + pm.floatX(np.log(np.pi * 2)) * k + logdetB.sum(-1)))
        if self.nonzero:
            logp = tt.switch(tt.eq(x, 0).any(-1), 0., logp)
        return bound(logp,
                     tt.gt(corr, -1.),
                     tt.lt(corr, 1.),
                     tt.gt(std, 0.),
                     broadcast_conditions=False)
示例#33
0
	def _X_inner_creation(self,x):
		# use theano.scan to create x
		result,_ = theano.scan(lambda x_min: tt.fill_diagonal(x_min[self.tri_index],1),
			sequences = [x])
		return result
示例#34
0
def q_ij_student_t_var(Y):
    sqdistance = sqeuclidean_var(Y)
    one_over = T.fill_diagonal(1 / (sqdistance + 1), 0)
    return one_over / one_over.sum()
示例#35
0
def cov_funct_special(lkj,sigma,tri_index):
	# need to approach the structure part
	result,_ =  theano.scan(lambda l,s: tt.diag(s).dot(tt.fill_diagonal(l[tri_index]*1*(tri_index!=-1),1)).dot(tt.diag(s)), 
		sequences = [lkj,sigma])
	return result
示例#36
0
	def _check_pos_def(self,x):
		out,_ = theano.scan(lambda x_in: tt.all(tt.nlinalg.eigh(tt.fill_diagonal(x_in[self.tri_index]*(self.tri_index!=-1),1))[0]>0),
			sequences = [x])
		return tt.all(out)
示例#37
0
	def _x_creation(self,x):
		out,_ = theano.scan(lambda x_in: 
			tt.fill_diagonal(x_in[self.tri_index]*(self.tri_index!=-1),1),
			sequences = [x])

		return out
示例#38
0
def _Tcov(sigma, rho):
    """Build a covariance matrix"""
    C = T.alloc(rho, 2, 2)
    C = T.fill_diagonal(C, 1.)
    S = T.diag(sigma)
    return T.nlinalg.matrix_dot(S, C, S)
示例#39
0
    def __init__(self,
                 dimension,
                 mu_data,
                 tau_data,
                 prior="Gaussian",
                 parameters={
                     "location": None,
                     "scale": None,
                     "corr": False
                 },
                 hyper_alpha=None,
                 hyper_beta=None,
                 hyper_gamma=None,
                 hyper_delta=None,
                 transformation=None,
                 parametrization="non-central",
                 name='',
                 model=None):

        assert isinstance(dimension, int), "dimension must be integer!"
        assert dimension in [3, 5, 6], "Not a valid dimension!"

        D = dimension

        # 2) call super's init first, passing model and name
        # to it name will be prefix for all variables here if
        # no name specified for model there will be no prefix
        super().__init__(str(D) + "D", model)
        # now you are in the context of instance,
        # `modelcontext` will return self you can define
        # variables in several ways note, that all variables
        # will get model's name prefix

        #------------------- Data ------------------------------------------------------
        N = int(len(mu_data) / D)
        if N == 0:
            sys.exit(
                "Data has length zero!. You must provide at least one data point"
            )
        #-------------------------------------------------------------------------------

        #============= Transformations ====================================

        if transformation is "mas":
            Transformation = Iden

        elif transformation is "pc":
            if D is 3:
                Transformation = cartesianToSpherical
            elif D is 6:
                Transformation = phaseSpaceToAstrometry_and_RV
            elif D is 5:
                Transformation = phaseSpaceToAstrometry
                D = 6

        else:
            sys.exit("Transformation is not accepted")
        #==================================================================

        #================ Hyper-parameters =====================================
        if hyper_delta is None:
            shape = 1
        else:
            shape = len(hyper_delta)

        #--------- Location ----------------------------------
        if parameters["location"] is None:

            location = [
                pm.Normal("loc_{0}".format(i),
                          mu=hyper_alpha[i][0],
                          sigma=hyper_alpha[i][1],
                          shape=shape) for i in range(D)
            ]

            #--------- Join variables --------------
            mu = pm.math.stack(location, axis=1)

        else:
            mu = parameters["location"]
        #------------------------------------------------------

        #------------- Scale --------------------------
        if parameters["scale"] is None:
            scale = [
                pm.Gamma("scl_{0}".format(i),
                         alpha=2.0,
                         beta=2.0 / hyper_beta[i][0],
                         shape=shape) for i in range(D)
            ]

        else:
            scale = parameters["scale"]
        #--------------------------------------------------

        #----------------------- Correlation -----------------------------------------
        if parameters["corr"]:
            pm.LKJCorr('chol_corr', eta=hyper_gamma, n=D)
            C = tt.fill_diagonal(
                self.chol_corr[np.zeros((D, D), dtype=np.int64)], 1.)
            # print_ = tt.printing.Print('C')(C)
        else:
            C = np.eye(D)
        #-----------------------------------------------------------------------------

        #-------------------- Covariance -------------------------
        sigma_diag = pm.math.stack(scale, axis=1)
        cov = theano.shared(np.zeros((shape, D, D)))

        for i in range(shape):
            sigma = tt.nlinalg.diag(sigma_diag[i])
            covi = tt.nlinalg.matrix_dot(sigma, C, sigma)
            cov = tt.set_subtensor(cov[i], covi)
        #---------------------------------------------------------
        #========================================================================

        #===================== True values ============================================
        if prior is "Gaussian":
            pm.MvNormal("source", mu=mu, cov=cov[0], shape=(N, D))

        elif prior is "GMM":
            pm.Dirichlet("weights", a=hyper_delta, shape=shape)

            comps = [
                pm.MvNormal.dist(mu=mu[i], cov=cov[i]) for i in range(shape)
            ]

            pm.Mixture("source",
                       w=self.weights,
                       comp_dists=comps,
                       shape=(N, D))

        else:
            sys.exit("The specified prior is not supported")
        #=================================================================================

        #----------------------- Transformation---------------------------------------
        transformed = Transformation(self.source)
        #-----------------------------------------------------------------------------

        #------------ Flatten --------------------------------------------------------
        true = pm.math.flatten(transformed)
        #----------------------------------------------------------------------------

        #----------------------- Likelihood ----------------------------------------
        pm.MvNormal('obs', mu=true, tau=tau_data, observed=mu_data)
        #------------------------------------------------------------------------------
示例#40
0
def covariance(sigma, rho):
    C = T.fill_diagonal(T.alloc(rho, 2, 2), 1.)
    S = T.diag(sigma)
    M = S.dot(C).dot(S)
    return M
示例#41
0
def q_ij_gaussian_var(Y):
    sqdistance = sqeuclidean_var(Y)
    gauss = T.fill_diagonal(T.exp(-sqdistance), 0)
    return gauss / gauss.sum()
示例#42
0
# In order to convert the upper triangular correlation values to a complete
# correlation matrix, we need to construct an index matrix:
n_elem = int(n_var * (n_var - 1) / 2)
tri_index = np.zeros([n_var, n_var], dtype=int)
tri_index[np.triu_indices(n_var, k=1)] = np.arange(n_elem)
tri_index[np.triu_indices(n_var, k=1)[::-1]] = np.arange(n_elem)

with pm.Model() as model:

    mu = pm.Normal('mu', mu=0, sd=1, shape=n_var)

    # We can specify separate priors for sigma and the correlation matrix:
    sigma = pm.Uniform('sigma', shape=n_var)
    corr_triangle = pm.LKJCorr('corr', n=1, p=n_var)
    corr_matrix = corr_triangle[tri_index]
    corr_matrix = tt.fill_diagonal(corr_matrix, 1)

    cov_matrix = tt.diag(sigma).dot(corr_matrix.dot(tt.diag(sigma)))

    like = pm.MvNormal('likelihood', mu=mu, cov=cov_matrix, observed=dataset)


def run(n=1000):
    if n == "short":
        n = 50
    with model:
        start = pm.find_MAP()
        step = pm.NUTS(scaling=start)
        trace = pm.sample(n, step=step, start=start)
    return trace
示例#43
0
# In order to convert the upper triangular correlation values to a complete
# correlation matrix, we need to construct an index matrix:
n_elem = int(n_var * (n_var - 1) / 2)
tri_index = np.zeros([n_var, n_var], dtype=int)
tri_index[np.triu_indices(n_var, k=1)] = np.arange(n_elem)
tri_index[np.triu_indices(n_var, k=1)[::-1]] = np.arange(n_elem)

with Model() as model:

    mu = Normal('mu', mu=0, tau=1**-2, shape=n_var)

    # We can specify separate priors for sigma and the correlation matrix:
    sigma = Uniform('sigma', shape=n_var)
    corr_triangle = LKJCorr('corr', n=1, p=n_var)
    corr_matrix = corr_triangle[tri_index]
    corr_matrix = tt.fill_diagonal(corr_matrix, 1)

    cov_matrix = tt.diag(sigma).dot(corr_matrix.dot(tt.diag(sigma)))

    like = MvNormal('likelihood', mu=mu, tau=inv(cov_matrix), observed=dataset)


def run(n=1000):
    if n == "short":
        n = 50
    with model:
        start = find_MAP()
        step = NUTS(scaling=start)
        tr = sample(n, step=step, start=start)

示例#44
0
def p_Yp_Y_var(Y):
    sqdistance = sqeuclidean_var(Y)
    one_over = T.fill_diagonal(1/(sqdistance + 1), 0)
    return one_over/one_over.sum()  # Possibly dangerous
示例#45
0
    x = np.random.uniform(0, 10, size=N)
    y = np.random.normal(np.sin(x), np.sqrt(0.01))

    plt.plot(x, y, 'o')
    plt.xlabel('$x$', fontsize=16)
    plt.ylabel('$f(x)$', fontsize=16, rotation=0)

    with pm.Model() as GP:
        mu = np.zeros(N)
        eta = pm.HalfCauchy('eta', 0.1)
        rho = pm.HalfCauchy('rho', 1)
        sigma = pm.HalfCauchy('sigma', 1)

        D = squared_distance(x, x)  #SED(x,x)

        K = tt.fill_diagonal(eta * pm.math.exp(-rho * D),
                             eta + sigma)  #(K(x, x) + σ I)

        obs = pm.MvNormal('obs', mu, cov=K, observed=y)

        test_points = np.linspace(0, 10, 100)
        D_pred = squared_distance(test_points, test_points)  #SED(x*,x*)
        D_off_diag = squared_distance(x, test_points)  #SED(x,x*) n * N

        K_oo = eta * pm.math.exp(-rho * D_pred)  #K(x*,x*)
        K_o = eta * pm.math.exp(-rho * D_off_diag)  #K(x,x*)

        inv_K = tt.nlinalg.matrix_inverse(K)

        mu_post = pm.Deterministic('mu_post',
                                   pm.math.dot(pm.math.dot(K_o.T, inv_K), y))
        SIGMA_post = pm.Deterministic(
示例#46
0
def build_mod_bpmf_model(train, alpha=2, dim=10, std=0.01):
    """Build the modified BPMF model using pymc3. The original model uses
    Wishart priors on the covariance matrices. Unfortunately, the Wishart
    distribution in pymc3 is currently not suitable for sampling. This
    version decomposes the covariance matrix into:

        diag(sigma) \dot corr_matrix \dot diag(std).

    We use uniform priors on the standard deviations (sigma) and LKJCorr
    priors on the correlation matrices (corr_matrix):

        sigma ~ Uniform
        corr_matrix ~ LKJCorr(n=1, p=dim)

    """
    n, m = train.shape
    beta_0 = 1  # scaling factor for lambdas; unclear on its use

    # Mean value imputation on training data.
    train = train.copy()
    nan_mask = np.isnan(train)
    train[nan_mask] = train[~nan_mask].mean()

    # We will use separate priors for sigma and correlation matrix.
    # In order to convert the upper triangular correlation values to a
    # complete correlation matrix, we need to construct an index matrix:
    n_elem = dim * (dim - 1) / 2
    tri_index = np.zeros([dim, dim], dtype=int)
    tri_index[np.triu_indices(dim, k=1)] = np.arange(n_elem)
    tri_index[np.triu_indices(dim, k=1)[::-1]] = np.arange(n_elem)

    logging.info('building the BPMF model')
    with pm.Model() as bpmf:
        # Specify user feature matrix
        sigma_u = pm.Uniform('sigma_u', shape=dim)
        corr_triangle_u = pm.LKJCorr('corr_u',
                                     n=1,
                                     p=dim,
                                     testval=np.random.randn(n_elem) * std)

        corr_matrix_u = corr_triangle_u[tri_index]
        corr_matrix_u = t.fill_diagonal(corr_matrix_u, 1)
        cov_matrix_u = t.diag(sigma_u).dot(corr_matrix_u.dot(t.diag(sigma_u)))
        lambda_u = t.nlinalg.matrix_inverse(cov_matrix_u)

        mu_u = pm.Normal('mu_u',
                         mu=0,
                         tau=beta_0 * t.diag(lambda_u),
                         shape=dim,
                         testval=np.random.randn(dim) * std)
        U = pm.MvNormal('U',
                        mu=mu_u,
                        tau=lambda_u,
                        shape=(n, dim),
                        testval=np.random.randn(n, dim) * std)

        # Specify item feature matrix
        sigma_v = pm.Uniform('sigma_v', shape=dim)
        corr_triangle_v = pm.LKJCorr('corr_v',
                                     n=1,
                                     p=dim,
                                     testval=np.random.randn(n_elem) * std)

        corr_matrix_v = corr_triangle_v[tri_index]
        corr_matrix_v = t.fill_diagonal(corr_matrix_v, 1)
        cov_matrix_v = t.diag(sigma_v).dot(corr_matrix_v.dot(t.diag(sigma_v)))
        lambda_v = t.nlinalg.matrix_inverse(cov_matrix_v)

        mu_v = pm.Normal('mu_v',
                         mu=0,
                         tau=beta_0 * t.diag(lambda_v),
                         shape=dim,
                         testval=np.random.randn(dim) * std)
        V = pm.MvNormal('V',
                        mu=mu_v,
                        tau=lambda_v,
                        shape=(m, dim),
                        testval=np.random.randn(m, dim) * std)

        # Specify rating likelihood function
        R = pm.Normal('R',
                      mu=t.dot(U, V.T),
                      tau=alpha * np.ones((n, m)),
                      observed=train)

    logging.info('done building the BPMF model')
    return bpmf
示例#47
0
            w = Dirichlet('w', a=pm.floatX(alpha), shape=(n_components, ))

            # Impose sparse structure onto mean with off-diagonal elements all being the same, because background should be the same throughout.
            mus_signal = MvNormal(
                'mus_signal',
                mu=pm.floatX(signalMean_priorMean),
                tau=pm.floatX(np.eye(n_dimensions) / signalMean_priorSD**2),
                shape=n_dimensions)
            mus_background = MvNormal('mus_background',
                                      mu=pm.floatX(backgroundMean_priorMean),
                                      tau=pm.floatX(
                                          np.eye(n_dimensions) /
                                          backgroundMean_priorSD**2),
                                      shape=n_dimensions)
            mus = tt.fill_diagonal(
                tt.reshape(tt.tile(mus_background, n_components),
                           (n_components, n_dimensions)),
                0) + tt.eye(n_components, n_dimensions) * mus_signal

            # Impose structure for covariance as well, with off-diagonal elements being zero, just because that model is easier to fit.
            sigmas_signal = pm.Gamma('sigmas_signal',
                                     mu=pm.floatX(signalSD_priorMean),
                                     sd=pm.floatX(signalSD_priorSD),
                                     shape=n_dimensions)
            sigmas_background = pm.Gamma('sigmas_background',
                                         mu=pm.floatX(backgroundSD_priorMean),
                                         sd=pm.floatX(backgroundSD_priorSD),
                                         shape=n_dimensions)
            sigmas = tt.fill_diagonal(
                tt.reshape(tt.tile(sigmas_background, n_components),
                           (n_components, n_dimensions)),
                0) + tt.eye(n_components, n_dimensions) * sigmas_signal