示例#1
0
    def __init__(self,
                 incoming,
                 uvec=lasagne.init.Normal(1),
                 b=lasagne.init.Constant(0),
                 **kwargs):
        super(OrthogonalFlow, self).__init__(incoming, **kwargs)
        num_inputs = self.input_shape[1]

        n = num_inputs
        n_triu_entries = (n * (n + 1)) // 2
        r = T.arange(n)
        tmp_mat = r[np.newaxis, :] + (n_triu_entries - n -
                                      (r * (r + 1)) // 2)[::-1, np.newaxis]
        triu_index_matrix = T.tril(tmp_mat.T) - r[np.newaxis, :]
        tmp_mat1 = T.tril(tmp_mat.T) - r[np.newaxis, :]
        skew_index_mat = T.tril(tmp_mat1 - T.diag(T.diag(tmp_mat1)))

        self.uvec = self.add_param(uvec,
                                   ((num_inputs - 1) * (num_inputs) / 2, ),
                                   name='uvec')
        vec0 = T.concatenate([T.zeros(1), self.uvec])
        skw_matrix = vec0[skew_index_mat] - vec0[skew_index_mat].T

        self.U = expm(skw_matrix)

        self.b = self.add_param(b, (num_inputs, ), name='b')  # scalar
示例#2
0
    def __init__(self, rng, input1, input2, n_in1, n_in2, n_hidden_layers, d_hidden, W1=None, W2=None):
        self.input1 = input1
        self.input2 = input2
        
        CouplingFunc = WarpNetwork(rng, input1, n_hidden_layers, d_hidden, n_in1, n_in2)  
        
        if W1 is None:
            bin = numpy.sqrt(6. / (n_in1 + n_in1))
            W1_values = numpy.identity(n_in1, dtype=theano.config.floatX)            
            W1 = theano.shared(value=W1_values, name='W1')

        if W2 is None:
            bin = numpy.sqrt(6. / (n_in2 + n_in2))
            W2_values = numpy.identity(n_in2, dtype=theano.config.floatX)
            W2 = theano.shared(value=W2_values, name='W2')

        V1u = T.triu(W1)
        V1l = T.tril(W1)
        V1l = T.extra_ops.fill_diagonal(V1l, 1.)
        V1 = T.dot(V1u, V1l) 
            
        V2u = T.triu(W2)
        V2l = T.tril(W2)
        V2l = T.extra_ops.fill_diagonal(V2l, 1.)
        V2 = T.dot(V2u, V2l) 
            
        self.output1 = T.dot(input1, V1)
        self.output2 = T.dot(input2, V2) + CouplingFunc.output

        self.log_jacobian = T.log(T.abs_(T.nlinalg.ExtractDiag()(V1u))).sum() \
            + T.log(T.abs_(T.nlinalg.ExtractDiag()(V2u))).sum() 

        self.params = CouplingFunc.params
示例#3
0
    def lower_lower(self):
        '''Evaluates the intractable term in the lower bound which itself
         must be lower bounded'''

        a = self.get_aux_mult()

        reversed_cum_probs = T.extra_ops.cumsum(a[:,::-1],1)
        dot_prod_m   = T.dot(reversed_cum_probs, self.digams_1p2)
        dot_prod_mp1 = T.dot(T.concatenate((reversed_cum_probs[:,1:],T.zeros((self.K,1))),1), self.digams[:,0])
        # final entropy term
        triu_ones = T.triu(T.ones_like(a)) - T.eye(self.K)
        aloga = T.sum(T.tril(a)*T.log(T.tril(a)+triu_ones),1)
        return T.dot(a, self.digams[:,1]) + dot_prod_m + dot_prod_mp1 - aloga
示例#4
0
    def log_prob(self, X, Y):
        """ Evaluate the log-probability for the given samples.

        Parameters
        ----------
        Y:      T.tensor
            samples from the upper layer
        X:      T.tensor
            samples from the lower layer

        Returns
        -------
        log_p:  T.tensor
            log-probabilities for the samples in X and Y
        """
        n_X, n_Y = self.get_hyper_params(['n_X', 'n_Y'])
        b, W, U = self.get_model_params(['b', 'W', 'U'])

        W = T.tril(W, k=-1)

        prob_X = self.sigmoid(T.dot(X, W) + T.dot(Y, U) + T.shape_padleft(b))
        log_prob = X * T.log(prob_X) + (1 - X) * T.log(1 - prob_X)
        log_prob = T.sum(log_prob, axis=1)

        return log_prob
    def logp(self, S):
    	l = 0.0

    	#add prior
    	pi = self.pi
        #Get time 0 states
        zeroIndices = np.roll(self.T.cumsum(),1)
        zeroIndices[0] = 0
        zeroIndices = zeroIndices.astype('int32')
        l += TT.sum(TT.log(pi[S[zeroIndices]]))
        #l += TT.sum(TT.log(pi[S[:,0]]))

    	#add likelihood
        Q = self.Q
        step_sizes = self.step_sizes

        #import pdb; pdb.set_trace()
        C = self.computeC(S)

        n_step_sizes = len(self.step_sizes)
        for i in range(0, n_step_sizes):
            tau = step_sizes[i]
            P = TT.slinalg.expm(tau*Q)
            
            stabilizer = TT.tril(TT.alloc(0.0, *P.shape)+0.1, k=-1)
            logP = TT.log(P + stabilizer)

            #compute likelihood in terms of P(tau)
            l += TT.sum(C[i,:,:]*logP)
          
        return l
示例#6
0
    def grad(self, inputs, output_gradients):
        """
        Reverse-mode gradient updates for matrix solve operation c = A \ b.

        Symbolic expression for updates taken from [1]_.

        References
        ----------
        ..[1] M. B. Giles, "An extended collection of matrix derivative results
          for forward and reverse mode automatic differentiation",
          http://eprints.maths.ox.ac.uk/1079/

        """
        A, b = inputs
        c = self(A, b)
        c_bar = output_gradients[0]
        trans_map = {
            'lower_triangular': 'upper_triangular',
            'upper_triangular': 'lower_triangular'
        }
        trans_solve_op = Solve(
            # update A_structure and lower to account for a transpose operation
            A_structure=trans_map.get(self.A_structure, self.A_structure),
            lower=not self.lower
        )
        b_bar = trans_solve_op(A.T, c_bar)
        # force outer product if vector second input
        A_bar = -tensor.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)
        if self.A_structure == 'lower_triangular':
            A_bar = tensor.tril(A_bar)
        elif self.A_structure == 'upper_triangular':
            A_bar = tensor.triu(A_bar)
        return [A_bar, b_bar]
示例#7
0
    def logp(self, S):
        l = 0.0

        #add prior
        pi = self.pi
        #Get time 0 states
        zeroIndices = np.roll(self.T.cumsum(), 1)
        zeroIndices[0] = 0
        zeroIndices = zeroIndices.astype('int32')
        l += TT.sum(TT.log(pi[S[zeroIndices]]))
        #l += TT.sum(TT.log(pi[S[:,0]]))

        #add likelihood
        Q = self.Q
        step_sizes = self.step_sizes

        #import pdb; pdb.set_trace()
        C = self.computeC(S)

        n_step_sizes = len(self.step_sizes)
        for i in range(0, n_step_sizes):
            tau = step_sizes[i]
            P = TT.slinalg.expm(tau * Q)

            stabilizer = TT.tril(TT.alloc(0.0, *P.shape) + 0.1, k=-1)
            logP = TT.log(P + stabilizer)

            #compute likelihood in terms of P(tau)
            l += TT.sum(C[i, :, :] * logP)

        return l
示例#8
0
文件: linalg.py 项目: Theano/Theano
    def L_op(self, inputs, outputs, gradients):
        # Modified from theano/tensor/slinalg.py
        # No handling for on_error = 'nan'
        dz = gradients[0]
        chol_x = outputs[0]

        # this is for nan mode
        #
        # ok = ~tensor.any(tensor.isnan(chol_x))
        # chol_x = tensor.switch(ok, chol_x, 1)
        # dz = tensor.switch(ok, dz, 1)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return gpu_solve_upper_triangular(
                outer.T, gpu_solve_upper_triangular(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))
        else:
            grad = tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))

        return [grad]
示例#9
0
    def L_op(self, inputs, outputs, output_gradients):
        r"""
        Reverse-mode gradient updates for matrix solve operation c = A \\\ b.

        Symbolic expression for updates taken from [#]_.

        References
        ----------
        .. [#] M. B. Giles, "An extended collection of matrix derivative results
          for forward and reverse mode automatic differentiation",
          http://eprints.maths.ox.ac.uk/1079/

        """
        A, b = inputs
        c = outputs[0]
        c_bar = output_gradients[0]
        trans_map = {
            "lower_triangular": "upper_triangular",
            "upper_triangular": "lower_triangular",
        }
        trans_solve_op = Solve(
            # update A_structure and lower to account for a transpose operation
            A_structure=trans_map.get(self.A_structure, self.A_structure),
            lower=not self.lower,
        )
        b_bar = trans_solve_op(A.T, c_bar)
        # force outer product if vector second input
        A_bar = -tensor.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)
        if self.A_structure == "lower_triangular":
            A_bar = tensor.tril(A_bar)
        elif self.A_structure == "upper_triangular":
            A_bar = tensor.triu(A_bar)
        return [A_bar, b_bar]
示例#10
0
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.
        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_
        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527
        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return solve_upper_triangular(
                outer.T,
                solve_upper_triangular(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        return [tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))]
示例#11
0
    def log_prob(self, X, Y):
        """ Evaluate the log-probability for the given samples.

        Parameters
        ----------
        Y:      T.tensor
            samples from the upper layer
        X:      T.tensor
            samples from the lower layer

        Returns
        -------
        log_p:  T.tensor
            log-probabilities for the samples in X and Y
        """
        n_X, n_Y = self.get_hyper_params(['n_X', 'n_Y'])
        b, W, U  = self.get_model_params(['b', 'W', 'U'])
        
        W = T.tril(W, k=-1)

        prob_X = self.sigmoid(T.dot(X, W) + T.dot(Y, U) + T.shape_padleft(b))
        log_prob = X*T.log(prob_X) + (1-X)*T.log(1-prob_X)
        log_prob = T.sum(log_prob, axis=1)

        return log_prob
示例#12
0
def rank_loss(scores):
    # Images
    diag   = T.diag(scores)
    diff_img = scores - diag.dimshuffle(0, 'x') + 1
    max_img = T.maximum(0, diff_img)
    triu_img = T.triu(max_img, 1)
    til_img  = T.tril(max_img, -1)
    res_img = T.sum(triu_img) + T.sum(til_img)

    # Sentences
    diff_sent = scores.T - diag.dimshuffle(0, 'x') + 1
    max_sent = T.maximum(0, diff_sent)
    triu_sent = T.triu(max_sent, 1)
    til_sent  = T.tril(max_sent, -1)
    res_sent = T.sum(triu_sent) + T.sum(til_sent)
    
    return T.log(T.sum(scores) + 0.01)
示例#13
0
    def __init__(self, input, n_in, n_out):

        batchSize, seqLen, _ = input.shape

        import collections
        if isinstance(n_out, collections.Sequence):
            LRembedLayer = EmbeddingLayer(input, n_in, n_out[2])
            MRembedLayer = EmbeddingLayer(input, n_in, n_out[1])
            SRembedLayer = EmbeddingLayer(input, n_in, n_out[0])
            n_out_max = max(n_out)
        else:
            LRembedLayer = EmbeddingLayer(input, n_in, n_out)
            MRembedLayer = EmbeddingLayer(input, n_in, n_out)
            SRembedLayer = EmbeddingLayer(input, n_in, n_out)
            n_out_max = n_out

        self.layers = [LRembedLayer, MRembedLayer, SRembedLayer]

        M1s = T.ones((seqLen, seqLen))
        Sep24Mat = T.triu(M1s, 24) + T.tril(M1s, -24)
        Sep12Mat = T.triu(M1s, 12) + T.tril(M1s, -12)
        Sep6Mat = T.triu(M1s, 6) + T.tril(M1s, -6)
        LRsel = Sep24Mat.dimshuffle('x', 0, 1, 'x')
        MRsel = (Sep12Mat - Sep24Mat).dimshuffle('x', 0, 1, 'x')
        SRsel = (Sep6Mat - Sep12Mat).dimshuffle('x', 0, 1, 'x')

        selections = [LRsel, MRsel, SRsel]

        self.output = T.zeros((batchSize, seqLen, seqLen, n_out_max),
                              dtype=theano.config.floatX)
        for emLayer, sel in zip(self.layers, selections):
            l_n_out = emLayer.n_out
            self.output = T.inc_subtensor(self.output[:, :, :, :l_n_out],
                                          T.mul(emLayer.output, sel))

        self.pcenters = 0
        self.params = []
        self.paramL1 = 0
        self.paramL2 = 0
        for layer in [LRembedLayer, MRembedLayer, SRembedLayer]:
            self.params += layer.params
            self.paramL1 += layer.paramL1
            self.paramL2 += layer.paramL2
            self.pcenters += layer.pcenters

        self.n_out = n_out_max
示例#14
0
    def get_aux_mult(self):
        a_first_row_unnorm = (self.digams_1_cumsum - self.digams_1p2_cumsum + self.digams[:,1]).reshape((1,self.K))

        a_first_row_unnorm_rep = t_repeat(a_first_row_unnorm, self.K, axis=0).reshape((self.K,self.K))

        a = T.exp(a_first_row_unnorm_rep) * T.tril(T.ones((self.K, self.K)))

        return a / T.sum(a, 1).reshape((self.K,1))
示例#15
0
    def check_l(m, k=0):
        m_symb = T.matrix(dtype=m.dtype)
        k_symb = T.iscalar()

        f = theano.function([m_symb, k_symb],
                            T.tril(m_symb, k_symb),
                            mode=mode_with_gpu)
        result = f(m, k)
        assert np.allclose(result, np.tril(m, k))
        assert result.dtype == np.dtype(dtype)
        assert any([isinstance(node.op, GpuTri)
                    for node in f.maker.fgraph.toposort()])
示例#16
0
    def check_l(m, k=0):
        m_symb = T.matrix(dtype=m.dtype)
        k_symb = T.iscalar()

        f = theano.function([m_symb, k_symb],
                            T.tril(m_symb, k_symb),
                            mode=mode_with_gpu)
        result = f(m, k)
        assert np.allclose(result, np.tril(m, k))
        assert result.dtype == np.dtype(dtype)
        assert any([
            isinstance(node.op, GpuTri) for node in f.maker.fgraph.toposort()
        ])
示例#17
0
    def get_model(self, X, Y, X_test):
        #initial_params = {'m':m,'S_b':S_b,'mu':mu,'Sigma_b':Sigma_b,'Z':Z,'lhyp':lhyp,'ls':ls}
        (M, D), N, Q = self.Z.shape, X.shape[0], X.shape[1]

        #変数の正の値への制約条件
        beta, sf2, l = T.exp(self.ls), T.exp(self.lhyp[0]), T.exp(
            self.lhyp[1:])
        S = T.exp(self.S_b)
        #Sigma=T.exp(self.Sigma_b)

        #xについてはルートを取らなくても対角行列なので問題なし
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある
        Sigma = T.tril(self.Sigma_b - T.diag(T.diag(self.Sigma_b)) +
                       T.diag(T.exp(T.diag(self.Sigma_b))))

        #スケール変換
        mu_scaled, Sigma_scaled = sf2**0.5 * self.mu, sf2**0.5 * Sigma

        #reparametarizationのための乱数
        srng = T.shared_randomstreams.RandomStreams(234)
        eps_NQ = srng.normal(self.m.shape)
        eps_M = srng.normal(self.mu.shape)

        #サンプルの生成。バッチでやるので一回だけのMC
        Xtilda = self.m + S * eps_NQ
        U = mu_scaled + Sigma_scaled * eps_M

        Kmm = self.ker.RBF(sf2, l, self.Z)
        KmmInv = sT.matrix_inverse(Kmm)
        #KmmDet=theano.sandbox.linalg.det(Kmm)

        Kmn = self.ker.RBF(sf2, l, self.Z, Xtilda)
        Knn = self.ker.RBF(sf2, l, Xtilda, Xtilda)

        Ktilda = Knn - T.dot(Kmn.T, T.dot(KmmInv, Kmn))

        Kinterval = T.dot(KmmInv, Kmn)

        mean_U = T.dot(Kinterval.T, U)
        Covariance = beta

        LL = self.log_mvn(X, mean_U, Covariance) - 0.5 * beta * T.sum(
            (T.eye(N) * Ktilda))

        #KL_X = -0.5 * (-T.sum(T.log(T.sum(Sigma,0))) + T.dot(m.T,T.dot(KmmInv,m)).squeeze() + T.sum((Sigma*KmmInv)) - M)-0.5*T.log(KmmDet)

        KL_X = self.KLD_X(self.m, S)

        KL_U = self.KLD_U(mu_scaled, Sigma_scaled, Kmm)

        return KL_X, KL_U, LL
示例#18
0
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)

        # Replace the cholesky decomposition with 1 if there are nans
        # or solve_upper_triangular will throw a ValueError.
        if self.on_error == 'nan':
            ok = ~tensor.any(tensor.isnan(chol_x))
            chol_x = tensor.switch(ok, chol_x, 1)
            dz = tensor.switch(ok, dz, 1)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return solve_upper_triangular(
                outer.T,
                solve_upper_triangular(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))
        else:
            grad = tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))

        if self.on_error == 'nan':
            return [tensor.switch(ok, grad, np.nan)]
        else:
            return [grad]
示例#19
0
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)

        # Replace the cholesky decomposition with 1 if there are nans
        # or solve_upper_triangular will throw a ValueError.
        if self.on_error == 'nan':
            ok = ~tensor.any(tensor.isnan(chol_x))
            chol_x = tensor.switch(ok, chol_x, 1)
            dz = tensor.switch(ok, dz, 1)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return solve_upper_triangular(
                outer.T, solve_upper_triangular(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))
        else:
            grad = tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))

        if self.on_error == 'nan':
            return [tensor.switch(ok, grad, np.nan)]
        else:
            return [grad]
示例#20
0
文件: linalg.py 项目: Theano/Theano
    def L_op(self, inputs, outputs, output_gradients):
        # Modified from theano/tensor/slinalg.py
        A, b = inputs
        c = outputs[0]
        c_bar = output_gradients[0]

        trans_solve_op = GpuCublasTriangularSolve(not self.lower)
        b_bar = trans_solve_op(A.T, c_bar)

        A_bar = -tensor.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)

        if self.lower:
            A_bar = tensor.tril(A_bar)
        else:
            A_bar = tensor.triu(A_bar)
        return [A_bar, b_bar]
示例#21
0
def triangularize_network(layers, force_diag=False):
    n_layers, rem = divmod(len(layers) + 1, 4)
    assert(rem == 0)
    assert(n_layers > 0)
    assert((n_layers - 1, aL_PARAM) not in layers)

    layers_LU = layers.copy()
    for nn in xrange(n_layers):
        LL, UL = layers[(nn, LL_PARAM)], layers[(nn, UL_PARAM)]
        LL_diag = T.nlinalg.alloc_diag(T.nlinalg.extract_diag(LL))

        layers_LU[(nn, LL_PARAM)] = \
            ifelse(force_diag, LL_diag, T.tril(LL))
        layers_LU[(nn, UL_PARAM)] = \
            ifelse(force_diag, T.eye(UL.shape[0]), T.triu(UL))
    return layers_LU, n_layers
示例#22
0
    def L_op(self, inputs, outputs, output_gradients):
        # Modified from theano/tensor/slinalg.py
        A, b = inputs
        c = outputs[0]
        c_bar = output_gradients[0]

        trans_solve_op = GpuCublasTriangularSolve(not self.lower)
        b_bar = trans_solve_op(A.T, c_bar)

        A_bar = -tensor.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)

        if self.lower:
            A_bar = tensor.tril(A_bar)
        else:
            A_bar = tensor.triu(A_bar)
        return [A_bar, b_bar]
示例#23
0
    def grad(self, inputs, g_outputs):
        r"""The gradient function should return
           .. math:: \sum_n\left(W_n\frac{\partial\,w_n}
                           {\partial a_{ij}} +
                     \sum_k V_{nk}\frac{\partial\,v_{nk}}
                           {\partial a_{ij}}\right),
        where [:math:`W`, :math:`V`] corresponds to ``g_outputs``,
        :math:`a` to ``inputs``, and  :math:`(w, v)=\mbox{eig}(a)`.
        Analytic formulae for eigensystem gradients are well-known in
        perturbation theory:
           .. math:: \frac{\partial\,w_n}
                          {\partial a_{ij}} = v_{in}\,v_{jn}
           .. math:: \frac{\partial\,v_{kn}}
                          {\partial a_{ij}} =
                \sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}
                
        Code derived from theano.nlinalg.Eigh and doi=10.1.1.192.9105
        """
        x, = inputs
        w, v = self(x)
        # Replace gradients wrt disconnected variables with
        # zeros. This is a work-around for issue #1063.
        W, V = _zero_disconnected([w, v], g_outputs)

        N = x.shape[0]

        # W part
        gW = T.tensordot(v, v * W[numpy.newaxis, :], (1, 1))
        # V part
        vv = v[:, :, numpy.newaxis, numpy.newaxis] * v[numpy.newaxis,
                                                       numpy.newaxis, :, :]
        minusww = -w[:, numpy.newaxis] + w[numpy.newaxis, :]
        minuswwinv = 1 / (minusww + T.eye(N))
        minuswwinv = T.triu(minuswwinv, 1) + T.tril(minuswwinv,
                                                    -1)  # remove diagonal
        c = (vv * minuswwinv[numpy.newaxis, :, numpy.newaxis, :]).dimshuffle(
            (1, 3, 0, 2))
        vc = T.tensordot(v, c, (1, 0))
        gV = T.tensordot(V, vc, ((0, 1), (0, 1)))

        g = gW + gV

        res = (g.T + g) / 2
        return [res]
示例#24
0
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)
        ok = tt.all(tt.nlinalg.diag(chol_x) > 0)
        chol_x = tt.switch(ok, chol_x, tt.fill_diagonal(chol_x, 1))
        dz = tt.switch(ok, dz, floatX(1))

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tt.tril(mtx) - tt.diag(tt.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            solve = tt.slinalg.Solve(A_structure="upper_triangular")
            return solve(outer.T, solve(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tt.tril(s + s.T) - tt.diag(tt.diagonal(s))
        else:
            grad = tt.triu(s + s.T) - tt.diag(tt.diagonal(s))
        return [tt.switch(ok, grad, floatX(np.nan))]
示例#25
0
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)
        ok = tt.all(tt.nlinalg.diag(chol_x) > 0)
        chol_x = tt.switch(ok, chol_x, tt.fill_diagonal(chol_x, 1))
        dz = tt.switch(ok, dz, floatX(1))

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tt.tril(mtx) - tt.diag(tt.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            solve = tt.slinalg.Solve(A_structure="upper_triangular")
            return solve(outer.T, solve(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tt.tril(s + s.T) - tt.diag(tt.diagonal(s))
        else:
            grad = tt.triu(s + s.T) - tt.diag(tt.diagonal(s))
        return [tt.switch(ok, grad, floatX(np.nan))]
示例#26
0
def test_autoregressor(dim=3, n_samples=5):
    ar = AutoRegressor(dim)
    ar.params['b'] += 0.1
    tparams = ar.set_tparams()

    X = T.matrix('X', dtype=floatX)
    nlp = ar.neg_log_prob(X)
    p = ar.get_prob(X, *ar.get_params())
    W = T.tril(ar.W, k=-1)
    z = T.dot(X, W) + ar.b

    x = np.random.randint(0, 2, size=(n_samples, dim)).astype(floatX)

    f = theano.function([X], [nlp, p, z, W])
    nlp_t, p_t, z_t, W_t = f(x)
    print x.shape, nlp_t.shape
    z_np = np.zeros((n_samples, dim)).astype(floatX) + ar.params['b'][None, :]

    for i in xrange(dim):
        print i
        for j in xrange(i + 1, dim):
            print i, j
            z_np[:, i] += ar.params['W'][j, i] * x[:, j]

    assert np.allclose(z_t, z_np), (z_t, z_np)
    p_np = sigmoid(z_np)
    assert np.allclose(p_t, p_np), (p_t, p_np)

    p_np = np.clip(p_np, 1e-7, 1 - 1e-7)
    nlp_np = (- x * np.log(p_np) - (1 - x) * np.log(1 - p_np)).sum(axis=1)

    assert np.allclose(nlp_t, nlp_np), (nlp_t, nlp_np)

    samples, updates = ar.sample(n_samples=n_samples)

    f = theano.function([], samples, updates=updates)
    print f()
    assert False
示例#27
0
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return solve_upper_triangular(
                outer.T, solve_upper_triangular(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            return [tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))]
        else:
            return [tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))]
示例#28
0
    def log_prob(self, X):
        """ Evaluate the log-probability for the given samples.

        Parameters
        ----------
        X:      T.tensor 
            samples from X

        Returns
        -------
        log_p:  T.tensor
            log-probabilities for the samples in X
        """
        n_X, = self.get_hyper_params(['n_X'])
        b, W = self.get_model_params(['b', 'W'])

        W = T.tril(W, k=-1)

        prob_X = self.sigmoid(T.dot(X, W) + b)
        log_prob = X * T.log(prob_X) + (1 - X) * T.log(1 - prob_X)
        log_prob = T.sum(log_prob, axis=1)

        return log_prob
 def __init__(self, weights_init, biases_init, lower=False,
              weights_prec=0., biases_prec=0., weights_mean=None,
              biases_mean=None):
     assert weights_init.ndim == 2, 'weights_init must be 2D array.'
     assert biases_init.ndim == 1, 'biases_init must be 1D array.'
     assert weights_init.shape[0] == biases_init.shape[0], \
         'Dimensions of weights_init and biases_init must be consistent.'
     self.lower = lower
     self.weights = th.shared(weights_init, name='W')
     self.weights_tri = (tt.tril(self.weights)
                         if lower else tt.triu(self.weights))
     self.biases = th.shared(biases_init, name='b')
     self.weights_prec = weights_prec
     self.biases_prec = biases_prec
     if weights_mean is None:
         weights_mean = np.eye(weights_init.shape[0])
     if biases_mean is None:
         biases_mean = np.zeros_like(biases_init)
     self.weights_mean = (np.tril(weights_mean)
                          if lower else np.triu(weights_mean))
     self.biases_mean = biases_mean
     super(TriangularAffineLayer, self).__init__(
         [self.weights, self.biases])
示例#30
0
    def log_prob(self, X):
        """ Evaluate the log-probability for the given samples.

        Parameters
        ----------
        X:      T.tensor 
            samples from X

        Returns
        -------
        log_p:  T.tensor
            log-probabilities for the samples in X
        """
        n_X, = self.get_hyper_params(['n_X'])
        b, W = self.get_model_params(['b', 'W'])
        
        W = T.tril(W, k=-1)

        prob_X = self.sigmoid(T.dot(X, W) + b)
        log_prob = X*T.log(prob_X) + (1-X)*T.log(1-prob_X)
        log_prob = T.sum(log_prob, axis=1)

        return log_prob
示例#31
0
    def L_op(self, inputs, outputs, gradients):
        # Modified from theano/tensor/slinalg.py
        # No handling for on_error = 'nan'
        dz = gradients[0]
        chol_x = outputs[0]

        # this is for nan mode
        #
        # ok = ~tensor.any(tensor.isnan(chol_x))
        # chol_x = tensor.switch(ok, chol_x, 1)
        # dz = tensor.switch(ok, dz, 1)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.0)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return gpu_solve_upper_triangular(
                outer.T, gpu_solve_upper_triangular(outer.T, inner.T).T
            )

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz))
        )

        if self.lower:
            grad = tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))
        else:
            grad = tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))

        return [grad]
示例#32
0
def test_autoregressor(dim=3, n_samples=5):
    ar = AutoRegressor(dim)
    ar.params['b'] += 0.1
    tparams = ar.set_tparams()

    X = T.matrix('X', dtype=floatX)
    nlp = ar.neg_log_prob(X)
    p = ar.get_prob(X, *ar.get_params())
    W = T.tril(ar.W, k=-1)
    z = T.dot(X, W) + ar.b

    x = np.random.randint(0, 2, size=(n_samples, dim)).astype(floatX)

    f = theano.function([X], [nlp, p, z, W])
    nlp_t, p_t, z_t, W_t = f(x)
    print x.shape, nlp_t.shape
    z_np = np.zeros((n_samples, dim)).astype(floatX) + ar.params['b'][None, :]

    for i in xrange(dim):
        print i
        for j in xrange(i + 1, dim):
            print i, j
            z_np[:, i] += ar.params['W'][j, i] * x[:, j]

    assert np.allclose(z_t, z_np), (z_t, z_np)
    p_np = sigmoid(z_np)
    assert np.allclose(p_t, p_np, atol=1e-4), (p_t - p_np)

    p_np = np.clip(p_np, 1e-7, 1 - 1e-7)
    nlp_np = (- x * np.log(p_np) - (1 - x) * np.log(1 - p_np)).sum(axis=1)

    assert np.allclose(nlp_t, nlp_np, atol=1e-3), (nlp_t - nlp_np)

    samples, updates = ar.sample(n_samples=n_samples)

    f = theano.function([], samples, updates=updates)
    print f()
示例#33
0
    def __init__(self, rng, input, n_in, n_batch, d_bucket, activation, activation_deriv,
                 w=None, index_permute=None, index_permute_reverse=None):
        srng = RandomStreams(seed=234)
        
        n_bucket = n_in / d_bucket + 1
        self.input = input

        # randomly permute input space
        if index_permute is None:
            index_permute = srng.permutation(n=n_in)#numpy.random.permutation(n_in)
            index_permute_reverse = T.argsort(index_permute)
            self.index_permute = index_permute
            self.index_permute_reverse = index_permute_reverse

        permuted_input = input[:, index_permute]
        self.permuted_input = permuted_input

        # initialize reflection parameters
        if w is None:
            bound = numpy.sqrt(3. / d_bucket)
            w_values = numpy.asarray(rng.uniform(low=-bound,
                                                 high=bound,
                                                 size=(n_bucket, d_bucket, d_bucket)),
                                     dtype=theano.config.floatX)
            w = theano.shared(value=w_values, name='w')
            
        self.w = w
        
        
        # compute outputs and Jacobians
        
        log_jacobian = T.alloc(0, n_batch)
        for b in xrange(n_bucket):
            bucket_size = d_bucket
            if b == n_bucket - 1:
                bucket_size = n_in - b * d_bucket
            
            x_b = self.permuted_input[:, b*d_bucket:b*d_bucket + bucket_size]

            
            w_b = self.w[b, :bucket_size, :bucket_size]

#            W = T.slinalg.Expm()(w_b)
#            log_jacobian = log_jacobian + T.alloc(T.nlinalg.trace(w_b), n_batch)

            Upper = T.triu(w_b)
#            Upper = T.extra_ops.fill_diagonal(Upper, 1.)
            Lower = T.tril(w_b)
            Lower = T.extra_ops.fill_diagonal(Lower, 1.)
            log_det_Upper = T.log(T.abs_(T.nlinalg.ExtractDiag()(Upper))).sum() 
#            log_det_Lower = T.log(T.abs_(T.nlinalg.ExtractDiag()(Lower))).sum() 


            W = T.dot(Upper, Lower)
            log_jacobian = log_jacobian + T.alloc(log_det_Upper, n_batch)

            
#            W = T.dot(T.transpose(w_b), w_b) + 0.001*T.eye(bucket_size)
#            log_jacobian = log_jacobian + T.alloc(T.log(T.abs_(T.nlinalg.Det()(W))), n_batch)

#            diag = T.nlinalg.diag(W)
#            div = T.tile(T.reshape(T.sqrt(diag), [1, bucket_size]), (bucket_size, 1))
            
#            W = W / div / T.transpose(div)
            #import pdb; pdb.set_trace()

            lin_output_b = T.dot(x_b, W)
            if b>0:
                lin_output = T.concatenate([lin_output, lin_output_b], axis=1)
            else:
                lin_output = lin_output_b
            if activation is not None:
                derivs = activation_deriv(lin_output_b)     
                #import pdb; pdb.set_trace()
                log_jacobian = log_jacobian + T.log(T.abs_(derivs)).sum(axis=1)

                
#                for n in xrange(n_batch):                    
#                    mat = T.tile(T.reshape(derivs[n], [1, bucket_size]), (bucket_size, 1))
#                    mat = mat * W                   
#                    T.inc_subtensor(log_jacobian[n], T.log(T.abs_(T.nlinalg.Det()(mat))))
                    
        self.log_jacobian = log_jacobian        

        self.output = (
            lin_output if activation is None
            else activation(lin_output)
        )


        self.params = [w]
示例#34
0
文件: losses.py 项目: keirkwame/delfi
 def log_single_component(c, mu, P, al, tr):
     L = T.tril(P[c, :, :], k=-1) + T.diag(T.exp(T.diagonal(
         P[c, :, :])))
     z = T.exp(-0.5 * T.sum(T.dot(T.transpose(L), (tr - mu[c, :]))**2) +
               T.log(al[c]) + T.log(T.nlinalg.det(L)) - D * log2pi / 2.)
     return z
示例#35
0
    def __init__(self,D, M,Q,Domain_number):
        
        self.Xlabel=T.matrix('Xlabel')

        
        self.X=T.matrix('X')
        N=self.X.shape[0]
        
        self.Weight=T.matrix('Weight')

        ker=kernel(Q)
        mmd=MMD(M,Domain_number)
        
        mu_value = np.random.randn(M,D)
        Sigma_b_value = np.zeros((M,M)) + np.log(0.01)

        Z_value = np.random.randn(M,Q)
        self.test=Z_value
        ls_value=np.zeros(Domain_number)+np.log(0.1)
        
        self.mu = theano.shared(value=mu_value, name='mu', borrow=True)
        self.Sigma_b = theano.shared(value=Sigma_b_value, name='Sigma_b', borrow=True)
        self.Z = theano.shared(value=Z_value, name='Z', borrow=True)
        self.ls = theano.shared(value=ls_value, name='ls', borrow=True)
        
        self.params = [self.mu,self.Sigma_b,self.Z,self.ls]
        
        self.hiddenLayer_x = HiddenLayer(rng=rng,input=self.X,n_in=D,n_out=20,activation=T.nnet.relu,number='_x')
        self.hiddenLayer_m = HiddenLayer(rng=rng,input=self.hiddenLayer_x.output,n_in=20,n_out=Q,activation=T.nnet.relu,number='_m')
        self.hiddenLayer_S = HiddenLayer(rng=rng,input=self.hiddenLayer_x.output,n_in=20,n_out=Q,activation=T.nnet.relu,number='_S')
        
        self.loc_params= []
        self.loc_params.extend(self.hiddenLayer_x.params)
        self.loc_params.extend(self.hiddenLayer_m.params)
        self.loc_params.extend(self.hiddenLayer_S.params)

        self.local_params={}
        for i in self.loc_params:
            self.local_params[str(i)]=i
        
        self.params.extend(ker.params)
        self.params.extend(mmd.params)
        
        self.global_params={}
        for i in self.params:
            self.global_params[str(i)]=i
        
        self.params.extend(self.hiddenLayer_x.params)
        self.params.extend(self.hiddenLayer_m.params)
        self.params.extend(self.hiddenLayer_S.params)
        
        self.wrt={}
        for i in self.params:
            self.wrt[str(i)]=i
        
        m=self.hiddenLayer_m.output
        S_0=self.hiddenLayer_S.output
        S_1=T.exp(S_0)
        S=T.sqrt(S_1)
        
        from theano.tensor.shared_randomstreams import RandomStreams
        srng = RandomStreams(seed=234)
        eps_NQ = srng.normal((N,Q))
        eps_M= srng.normal((M,D))#平均と分散で違う乱数を使う必要があるので別々に銘銘

        beta = T.exp(self.ls)
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある

        Sigma = T.tril(self.Sigma_b - T.diag(T.diag(self.Sigma_b)) + T.diag(T.exp(T.diag(self.Sigma_b))))
        
        #スケール変換
        mu_scaled, Sigma_scaled = ker.sf2**0.5 * self.mu, ker.sf2**0.5 * Sigma
        
        Xtilda = m + S * eps_NQ
        self.U = mu_scaled+Sigma_scaled.dot(eps_M)
        
        Kmm = ker.RBF(self.Z)
        Kmm=mmd.MMD_kenel_Xonly(mmd.Zlabel_T,Kmm,self.Weight)
        KmmInv = sT.matrix_inverse(Kmm) 
        
        Kmn = ker.RBF(self.Z,Xtilda)
        Kmn=mmd.MMD_kenel_ZX(self.Xlabel,Kmn,self.Weight)
        
        Knn = ker.RBF(Xtilda)
        Knn=mmd.MMD_kenel_Xonly(self.Xlabel,Knn,self.Weight)
        
        Ktilda=Knn-T.dot(Kmn.T,T.dot(KmmInv,Kmn))
        
        Kinterval=T.dot(KmmInv,Kmn)
              
        mean_U=T.dot(Kinterval.T,self.U)
        betaI=T.diag(T.dot(self.Xlabel,beta))
        Covariance = betaI       
        
        self.LL = (self.log_mvn(self.X, mean_U, Covariance) - 0.5*T.sum(T.dot(betaI,Ktilda)))            
        self.KL_X = -self.KLD_X(m,S)
        self.KL_U = -self.KLD_U(mu_scaled , Sigma_scaled , Kmm,KmmInv)
示例#36
0
    def __init__(self, D, M, Q, Domain_number, D_Y, M_Y):

        self.Xlabel = T.matrix('Xlabel')

        self.X = T.matrix('X')
        self.Y = T.matrix('Y')
        N = self.X.shape[0]

        self.Weight = T.matrix('Weight')

        ker = kernel(Q)
        mmd = MMD(M, Domain_number)

        mu_value = np.random.randn(M, D)
        Sigma_b_value = np.zeros((M, M)) + np.log(0.01)

        Z_value = np.random.randn(M, Q)

        ls_value = np.zeros(Domain_number) + np.log(0.1)

        self.mu = theano.shared(value=mu_value, name='mu', borrow=True)
        self.Sigma_b = theano.shared(value=Sigma_b_value,
                                     name='Sigma_b',
                                     borrow=True)
        self.Z = theano.shared(value=Z_value, name='Z', borrow=True)
        self.ls = theano.shared(value=ls_value, name='ls', borrow=True)

        self.hiddenLayer_x = HiddenLayer(rng=rng,
                                         input=self.X,
                                         n_in=D,
                                         n_out=20,
                                         activation=T.nnet.relu,
                                         number='_x')
        self.hiddenLayer_m = HiddenLayer(rng=rng,
                                         input=self.hiddenLayer_x.output,
                                         n_in=20,
                                         n_out=Q,
                                         activation=T.nnet.relu,
                                         number='_m')
        self.hiddenLayer_S = HiddenLayer(rng=rng,
                                         input=self.hiddenLayer_x.output,
                                         n_in=20,
                                         n_out=Q,
                                         activation=T.nnet.relu,
                                         number='_S')

        #################################################################################
        ###モデルの計算X側
        m = self.hiddenLayer_m.output
        S_0 = self.hiddenLayer_S.output
        S_1 = T.exp(S_0)
        S = T.sqrt(S_1)

        from theano.tensor.shared_randomstreams import RandomStreams
        srng = RandomStreams(seed=234)
        eps_NQ = srng.normal((N, Q))
        eps_M = srng.normal((M, D))  #平均と分散で違う乱数を使う必要があるので別々に銘銘

        beta = T.exp(self.ls)

        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある

        Sigma = T.tril(self.Sigma_b - T.diag(T.diag(self.Sigma_b)) +
                       T.diag(T.exp(T.diag(self.Sigma_b))))

        #スケール変換
        mu_scaled, Sigma_scaled = ker.sf2**0.5 * self.mu, ker.sf2**0.5 * Sigma

        Xtilda = m + S * eps_NQ
        self.U = mu_scaled + Sigma_scaled.dot(eps_M)

        Kmm = ker.RBF(self.Z)
        Kmm = mmd.MMD_kenel_Xonly(mmd.Zlabel_T, Kmm, self.Weight)
        KmmInv = sT.matrix_inverse(Kmm)

        Kmn = ker.RBF(self.Z, Xtilda)
        Kmn = mmd.MMD_kenel_ZX(self.Xlabel, Kmn, self.Weight)

        Knn = ker.RBF(Xtilda)
        Knn = mmd.MMD_kenel_Xonly(self.Xlabel, Knn, self.Weight)

        Ktilda = Knn - T.dot(Kmn.T, T.dot(KmmInv, Kmn))

        Kinterval = T.dot(KmmInv, Kmn)

        mean_U = T.dot(Kinterval.T, self.U)
        betaI = T.diag(T.dot(self.Xlabel, beta))
        Covariance = betaI
        ##############################################################################################
        ###Y側の計算
        ker_Y = kernel(Q, number='_Y')
        muY_value = np.random.randn(M_Y, D_Y)
        SigmaY_b_value = np.zeros((M_Y, M_Y)) + np.log(0.01)

        ZY_value = np.random.randn(M_Y, Q)

        lsY_value = np.zeros(1) + np.log(0.1)

        self.muY = theano.shared(value=muY_value, name='muY', borrow=True)
        self.SigmaY_b = theano.shared(value=SigmaY_b_value,
                                      name='SigmaY_b',
                                      borrow=True)
        self.ZY = theano.shared(value=ZY_value, name='ZY', borrow=True)
        self.lsY = theano.shared(value=lsY_value, name='lsY', borrow=True)

        epsY_NQ = srng.normal((N, Q))
        epsY_M = srng.normal((M_Y, D_Y))

        betaY0 = T.exp(self.lsY)
        betaY = T.tile(betaY0, N)
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある

        SigmaY = T.tril(self.SigmaY_b - T.diag(T.diag(self.SigmaY_b)) +
                        T.diag(T.exp(T.diag(self.SigmaY_b))))

        #スケール変換
        muY_scaled, SigmaY_scaled = ker_Y.sf2**0.5 * self.muY, ker_Y.sf2**0.5 * SigmaY

        XtildaY = m + S * epsY_NQ
        self.UY = muY_scaled + SigmaY_scaled.dot(epsY_M)

        KmmY = ker_Y.RBF(self.ZY)
        KmmInvY = sT.matrix_inverse(KmmY)

        KmnY = ker_Y.RBF(self.ZY, XtildaY)

        KnnY = ker_Y.RBF(XtildaY)

        KtildaY = KnnY - T.dot(KmnY.T, T.dot(KmmInvY, KmnY))

        KintervalY = T.dot(KmmInvY, KmnY)

        mean_UY = T.dot(KintervalY.T, self.UY)
        betaIY = T.diag(betaY)
        CovarianceY = betaIY

        ##############################################################################################
        ###パラメータの格納
        self.params = []

        self.params_X = [self.mu, self.Sigma_b, self.Z, self.ls]
        self.params_Y = [self.muY, self.SigmaY_b, self.ZY, self.lsY]

        self.loc_params = []
        self.loc_params.extend(self.hiddenLayer_x.params)
        self.loc_params.extend(self.hiddenLayer_m.params)
        self.loc_params.extend(self.hiddenLayer_S.params)

        self.local_params = {}
        for i in self.loc_params:
            self.local_params[str(i)] = i

        self.params_X.extend(ker.params)
        self.params_X.extend(mmd.params)
        self.params_Y.extend(ker_Y.params)

        self.global_params_X = {}
        for i in self.params_X:
            self.global_params_X[str(i)] = i

        self.global_params_Y = {}
        for i in self.params_Y:
            self.global_params_Y[str(i)] = i

        self.params.extend(self.params_X)
        self.params.extend(self.params_Y)
        self.params.extend(self.loc_params)

        self.wrt = {}
        for i in self.params:
            self.wrt[str(i)] = i

###############################################################################################
###最終的な尤度
        self.LL = (self.log_mvn(self.X, mean_U, Covariance) -
                   0.5 * T.sum(T.dot(betaI, Ktilda)))
        self.KL_U = -self.KLD_U(mu_scaled, Sigma_scaled, Kmm, KmmInv)

        self.LLY = (self.log_mvn(self.Y, mean_UY, CovarianceY) -
                    0.5 * T.sum(T.dot(betaIY, KtildaY)))
        self.KL_UY = -self.KLD_U(muY_scaled, SigmaY_scaled, KmmY, KmmInvY)

        self.KL_X = -self.KLD_X(m, S)
    def __init__(self, params, correct, samples=20, batch_size=None):
        ker = kernel()
        self.samples = samples
        self.params = params
        self.batch_size = batch_size

        #データの保存ファイル
        model_file_name = 'model2' + '.save'
        #もしこれまでに作ったのがあるならロードする
        try:
            print('Trying to load model...')
            with open(model_file_name, 'rb') as file_handle:
                obj = pickle.load(file_handle)
                self.f, self.g = obj
                print('Loaded!')
            return
        except:
            print('Failed. Creating a new model...')

        X,Y,X_test,m,S_b,mu,Sigma_b,Z,eps_NQ,eps_M =\
        T.dmatrices('X','Y','X_test','m','S_b','mu','Sigma_b','Z','eps_NQ','eps_M')

        lhyp = T.dvector('lhyp')
        ls = T.dvector('ls')

        (M, D), N, Q = Z.shape, X.shape[0], X.shape[1]

        #変数の正の値への制約条件
        beta = T.exp(ls[0])
        #beta=T.exp(lhyp[0])
        sf2, l = T.exp(lhyp[0]), T.exp(lhyp[1:1 + Q])

        S = T.exp(S_b)
        #Sigma=T.exp(self.Sigma_b)

        #xについてはルートを取らなくても対角行列なので問題なし
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある
        Sigma = T.tril(Sigma_b - T.diag(T.diag(Sigma_b)) +
                       T.diag(T.exp(T.diag(Sigma_b))))

        #スケール変換
        mu_scaled, Sigma_scaled = sf2**0.5 * mu, sf2**0.5 * Sigma

        Xtilda = m + S * eps_NQ
        U = mu_scaled + Sigma_scaled.dot(eps_M)

        print('Setting up cache...')

        Kmm = ker.RBF(sf2, l, Z)
        KmmInv = sT.matrix_inverse(Kmm)
        #KmmDet=theano.sandbox.linalg.det(Kmm)

        #KmmInv_cache = sT.matrix_inverse(Kmm)
        #self.fKmm = theano.function([Z, lhyp], Kmm, name='Kmm')
        #self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache')
        #復習:これは員数をZ,lhypとした関数kmmInv_cacheをコンパイルしている。つまり逆行列はzとハイパーパラメタの関数になった
        #self.update_KmmInv_cache()#実際に数値を入れてkinnvを計算させている
        #逆行列の微分関数を作っている

        #self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'),
        #               'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')}

        print('Modeling...')

        Kmn = ker.RBF(sf2, l, Z, Xtilda)
        Knn = ker.RBF(sf2, l, Xtilda, Xtilda)

        Ktilda = Knn - T.dot(Kmn.T, T.dot(KmmInv, Kmn))

        Kinterval = T.dot(KmmInv, Kmn)

        mean_U = T.dot(Kinterval.T, U)
        Covariance = beta

        LL = (self.log_mvn(X, mean_U, Covariance) - 0.5 * beta * T.sum(
            (T.eye(N) * Ktilda))) * correct
        KL_X = -self.KLD_X(m, S) * correct
        KL_U = -self.KLD_U(mu_scaled, Sigma_scaled, Kmm, KmmInv)

        print('Compiling model ...')

        inputs = {
            'X': X,
            'Z': Z,
            'm': m,
            'S_b': S_b,
            'mu': mu,
            'Sigma_b': Sigma_b,
            'lhyp': lhyp,
            'ls': ls,
            'eps_M': eps_M,
            'eps_NQ': eps_NQ
        }

        z = 0.0 * sum([
            T.sum(v) for v in inputs.values()
        ])  # solve a bug with derivative wrt inputs not in the graph

        self.f = {n: theano.function(list(inputs.values()), f+z, name=n, on_unused_input='ignore')\
                  for n,f in zip(['X', 'U', 'LL', 'KL_U', 'KL_X'], [X, U, LL, KL_U, KL_X])}

        wrt = {
            'Z': Z,
            'm': m,
            'S_b': S_b,
            'mu': mu,
            'Sigma_b': Sigma_b,
            'lhyp': lhyp,
            'ls': ls
        }
        self.g = {
            vn: {
                gn: theano.function(list(inputs.values()),
                                    T.grad(gv + z, vv),
                                    name='d' + gn + '_d' + vn,
                                    on_unused_input='ignore')
                for gn, gv in zip(['LL', 'KL_U', 'KL_X'], [LL, KL_U, KL_X])
            }
            for vn, vv in wrt.items()
        }

        with open(model_file_name, 'wb') as file_handle:
            print('Saving model...')
            sys.setrecursionlimit(2000)
            pickle.dump([self.f, self.g],
                        file_handle,
                        protocol=pickle.HIGHEST_PROTOCOL)
示例#38
0
文件: linalg.py 项目: Theano/Theano
 def tril_and_halve_diagonal(mtx):
     """Extracts lower triangle of square matrix and halves diagonal."""
     return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)
    def __init__(self,input, D_in, D_out,num_MC,inducing_number,fixed_z,Domain_number=None,Domain_consideration=True,number="1",kernel_name='X'):

        Xtilda=input        
        
        self.N=Xtilda.shape[1]
        D=D_out
        Q=D_in
        M=inducing_number
        ################################################################################
        #set_initial_value
        ker=kernel(Q,kernel_name)
        self.kern=ker
        
        mu_value = np.random.randn(M,D)* 1e-2
        Sigma_b_value = np.zeros((M,M))
        
        if Domain_consideration:
            ls_value=np.zeros(Domain_number)+np.log(0.1)
        else:
            ls_value=np.zeros(1)+np.log(0.1)
        
        self.mu = theano.shared(value=mu_value, name='mu'+number, borrow=True)
        self.Sigma_b = theano.shared(value=Sigma_b_value, name='Sigma_b'+number, borrow=True)
        self.Z = theano.shared(value=fixed_z, name='Z'+number, borrow=True)
        self.ls = theano.shared(value=ls_value, name='ls'+number, borrow=True)
        
        ##############################################################################
        #param list
        self.params = [self.mu,self.Sigma_b,self.ls]
        self.params.extend(ker.params)
        
        self.hyp_params_list=[self.mu,self.Sigma_b,self.ls,ker.params]
        self.Z_params_list=[self.Z]        
        self.global_params_list=self.params
        #############################################################################
        #set random seed
        from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
        srng = RandomStreams(seed=234)
        
        eps_M = srng.normal((num_MC,M,D))#平均と分散で違う乱数を使う必要があるので別々に銘銘
        eps_ND = srng.normal((num_MC,self.N,D))
        #################################################################
        #set constraints                          
        self.beta = T.exp(self.ls)
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある

        Sigma = T.tril(self.Sigma_b - T.diag(T.diag(self.Sigma_b)) + T.diag(T.exp(T.diag(self.Sigma_b))))
        ##################################################################
        #スケール変換
        mu_scaled, Sigma_scaled = ker.sf2**0.5 * self.mu, ker.sf2**0.5 * Sigma
        
        ##################################################################
        #if the model is latetnt variable model, we make MC samples of latent X
        
        #Xtilda = eps_NQ * S[None,:,:] + m[None,:,:]
        
        #Xtilda, updates = theano.scan(fn=lambda a: m+S*a,
        #                      sequences=[eps_NQ])
        
        ###############################
        #U is the posterior samples
        self.U, updates = theano.scan(fn=lambda a: mu_scaled+Sigma_scaled.dot(a),
                              sequences=[eps_M])
        ################################
        #inducing point prior
        Kmm = ker.RBF(self.Z)
        KmmInv = sT.matrix_inverse(Kmm) 
        
        ###############################
        #For the MC calculation, we copy the input X
        Knn, updates = theano.scan(fn=lambda a: self.kern.RBF(a),
                              sequences=[Xtilda])
        
        Kmn, updates = theano.scan(fn=lambda a: self.kern.RBF(self.Z,a),
                              sequences=[Xtilda])
        ########################################
        #make posterior (p(F|U)) , its variace
        Ktilda, updates = theano.scan(fn=lambda a,b: a-T.dot(b.T,T.dot(KmmInv,b)),
                              sequences=[Knn,Kmn])
        ##################################################
        #get the posterior samples form (p(F|U))
        #MC*N*D_out
        #F, updates = theano.scan(fn=lambda a,b,c,d: T.dot(a.T,T.dot(KmmInv,b)) + T.dot(T.maximum(c, 1e-16)**0.5,d),
        #                      sequences=[Kmn,self.U,Ktilda,eps_ND])
        F, updates = theano.scan(fn=lambda a,c,d: T.dot(a.T,T.dot(KmmInv,mu_scaled)) + T.dot(T.maximum(c, 1e-16)**0.5,d),
                              sequences=[Kmn,Ktilda,eps_ND])
        ##################################################        
        #Kinterval=T.dot(KmmInv,Kmn)

        self.mean_U=F
        #mean_U=T.dot(Kinterval.T,self.U)
        
        #A=Kinterval.T      
        #Sigma_tilda=Ktilda+T.dot(A,T.dot(Sigma_scaled,A.T))
        #mean_tilda=T.dot(A,mu_scaled)        
        #self.mean_U=mean_tilda + T.dot(T.maximum(Sigma_tilda, 1e-16)**0.5,eps_ND)

        ###################################################################
        eps_ND_F = srng.normal((num_MC,self.N,D))

        added_noise=T.tile(self.beta,(num_MC,self.N,D))

        self.output=added_noise*eps_ND_F+self.mean_U
        #self.KL_X = -self.KLD_X(m,S)
        self.KL_U = self.KLD_U(mu_scaled , Sigma_scaled , Kmm,KmmInv)
示例#40
0
    def __init__(self, rng, target,input_m,input_S, n_in, n_out,inducing_number,Domain_number,Xlabel,
                 liklihood="Gaussian",Domain_consideration=True,number="1"):

        m=input_m
        S_0=input_S
        
        N=m.shape[0]
        D=n_out
        Q=n_in
        M=inducing_number
        
        #set_initial_value
        ker=kernel(Q)
        mu_value = np.random.randn(M,D)* 1e-2
        Sigma_b_value = np.zeros((M,M))
        Z_value = np.random.randn(M,Q)
        if Domain_consideration:
            ls_value=np.zeros(Domain_number)+np.log(0.1)
        else:
            ls_value=np.zeros(1)+np.log(0.1)
        
        self.mu = theano.shared(value=mu_value, name='mu'+number, borrow=True)
        self.Sigma_b = theano.shared(value=Sigma_b_value, name='Sigma_b'+number, borrow=True)
        self.Z = theano.shared(value=Z_value, name='Z'+number, borrow=True)
        self.ls = theano.shared(value=ls_value, name='ls'+number, borrow=True)
        
        self.params = [self.mu,self.Sigma_b,self.Z,self.ls]
        
        
        self.params.extend(ker.params)
        
        self.hyp_params_list=[self.mu,self.Sigma_b,self.ls]
        self.Z_params_list=[self.Z]        
        self.global_params_list=self.params
        
        S_1=T.exp(S_0)
        S=T.sqrt(S_1)
        
        from theano.tensor.shared_randomstreams import RandomStreams
        srng = RandomStreams(seed=234)
        eps_NQ = srng.normal((N,Q))
        eps_M = srng.normal((M,D))#平均と分散で違う乱数を使う必要があるので別々に銘銘
        eps_ND = srng.normal((N,D))
                          
        beta = T.exp(self.ls)
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある

        Sigma = T.tril(self.Sigma_b - T.diag(T.diag(self.Sigma_b)) + T.diag(T.exp(T.diag(self.Sigma_b))))
        
        #スケール変換
        mu_scaled, Sigma_scaled = ker.sf2**0.5 * self.mu, ker.sf2**0.5 * Sigma
        
        Xtilda = m + S * eps_NQ
        self.U = mu_scaled+Sigma_scaled.dot(eps_M)
        
        Kmm = ker.RBF(self.Z)
        KmmInv = sT.matrix_inverse(Kmm) 
        
        Kmn = ker.RBF(self.Z,Xtilda)
        
        Knn = ker.RBF(Xtilda)        
        Ktilda=Knn-T.dot(Kmn.T,T.dot(KmmInv,Kmn))
        
        
        #F = T.dot(Kmn.T,T.dot(KmmInv,self.U)) + T.dot(T.maximum(Ktilda, 1e-16)**0.5,eps_ND)
        
        Kinterval=T.dot(KmmInv,Kmn)
        A=Kinterval.T      
        Sigma_tilda=Ktilda+T.dot(A,T.dot(Sigma_scaled,A.T))
        mean_tilda=T.dot(A,mu_scaled)
        #mean_U=F
        #mean_U=T.dot(Kinterval.T,self.U)
        mean_U=mean_tilda + T.dot(T.maximum(Sigma_tilda, 1e-16)**0.5,eps_ND)
        betaI=T.diag(T.dot(Xlabel,beta))
        Covariance = betaI       
        
        self.output=mean_U
        
        self.LL = self.log_mvn(target, mean_U, Covariance)/N# - 0.5*T.sum(T.dot(betaI,Ktilda))       
        self.KL_X = -self.KLD_X(m,S)
        self.KL_U = -self.KLD_U(mu_scaled , Sigma_scaled , Kmm,KmmInv)
    def __init__(self, params,correct,Xinfo, samples = 500,batch_size=None):
        ker = kernel()
        mmd = MMD()
        self.samples = samples
        self.params =  params
        self.batch_size=batch_size
        self.Xlabel_value=Xinfo["Xlabel_value"]
        self.Weight_value=Xinfo["Weight_value"]
        
        #データの保存ファイル
        model_file_name = 'model_MMD_kernel' + '.save'
                                    #もしこれまでに作ったのがあるならロードする
        try:
            print ('Trying to load model...')
            with open(model_file_name, 'rb') as file_handle:
                obj = pickle.load(file_handle)
                self.f, self.g= obj
                print ('Loaded!')
            return
        except:
            print ('Failed. Creating a new model...')
        
        X,Y,X_test,m,S_b,mu,Sigma_b,Z,eps_NQ,eps_M =\
        T.dmatrices('X','Y','X_test','m','S_b','mu','Sigma_b','Z','eps_NQ','eps_M')
        
        Xlabel=T.dmatrix('Xlabel')
        Zlabel=T.dmatrix('Zlabel')
        
        Zlabel_T=T.exp(Zlabel)/T.sum(T.exp(Zlabel),1)[:,None]#ラベルは確率なので正の値でかつ、企画化されている
        
        Weight=T.dmatrix('Weight')
        
        lhyp = T.dvector('lhyp')
        ls=T.dvector('ls')
        ga=T.dvector('ga')
        
        (M, D), N, Q = Z.shape, X.shape[0], X.shape[1]

        
        #変数の正の値への制約条件
        beta = T.exp(ls)
        gamma=T.exp(ga[0])
        #beta=T.exp(lhyp[0])
        sf2, l = T.exp(lhyp[0]), T.exp(lhyp[1:1+Q])
        
        S=T.exp(S_b)
        #Sigma=T.exp(self.Sigma_b)
        
        #xについてはルートを取らなくても対角行列なので問題なし
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある
        Sigma = T.tril(Sigma_b - T.diag(T.diag(Sigma_b)) + T.diag(T.exp(T.diag(Sigma_b))))
        
        #スケール変換
        mu_scaled, Sigma_scaled = sf2**0.5 * mu, sf2**0.5 * Sigma
        
        Xtilda = m + S * eps_NQ
        U = mu_scaled+Sigma_scaled.dot(eps_M)

        print ('Setting up cache...')
        
        Kmm = ker.RBF(sf2, l, Z)
        Kmm=mmd.MMD_kenel_Xonly(gamma,Zlabel_T,Kmm,Weight)
        KmmInv = sT.matrix_inverse(Kmm) 
        #KmmDet=theano.sandbox.linalg.det(Kmm)
        
        #KmmInv_cache = sT.matrix_inverse(Kmm)
        #self.fKmm = theano.function([Z, lhyp], Kmm, name='Kmm')
        #self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache')
        #復習:これは員数をZ,lhypとした関数kmmInv_cacheをコンパイルしている。つまり逆行列はzとハイパーパラメタの関数になった
        #self.update_KmmInv_cache()#実際に数値を入れてkinnvを計算させている
        #逆行列の微分関数を作っている
        
        #self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'),
        #               'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')}

        
        print ('Modeling...')
        
        Kmn = ker.RBF(sf2,l,Z,Xtilda)
        Kmn=mmd.MMD_kenel_ZX(gamma,Zlabel_T,Xlabel,Kmn,Weight)
        
        Knn = ker.RBF(sf2,l,Xtilda,Xtilda)
        Knn=mmd.MMD_kenel_Xonly(gamma,Xlabel,Knn,Weight)
        
        Ktilda=Knn-T.dot(Kmn.T,T.dot(KmmInv,Kmn))
        
        Kinterval=T.dot(KmmInv,Kmn)
              
        mean_U=T.dot(Kinterval.T,U)
        betaI=T.diag(T.dot(Xlabel,beta))
        Covariance = betaI       
        
        LL = (self.log_mvn(X, mean_U, Covariance) - 0.5*T.sum(T.dot(betaI,Ktilda)))*correct              
        KL_X = -self.KLD_X(m,S)*correct
        KL_U = -self.KLD_U(mu_scaled , Sigma_scaled , Kmm,KmmInv)
        
        print ('Compiling model ...')        


        inputs = {'X': X, 'Z': Z, 'm': m, 'S_b': S_b, 'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls, 
            'eps_M': eps_M, 'eps_NQ': eps_NQ,'ga':ga,'Zlabel':Zlabel,'Weight':Weight,'Xlabel':Xlabel}
        
        z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph
        
        self.f = {n: theano.function(list(inputs.values()), f+z, name=n, on_unused_input='ignore')\
                  for n,f in zip(['X', 'U', 'LL', 'KL_U', 'KL_X'], [X, U, LL, KL_U, KL_X])}
        
        
        wrt = {'Z': Z, 'm': m, 'S_b': S_b, 'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls,'ga':ga,'Zlabel':Zlabel}
        self.g = {vn: {gn: theano.function(list(inputs.values()), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, 
            on_unused_input='ignore') for gn,gv in zip(['LL', 'KL_U', 'KL_X'], [LL, KL_U, KL_X])} for vn, vv in wrt.items()}

        with open(model_file_name, 'wb') as file_handle:
            print ('Saving model...')
            sys.setrecursionlimit(10000)
            pickle.dump([self.f, self.g], file_handle, protocol=pickle.HIGHEST_PROTOCOL)
示例#42
0
 def get_prob(self, x, W, b):
     W = T.tril(W, k=-1)
     p = T.nnet.sigmoid(T.dot(x, W) + b) * 0.9999 + 0.000005
     return p
示例#43
0
 def constructL(ltri):
     tmp = T.transpose(T.tril(T.ones((d,d)),-1))
     lower_tril_indices = tmp.nonzero()
     L = T.transpose(T.set_subtensor(tmp[lower_tril_indices], ltri))
     return L
示例#44
0
    def __init__(self,
                 input,
                 D_in,
                 D_out,
                 num_MC,
                 inducing_number,
                 Domain_number=None,
                 Domain_consideration=True,
                 number="1",
                 kernel_name='X'):

        Xtilda = input

        self.N = Xtilda.shape[1]
        D = D_out
        Q = D_in
        M = inducing_number
        ################################################################################
        #set_initial_value
        ker = kernel(Q, kernel_name)
        self.kern = ker

        mu_value = np.random.randn(M, D) * 1e-2
        Sigma_b_value = np.zeros((M, M))

        Z_value = np.random.randn(M, Q)

        if Domain_consideration:
            ls_value = np.zeros(Domain_number) + np.log(0.1)
        else:
            ls_value = np.zeros(1) + np.log(0.1)

        self.mu = theano.shared(value=mu_value,
                                name='mu' + number,
                                borrow=True)
        self.Sigma_b = theano.shared(value=Sigma_b_value,
                                     name='Sigma_b' + number,
                                     borrow=True)
        self.Z = theano.shared(value=Z_value, name='Z' + number, borrow=True)
        self.ls = theano.shared(value=ls_value,
                                name='ls' + number,
                                borrow=True)

        ##############################################################################
        #param list
        self.params = [self.mu, self.Sigma_b, self.ls, self.Z]
        self.params.extend(ker.params)

        self.hyp_params_list = [self.mu, self.Sigma_b, self.ls, ker.params]
        self.Z_params_list = [self.Z]
        self.global_params_list = self.params
        #############################################################################
        #set random seed
        from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
        srng = RandomStreams(seed=234)

        eps_M = srng.normal((num_MC, M, D))  #平均と分散で違う乱数を使う必要があるので別々に銘銘
        eps_ND = srng.normal((num_MC, self.N, D))

        #################################################################
        #set constraints
        self.beta = T.exp(self.ls)
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある

        Sigma = T.tril(self.Sigma_b - T.diag(T.diag(self.Sigma_b)) +
                       T.diag(T.exp(T.diag(self.Sigma_b))))
        ##################################################################
        #スケール変換
        mu_scaled, Sigma_scaled = ker.sf2**0.5 * self.mu, ker.sf2**0.5 * Sigma

        ##################################################################
        #if the model is latetnt variable model, we make MC samples of latent X

        #Xtilda = eps_NQ * S[None,:,:] + m[None,:,:]

        #Xtilda, updates = theano.scan(fn=lambda a: m+S*a,
        #                      sequences=[eps_NQ])

        ###############################
        #U is the posterior samples
        self.U, updates = theano.scan(
            fn=lambda a: mu_scaled + Sigma_scaled.dot(a), sequences=[eps_M])
        ################################
        #inducing point prior
        Kmm = ker.RBF(self.Z)
        KmmInv = sT.matrix_inverse(Kmm)

        ###############################
        #For the MC calculation, we copy the input X
        Knn, updates = theano.scan(fn=lambda a: self.kern.RBF(a),
                                   sequences=[Xtilda])

        Kmn, updates = theano.scan(fn=lambda a: self.kern.RBF(self.Z, a),
                                   sequences=[Xtilda])
        ########################################
        #make posterior (p(F|U)) , its variace
        Ktilda, updates = theano.scan(
            fn=lambda a, b: a - T.dot(b.T, T.dot(KmmInv, b)),
            sequences=[Knn, Kmn])
        ##################################################
        #get the posterior samples form (p(F|U))
        #MC*N*D_out
        F, updates = theano.scan(fn=lambda a, b, c, d: T.dot(
            a.T, T.dot(KmmInv, b)) + T.dot(T.maximum(c, 1e-16)**0.5, d),
                                 sequences=[Kmn, self.U, Ktilda, eps_ND])
        ##################################################
        #Kinterval=T.dot(KmmInv,Kmn)

        self.mean_U = F
        #mean_U=T.dot(Kinterval.T,self.U)

        #A=Kinterval.T
        #Sigma_tilda=Ktilda+T.dot(A,T.dot(Sigma_scaled,A.T))
        #mean_tilda=T.dot(A,mu_scaled)
        #self.mean_U=mean_tilda + T.dot(T.maximum(Sigma_tilda, 1e-16)**0.5,eps_ND)

        ###################################################################
        self.output = self.mean_U
        #self.KL_X = -self.KLD_X(m,S)
        self.KL_U = self.KLD_U(mu_scaled, Sigma_scaled, Kmm, KmmInv)
示例#45
0
    def __init__(self,
                 rng,
                 target,
                 input_m,
                 input_S,
                 n_in,
                 n_out,
                 inducing_number,
                 Domain_number,
                 Xlabel,
                 liklihood="Gaussian",
                 Domain_consideration=True,
                 number="1"):

        m = input_m
        S_0 = input_S

        N = m.shape[0]
        D = n_out
        Q = n_in
        M = inducing_number

        #set_initial_value
        ker = kernel(Q)
        mu_value = np.random.randn(M, D) * 1e-2
        Sigma_b_value = np.zeros((M, M))
        Z_value = np.random.randn(M, Q)
        if Domain_consideration:
            ls_value = np.zeros(Domain_number) + np.log(0.1)
        else:
            ls_value = np.zeros(1) + np.log(0.1)

        self.mu = theano.shared(value=mu_value,
                                name='mu' + number,
                                borrow=True)
        self.Sigma_b = theano.shared(value=Sigma_b_value,
                                     name='Sigma_b' + number,
                                     borrow=True)
        self.Z = theano.shared(value=Z_value, name='Z' + number, borrow=True)
        self.ls = theano.shared(value=ls_value,
                                name='ls' + number,
                                borrow=True)

        self.params = [self.mu, self.Sigma_b, self.Z, self.ls]

        self.params.extend(ker.params)

        self.hyp_params_list = [self.mu, self.Sigma_b, self.ls]
        self.Z_params_list = [self.Z]
        self.global_params_list = self.params

        S_1 = T.exp(S_0)
        S = T.sqrt(S_1)

        from theano.tensor.shared_randomstreams import RandomStreams
        srng = RandomStreams(seed=234)
        eps_NQ = srng.normal((N, Q))
        eps_M = srng.normal((M, D))  #平均と分散で違う乱数を使う必要があるので別々に銘銘
        eps_ND = srng.normal((N, D))

        beta = T.exp(self.ls)
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある

        Sigma = T.tril(self.Sigma_b - T.diag(T.diag(self.Sigma_b)) +
                       T.diag(T.exp(T.diag(self.Sigma_b))))

        #スケール変換
        mu_scaled, Sigma_scaled = ker.sf2**0.5 * self.mu, ker.sf2**0.5 * Sigma

        Xtilda = m + S * eps_NQ
        self.U = mu_scaled + Sigma_scaled.dot(eps_M)

        Kmm = ker.RBF(self.Z)
        KmmInv = sT.matrix_inverse(Kmm)

        Kmn = ker.RBF(self.Z, Xtilda)

        Knn = ker.RBF(Xtilda)
        Ktilda = Knn - T.dot(Kmn.T, T.dot(KmmInv, Kmn))

        #F = T.dot(Kmn.T,T.dot(KmmInv,self.U)) + T.dot(T.maximum(Ktilda, 1e-16)**0.5,eps_ND)

        Kinterval = T.dot(KmmInv, Kmn)
        A = Kinterval.T
        Sigma_tilda = Ktilda + T.dot(A, T.dot(Sigma_scaled, A.T))
        mean_tilda = T.dot(A, mu_scaled)
        #mean_U=F
        #mean_U=T.dot(Kinterval.T,self.U)
        mean_U = mean_tilda + T.dot(T.maximum(Sigma_tilda, 1e-16)**0.5, eps_ND)
        betaI = T.diag(T.dot(Xlabel, beta))
        Covariance = betaI

        self.output = mean_U

        self.LL = self.log_mvn(
            target, mean_U, Covariance) / N  # - 0.5*T.sum(T.dot(betaI,Ktilda))
        self.KL_X = -self.KLD_X(m, S)
        self.KL_U = -self.KLD_U(mu_scaled, Sigma_scaled, Kmm, KmmInv)
示例#46
0
    def __init__(self, params, sx2 = 1, linear_model = False, samples = 20, use_hat = False):
        ker, self.samples, self.params, self.KmmInv  = kernel(), samples, params, {}
        self.use_hat = use_hat

        model_file_name = 'model' + ('_hat' if use_hat else '') + ('_linear' if linear_model else '') + '.save'

        try:
            print 'Trying to load model...'
            with open(model_file_name, 'rb') as file_handle:
                obj = cPickle.load(file_handle)
                self.f, self.g, self.f_Kmm, self.f_KmmInv, self.dKmm_d = obj
                self.update_KmmInv_cache()
                print 'Loaded!'
            return
        except:
            print 'Failed. Creating a new model...'

        Y, Z, m, ls, mu, lL, eps_MK, eps_NQ, eps_NK, KmmInv = T.dmatrices('Y', 'Z', 'm', 'ls', 'mu', 
            'lL', 'eps_MK', 'eps_NQ', 'eps_NK', 'KmmInv')
        lhyp = T.dvector('lhyp')
        (M, K), N, Q = mu.shape, m.shape[0], Z.shape[1]
        s, sl2, sf2, l = T.exp(ls), T.exp(lhyp[0]), T.exp(lhyp[1]), T.exp(lhyp[2:2+Q])
        L = T.tril(lL - T.diag(T.diag(lL)) + T.diag(T.exp(T.diag(lL))))
        
        print 'Setting up cache...'
        Kmm = ker.RBF(sf2, l, Z) if not linear_model else ker.LIN(sl2, Z)
        KmmInv_cache = sT.matrix_inverse(Kmm)
        self.f_Kmm = theano.function([Z, lhyp], Kmm, name='Kmm')
        self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache')
        self.update_KmmInv_cache()
        self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'),
                       'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')}

        print 'Setting up model...'
        if not self.use_hat:
            mu_scaled, L_scaled = sf2**0.5 * mu, sf2**0.5 * L
            X = m + s * eps_NQ
            U = mu_scaled + L_scaled.dot(eps_MK)
            Kmn = ker.RBF(sf2, l, Z, X) if not linear_model else ker.LIN(sl2, Z, X)
            Knn = ker.RBFnn(sf2, l, X) if not linear_model else ker.LINnn(sl2, X)
            A = KmmInv.dot(Kmn)
            B = Knn - T.sum(Kmn * KmmInv.dot(Kmn), 0)
            F = A.T.dot(U) + T.maximum(B, 1e-16)[:,None]**0.5 * eps_NK
            F = T.concatenate((T.zeros((N,1)), F), axis=1)
            S = T.nnet.softmax(F)
            LS = T.sum(T.log(T.maximum(T.sum(Y * S, 1), 1e-16)))
            if not linear_model:
                KL_U = -0.5 * (T.sum(KmmInv.T * T.sum(mu_scaled[:,None,:]*mu_scaled[None,:,:], 2))
                        + K * (T.sum(KmmInv.T * L_scaled.dot(L_scaled.T)) - M - 2.0*T.sum(T.log(T.diag(L_scaled)))
                               + 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))))
            else:
                KL_U = 0
            #KL_U = -0.5 * T.sum(T.sum(mu_scaled * KmmInv.dot(mu_scaled), 0) + T.sum(KmmInv * L_scaled.dot(L_scaled.T)) - M
            #                    - 2.0*T.sum(T.log(T.diag(L_scaled))) + 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))) if not linear_model else 0
        else:
            # mu_scaled, L_scaled = mu / sf2**0.5, L / sf2**0.5
            mu_scaled, L_scaled = mu / sf2, L / sf2
            X = m + s * eps_NQ
            U = mu_scaled + L_scaled.dot(eps_MK)
            Kmn = ker.RBF(sf2, l, Z, X) if not linear_model else ker.LIN(sl2, Z, X)
            Knn = ker.RBFnn(sf2, l, X) if not linear_model else ker.LINnn(sl2, X)
            B = Knn - T.sum(Kmn * KmmInv.dot(Kmn), 0)
            F = Kmn.T.dot(U) + T.maximum(B, 1e-16)[:,None]**0.5 * eps_NK
            F = T.concatenate((T.zeros((N,1)), F), axis=1)
            S = T.nnet.softmax(F)
            LS = T.sum(T.log(T.maximum(T.sum(Y * S, 1), 1e-16)))
            if not linear_model:
                KL_U = -0.5 * (T.sum(Kmm.T * T.sum(mu_scaled[:,None,:]*mu_scaled[None,:,:], 2))
                        + K * (T.sum(Kmm.T * L_scaled.dot(L_scaled.T)) - M - 2.0*T.sum(T.log(T.diag(L_scaled)))
                               - 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))))
            else:
                KL_U = 0

        KL_X_all = -0.5 * T.sum((m**2.0 + s**2.0)/sx2 - 1.0 - 2.0*ls + T.log(sx2), 1)
        KL_X = T.sum(KL_X_all)

        print 'Compiling...'
        inputs = {'Y': Y, 'Z': Z, 'm': m, 'ls': ls, 'mu': mu, 'lL': lL, 'lhyp': lhyp, 'KmmInv': KmmInv, 
            'eps_MK': eps_MK, 'eps_NQ': eps_NQ, 'eps_NK': eps_NK}
        z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph
        f = zip(['X', 'U', 'S', 'LS', 'KL_U', 'KL_X', 'KL_X_all'], [X, U, S, LS, KL_U, KL_X, KL_X_all])
        self.f = {n: theano.function(inputs.values(), f+z, name=n, on_unused_input='ignore') for n,f in f}
        g = zip(['LS', 'KL_U', 'KL_X'], [LS, KL_U, KL_X])
        wrt = {'Z': Z, 'm': m, 'ls': ls, 'mu': mu, 'lL': lL, 'lhyp': lhyp, 'KmmInv': KmmInv}
        self.g = {vn: {gn: theano.function(inputs.values(), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, 
            on_unused_input='ignore') for gn,gv in g} for vn, vv in wrt.iteritems()}

        with open(model_file_name, 'wb') as file_handle:
            print 'Saving model...'
            sys.setrecursionlimit(2000)
            cPickle.dump([self.f, self.g, self.f_Kmm, self.f_KmmInv, self.dKmm_d], file_handle, protocol=cPickle.HIGHEST_PROTOCOL)
示例#47
0
 def constructL(ltri):
     tmp = T.transpose(T.tril(T.ones((d, d)), -1))
     lower_tril_indices = tmp.nonzero()
     L = T.transpose(T.set_subtensor(tmp[lower_tril_indices], ltri))
     return L
示例#48
0
    def createGradientFunctions(self):
        #Create the Theano variables
        W1, W2, W3, W4, W5, W7, x, eps = T.dmatrices("W1", "W2", "W3", "W4",
                                                     "W5", "W7", "x", "eps")

        #Create biases as cols so they can be broadcasted for minibatches
        b1, b2, b3, b4, b5, b7 = T.dcols("b1", "b2", "b3", "b4", "b5", "b7")

        if self.continuous_data:
            h_encoder = T.nnet.softplus(T.dot(W1, x) + b1)
        else:
            h_encoder = T.tanh(T.dot(W1, x) + b1)

        mu_encoder = T.dot(W2, h_encoder) + b2
        log_sigma_encoder = 0.5 * (T.dot(W3, h_encoder) + b3)

        L_u = T.tril(log_L_u - T.diag(T.diag(log_L_u)) +
                     T.diag(T.exp(T.diag(log_L_u))))
        # To do: Better ways of paramterising the covariance (see: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.31.494&rep=rep1&type=pdf)

        #Compute GP objects
        K_ff = self.ker.RBF(sf2, ell, X)
        K_uu = self.ker.RBF(sf2, ell, X_u)
        K_uu_inv = nlinalg.matrix_inverse(K_uu)
        L_f = slinalg.cholesky(K_ff - T.dot(K_fu, T.dot(K_uu_inv, K_fu.T)))
        # f_i make up the columns of f, simiarly for m_u_i
        u = m_u + T.dot(L_u, eps_u)  #n_induce iid pseudo inducing sets
        f = T.dot(K_fu, T.dot(K_uu_inv, u)) + T.dot(L_f, X)

        #Find the hidden variable z
        # log_sigma_lhood = 0.5*(T.dot(W9,f) + b9) # the var GP maps to both mean *and* covariance
        sigma_var_lhood = sigma_z**2 * T.eye(self.dimZ)
        L_z = slinalg.cholesky(sigma_var_lhood)
        z = f + T.dot(L_z, eps_z)
        # z = mu_encoder + T.exp(log_sigma_encoder)*eps

        prior = 0.5 * T.sum(1 + 2 * log_sigma_encoder - mu_encoder**2 -
                            T.exp(2 * log_sigma_encoder))

        #Set up decoding layer
        if self.continuous_data:
            h_decoder = T.nnet.softplus(T.dot(W4, z) + b4)
            mu_decoder = T.nnet.sigmoid(T.dot(W5, h_decoder) + b5)
            log_sigma_decoder = 0.5 * (T.dot(W7, h_decoder) + b7)
            logpxz = T.sum(-(0.5 * np.log(2 * np.pi) + log_sigma_decoder) -
                           0.5 *
                           ((x - mu_decoder) / T.exp(log_sigma_decoder))**2)
            gradvariables = [
                W1, W2, W3, W4, W5, W7, b1, b2, b3, b4, b5, b7, sf2, ell, X_u,
                m_u, L_u
            ]
        else:
            h_decoder = T.tanh(T.dot(W4, z) + b4)
            y = T.nnet.sigmoid(T.dot(W5, h_decoder) + b5)
            logpxz = -T.nnet.binary_crossentropy(y, x).sum()
            gradvariables = [
                W1, W2, W3, W4, W5, b1, b2, b3, b4, b5, sf2, ell, X_u, m_u, L_u
            ]

        #Set up auxiliary layer
        if self.continuous_data:
            h_auxiliary = T.nnet.softplus(T.dot(W6, [x, z]) + b6)
            mu_auxiliary = T.nnet.sigmoid(T.dot(W7, h_auxiliary) + b7)
            log_sigma_auxiliary = 0.5 * (T.dot(W8, h_auxiliary) + b8)
        else:
            pass  #to do

        logp = logpxz + prior

        #Compute KL terms
        # KL_qp = -0.5*T.sum(1.0 + 2*log_sigma_lhood - f**2 - T.exp(2*log_sigma_lhood))
        KL_qp = 0.5 * (T.dot(f.T, f) +
                       T.trace(sigma_var_lhood + T.log(T.eye(self.dimZ)) -
                               T.log(sigma_var_lhood)) - self.dimZ)
        KL_qr = 0.5 * (T.dot(
            (mu_auxiliary - mu_encoder).T,
            T.dot(T.diag(1.0 / T.exp(log_sigma_auxiliary)),
                  mu_auxiliary - mu_decoder)) + T.trace(
                      T.dot(T.diag(1.0 / T.exp(log_sigma_auxiliary)),
                            T.dot(L_u, L_u.T)) + log_sigma_auxiliary -
                      log_sigma_encoder) - self.dimXf - self.dimf)

        #Compute bound and all the gradients
        stoch_bound = logpxz - KL_qp - KL_qr
        derivatives = T.grad(stoch_bound, gradvariables)

        #Add the lowerbound so we can keep track of results
        derivatives.append(stoch_bound)

        self.gradientfunction = th.function(gradvariables +
                                            [x, eps_u, eps_z, X],
                                            derivatives,
                                            on_unused_input='ignore')
        self.lowerboundfunction = th.function(gradvariables +
                                              [x, eps_u, eps_z, X],
                                              stoch_bound,
                                              on_unused_input='ignore')
        self.zfunction = th.function(gradvariables + [x, eps_u, eps_z, X],
                                     z,
                                     on_unused_input='ignore')
示例#49
0
 def neg_log_prob(self, x, c):
     W = T.tril(self.War, k=-1)
     p = T.nnet.sigmoid(T.dot(x, W) + self.bar + c)
     return self.f_neg_log_prob(x, p)
    def __init__(self, params,correct, samples = 500,batch_size=None):
        ker = kernel()
        self.samples = samples
        self.params =  params
        self.batch_size=batch_size
        
        #データの保存ファイル
        model_file_name = 'model2' + '.save'
                                    #もしこれまでに作ったのがあるならロードする
        try:
            print ('Trying to load model...')
            with open(model_file_name, 'rb') as file_handle:
                obj = pickle.load(file_handle)
                self.f, self.g= obj
                print ('Loaded!')
            return
        except:
            print ('Failed. Creating a new model...')
        
        X,Y,X_test,mu,Sigma_b,Z,eps_NQ,eps_M =\
        T.dmatrices('X','Y','X_test','mu','Sigma_b','Z','eps_NQ','eps_M')
        
        Wx, Ws, Wu=\
        T.dmatrices('Wx', 'Ws', 'Wu')

        bx, bs, bu=\
        T.dvectors('bx', 'bs', 'bu')

        gamma_x,beta_x,gamma_u,beta_u,gamma_s,beta_s=\
        T.dvectors("gamma_x","beta_x","gamma_u","beta_u","gamma_s","beta_s")
    
        lhyp = T.dvector('lhyp')
        ls=T.dvector('ls')
        
        (M, D), N, Q = Z.shape, X.shape[0], X.shape[1]

        
        #変数の正の値への制約条件
        beta = T.exp(ls[0])
        #beta=T.exp(lhyp[0])
        sf2, l = T.exp(lhyp[0]), T.exp(lhyp[1:1+Q])
        
        #Sigma=T.exp(self.Sigma_b)
        
        #xについてはルートを取らなくても対角行列なので問題なし
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある
        Sigma = T.tril(Sigma_b - T.diag(T.diag(Sigma_b)) + T.diag(T.exp(T.diag(Sigma_b))))
        
        #スケール変換
        mu_scaled, Sigma_scaled = sf2**0.5 * mu, sf2**0.5 * Sigma
        
        #隠れ層の生成
        out1=self.neural_net_predict(Wx,bx,gamma_x,beta_x,X)
        m=self.neural_net_predict(Wu,bu,gamma_u,beta_u,out1)
        S=self.neural_net_predict(Ws,bs,gamma_s,beta_s,out1)
        #outputs1 = T.dot(X,Wx) + bx
        #m = T.dot(out1,Wu) + bu
        #S=T.dot(out1,Ws) + bs
                 
        S=T.exp(S)
        S=T.sqrt(S)
        
        Xtilda = m+S*eps_NQ
        U = mu_scaled+Sigma_scaled.dot(eps_M)

        print ('Setting up cache...')
        
        Kmm = ker.RBF(sf2, l, Z)
        KmmInv = sT.matrix_inverse(Kmm) 
        #KmmDet=theano.sandbox.linalg.det(Kmm)
        
        #KmmInv_cache = sT.matrix_inverse(Kmm)
        #self.fKmm = theano.function([Z, lhyp], Kmm, name='Kmm')
        #self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache')
        #復習:これは員数をZ,lhypとした関数kmmInv_cacheをコンパイルしている。つまり逆行列はzとハイパーパラメタの関数になった
        #self.update_KmmInv_cache()#実際に数値を入れてkinnvを計算させている
        #逆行列の微分関数を作っている
        
        #self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'),
        #               'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')}

        
        print ('Modeling...')
        
        Kmn = ker.RBF(sf2,l,Z,Xtilda)
        Knn = ker.RBF(sf2,l,Xtilda,Xtilda)
        
        Ktilda=Knn-T.dot(Kmn.T,T.dot(KmmInv,Kmn))
        
        Kinterval=T.dot(KmmInv,Kmn)
              
        mean_U=T.dot(Kinterval.T,U)
        Covariance = beta       
        
        LL = (self.log_mvn(X, mean_U, Covariance) - 0.5*beta*T.sum((T.eye(N)*Ktilda)))*correct      
        KL_X = -self.KLD_X(m,S)*correct
        KL_U = -self.KLD_U(mu_scaled , Sigma_scaled , Kmm,KmmInv)
        
        print ('Compiling model ...')        

        inputs = {'X': X, 'Z': Z,'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls, 'eps_M': eps_M, 'eps_NQ': eps_NQ,\
                  "Wx":Wx, "bx":bx, "Wu":Wu,"bu":bu, "Ws":Ws, "bs":bs,\
              "gamma_x":gamma_x,"beta_x":beta_x,"gamma_u":gamma_u,"beta_u":beta_u,"gamma_s":gamma_s,"beta_s":beta_s}
        
        z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph
        
        self.f = {n: theano.function(list(inputs.values()), f+z, name=n, on_unused_input='ignore')\
                  for n,f in zip(['Xtilda','U', 'LL', 'KL_U', 'KL_X'], [Xtilda,U, LL, KL_U, KL_X])}
        
        
        wrt = {'Z': Z,'mu': mu, 'Sigma_b': Sigma_b, 'lhyp': lhyp, 'ls': ls, "Wx":Wx, "bx":bx, "Wu":Wu,"bu":bu, "Ws":Ws, "bs":bs,\
              "gamma_x":gamma_x,"beta_x":beta_x,"gamma_u":gamma_u,"beta_u":beta_u,"gamma_s":gamma_s,"beta_s":beta_s}
        self.g = {vn: {gn: theano.function(list(inputs.values()), T.grad(gv+z, vv), name='d'+gn+'_d'+vn, 
            on_unused_input='ignore') for gn,gv in zip(['LL', 'KL_U', 'KL_X'], [LL, KL_U, KL_X])} for vn, vv in wrt.items()}

        with open(model_file_name, 'wb') as file_handle:
            print ('Saving model...')
            sys.setrecursionlimit(2000)
            pickle.dump([self.f, self.g], file_handle, protocol=pickle.HIGHEST_PROTOCOL)
示例#51
0
 def step_neg_log_prob(self, x, c, War, bar):
     W = T.tril(War, k=-1)
     p = T.nnet.sigmoid(T.dot(x, W) + bar + c)
     return self.f_neg_log_prob(x, p)
示例#52
0
文件: layers.py 项目: zixiangfu/senti
 def get_output_for(self, input_, **kwargs):
     W = T.tril(self.W, -1)
     interactions = T.batched_dot(T.dot(input_, W), input_)
     interactions = T.sqrt(T.max(interactions, 1e-6))
     return self.nonlinearity(input_ + interactions)
示例#53
0
 def tril_and_halve_diagonal(mtx):
     """Extracts lower triangle of square matrix and halves diagonal."""
     return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.0)
示例#54
0
    def __init__(self, rng, input, n_in, n_batch, d_bucket, activation, activation_deriv,
                 w=None, index_permute=None, index_permute_reverse=None):
        srng = RandomStreams(seed=234)
        
        n_bucket = n_in / d_bucket + 1
        self.input = input

        # randomly permute input space
        if index_permute is None:
            index_permute = srng.permutation(n=n_in)#numpy.random.permutation(n_in)
            index_permute_reverse = T.argsort(index_permute)
            self.index_permute = index_permute
            self.index_permute_reverse = index_permute_reverse

        permuted_input = input[:, index_permute]
        self.permuted_input = permuted_input

        # initialize matrix parameters
        if w is None:
            bound = numpy.sqrt(3. / d_bucket)
            w_values = numpy.asarray(rng.uniform(low=-bound,
                                                 high=bound,
                                                 size=(n_bucket, d_bucket, d_bucket)),
                                     dtype=theano.config.floatX)
            w = theano.shared(value=w_values, name='w')
            
        self.w = w
        
        
        # compute outputs and Jacobians
        
        log_jacobian = T.alloc(0, n_batch)
        for b in xrange(n_bucket):
            bucket_size = d_bucket
            if b == n_bucket - 1:
                bucket_size = n_in - b * d_bucket
            
           
            if b>0:
                prev_input = x_b
                
                """here we warp the previous bucket of inputs and add to the new input"""            

            x_b = self.permuted_input[:, b*d_bucket:b*d_bucket + bucket_size]
            w_b = self.w[b, :bucket_size, :bucket_size]

            if b>0:
                x_b_plus = x_b + m_b
            else:
                x_b_plus = x_b

            Upper = T.triu(w_b)
            Lower = T.tril(w_b)
            Lower = T.extra_ops.fill_diagonal(Lower, 1.)
            log_det_Upper = T.log(T.abs_(T.nlinalg.ExtractDiag()(Upper))).sum() 

            W = T.dot(Upper, Lower)
            log_jacobian = log_jacobian + T.alloc(log_det_Upper, n_batch)

            lin_output_b = T.dot(x_b_plus, W)
            if b>0:
                lin_output = T.concatenate([lin_output, lin_output_b], axis=1)
            else:
                lin_output = lin_output_b
            if activation is not None:
                derivs = activation_deriv(lin_output_b)     
                #import pdb; pdb.set_trace()
                log_jacobian = log_jacobian + T.log(T.abs_(derivs)).sum(axis=1)                 
                    
        self.log_jacobian = log_jacobian        


        self.output = (
            lin_output[:, index_permute_reverse] if activation is None
            else activation(lin_output[:, index_permute_reverse])
        )

        self.params = [w]
    def __init__(self, rng,input_m,input_S, n_in, n_out,inducing_number,Domain_number=None,
                 liklihood="Gaussian",Domain_consideration=True,number="1",kernel_name='X'):

        m=input_m
        self.cal=input_m
        S_0=input_S
        
        self.N=m.shape[0]
        D=n_out
        Q=n_in
        M=inducing_number
        
        #set_initial_value
        ker=kernel(Q,kernel_name)
        self.kern=ker
        mu_value = np.random.randn(M,D)* 1e-2
        Sigma_b_value = np.zeros((M,M))
        Z_value = np.random.randn(M,Q)
        if Domain_consideration:
            ls_value=np.zeros(Domain_number)+np.log(0.1)
        else:
            ls_value=np.zeros(1)+np.log(0.1)
        
        self.mu = theano.shared(value=mu_value, name='mu'+number, borrow=True)
        self.Sigma_b = theano.shared(value=Sigma_b_value, name='Sigma_b'+number, borrow=True)
        self.Z = theano.shared(value=Z_value, name='Z'+number, borrow=True)
        self.ls = theano.shared(value=ls_value, name='ls'+number, borrow=True)
        
        self.params = [self.mu,self.Sigma_b,self.Z,self.ls]
        
        
        self.params.extend(ker.params)
        
        self.hyp_params_list=[self.mu,self.Sigma_b,self.ls]
        self.Z_params_list=[self.Z]        
        self.global_params_list=self.params
        
        S_1=T.exp(S_0)
        S=T.sqrt(S_1)
        
        from theano.tensor.shared_randomstreams import RandomStreams
        srng = RandomStreams(seed=234)
        eps_NQ = srng.normal((100,self.N,Q))
        eps_M = srng.normal((100,M,D))#平均と分散で違う乱数を使う必要があるので別々に銘銘
        eps_ND = srng.normal((100,self.N,D))
                          
        self.beta = T.exp(self.ls)
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある

        Sigma = T.tril(self.Sigma_b - T.diag(T.diag(self.Sigma_b)) + T.diag(T.exp(T.diag(self.Sigma_b))))
        
        #スケール変換
        mu_scaled, Sigma_scaled = ker.sf2**0.5 * self.mu, ker.sf2**0.5 * Sigma
        
        #Xtilda = m[None,:,:] + S[None,:,:] * eps_NQ
        Xtilda, updates = theano.scan(fn=lambda a: m+S*a,
                              sequences=[eps_NQ])
                   
        #self.U = mu_scaled[None,:,:]+Sigma_scaled[None,:,:].dot(eps_M)
        self.U, updates = theano.scan(fn=lambda a: mu_scaled+Sigma_scaled.dot(a),
                              sequences=[eps_M])
        
        Kmm = ker.RBF(self.Z)
        KmmInv = sT.matrix_inverse(Kmm) 
        
        Knn, updates = theano.scan(fn=lambda a: self.kern.RBF(a),
                              sequences=[Xtilda])
        
        Kmn, updates = theano.scan(fn=lambda a: self.kern.RBF(self.Z,a),
                              sequences=[Xtilda])
        
        #Kmn = ker.RBF(self.Z,Xtilda)
        
        #Knn = ker.RBF(Xtilda)
        Ktilda, updates = theano.scan(fn=lambda a,b: a-T.dot(b.T,T.dot(KmmInv,b)),
                              sequences=[Knn,Kmn])
        #Ktilda=Knn-T.dot(Kmn.T,T.dot(KmmInv,Kmn))
        
        
        F, updates = theano.scan(fn=lambda a,b,c,d: T.dot(a.T,T.dot(KmmInv,b)) + T.dot(T.maximum(c, 1e-16)**0.5,d),
                              sequences=[Kmn,self.U,Ktilda,eps_ND])
        #F = T.dot(Kmn.T,T.dot(KmmInv,self.U)) + T.dot(T.maximum(Ktilda, 1e-16)**0.5,eps_ND)
        
        #Kinterval=T.dot(KmmInv,Kmn)

        self.mean_U=F
        #mean_U=T.dot(Kinterval.T,self.U)
        
        #A=Kinterval.T      
        #Sigma_tilda=Ktilda+T.dot(A,T.dot(Sigma_scaled,A.T))
        #mean_tilda=T.dot(A,mu_scaled)        
        #self.mean_U=mean_tilda + T.dot(T.maximum(Sigma_tilda, 1e-16)**0.5,eps_ND)

        
        self.output=self.mean_U
        self.KL_X = -self.KLD_X(m,S)
        self.KL_U = -self.KLD_U(mu_scaled , Sigma_scaled , Kmm,KmmInv)
示例#56
0
 def get_prob(self, x, W, b):
     W = T.tril(W, k=-1)
     p = T.nnet.sigmoid(T.dot(x, W) + b) * 0.9999 + 0.000005
     return p
    def __init__(self, D, M, Q, Domain_number, m, pre_params, Pre_U,
                 Hiddenlayerdim1, Hiddenlayerdim2):

        self.Xlabel = T.matrix('Xlabel')

        self.X = T.matrix('X')
        N = self.X.shape[0]

        self.Weight = T.matrix('Weight')

        ker = kernel(Q)
        #mmd=MMD(M,Domain_number)

        mu_value = np.random.randn(M, D)
        Sigma_b_value = np.zeros((M, M)) + np.log(0.01)

        Z_value = m[:M]
        self.test = Z_value
        ls_value = np.zeros(Domain_number) + np.log(0.1)

        self.mu = theano.shared(value=mu_value, name='mu', borrow=True)
        self.Sigma_b = theano.shared(value=Sigma_b_value,
                                     name='Sigma_b',
                                     borrow=True)
        self.Z = theano.shared(value=Z_value, name='Z', borrow=True)
        self.ls = theano.shared(value=ls_value, name='ls', borrow=True)

        self.params = [self.mu, self.Sigma_b, self.Z, self.ls]

        self.hiddenLayer_x = HiddenLayer(rng=rng,
                                         input=self.X,
                                         n_in=D,
                                         n_out=Hiddenlayerdim1,
                                         activation=T.nnet.relu,
                                         number='_x')
        self.hiddenLayer_hidden = HiddenLayer(rng=rng,
                                              input=self.hiddenLayer_x.output,
                                              n_in=Hiddenlayerdim1,
                                              n_out=Hiddenlayerdim2,
                                              activation=T.nnet.relu,
                                              number='_h')
        self.hiddenLayer_m = HiddenLayer(rng=rng,
                                         input=self.hiddenLayer_hidden.output,
                                         n_in=Hiddenlayerdim2,
                                         n_out=Q,
                                         activation=T.nnet.relu,
                                         number='_m')
        self.hiddenLayer_S = HiddenLayer(rng=rng,
                                         input=self.hiddenLayer_hidden.output,
                                         n_in=Hiddenlayerdim2,
                                         n_out=Q,
                                         activation=T.nnet.relu,
                                         number='_S')

        self.loc_params = []
        self.loc_params.extend(self.hiddenLayer_x.params)
        self.loc_params.extend(self.hiddenLayer_hidden.params)
        self.loc_params.extend(self.hiddenLayer_m.params)
        self.loc_params.extend(self.hiddenLayer_S.params)

        self.local_params = {}
        for i in self.loc_params:
            self.local_params[str(i)] = i

        self.params.extend(ker.params)
        #self.params.extend(mmd.params)

        self.hyp_params = {}
        for i in [self.mu, self.Sigma_b, self.ls]:
            self.hyp_params[str(i)] = i

        self.Z_params = {}
        for i in [self.Z]:
            self.Z_params[str(i)] = i

        self.global_params = {}
        for i in self.params:
            self.global_params[str(i)] = i

        self.params.extend(self.hiddenLayer_x.params)
        self.params.extend(self.hiddenLayer_hidden.params)
        self.params.extend(self.hiddenLayer_m.params)
        self.params.extend(self.hiddenLayer_S.params)

        self.wrt = {}
        for i in self.params:
            self.wrt[str(i)] = i

        for i, j in pre_params.items():
            self.wrt[i].set_value(j)

        for i, j in Pre_U.items():
            self.wrt[i].set_value(j)

        m = self.hiddenLayer_m.output
        S_0 = self.hiddenLayer_S.output
        S_1 = T.exp(S_0)
        S = T.sqrt(S_1)

        from theano.tensor.shared_randomstreams import RandomStreams
        srng = RandomStreams(seed=234)
        eps_NQ = srng.normal((N, Q))
        eps_M = srng.normal((M, D))  #平均と分散で違う乱数を使う必要があるので別々に銘銘

        beta = T.exp(self.ls)
        #uについては対角でないのでコレスキー分解するとかして三角行列を作る必要がある

        Sigma = T.tril(self.Sigma_b - T.diag(T.diag(self.Sigma_b)) +
                       T.diag(T.exp(T.diag(self.Sigma_b))))

        #スケール変換
        mu_scaled, Sigma_scaled = ker.sf2**0.5 * self.mu, ker.sf2**0.5 * Sigma

        Xtilda = m + S * eps_NQ
        self.U = mu_scaled + Sigma_scaled.dot(eps_M)

        Kmm = ker.RBF(self.Z)
        #Kmm=mmd.MMD_kenel_Xonly(mmd.Zlabel_T,Kmm,self.Weight)
        KmmInv = sT.matrix_inverse(Kmm)

        Kmn = ker.RBF(self.Z, Xtilda)
        #Kmn=mmd.MMD_kenel_ZX(self.Xlabel,Kmn,self.Weight)

        Knn = ker.RBF(Xtilda)
        #Knn=mmd.MMD_kenel_Xonly(self.Xlabel,Knn,self.Weight)

        Ktilda = Knn - T.dot(Kmn.T, T.dot(KmmInv, Kmn))

        Kinterval = T.dot(KmmInv, Kmn)

        mean_U = T.dot(Kinterval.T, self.U)
        betaI = T.diag(T.dot(self.Xlabel, beta))
        Covariance = betaI

        self.LL = (self.log_mvn(self.X, mean_U, Covariance) -
                   0.5 * T.sum(T.dot(betaI, Ktilda)))
        self.KL_X = -self.KLD_X(m, S)
        self.KL_U = -self.KLD_U(mu_scaled, Sigma_scaled, Kmm, KmmInv)
示例#58
0
def contaminate_mixture(data, fit_for='z', fit_data=None): #stickbreaking problems
    steps = []
    # shapes and sizes
    n_epochs = data['epoch_i'].max() + 1  # each epoch indexed by epoch_i
    n_raters = data['rater_i'].max() + 1
    n_obs = data.shape[0]  # each spindle marker indexed by t

    # static priors vars
    trust_purcell = 0.1  # crank up to give more weight to purcell et al, 2017
    purcell = np.array([0.3587, 0.6387, 0.0026, 0., 0., 0.]) + (1 - trust_purcell)
    s_number_prior = purcell / purcell.sum()
    max_s = len(s_number_prior) - 1
    gss_spindle_testvals = [1., 5., 10., 15., 20.]
    with pm.Model() as model:

        # True s
        gss = pm.Uniform('gss', lower=0., upper=25., shape=(n_epochs, max_s),
                         testval=np.tile(np.array(gss_spindle_testvals).T, reps=(n_epochs, 1),))  # Real spindles
        gss_per_obs = gss[data['epoch_i'], :]

        # The number of spindles per epoch:
        if fit_for == 'z':
            gss_prior = pm.Dirichlet('gss_prior', a=s_number_prior)
            if n_epochs > 1:
                z = pm.Categorical('z', p=gss_prior,
                                   shape=n_epochs)
            else:
                z = pm.Categorical('z', p=gss_prior)
        else:
            z = fit_data['z']
        z_rs = z.reshape((n_epochs, 1))

        if fit_for in ['w', 'z']:  # when we are finding z or w
            w_prior_possibilities = tt.tril(tt.ones((max_s + 1, max_s + 1)))
            w = pm.Categorical('w', p=w_prior_possibilities[z_rs[data['epoch_i'], 0], :], shape=n_obs)
        else:  # fit for gss
            w = fit_data['w']

        # --- Raters ability to detect markers --- #
        r_E = pm.Bound(pm.Normal, lower=0.)('r_E', mu=0.5, sd=0.5, shape=n_raters)
        r_E_per_obs = r_E[data['rater_i']]
        #r_E = pm.Bound(pm.Normal, lower=0.)('r_E', mu=0.5, sd=0.5)

        # --- Behaviour --- #
        contaminate_dist_s = pm.Uniform.dist(lower=0., upper=25., shape=n_obs)
        contaminate_dist_s.mean = 12.5
        possible_dists = [contaminate_dist_s]
        for i in range(0, 5):
            dist = pm.Normal.dist(mu=gss_per_obs[:, i], sd=r_E_per_obs)
            dist.mean = gss_spindle_testvals[i]
            possible_dists.append(dist)

        w_array = tt.extra_ops.to_one_hot(w, nb_class=max_s + 1)
        s = pm.Mixture('s', w=w_array,
                       comp_dists=possible_dists,
                       observed=data['s'])

        #STEP methods for vars:
        if fit_for == 'z':
            steps = [pm.CategoricalGibbsMetropolis([z, w]),
                     pm.NUTS([gss_prior, gss, r_E], target_accept=0.9)]
        if fit_for == 'w':
            steps = [pm.CategoricalGibbsMetropolis([w]),
                     pm.NUTS([gss, r_E], target_accept=0.9)]
        #else, everything NUTS

    return model, steps