Пример #1
0
    def __init__(self, rng, input1, input2, n_in1, n_in2, n_hidden_layers, d_hidden, W1=None, W2=None):
        self.input1 = input1
        self.input2 = input2
        
        CouplingFunc = WarpNetwork(rng, input1, n_hidden_layers, d_hidden, n_in1, n_in2)  
        
        if W1 is None:
            bin = numpy.sqrt(6. / (n_in1 + n_in1))
            W1_values = numpy.identity(n_in1, dtype=theano.config.floatX)            
            W1 = theano.shared(value=W1_values, name='W1')

        if W2 is None:
            bin = numpy.sqrt(6. / (n_in2 + n_in2))
            W2_values = numpy.identity(n_in2, dtype=theano.config.floatX)
            W2 = theano.shared(value=W2_values, name='W2')

        V1u = T.triu(W1)
        V1l = T.tril(W1)
        V1l = T.extra_ops.fill_diagonal(V1l, 1.)
        V1 = T.dot(V1u, V1l) 
            
        V2u = T.triu(W2)
        V2l = T.tril(W2)
        V2l = T.extra_ops.fill_diagonal(V2l, 1.)
        V2 = T.dot(V2u, V2l) 
            
        self.output1 = T.dot(input1, V1)
        self.output2 = T.dot(input2, V2) + CouplingFunc.output

        self.log_jacobian = T.log(T.abs_(T.nlinalg.ExtractDiag()(V1u))).sum() \
            + T.log(T.abs_(T.nlinalg.ExtractDiag()(V2u))).sum() 

        self.params = CouplingFunc.params
Пример #2
0
 def cost(X):
     Y = T.dot(X, X.T)
     s = T.triu(Y, 1).max()
     expY = T.exp((Y - s) / epsilon)
     expY = expY - T.diag(T.diag(expY))
     u = T.sum(T.triu(expY, 1))
     return s + epsilon * T.log(u)
Пример #3
0
Файл: rand.py Проект: gburt/iaf
def gaussian_chol(mean, logvar, chol, sample=None):
    if sample != None:
        raise Exception('Not implemented')
    diag = gaussian_diag(mean, logvar)
    mask = T.shape_padleft(T.triu(T.ones_like(chol[0]), 1))
    sample = diag.sample + T.batched_dot(diag.sample, chol * mask)
    return RandomVariable(sample, diag.logp, diag.entr, mean=mean, logvar=logvar)
Пример #4
0
    def grad(self, inputs, output_gradients):
        """
        Reverse-mode gradient updates for matrix solve operation c = A \ b.

        Symbolic expression for updates taken from [1]_.

        References
        ----------
        ..[1] M. B. Giles, "An extended collection of matrix derivative results
          for forward and reverse mode automatic differentiation",
          http://eprints.maths.ox.ac.uk/1079/

        """
        A, b = inputs
        c = self(A, b)
        c_bar = output_gradients[0]
        trans_map = {
            'lower_triangular': 'upper_triangular',
            'upper_triangular': 'lower_triangular'
        }
        trans_solve_op = Solve(
            # update A_structure and lower to account for a transpose operation
            A_structure=trans_map.get(self.A_structure, self.A_structure),
            lower=not self.lower
        )
        b_bar = trans_solve_op(A.T, c_bar)
        # force outer product if vector second input
        A_bar = -tensor.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)
        if self.A_structure == 'lower_triangular':
            A_bar = tensor.tril(A_bar)
        elif self.A_structure == 'upper_triangular':
            A_bar = tensor.triu(A_bar)
        return [A_bar, b_bar]
Пример #5
0
    def L_op(self, inputs, outputs, gradients):
        # Modified from theano/tensor/slinalg.py
        # No handling for on_error = 'nan'
        dz = gradients[0]
        chol_x = outputs[0]

        # this is for nan mode
        #
        # ok = ~tensor.any(tensor.isnan(chol_x))
        # chol_x = tensor.switch(ok, chol_x, 1)
        # dz = tensor.switch(ok, dz, 1)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return gpu_solve_upper_triangular(
                outer.T, gpu_solve_upper_triangular(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))
        else:
            grad = tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))

        return [grad]
Пример #6
0
    def L_op(self, inputs, outputs, output_gradients):
        r"""
        Reverse-mode gradient updates for matrix solve operation c = A \\\ b.

        Symbolic expression for updates taken from [#]_.

        References
        ----------
        .. [#] M. B. Giles, "An extended collection of matrix derivative results
          for forward and reverse mode automatic differentiation",
          http://eprints.maths.ox.ac.uk/1079/

        """
        A, b = inputs
        c = outputs[0]
        c_bar = output_gradients[0]
        trans_map = {
            "lower_triangular": "upper_triangular",
            "upper_triangular": "lower_triangular",
        }
        trans_solve_op = Solve(
            # update A_structure and lower to account for a transpose operation
            A_structure=trans_map.get(self.A_structure, self.A_structure),
            lower=not self.lower,
        )
        b_bar = trans_solve_op(A.T, c_bar)
        # force outer product if vector second input
        A_bar = -tensor.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)
        if self.A_structure == "lower_triangular":
            A_bar = tensor.tril(A_bar)
        elif self.A_structure == "upper_triangular":
            A_bar = tensor.triu(A_bar)
        return [A_bar, b_bar]
        def TopAccuracy2C(pred=None, truth=None, symmetric=False):

            M1s = T.ones_like(truth, dtype=np.int8)
            LRsel = T.triu(M1s, 24)
            MLRsel = T.triu(M1s, 12)
            SMLRsel = T.triu(M1s, 6)
            MRsel = MLRsel - LRsel
            SRsel = SMLRsel - MLRsel

            dataLen = truth.shape[0]

            pred0 = pred[:, :, 0]

            if symmetric:
                avg_pred = (pred0 + pred0.dimshuffle(1, 0)) / 2.0
            else:
                avg_pred = pred0

            #pred_truth = T.concatenate( (avg_pred, truth.dimshuffle(0, 1, 'x') ), axis=2)
            pred_truth = T.stack([avg_pred, T.cast(truth, 'int32')], axis=2)

            accuracyList = []
            for Rsel in [LRsel, MRsel, MLRsel, SRsel]:
                selected_pred_truth = pred_truth[Rsel.nonzero()]

                ## sort by the predicted value for label 0 from the largest to the smallest
                selected_pred_truth_sorted = selected_pred_truth[(
                    selected_pred_truth[:, 0]).argsort()[::-1]]

                #print 'topRatio =', topRatio
                numTops = T.minimum(T.iround(dataLen * topRatio),
                                    selected_pred_truth_sorted.shape[0])

                selected_sorted_truth = T.cast(
                    selected_pred_truth_sorted[:, -1], 'int32')
                numTruths = T.bincount(selected_sorted_truth, minlength=2)
                numCorrects = T.bincount(selected_sorted_truth[0:numTops],
                                         minlength=2)
                #numTops = T.minimum(numTops, numTruths[0])
                accuracyList.append(
                    T.stack([
                        numCorrects[0] * 1. /
                        (numTops + 0.001), numTops, numTruths[0]
                    ],
                            axis=0))

            return T.stacklists(accuracyList)
Пример #8
0
    def __init__(self, input, n_in, n_out):

        batchSize, seqLen, _ = input.shape

        import collections
        if isinstance(n_out, collections.Sequence):
            LRembedLayer = EmbeddingLayer(input, n_in, n_out[2])
            MRembedLayer = EmbeddingLayer(input, n_in, n_out[1])
            SRembedLayer = EmbeddingLayer(input, n_in, n_out[0])
            n_out_max = max(n_out)
        else:
            LRembedLayer = EmbeddingLayer(input, n_in, n_out)
            MRembedLayer = EmbeddingLayer(input, n_in, n_out)
            SRembedLayer = EmbeddingLayer(input, n_in, n_out)
            n_out_max = n_out

        self.layers = [LRembedLayer, MRembedLayer, SRembedLayer]

        M1s = T.ones((seqLen, seqLen))
        Sep24Mat = T.triu(M1s, 24) + T.tril(M1s, -24)
        Sep12Mat = T.triu(M1s, 12) + T.tril(M1s, -12)
        Sep6Mat = T.triu(M1s, 6) + T.tril(M1s, -6)
        LRsel = Sep24Mat.dimshuffle('x', 0, 1, 'x')
        MRsel = (Sep12Mat - Sep24Mat).dimshuffle('x', 0, 1, 'x')
        SRsel = (Sep6Mat - Sep12Mat).dimshuffle('x', 0, 1, 'x')

        selections = [LRsel, MRsel, SRsel]

        self.output = T.zeros((batchSize, seqLen, seqLen, n_out_max),
                              dtype=theano.config.floatX)
        for emLayer, sel in zip(self.layers, selections):
            l_n_out = emLayer.n_out
            self.output = T.inc_subtensor(self.output[:, :, :, :l_n_out],
                                          T.mul(emLayer.output, sel))

        self.pcenters = 0
        self.params = []
        self.paramL1 = 0
        self.paramL2 = 0
        for layer in [LRembedLayer, MRembedLayer, SRembedLayer]:
            self.params += layer.params
            self.paramL1 += layer.paramL1
            self.paramL2 += layer.paramL2
            self.pcenters += layer.pcenters

        self.n_out = n_out_max
Пример #9
0
def rank_loss(scores):
    # Images
    diag   = T.diag(scores)
    diff_img = scores - diag.dimshuffle(0, 'x') + 1
    max_img = T.maximum(0, diff_img)
    triu_img = T.triu(max_img, 1)
    til_img  = T.tril(max_img, -1)
    res_img = T.sum(triu_img) + T.sum(til_img)

    # Sentences
    diff_sent = scores.T - diag.dimshuffle(0, 'x') + 1
    max_sent = T.maximum(0, diff_sent)
    triu_sent = T.triu(max_sent, 1)
    til_sent  = T.tril(max_sent, -1)
    res_sent = T.sum(triu_sent) + T.sum(til_sent)
    
    return T.log(T.sum(scores) + 0.01)
Пример #10
0
 def check_u(m, k=0):
     m_symb = T.matrix(dtype=m.dtype)
     k_symb = T.iscalar()
     f = theano.function([m_symb, k_symb],
                         T.triu(m_symb, k_symb),
                         mode=mode_with_gpu)
     result = f(m, k)
     assert np.allclose(result, np.triu(m, k))
     assert result.dtype == np.dtype(dtype)
     assert any([isinstance(node.op, GpuTri)
                 for node in f.maker.fgraph.toposort()])
Пример #11
0
def gaussian_chol(mean, logvar, chol, sample=None):
    if sample != None:
        raise Exception('Not implemented')
    diag = gaussian_diag(mean, logvar)
    mask = T.shape_padleft(T.triu(T.ones_like(chol[0]), 1))
    sample = diag.sample + T.batched_dot(diag.sample, chol * mask)
    return RandomVariable(sample,
                          diag.logp,
                          diag.entr,
                          mean=mean,
                          logvar=logvar)
Пример #12
0
 def check_u(m, k=0):
     m_symb = T.matrix(dtype=m.dtype)
     k_symb = T.iscalar()
     f = theano.function([m_symb, k_symb],
                         T.triu(m_symb, k_symb),
                         mode=mode_with_gpu)
     result = f(m, k)
     assert np.allclose(result, np.triu(m, k))
     assert result.dtype == np.dtype(dtype)
     assert any([
         isinstance(node.op, GpuTri) for node in f.maker.fgraph.toposort()
     ])
Пример #13
0
    def lower_lower(self):
        '''Evaluates the intractable term in the lower bound which itself
         must be lower bounded'''

        a = self.get_aux_mult()

        reversed_cum_probs = T.extra_ops.cumsum(a[:,::-1],1)
        dot_prod_m   = T.dot(reversed_cum_probs, self.digams_1p2)
        dot_prod_mp1 = T.dot(T.concatenate((reversed_cum_probs[:,1:],T.zeros((self.K,1))),1), self.digams[:,0])
        # final entropy term
        triu_ones = T.triu(T.ones_like(a)) - T.eye(self.K)
        aloga = T.sum(T.tril(a)*T.log(T.tril(a)+triu_ones),1)
        return T.dot(a, self.digams[:,1]) + dot_prod_m + dot_prod_mp1 - aloga
Пример #14
0
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)

        # Replace the cholesky decomposition with 1 if there are nans
        # or solve_upper_triangular will throw a ValueError.
        if self.on_error == 'nan':
            ok = ~tensor.any(tensor.isnan(chol_x))
            chol_x = tensor.switch(ok, chol_x, 1)
            dz = tensor.switch(ok, dz, 1)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return solve_upper_triangular(
                outer.T,
                solve_upper_triangular(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))
        else:
            grad = tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))

        if self.on_error == 'nan':
            return [tensor.switch(ok, grad, np.nan)]
        else:
            return [grad]
Пример #15
0
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)

        # Replace the cholesky decomposition with 1 if there are nans
        # or solve_upper_triangular will throw a ValueError.
        if self.on_error == 'nan':
            ok = ~tensor.any(tensor.isnan(chol_x))
            chol_x = tensor.switch(ok, chol_x, 1)
            dz = tensor.switch(ok, dz, 1)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return solve_upper_triangular(
                outer.T, solve_upper_triangular(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))
        else:
            grad = tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))

        if self.on_error == 'nan':
            return [tensor.switch(ok, grad, np.nan)]
        else:
            return [grad]
Пример #16
0
    def L_op(self, inputs, outputs, output_gradients):
        # Modified from theano/tensor/slinalg.py
        A, b = inputs
        c = outputs[0]
        c_bar = output_gradients[0]

        trans_solve_op = GpuCublasTriangularSolve(not self.lower)
        b_bar = trans_solve_op(A.T, c_bar)

        A_bar = -tensor.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)

        if self.lower:
            A_bar = tensor.tril(A_bar)
        else:
            A_bar = tensor.triu(A_bar)
        return [A_bar, b_bar]
Пример #17
0
def triangularize_network(layers, force_diag=False):
    n_layers, rem = divmod(len(layers) + 1, 4)
    assert(rem == 0)
    assert(n_layers > 0)
    assert((n_layers - 1, aL_PARAM) not in layers)

    layers_LU = layers.copy()
    for nn in xrange(n_layers):
        LL, UL = layers[(nn, LL_PARAM)], layers[(nn, UL_PARAM)]
        LL_diag = T.nlinalg.alloc_diag(T.nlinalg.extract_diag(LL))

        layers_LU[(nn, LL_PARAM)] = \
            ifelse(force_diag, LL_diag, T.tril(LL))
        layers_LU[(nn, UL_PARAM)] = \
            ifelse(force_diag, T.eye(UL.shape[0]), T.triu(UL))
    return layers_LU, n_layers
Пример #18
0
    def L_op(self, inputs, outputs, output_gradients):
        # Modified from theano/tensor/slinalg.py
        A, b = inputs
        c = outputs[0]
        c_bar = output_gradients[0]

        trans_solve_op = GpuCublasTriangularSolve(not self.lower)
        b_bar = trans_solve_op(A.T, c_bar)

        A_bar = -tensor.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)

        if self.lower:
            A_bar = tensor.tril(A_bar)
        else:
            A_bar = tensor.triu(A_bar)
        return [A_bar, b_bar]
Пример #19
0
 def calMAP(_k):
     inx = T.argsort(dist, axis=1)
     # A = (te_lab == tr_lab[inx[:, 0: _k].reshape([-1])].reshape([length, _k])).astype('float32')
     A = T.eq(te_lab, tr_lab[inx[:, 0: _k].reshape([-1])].reshape([length, _k])).astype('float32')
     U = T.triu(T.ones([_k, _k]))
     B = T.dot(A, U)
     B *= A
     r = T.sum(A, axis=1)
     p = T.sum(B / (T.arange(1, _k + 1).astype('float32')), axis=1)
     r, p = theano.function([], [r, p])()
     p = p[r.nonzero()]
     r = r[r.nonzero()]
     res = T.sum(p / r)
     res /= (_k * length)
     res = theano.function([], res)()
     return res
Пример #20
0
    def grad(self, inputs, g_outputs):
        r"""The gradient function should return
           .. math:: \sum_n\left(W_n\frac{\partial\,w_n}
                           {\partial a_{ij}} +
                     \sum_k V_{nk}\frac{\partial\,v_{nk}}
                           {\partial a_{ij}}\right),
        where [:math:`W`, :math:`V`] corresponds to ``g_outputs``,
        :math:`a` to ``inputs``, and  :math:`(w, v)=\mbox{eig}(a)`.
        Analytic formulae for eigensystem gradients are well-known in
        perturbation theory:
           .. math:: \frac{\partial\,w_n}
                          {\partial a_{ij}} = v_{in}\,v_{jn}
           .. math:: \frac{\partial\,v_{kn}}
                          {\partial a_{ij}} =
                \sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}
                
        Code derived from theano.nlinalg.Eigh and doi=10.1.1.192.9105
        """
        x, = inputs
        w, v = self(x)
        # Replace gradients wrt disconnected variables with
        # zeros. This is a work-around for issue #1063.
        W, V = _zero_disconnected([w, v], g_outputs)

        N = x.shape[0]

        # W part
        gW = T.tensordot(v, v * W[numpy.newaxis, :], (1, 1))
        # V part
        vv = v[:, :, numpy.newaxis, numpy.newaxis] * v[numpy.newaxis,
                                                       numpy.newaxis, :, :]
        minusww = -w[:, numpy.newaxis] + w[numpy.newaxis, :]
        minuswwinv = 1 / (minusww + T.eye(N))
        minuswwinv = T.triu(minuswwinv, 1) + T.tril(minuswwinv,
                                                    -1)  # remove diagonal
        c = (vv * minuswwinv[numpy.newaxis, :, numpy.newaxis, :]).dimshuffle(
            (1, 3, 0, 2))
        vc = T.tensordot(v, c, (1, 0))
        gV = T.tensordot(V, vc, ((0, 1), (0, 1)))

        g = gW + gV

        res = (g.T + g) / 2
        return [res]
Пример #21
0
    def calc_feats(self, h):
        """
        :param h: 1D: n_words, 2D: batch_size, 3D: hidden_dim
        :return: 1D: batch_size, 2D: n_spans, 3D: 2 * hidden_dim
        """
        h = h.dimshuffle(1, 0, 2)
        n_words = h.shape[1]

        m = T.triu(T.ones(shape=(n_words, n_words)))
        indices = m.nonzero()

        # 1D: batch_size, 2D: n_spans, 3D: hidden_dim
        h_i = h[:, indices[0]]
        h_j = h[:, indices[1]]

        h_diff = h_i - h_j
        h_add = h_i + h_j

        return T.concatenate([h_add, h_diff], axis=2)
Пример #22
0
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)
        ok = tt.all(tt.nlinalg.diag(chol_x) > 0)
        chol_x = tt.switch(ok, chol_x, tt.fill_diagonal(chol_x, 1))
        dz = tt.switch(ok, dz, floatX(1))

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tt.tril(mtx) - tt.diag(tt.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            solve = tt.slinalg.Solve(A_structure="upper_triangular")
            return solve(outer.T, solve(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tt.tril(s + s.T) - tt.diag(tt.diagonal(s))
        else:
            grad = tt.triu(s + s.T) - tt.diag(tt.diagonal(s))
        return [tt.switch(ok, grad, floatX(np.nan))]
Пример #23
0
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)
        ok = tt.all(tt.nlinalg.diag(chol_x) > 0)
        chol_x = tt.switch(ok, chol_x, tt.fill_diagonal(chol_x, 1))
        dz = tt.switch(ok, dz, floatX(1))

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tt.tril(mtx) - tt.diag(tt.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            solve = tt.slinalg.Solve(A_structure="upper_triangular")
            return solve(outer.T, solve(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            grad = tt.tril(s + s.T) - tt.diag(tt.diagonal(s))
        else:
            grad = tt.triu(s + s.T) - tt.diag(tt.diagonal(s))
        return [tt.switch(ok, grad, floatX(np.nan))]
Пример #24
0
    def span_feats(self, h):
        """
        :param h: 1D: n_words, 2D: batch_size, 3D: hidden_dim
        :return: 1D: batch_size, 2D: n_words(i), 3D: n_words(j), 4D: 2 * hidden_dim
        """
        h = h.dimshuffle(1, 0, 2)
        n_words = h.shape[1]
        pad = T.zeros(shape=(h.shape[0], 1, h.shape[2]))
        h_pad = T.concatenate([h, pad], axis=1)

        m = T.triu(T.ones(shape=(n_words, n_words)))
        indices = m.nonzero()

        # 1D: batch_size, 2D: n_spans, 3D: hidden_dim
        h_i = h[:, indices[0]]
        h_j = h_pad[:, indices[1] + 1]

        h_diff = h_i - h_j
        h_add = h_i + h_j

        return T.concatenate([h_add, h_diff], axis=2)
Пример #25
0
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return solve_upper_triangular(
                outer.T,
                solve_upper_triangular(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            return [tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))]
        else:
            return [tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))]
Пример #26
0
    def grad(self, inputs, gradients):
        """
        Cholesky decomposition reverse-mode gradient update.

        Symbolic expression for reverse-mode Cholesky gradient taken from [0]_

        References
        ----------
        .. [0] I. Murray, "Differentiation of the Cholesky decomposition",
           http://arxiv.org/abs/1602.07527

        """

        x = inputs[0]
        dz = gradients[0]
        chol_x = self(x)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return solve_upper_triangular(
                outer.T, solve_upper_triangular(outer.T, inner.T).T)

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))

        if self.lower:
            return [tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))]
        else:
            return [tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))]
 def __init__(self, weights_init, biases_init, lower=False,
              weights_prec=0., biases_prec=0., weights_mean=None,
              biases_mean=None):
     assert weights_init.ndim == 2, 'weights_init must be 2D array.'
     assert biases_init.ndim == 1, 'biases_init must be 1D array.'
     assert weights_init.shape[0] == biases_init.shape[0], \
         'Dimensions of weights_init and biases_init must be consistent.'
     self.lower = lower
     self.weights = th.shared(weights_init, name='W')
     self.weights_tri = (tt.tril(self.weights)
                         if lower else tt.triu(self.weights))
     self.biases = th.shared(biases_init, name='b')
     self.weights_prec = weights_prec
     self.biases_prec = biases_prec
     if weights_mean is None:
         weights_mean = np.eye(weights_init.shape[0])
     if biases_mean is None:
         biases_mean = np.zeros_like(biases_init)
     self.weights_mean = (np.tril(weights_mean)
                          if lower else np.triu(weights_mean))
     self.biases_mean = biases_mean
     super(TriangularAffineLayer, self).__init__(
         [self.weights, self.biases])
Пример #28
0
    def L_op(self, inputs, outputs, gradients):
        # Modified from theano/tensor/slinalg.py
        # No handling for on_error = 'nan'
        dz = gradients[0]
        chol_x = outputs[0]

        # this is for nan mode
        #
        # ok = ~tensor.any(tensor.isnan(chol_x))
        # chol_x = tensor.switch(ok, chol_x, 1)
        # dz = tensor.switch(ok, dz, 1)

        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
            dz = dz.T

        def tril_and_halve_diagonal(mtx):
            """Extracts lower triangle of square matrix and halves diagonal."""
            return tensor.tril(mtx) - tensor.diag(tensor.diagonal(mtx) / 2.0)

        def conjugate_solve_triangular(outer, inner):
            """Computes L^{-T} P L^{-1} for lower-triangular L."""
            return gpu_solve_upper_triangular(
                outer.T, gpu_solve_upper_triangular(outer.T, inner.T).T
            )

        s = conjugate_solve_triangular(
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz))
        )

        if self.lower:
            grad = tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))
        else:
            grad = tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))

        return [grad]
Пример #29
0
def skew_frac(A):
    return tensor.tril(A, -1) - tensor.tril(A, -1).T,\
           tensor.triu(A, 0).T + tensor.triu(A, 1)
Пример #30
0
def run_model(index, slide_index, Y, mFunc, Struct, Dist, n, kernel, lambdaw,
              Kf, sample_size, tune_size):
    """
    index: index of object data
    slide_index: index of slide window
    Y: time-series data
    mFunc: functional connectivity
    Struct: structural connectivity
    Dist: distribution matrix of n ROIs 
    n: ROI number
    kernel: "exponential" or "gaussian" or "matern52" or "matern32"
    lambdaw: weighted parameter
    kf: weighted parameter
    sample_size: NUTS number
    tune_size: burning number
    """
    m = Dist[0].shape[0]
    k = Y.shape[1]
    n_vec = n * (n + 1) // 2

    Y_mean = []
    for i in range(n):
        Y_mean.append(np.mean(Y[i * m:(i + 1) * m, 0]))
    Y_mean = np.array(Y_mean)

    with pm.Model() as model_generator:

        # convariance matrix
        log_Sig = pm.Uniform("log_Sig", -8, 8, shape=(n, ))
        SQ = tt.diag(tt.sqrt(tt.exp(log_Sig)))
        Func_Covm = tt.dot(tt.dot(SQ, mFunc), SQ)
        Struct_Convm = tt.dot(tt.dot(SQ, Struct), SQ)

        # double fusion of structural and FC
        L_fc_vec = tt.reshape(
            tt.slinalg.cholesky(tt.squeeze(Func_Covm)).T[np.triu_indices(n)],
            (n_vec, ))
        L_st_vec = tt.reshape(
            tt.slinalg.cholesky(
                tt.squeeze(Struct_Convm)).T[np.triu_indices(n)], (n_vec, ))
        Struct_vec = tt.reshape(Struct[np.triu_indices(n)], (n_vec, ))
        rhonn = Kf*( (1-lambdaw)*L_fc_vec + lambdaw*L_st_vec ) + \
            (1-Kf)*( (1-Struct_vec*lambdaw)*L_fc_vec + Struct_vec*lambdaw*L_st_vec )

        # correlation
        Cov_temp = tt.triu(tt.ones((n, n)))
        Cov_temp = tt.set_subtensor(Cov_temp[np.triu_indices(n)], rhonn)
        Cov_mat_v = tt.dot(Cov_temp.T, Cov_temp)
        d = tt.sqrt(tt.diagonal(Cov_mat_v))
        rho = (Cov_mat_v.T / d).T / d
        rhoNew = pm.Deterministic("rhoNew", rho[np.triu_indices(n, 1)])

        # temporal correlation AR(1)
        phi_T = pm.Uniform("phi_T", 0, 1, shape=(n, ))
        sigW_T = pm.Uniform("sigW_T", 0, 100, shape=(n, ))
        B = pm.Normal("B", 0, 100, shape=(n, ))
        muW1 = Y_mean - B  # get the shifted mean
        mean_overall = muW1 / (1.0 - phi_T)  # AR(1) mean
        tau_overall = (1.0 - tt.sqr(phi_T)) / tt.sqr(sigW_T)  # AR (1) variance
        W_T = pm.MvNormal("W_T",
                          mu=mean_overall,
                          tau=tt.diag(tau_overall),
                          shape=(k, n))

        # add all parts together
        one_m_vec = tt.ones((m, 1))
        one_k_vec = tt.ones((1, k))
        D = pm.MvNormal("D", mu=tt.zeros(n), cov=Cov_mat_v, shape=(n, ))
        phi_s = pm.Uniform("phi_s", 0, 20, shape=(n, ))
        spat_prec = pm.Uniform("spat_prec", 0, 100, shape=(n, ))
        H_base = pm.Normal("H_base", 0, 1, shape=(m, n))

        Mu_all = tt.zeros((m * n, k))
        if kernel == "exponential":
            for i in range(n):
                r = Dist[i] * phi_s[i]
                H_temp = tt.sqr(spat_prec[i]) * tt.exp(-r)
                L_H_temp = tt.slinalg.cholesky(H_temp)
                Mu_all_update = tt.set_subtensor(Mu_all[m*i:m*(i+1), :], B[i] + D[i] + one_m_vec*W_T[:,i] + \
                    tt.dot(L_H_temp, tt.reshape(H_base[:,i], (m, 1)))*one_k_vec)
                Mu_all = Mu_all_update
        elif kernel == "gaussian":
            for i in range(n):
                r = Dist[i] * phi_s[i]
                H_temp = tt.sqr(spat_prec[i]) * tt.exp(-tt.sqr(r) * 0.5)
                L_H_temp = tt.slinalg.cholesky(H_temp)
                Mu_all_update = tt.set_subtensor(Mu_all[m*i:m*(i+1), :], B[i] + D[i] + one_m_vec*W_T[:,i] + \
                    tt.dot(L_H_temp, tt.reshape(H_base[:,i], (m, 1)))*one_k_vec)
                Mu_all = Mu_all_update
        elif kernel == "matern52":
            for i in range(n):
                r = Dist[i] * phi_s[i]
                H_temp = tt.sqr(spat_prec[i]) * (
                    (1.0 + tt.sqrt(5.0) * r + 5.0 / 3.0 * tt.sqr(r)) *
                    tt.exp(-1.0 * tt.sqrt(5.0) * r))
                L_H_temp = tt.slinalg.cholesky(H_temp)
                Mu_all_update = tt.set_subtensor(Mu_all[m*i:m*(i+1), :], B[i] + D[i] + one_m_vec*W_T[:,i] + \
                    tt.dot(L_H_temp, tt.reshape(H_base[:,i], (m, 1)))*one_k_vec)
                Mu_all = Mu_all_update
        elif kernel == "matern32":
            for i in range(n):
                r = Dist[i] * phi_s[i]
                H_temp = tt.sqr(spat_prec[i]) * (
                    1.0 + tt.sqrt(3.0) * r) * tt.exp(-tt.sqrt(3.0) * r)
                L_H_temp = tt.slinalg.cholesky(H_temp)
                Mu_all_update = tt.set_subtensor(Mu_all[m*i:m*(i+1), :], B[i] + D[i] + one_m_vec*W_T[:,i] + \
                    tt.dot(L_H_temp, tt.reshape(H_base[:,i], (m, 1)))*one_k_vec)
                Mu_all = Mu_all_update

        sigma_error_prec = pm.Uniform("sigma_error_prec", 0, 100)
        Y1 = pm.Normal("Y1", mu=Mu_all, sd=sigma_error_prec, observed=Y)

    with model_generator:
        step = pm.NUTS()
        trace = pm.sample(sample_size, step=step, tune=tune_size, chains=1)

    # save as pandas format and output the csv file
    save_trace = pm.trace_to_dataframe(trace)
    save_trace.to_csv(out_dir + date.today().strftime("%m_%d_%y") + \
        "_sample_size_" + str(sample_size) + "_index_" + str(index) + "_slide_index_" + str(slide_index) +".csv")
Пример #31
0
    def __init__(self, x_h_0, v_h_0, t_h_0, x_t_0, v_t_0, a_t_0, t_t_0,
                 time_steps, exist, is_leader, x_goal, turn_vec_h, turn_vec_t,
                 n_steps, lr, game_params, arch_params, solver_params, params):

        self._init_layers(params, arch_params, game_params)

        self._connect(game_params, solver_params)

        def _dist_from_rail(pos, rail_center, rail_radius):
            d = tt.sqrt(((pos - rail_center)**2).sum())
            return tt.sum((d - rail_radius)**2)

        def _step_state(x_h_, v_h_, angle_, speed_, t_h_, turn_vec_h, x_t_,
                        v_t_, t_t_, turn_vec_t, ctrl, exist, time_step):

            a_t_e, v_t_e, x_t_e, t_t, t_h = step(x_h_, v_h_, t_h_, turn_vec_h,
                                                 x_t_, v_t_, t_t_, turn_vec_h,
                                                 exist, time_step)

            t_h = common.disconnected_grad(t_h)
            t_t = common.disconnected_grad(t_t)

            # approximated dynamic of the un-observed parts in the state
            a_t_a = tt.zeros(shape=(3, 2), dtype=np.float32)

            v_t_a = v_t_

            x_t_a = x_t_ + self.dt * v_t_a

            # difference in predictions
            n_v_t = v_t_e - v_t_a

            n_a_t = a_t_e - a_t_a

            n_x_t = x_t_e - x_t_a

            # disconnect the gradient of the noise signals
            n_v_t = common.disconnected_grad(n_v_t)

            n_a_t = common.disconnected_grad(n_a_t)

            n_x_t = common.disconnected_grad(n_x_t)

            # add the noise to the approximation
            a_t = a_t_a + n_a_t

            v_t = v_t_a + n_v_t

            x_t = x_t_a + n_x_t

            # update the observed part of the state
            delta_steer = ctrl[0]
            accel = ctrl[1]

            delta_steer = tt.clip(delta_steer, -np.pi / 4, np.pi / 4)

            angle = angle_ + delta_steer

            speed = speed_ + accel * self.dt

            speed = tt.clip(speed, 0, self.v_max)

            v_h_x = speed * tt.sin(angle)
            v_h_y = speed * tt.cos(angle)

            v_h = tt.stack([v_h_x, v_h_y])

            x_h = x_h_ + self.dt * v_h
            x_h = tt.clip(x_h, -self.bw, self.bw)

            return x_h, v_h, angle, speed, t_h, x_t, v_t, a_t, t_t

        def _recurrence(time_step, x_h_, v_h_, angle_, speed_, t_h_, x_t_,
                        v_t_, a_t_, t_t_, exist, is_leader, x_goal, turn_vec_h,
                        turn_vec_t):
            # state
            '''
            1. host
                1.1 position (2) - (x,y) coordinates in cross coordinate system
                1.2 speed (2) - (v_x,v_y)
                # 1.3 acceleration (2) - (a_x,a_y)
                # 1.4 waiting time (1) - start counting on full stop. stop counting when clearing the junction
                1.5 x_goal (2) - destination position (indicates different turns)
                total = 5
            2. right lane car
                2.1 position (2) - null value = (-1,-1)
                2.2 speed (2) - null value = (0,0)
                2.3 acceleration (2) - null value = (0,0)
                2.4 waiting time (1) - null value = 0
                total = 7
            3. front lane car
                3.1 position (2)
                3.2 speed (2)
                3.3 acceleration (2)
                3.4 waiting time (1)
                total = 7
            4. target 3
                4.1 position (2)
                4.2 speed (2)
                4.3 acceleration (2)
                4.4 waiting time (1)
                total = 7
            total = 26
            '''

            # host_state_vec = tt.concatenate([x_h_, v_h_, t_h_])
            ang_spd = tt.stack([angle_, speed_])
            host_state_vec = tt.concatenate([x_h_, ang_spd, x_goal])

            # target_state_vec = tt.concatenate([tt.flatten(x_t_), tt.flatten(v_t_), tt.flatten(a_t_), tt.flatten(t_t_)])
            target_state_vec = tt.concatenate([
                tt.flatten(x_t_),
                tt.flatten(v_t_),
                tt.flatten(a_t_), is_leader
            ])

            state = tt.concatenate([host_state_vec, target_state_vec])

            h0 = tt.dot(state, self.W_0) + self.b_0

            relu0 = tt.nnet.relu(h0)

            h1 = tt.dot(relu0, self.W_1) + self.b_1

            relu1 = tt.nnet.relu(h1)

            h2 = tt.dot(relu1, self.W_2) + self.b_2

            relu2 = tt.nnet.relu(h2)

            a_h = tt.dot(relu2, self.W_c)

            x_h, v_h, angle, speed, t_h, x_t, v_t, a_t, t_t = _step_state(
                x_h_, v_h_, angle_, speed_, t_h_, turn_vec_h, x_t_, v_t_, t_t_,
                turn_vec_t, a_h, exist, time_step)

            # cost:

            discount_factor = 0.99**time_step

            # 0. smooth driving policy
            cost_steer = discount_factor * a_h[0]**2
            cost_accel = discount_factor * a_h[1]**2

            # 1. forcing the host to move forward
            dist_from_goal = tt.mean((x_goal - x_h)**2)

            cost_progress = discount_factor * dist_from_goal

            # 2. keeping distance from in front vehicles
            d_t_h = x_t - x_h

            h_t_dists = (d_t_h**2).sum(axis=1)

            # v_h_norm = tt.sqrt((v_h**2).sum())
            # d_t_h_norm = tt.sqrt((d_t_h**2).sum(axis=1))
            #
            # denominator = v_h_norm * d_t_h_norm
            #
            # host_targets_orientation = tt.dot(d_t_h, v_h) / (denominator + 1e-3)
            #
            # in_fornt_targets = tt.nnet.sigmoid(5 * host_targets_orientation)
            #
            # close_targets = tt.sum(tt.abs_(d_t_h))
            #
            # cost_accident = tt.mean(in_fornt_targets * close_targets)

            cost_accident = tt.sum(
                tt.nnet.relu(self.require_distance - h_t_dists))

            # 3. rail divergence
            cost_right_rail = _dist_from_rail(
                x_h, self.right_rail_center,
                self.right_rail_radius) * turn_vec_h[0]
            cost_front_rail = (x_h[0] - self.lw / 2)**2 * turn_vec_h[1]
            cost_left_rail = _dist_from_rail(
                x_h, self.left_rail_center,
                self.left_rail_radius) * turn_vec_h[2]

            cost_rail = cost_right_rail + cost_left_rail + cost_front_rail

            return (x_h, v_h, angle, speed, t_h, x_t, v_t, a_t, t_t,
                    cost_steer, cost_accel, cost_progress, cost_accident,
                    cost_rail,
                    a_h), t.scan_module.until(dist_from_goal < 0.001)

        [
            x_h, v_h, angle, speed, t_h, x_t, v_t, a_t, t_t, costs_steer,
            costs_accel, costs_progress, costs_accident, costs_rail, a_hs
        ], scan_updates = t.scan(
            fn=_recurrence,
            sequences=time_steps,
            outputs_info=[
                x_h_0, v_h_0, 0., 0., t_h_0, x_t_0, v_t_0, a_t_0, t_t_0, None,
                None, None, None, None, None
            ],
            non_sequences=[exist, is_leader, x_goal, turn_vec_h, turn_vec_t],
            n_steps=n_steps,
            name='scan_func')

        # 3. right of way cost term

        T = x_h.shape[0]

        x_h_rpt_1 = tt.repeat(x_h, T, axis=1)  # (Tx2T)

        x_h_rpt_1_3d = x_h_rpt_1.dimshuffle(0, 1, 'x')  # (Tx2Tx1)

        x_h_3D = tt.repeat(x_h_rpt_1_3d, 3, axis=2)  # (Tx2Tx3)

        x_t_rshp_1 = tt.zeros(shape=(2 * T, 3), dtype=np.float32)  # (2Tx3)

        x_t_rshp_1_x = tt.set_subtensor(x_t_rshp_1[:T, :], x_t[:, :, 0])

        x_t_rshp_1_xy = tt.set_subtensor(x_t_rshp_1_x[T:, :], x_t[:, :, 1])

        x_t_rshp_1_3d = x_t_rshp_1_xy.dimshuffle(0, 1, 'x')  # (2Tx3x1)

        x_t_rpt_2_3d = tt.repeat(x_t_rshp_1_3d, T, axis=2)  # (2Tx3xT)

        x_t_3D = x_t_rpt_2_3d.dimshuffle(2, 0, 1)  # (Tx2Tx3)

        # abs_diff_mat = tt.abs_(x_h_3D - x_t_3D) # (Tx2Tx3)
        abs_diff_mat = (x_h_3D - x_t_3D)**2  # (Tx2Tx3)

        dists_mat = abs_diff_mat[:, :
                                 T, :] + abs_diff_mat[:,
                                                      T:, :]  # d_x+d_y: (TxTx3)

        # punish only when cutting a leader
        host_effective_dists = (tt.triu(dists_mat[:, :, 0]) * is_leader[0] +
                                tt.triu(dists_mat[:, :, 1]) * is_leader[1] +
                                tt.triu(dists_mat[:, :, 2]) * is_leader[2])

        costs_row = tt.mean(
            tt.nnet.sigmoid(self.eps_row - host_effective_dists))

        self.cost_steer = tt.mean(costs_steer)
        self.cost_accel = tt.mean(costs_accel)
        self.cost_progress = tt.mean(costs_progress)
        self.cost_accident = tt.mean(costs_accident)
        self.cost_row = tt.mean(costs_row)
        self.cost_rail = tt.mean(costs_rail)

        self.weighted_cost = (
            self.w_delta_steer * self.cost_steer +
            self.w_accel * self.cost_accel +
            self.w_progress * self.cost_progress +
            self.w_accident * self.cost_accident +
            # self.w_row * self.cost_row
            self.w_rail * self.cost_rail)

        self.cost = (
            self.cost_steer + self.cost_accel + self.cost_progress +
            self.cost_accident +
            # self.cost_row
            self.cost_rail)

        objective = self.weighted_cost

        objective = common.weight_decay(objective=objective,
                                        params=self.params,
                                        l1_weight=self.l1_weight)

        objective = t.gradient.grad_clip(objective, -self.grad_clip_val,
                                         self.grad_clip_val)

        gradients = tt.grad(objective, self.params)

        self.updates = optimizers.optimizer(lr=lr,
                                            param_struct=self,
                                            gradients=gradients,
                                            solver_params=solver_params)

        self.x_h = x_h
        self.v_h = v_h
        self.x_t = x_t
        self.v_t = v_t

        self.max_a = tt.max(abs(a_hs))

        self.max_grad_val = 0
        self.grad_mean = 0
        for g in gradients:
            self.grad_mean += tt.mean(tt.abs_(g))
            self.max_grad_val = (tt.max(g) > self.max_grad_val) * tt.max(g) + (
                tt.max(g) <= self.max_grad_val) * self.max_grad_val

        self.params_abs_norm = self._calc_params_norm()
Пример #32
0
    def grad(self, inputs, g_outputs):
        r"""The gradient function should return
           .. math:: \sum_n\left(W_n\frac{\partial\,w_n}
                           {\partial a_{ij}} +
                     \sum_k V_{nk}\frac{\partial\,v_{nk}}
                           {\partial a_{ij}}\right),
        where [:math:`W`, :math:`V`] corresponds to ``g_outputs``,
        :math:`a` to ``inputs``, and  :math:`(w, v)=\mbox{eig}(a)`.
        Analytic formulae for eigensystem gradients are well-known in
        perturbation theory:
           .. math:: \frac{\partial\,w_n}
                          {\partial a_{ij}} = v_{in}\,v_{jn}
           .. math:: \frac{\partial\,v_{kn}}
                          {\partial a_{ij}} =
                \sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}
                
        Code derived from theano.nlinalg.Eigh and doi=10.1.1.192.9105
        """
        x, = inputs
        w, vr, vj = self(x)
        # Replace gradients wrt disconnected variables with
        # zeros. This is a work-around for issue #1063.
        W, Vr, Vj = _zero_disconnected([w, vr, vj], g_outputs)

        #         # complex version
        #         v = vr+1j*vj
        #         V = Vr+1j*Vj
        #         N = x.shape[0]

        #         gW = T.tensordot(T.conj(v),v*W[numpy.newaxis,:],(1,1)) # W part
        #         vv = T.conj(v[:,:,numpy.newaxis,numpy.newaxis])*v[numpy.newaxis,numpy.newaxis,:,:]
        #         minusww = -w[:,numpy.newaxis]+w[numpy.newaxis,:]
        #         minuswwinv = 1/(minusww+T.eye(N))
        #         minuswwinv = T.triu(minuswwinv,1)+T.tril(minuswwinv,-1)# remove diagonal
        #         c = (vv*minuswwinv[numpy.newaxis,:,numpy.newaxis,:]).dimshuffle((1,3,0,2))
        #         vc = T.tensordot(v,c,(1,0))
        #         gV = T.tensordot(T.conj(V),vc,((0,1),(0,1)))
        #         g = gW+gV

        #         g = T.imag(g)

        # real version
        v = vr + 1j * vj
        V = Vr + 1j * Vj
        N = x.shape[0]

        # W part
        gWr = (T.tensordot(vr, vr * W[numpy.newaxis, :],
                           (1, 1)) + T.tensordot(vj, vj * W[numpy.newaxis, :],
                                                 (1, 1)))
        gWj = (T.tensordot(vr, vj * W[numpy.newaxis, :],
                           (1, 1)) - T.tensordot(vj, vr * W[numpy.newaxis, :],
                                                 (1, 1)))
        # V part
        vvr = (vr[:, :, numpy.newaxis, numpy.newaxis] *
               vr[numpy.newaxis, numpy.newaxis, :, :] +
               vj[:, :, numpy.newaxis, numpy.newaxis] *
               vj[numpy.newaxis, numpy.newaxis, :, :])
        vvj = (vr[:, :, numpy.newaxis, numpy.newaxis] *
               vj[numpy.newaxis, numpy.newaxis, :, :] -
               vj[:, :, numpy.newaxis, numpy.newaxis] *
               vr[numpy.newaxis, numpy.newaxis, :, :])
        minusww = -w[:, numpy.newaxis] + w[numpy.newaxis, :]
        minuswwinv = 1 / (minusww + T.eye(N))
        minuswwinv = T.triu(minuswwinv, 1) + T.tril(minuswwinv,
                                                    -1)  # remove diagonal
        cr = (vvr * minuswwinv[numpy.newaxis, :, numpy.newaxis, :]).dimshuffle(
            (1, 3, 0, 2))
        cj = (vvj * minuswwinv[numpy.newaxis, :, numpy.newaxis, :]).dimshuffle(
            (1, 3, 0, 2))
        vcr = (T.tensordot(vr, cr, (1, 0)) - T.tensordot(vj, cj, (1, 0)))
        vcj = (T.tensordot(vr, cj, (1, 0)) + T.tensordot(vj, cr, (1, 0)))
        gVr = (T.tensordot(Vr, vcr,
                           ((0, 1),
                            (0, 1))) + T.tensordot(Vj, vcj, ((0, 1), (0, 1))))
        gVj = (T.tensordot(Vr, vcj,
                           ((0, 1),
                            (0, 1))) - T.tensordot(Vj, vcr, ((0, 1), (0, 1))))

        g = gWj + gVj

        res = (g.T - g) / 2
        return [res]
Пример #33
0
    def __init__(self, rng, input, n_in, n_batch, d_bucket, activation, activation_deriv,
                 w=None, index_permute=None, index_permute_reverse=None):
        srng = RandomStreams(seed=234)
        
        n_bucket = n_in / d_bucket + 1
        self.input = input

        # randomly permute input space
        if index_permute is None:
            index_permute = srng.permutation(n=n_in)#numpy.random.permutation(n_in)
            index_permute_reverse = T.argsort(index_permute)
            self.index_permute = index_permute
            self.index_permute_reverse = index_permute_reverse

        permuted_input = input[:, index_permute]
        self.permuted_input = permuted_input

        # initialize reflection parameters
        if w is None:
            bound = numpy.sqrt(3. / d_bucket)
            w_values = numpy.asarray(rng.uniform(low=-bound,
                                                 high=bound,
                                                 size=(n_bucket, d_bucket, d_bucket)),
                                     dtype=theano.config.floatX)
            w = theano.shared(value=w_values, name='w')
            
        self.w = w
        
        
        # compute outputs and Jacobians
        
        log_jacobian = T.alloc(0, n_batch)
        for b in xrange(n_bucket):
            bucket_size = d_bucket
            if b == n_bucket - 1:
                bucket_size = n_in - b * d_bucket
            
            x_b = self.permuted_input[:, b*d_bucket:b*d_bucket + bucket_size]

            
            w_b = self.w[b, :bucket_size, :bucket_size]

#            W = T.slinalg.Expm()(w_b)
#            log_jacobian = log_jacobian + T.alloc(T.nlinalg.trace(w_b), n_batch)

            Upper = T.triu(w_b)
#            Upper = T.extra_ops.fill_diagonal(Upper, 1.)
            Lower = T.tril(w_b)
            Lower = T.extra_ops.fill_diagonal(Lower, 1.)
            log_det_Upper = T.log(T.abs_(T.nlinalg.ExtractDiag()(Upper))).sum() 
#            log_det_Lower = T.log(T.abs_(T.nlinalg.ExtractDiag()(Lower))).sum() 


            W = T.dot(Upper, Lower)
            log_jacobian = log_jacobian + T.alloc(log_det_Upper, n_batch)

            
#            W = T.dot(T.transpose(w_b), w_b) + 0.001*T.eye(bucket_size)
#            log_jacobian = log_jacobian + T.alloc(T.log(T.abs_(T.nlinalg.Det()(W))), n_batch)

#            diag = T.nlinalg.diag(W)
#            div = T.tile(T.reshape(T.sqrt(diag), [1, bucket_size]), (bucket_size, 1))
            
#            W = W / div / T.transpose(div)
            #import pdb; pdb.set_trace()

            lin_output_b = T.dot(x_b, W)
            if b>0:
                lin_output = T.concatenate([lin_output, lin_output_b], axis=1)
            else:
                lin_output = lin_output_b
            if activation is not None:
                derivs = activation_deriv(lin_output_b)     
                #import pdb; pdb.set_trace()
                log_jacobian = log_jacobian + T.log(T.abs_(derivs)).sum(axis=1)

                
#                for n in xrange(n_batch):                    
#                    mat = T.tile(T.reshape(derivs[n], [1, bucket_size]), (bucket_size, 1))
#                    mat = mat * W                   
#                    T.inc_subtensor(log_jacobian[n], T.log(T.abs_(T.nlinalg.Det()(mat))))
                    
        self.log_jacobian = log_jacobian        

        self.output = (
            lin_output if activation is None
            else activation(lin_output)
        )


        self.params = [w]
Пример #34
0
    def predict_symbolic(self, mx, Sx, unroll_scan=False):
        idims = self.D
        odims = self.E

        Ms = self.sr.shape[1]
        sf2M = (self.hyp[:, idims]**2)/tt.cast(Ms, floatX)
        sn2 = self.hyp[:, idims+1]**2

        # TODO this should just fallback to the method from the SSGP class
        if Sx is None:
            # first check if we received a vector [D] or a matrix [nxD]
            if mx.ndim == 1:
                mx = mx[None, :]

            srdotx = self.sr.dot(self.X.T).transpose(0,2,1)
            phi_x = tt.concatenate([tt.sin(srdotx), tt.cos(srdotx)], 2)
            M = (phi_x*self.beta_ss[:, None, :]).sum(-1)
            phi_x_L = tt.stack([
                solve_lower_triangular(self.Lmm[i], phi_x[i].T)
                for i in range(odims)])
            S = sn2[:, None]*(1 + (sf2M[:, None])*(phi_x_L**2).sum(-2)) + 1e-6

            return M, S

        # precompute some variables
        srdotx = self.sr.dot(mx)
        srdotSx = self.sr.dot(Sx)
        srdotSxdotsr = tt.sum(srdotSx*self.sr, 2)
        e = tt.exp(-0.5*srdotSxdotsr)
        cos_srdotx = tt.cos(srdotx)
        sin_srdotx = tt.sin(srdotx)
        cos_srdotx_e = cos_srdotx*e
        sin_srdotx_e = sin_srdotx*e

        # compute the mean vector
        mphi = tt.horizontal_stack(sin_srdotx_e, cos_srdotx_e)  # E x 2*Ms
        M = tt.sum(mphi*self.beta_ss, 1)

        # input output covariance
        mx_c = mx.dimshuffle(0, 'x')
        sin_srdotx_e_r = sin_srdotx_e.dimshuffle(0, 'x', 1)
        cos_srdotx_e_r = cos_srdotx_e.dimshuffle(0, 'x', 1)
        srdotSx_tr = srdotSx.transpose(0, 2, 1)
        c = tt.concatenate([mx_c*sin_srdotx_e_r + srdotSx_tr*cos_srdotx_e_r,
                            mx_c*cos_srdotx_e_r - srdotSx_tr*sin_srdotx_e_r],
                           axis=2)  # E x D x 2*Ms
        beta_ss_r = self.beta_ss.dimshuffle(0, 'x', 1)

        # input output covariance (notice this is not premultiplied by the
        # input covariance inverse)
        V = tt.sum(c*beta_ss_r, 2).T - tt.outer(mx, M)

        srdotSxdotsr_c = srdotSxdotsr.dimshuffle(0, 1, 'x')
        srdotSxdotsr_r = srdotSxdotsr.dimshuffle(0, 'x', 1)
        M2 = tt.zeros((odims, odims))

        # initialize indices
        triu_indices = np.triu_indices(odims)
        indices = [tt.as_index_variable(idx) for idx in triu_indices]

        def second_moments(i, j, M2, beta, iA, sn2, sf2M, sr, srdotSx,
                           srdotSxdotsr_c, srdotSxdotsr_r,
                           sin_srdotx, cos_srdotx, *args):
            # compute the second moments of the spectrum feature vectors
            siSxsj = srdotSx[i].dot(sr[j].T)  # Ms x Ms
            sijSxsij = -0.5*(srdotSxdotsr_c[i] + srdotSxdotsr_r[j])
            em = tt.exp(sijSxsij+siSxsj)      # MsxMs
            ep = tt.exp(sijSxsij-siSxsj)     # MsxMs
            si = sin_srdotx[i]       # Msx1
            ci = cos_srdotx[i]       # Msx1
            sj = sin_srdotx[j]       # Msx1
            cj = cos_srdotx[j]       # Msx1
            sicj = tt.outer(si, cj)  # MsxMs
            cisj = tt.outer(ci, sj)  # MsxMs
            sisj = tt.outer(si, sj)  # MsxMs
            cicj = tt.outer(ci, cj)  # MsxMs
            sm = (sicj-cisj)*em
            sp = (sicj+cisj)*ep
            cm = (sisj+cicj)*em
            cp = (cicj-sisj)*ep

            # Populate the second moment matrix of the feature vector
            Q_up = tt.concatenate([cm-cp, sm+sp], axis=1)
            Q_lo = tt.concatenate([sp-sm, cm+cp], axis=1)
            Q = tt.concatenate([Q_up, Q_lo], axis=0)

            # Compute the second moment of the output
            m2 = 0.5*matrix_dot(beta[i], Q, beta[j].T)

            m2 = theano.ifelse.ifelse(
                tt.eq(i, j),
                m2 + sn2[i]*(1.0 + sf2M[i]*tt.sum(self.iA[i]*Q)) + 1e-6,
                m2)
            M2 = tt.set_subtensor(M2[i, j], m2)
            return M2

        nseq = [self.beta_ss, self.iA, sn2, sf2M, self.sr, srdotSx,
                srdotSxdotsr_c, srdotSxdotsr_r, sin_srdotx, cos_srdotx,
                self.Lmm]

        if unroll_scan:
            from lasagne.utils import unroll_scan
            [M2_] = unroll_scan(second_moments, indices,
                                [M2], nseq, len(triu_indices[0]))
            updts = {}
        else:
            M2_, updts = theano.scan(fn=second_moments,
                                     sequences=indices,
                                     outputs_info=[M2],
                                     non_sequences=nseq,
                                     allow_gc=False,
                                     name="%s>M2_scan" % (self.name))

        M2 = M2_[-1]
        M2 = M2 + tt.triu(M2, k=1).T
        S = M2 - tt.outer(M, M)

        return M, S, V
Пример #35
0
def run_model(index, in_dir, out_dir, data_filename, func_filename,
              struct_filename, dist_filename, n, sample_size, tune_size):
    """
    index: data
    in_dir: set up work directory
    out_dir: save the trace as csv in the out directory
    data_filename: filename for time series data
    func_filename: filename for functional connectivity
    struct_filename: filename for structural connectivity
    dist_filename: filename for distribution matrix of n ROIs 
    n: ROI number
    sample_size: NUTS number
    tune_size: burning number
    """

    os.chdir(in_dir + str(index))
    Y = get_data(data_filename)
    mFunc = get_func(func_filename, n)
    Struct = get_struct(struct_filename, n)
    Dist = get_dist(dist_filename, n)
    m = Dist[0].shape[0]
    k = Y.shape[1]
    n_vec = n * (n + 1) // 2
    Y_mean = []
    for i in range(n):
        Y_mean.append(np.mean(Y[i * m:(i + 1) * m, 0]))
    Y_mean = np.array(Y_mean)

    with pm.Model() as model_generator:
        # convariance matrix
        log_Sig = pm.Uniform("log_Sig", -8, 8, shape=(n, ))
        SQ = tt.diag(tt.sqrt(tt.exp(log_Sig)))
        Func_Covm = tt.dot(tt.dot(SQ, mFunc), SQ)
        Struct_Convm = tt.dot(tt.dot(SQ, Struct), SQ)

        # double fusion of structural and FC
        L_fc_vec = tt.reshape(
            tt.slinalg.cholesky(tt.squeeze(Func_Covm)).T[np.triu_indices(n)],
            (n_vec, ))
        L_st_vec = tt.reshape(
            tt.slinalg.cholesky(
                tt.squeeze(Struct_Convm)).T[np.triu_indices(n)], (n_vec, ))
        Struct_vec = tt.reshape(Struct[np.triu_indices(n)], (n_vec, ))
        lambdaw = pm.Beta("lambdaw", alpha=1, beta=1, shape=(n_vec, ))
        Kf = pm.Beta("Kf", alpha=1, beta=1, shape=(n_vec, ))
        rhonn = Kf*( (1-lambdaw)*L_fc_vec + lambdaw*L_st_vec ) + \
            (1-Kf)*( (1-Struct_vec*lambdaw)*L_fc_vec + Struct_vec*lambdaw*L_st_vec )

        # correlation
        Cov_temp = tt.triu(tt.ones((n, n)))
        Cov_temp = tt.set_subtensor(Cov_temp[np.triu_indices(n)], rhonn)
        Cov_mat_v = tt.dot(Cov_temp.T, Cov_temp)
        d = tt.sqrt(tt.diagonal(Cov_mat_v))
        rho = (Cov_mat_v.T / d).T / d
        rhoNew = pm.Deterministic("rhoNew", rho[np.triu_indices(n, 1)])

        # temporal correlation AR(1)
        phi_T = pm.Uniform("phi_T", 0, 1, shape=(n, ))
        sigW_T = pm.Uniform("sigW_T", 0, 100, shape=(n, ))
        B = pm.Normal("B", 0, 0.01, shape=(n, ))
        muW1 = Y_mean - B  # get the shifted mean
        mean_overall = muW1 / (1.0 - phi_T)  # AR(1) mean
        tau_overall = (1.0 - tt.sqr(phi_T)) / tt.sqr(sigW_T)  # AR (1) variance
        W_T = pm.MvNormal("W_T",
                          mu=mean_overall,
                          tau=tt.diag(tau_overall),
                          shape=(k, n))

        # add all parts together
        one_m_vec = tt.ones((m, 1))
        one_k_vec = tt.ones((1, k))

        D = pm.MvNormal("D", mu=tt.zeros(n), cov=Cov_mat_v, shape=(n, ))
        phi_s = pm.Uniform("phi_s", 0, 20, shape=(n, ))
        spat_prec = pm.Uniform("spat_prec", 0, 100, shape=(n, ))
        H_base = pm.Normal("H_base", 0, 1, shape=(m, n))

        Mu_all_temp = []
        for i in range(n):
            # exponential covariance function
            H_temp = tt.sqr(spat_prec[i]) * tt.exp(-phi_s[i] * Dist[i])
            L_H_temp = tt.slinalg.cholesky(H_temp)
            Mu_all_temp.append(
                B[i] + D[i] + one_m_vec * W_T[:, i] +
                tt.dot(L_H_temp, tt.reshape(H_base[:, i], (m, 1))) * one_k_vec)
        MU_all = tt.concatenate(Mu_all_temp, axis=0)

        sigma_error_prec = pm.Uniform("sigma_error_prec", 0, 100)
        Y1 = pm.Normal("Y1", mu=MU_all, sd=sigma_error_prec, observed=Y)

    with model_generator:
        step = pm.NUTS()
        trace = pm.sample(sample_size, step=step, tune=tune_size, chains=1)

    # save as pandas format and output the csv file
    save_trace = pm.trace_to_dataframe(trace)
    save_trace.to_csv(out_dir + date.today().strftime("%m_%d_%y") +
                      "_sample_size_" + str(sample_size) + "_index_" +
                      str(index) + ".csv")
Пример #36
0
    def compile_theano():
        """
        This function generates theano compiled kernels for energy and force learning
        ker_jkmn_withcutoff = ker_jkmn #* cutoff_ikmn

        The position of the atoms relative to the centrla one, and their chemical species
        are defined by a matrix of dimension Mx5

        Returns:
            km_ee (func): energy-energy kernel
            km_ef (func): energy-force kernel
            km_ff (func): force-force kernel
        """

        if not (os.path.exists(Mffpath / 'k3_ee_m.pickle')
                and os.path.exists(Mffpath / 'k3_ef_m.pickle')
                and os.path.exists(Mffpath / 'k3_ff_m.pickle')):
            print("Building Kernels")

            import theano.tensor as T
            from theano import function, scan

            logger.info("Started compilation of theano three body kernels")

            # --------------------------------------------------
            # INITIAL DEFINITIONS
            # --------------------------------------------------

            # positions of central atoms
            r1, r2 = T.dvectors('r1d', 'r2d')
            # positions of neighbours
            rho1, rho2 = T.dmatrices('rho1', 'rho2')
            # hyperparameter
            sig = T.dscalar('sig')
            # cutoff hyperparameters
            theta = T.dscalar('theta')
            rc = T.dscalar('rc')

            # positions of neighbours without chemical species

            rho1s = rho1[:, 0:3]
            rho2s = rho2[:, 0:3]

            alpha_1 = rho1[:, 3].flatten()
            alpha_2 = rho2[:, 3].flatten()

            alpha_j = rho1[:, 4].flatten()
            alpha_m = rho2[:, 4].flatten()

            alpha_k = rho1[:, 4].flatten()
            alpha_n = rho2[:, 4].flatten()

            # --------------------------------------------------
            # RELATIVE DISTANCES TO CENTRAL VECTOR AND BETWEEN NEIGHBOURS
            # --------------------------------------------------

            # first and second configuration
            r1j = T.sqrt(T.sum((rho1s[:, :] - r1[None, :])**2, axis=1))
            r2m = T.sqrt(T.sum((rho2s[:, :] - r2[None, :])**2, axis=1))
            rjk = T.sqrt(
                T.sum((rho1s[None, :, :] - rho1s[:, None, :])**2, axis=2))
            rmn = T.sqrt(
                T.sum((rho2s[None, :, :] - rho2s[:, None, :])**2, axis=2))

            # --------------------------------------------------
            # CHEMICAL SPECIES MASK
            # --------------------------------------------------

            # numerical kronecker
            def delta_alpha2(a1j, a2m):
                d = np.exp(-(a1j - a2m)**2 / (2 * 0.00001**2))
                return d

            # permutation 1

            delta_alphas12 = delta_alpha2(alpha_1[0], alpha_2[0])
            delta_alphasjm = delta_alpha2(alpha_j[:, None], alpha_m[None, :])
            delta_alphas_jmkn = delta_alphasjm[:, None, :,
                                               None] * delta_alphasjm[None, :,
                                                                      None, :]

            delta_perm1 = delta_alphas12 * delta_alphas_jmkn

            # permutation 3
            delta_alphas1m = delta_alpha2(alpha_1[0, None],
                                          alpha_m[None, :]).flatten()
            delta_alphasjn = delta_alpha2(alpha_j[:, None], alpha_n[None, :])
            delta_alphask2 = delta_alpha2(alpha_k[:, None],
                                          alpha_2[None, 0]).flatten()

            delta_perm3 = delta_alphas1m[None, None, :, None] * delta_alphasjn[:, None, None, :] * \
                delta_alphask2[None, :, None, None]

            # permutation 5
            delta_alphas1n = delta_alpha2(alpha_1[0, None],
                                          alpha_n[None, :]).flatten()
            delta_alphasj2 = delta_alpha2(alpha_j[:, None],
                                          alpha_2[None, 0]).flatten()
            delta_alphaskm = delta_alpha2(alpha_k[:, None], alpha_m[None, :])

            delta_perm5 = delta_alphas1n[None, None, None, :] * delta_alphaskm[None, :, :, None] * \
                delta_alphasj2[:, None, None, None]

            # --------------------------------------------------
            # BUILD THE KERNEL
            # --------------------------------------------------

            # Squared exp of differences
            se_1j2m = T.exp(-(r1j[:, None] - r2m[None, :])**2 / (2 * sig**2))
            se_jkmn = T.exp(
                -(rjk[:, :, None, None] - rmn[None, None, :, :])**2 /
                (2 * sig**2))
            se_jk2m = T.exp(-(rjk[:, :, None] - r2m[None, None, :])**2 /
                            (2 * sig**2))
            se_1jmn = T.exp(-(r1j[:, None, None] - rmn[None, :, :])**2 /
                            (2 * sig**2))

            # Kernel not summed (cyclic permutations)
            k1n = (se_1j2m[:, None, :, None] * se_1j2m[None, :, None, :] *
                   se_jkmn)
            k2n = (se_1jmn[:, None, :, :] * se_jk2m[:, :, None, :] *
                   se_1j2m[None, :, :, None])
            k3n = (se_1j2m[:, None, None, :] * se_jk2m[:, :, :, None] *
                   se_1jmn[None, :, :, :])

            # final shape is M1 M1 M2 M2

            ker_loc = k1n * delta_perm1 + k2n * delta_perm3 + k3n * delta_perm5

            # Faster version of cutoff (less calculations)
            cut_j = 0.5 * (1 + T.cos(np.pi * r1j / rc))
            cut_m = 0.5 * (1 + T.cos(np.pi * r2m / rc))

            cut_jk = cut_j[:, None] * cut_j[None, :] * 0.5 * (
                1 + T.cos(np.pi * rjk / rc))
            cut_mn = cut_m[:, None] * cut_m[None, :] * 0.5 * (
                1 + T.cos(np.pi * rmn / rc))

            # --------------------------------------------------
            # REMOVE DIAGONAL ELEMENTS
            # --------------------------------------------------

            # remove diagonal elements AND lower triangular ones from first configuration
            mask_jk = T.triu(T.ones_like(rjk)) - T.identity_like(rjk)

            # remove diagonal elements from second configuration
            mask_mn = T.ones_like(rmn) - T.identity_like(rmn)

            # Combine masks
            mask_jkmn = mask_jk[:, :, None, None] * mask_mn[None, None, :, :]

            # Apply mask and then apply cutoff functions
            ker_loc = ker_loc * mask_jkmn
            ker_loc = T.sum(ker_loc * cut_jk[:, :, None, None] *
                            cut_mn[None, None, :, :])

            ker_loc = T.exp(ker_loc / 20)

            # --------------------------------------------------
            # FINAL FUNCTIONS
            # --------------------------------------------------

            # energy energy kernel
            k_ee_fun = function([r1, r2, rho1, rho2, sig, theta, rc],
                                ker_loc,
                                on_unused_input='ignore')

            # energy force kernel
            k_ef_cut = T.grad(ker_loc, r2)
            k_ef_fun = function([r1, r2, rho1, rho2, sig, theta, rc],
                                k_ef_cut,
                                on_unused_input='ignore')

            # force force kernel
            k_ff_cut = T.grad(ker_loc, r1)
            k_ff_cut_der, updates = scan(
                lambda j, k_ff_cut, r2: T.grad(k_ff_cut[j], r2),
                sequences=T.arange(k_ff_cut.shape[0]),
                non_sequences=[k_ff_cut, r2])
            k_ff_fun = function([r1, r2, rho1, rho2, sig, theta, rc],
                                k_ff_cut_der,
                                on_unused_input='ignore')

            # Save the function that we want to use for multiprocessing
            # This is necessary because theano is a crybaby and does not want to access the
            # Automaticallly stored compiled object from different processes
            with open(Mffpath / 'k3_ee_m.pickle', 'wb') as f:
                pickle.dump(k_ee_fun, f)
            with open(Mffpath / 'k3_ef_m.pickle', 'wb') as f:
                pickle.dump(k_ef_fun, f)
            with open(Mffpath / 'k3_ff_m.pickle', 'wb') as f:
                pickle.dump(k_ff_fun, f)

        else:
            print("Loading Kernels")
            with open(Mffpath / "k3_ee_m.pickle", 'rb') as f:
                k_ee_fun = pickle.load(f)
            with open(Mffpath / "k3_ef_m.pickle", 'rb') as f:
                k_ef_fun = pickle.load(f)
            with open(Mffpath / "k3_ff_m.pickle", 'rb') as f:
                k_ff_fun = pickle.load(f)

        # WRAPPERS (we don't want to plug the position of the central element every time)

        def km_ee(conf1, conf2, sig, theta, rc):
            """
            Many body kernel for energy-energy correlation

            Args:
                conf1 (array): first configuration.
                conf2 (array): second configuration.
                sig (float): lengthscale hyperparameter theta[0]
                theta (float): cutoff decay rate hyperparameter theta[1]
                rc (float): cutoff distance hyperparameter theta[2]

            Returns:
                kernel (float): scalar valued energy-energy many-body kernel

            """
            return k_ee_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta,
                            rc)

        def km_ef(conf1, conf2, sig, theta, rc):
            """
            Many body kernel for energy-force correlation

            Args:
                conf1 (array): first configuration.
                conf2 (array): second configuration.
                sig (float): lengthscale hyperparameter theta[0]
                theta (float): cutoff decay rate hyperparameter theta[1]
                rc (float): cutoff distance hyperparameter theta[2]

            Returns:
                kernel (array): 3x1 energy-force many-body kernel

            """
            return -k_ef_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig,
                             theta, rc)

        def km_ff(conf1, conf2, sig, theta, rc):
            """
            Many body kernel for force-force correlation

            Args:
                conf1 (array): first configuration.
                conf2 (array): second configuration.
                sig (float): lengthscale hyperparameter theta[0]
                theta (float): cutoff decay rate hyperparameter theta[1]
                rc (float): cutoff distance hyperparameter theta[2]

            Returns:
                kernel (matrix): 3x3 force-force many-body kernel

            """
            return k_ff_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta,
                            rc)

        logger.info("Ended compilation of theano many body kernels")

        return km_ee, km_ef, km_ff
Пример #37
0
    def predict_symbolic(self, mx, Sx, unroll_scan=False):
        idims = self.D
        odims = self.E

        # centralize inputs
        zeta = self.X - mx

        # initialize some variables
        sf2 = self.hyp[:, idims]**2
        eyeE = tt.tile(tt.eye(idims), (odims, 1, 1))
        lscales = self.hyp[:, :idims]
        iL = eyeE / lscales.dimshuffle(0, 1, 'x')

        # predictive mean
        inp = iL.dot(zeta.T).transpose(0, 2, 1)
        iLdotSx = iL.dot(Sx)
        # TODO vectorize this
        B = (iLdotSx[:, :, None, :] *
             iL[:, None, :, :]).sum(-1) + tt.eye(idims)
        t = tt.stack([solve(B[i].T, inp[i].T).T for i in range(odims)])
        c = sf2 / tt.sqrt(tt.stack([det(B[i]) for i in range(odims)]))
        l = tt.exp(-0.5 * tt.sum(inp * t, 2))
        lb = l * self.beta  # E x N dot E x N
        M = tt.sum(lb, 1) * c

        # input output covariance
        tiL = (t[:, :, None, :] * iL[:, None, :, :]).sum(-1)
        # tiL = tt.stack([t[i].dot(iL[i]) for i in range(odims)])
        V = tt.stack([tiL[i].T.dot(lb[i]) for i in range(odims)]).T * c

        # predictive covariance
        logk = (tt.log(sf2))[:, None] - 0.5 * tt.sum(inp * inp, 2)
        logk_r = logk.dimshuffle(0, 'x', 1)
        logk_c = logk.dimshuffle(0, 1, 'x')
        Lambda = tt.square(iL)
        LL = (Lambda.dimshuffle(0, 'x', 1, 2) + Lambda).transpose(0, 1, 3, 2)
        R = tt.dot(LL, Sx).transpose(0, 1, 3, 2) + tt.eye(idims)
        z_ = Lambda.dot(zeta.T).transpose(0, 2, 1)

        M2 = tt.zeros((odims, odims))

        # initialize indices
        triu_indices = np.triu_indices(odims)
        indices = [tt.as_index_variable(idx) for idx in triu_indices]

        def second_moments(i, j, M2, beta, iK, sf2, R, logk_c, logk_r, z_, Sx,
                           *args):
            # This comes from Deisenroth's thesis ( Eqs 2.51- 2.54 )
            Rij = R[i, j]
            n2 = logk_c[i] + logk_r[j]
            n2 += utils.maha(z_[i], -z_[j], 0.5 * solve(Rij, Sx))

            Q = tt.exp(n2) / tt.sqrt(det(Rij))

            # Eq 2.55
            m2 = matrix_dot(beta[i], Q, beta[j])

            m2 = theano.ifelse.ifelse(tt.eq(i, j),
                                      m2 - tt.sum(iK[i] * Q) + sf2[i], m2)
            M2 = tt.set_subtensor(M2[i, j], m2)
            return M2

        nseq = [self.beta, self.iK, sf2, R, logk_c, logk_r, z_, Sx, self.L]
        if unroll_scan:
            from lasagne.utils import unroll_scan
            [M2_] = unroll_scan(second_moments, indices, [M2], nseq,
                                len(triu_indices[0]))
            updts = {}
        else:
            M2_, updts = theano.scan(fn=second_moments,
                                     sequences=indices,
                                     outputs_info=[M2],
                                     non_sequences=nseq,
                                     allow_gc=False,
                                     strict=True,
                                     name="%s>M2_scan" % (self.name))
        M2 = M2_[-1]
        M2 = M2 + tt.triu(M2, k=1).T
        S = M2 - tt.outer(M, M)

        return M, S, V
Пример #38
0
    def predict_symbolic(self, mx, Sx=None, unroll_scan=False):
        idims = self.D
        odims = self.E

        # initialize some variables
        sf2 = self.hyp[:, idims]**2
        eyeE = tt.tile(tt.eye(idims), (odims, 1, 1))
        lscales = self.hyp[:, :idims]
        iL = eyeE / lscales.dimshuffle(0, 1, 'x')

        if Sx is None:
            # first check if we received a vector [D] or a matrix [nxD]
            if mx.ndim == 1:
                mx = mx[None, :]
            # centralize inputs
            zeta = self.X[:, None, :] - mx[None, :, :]

            # predictive mean ( we don't need to do the rest )
            inp = (iL[:, None, :, None, :] * zeta[:, None, :, :]).sum(2)
            l = tt.exp(-0.5 * tt.sum(inp**2, -1))
            lb = l * self.beta[:, :, None]  # E x N
            M = tt.sum(lb, 1).T * sf2

            # apply saturating function to the output if available
            if self.sat_func is not None:
                # saturate the output
                M = self.sat_func(M)

            return M

        # centralize inputs
        zeta = self.X - mx

        # predictive mean
        inp = iL.dot(zeta.T).transpose(0, 2, 1)
        iLdotSx = iL.dot(Sx)
        B = (iLdotSx[:, :, None, :] *
             iL[:, None, :, :]).sum(-1) + tt.eye(idims)
        t = tt.stack([solve(B[i].T, inp[i].T).T for i in range(odims)])
        c = sf2 / tt.sqrt(tt.stack([det(B[i]) for i in range(odims)]))
        l = tt.exp(-0.5 * tt.sum(inp * t, 2))
        lb = l * self.beta
        M = tt.sum(lb, 1) * c

        # input output covariance
        tiL = tt.stack([t[i].dot(iL[i]) for i in range(odims)])
        V = tt.stack([tiL[i].T.dot(lb[i]) for i in range(odims)]).T * c

        # predictive covariance
        logk = (tt.log(sf2))[:, None] - 0.5 * tt.sum(inp * inp, 2)
        logk_r = logk.dimshuffle(0, 'x', 1)
        logk_c = logk.dimshuffle(0, 1, 'x')
        Lambda = tt.square(iL)
        LL = (Lambda.dimshuffle(0, 'x', 1, 2) + Lambda).transpose(0, 1, 3, 2)
        R = tt.dot(LL, Sx).transpose(0, 1, 3, 2) + tt.eye(idims)
        z_ = Lambda.dot(zeta.T).transpose(0, 2, 1)

        M2 = tt.zeros((odims, odims))

        # initialize indices
        triu_indices = np.triu_indices(odims)
        indices = [tt.as_index_variable(idx) for idx in triu_indices]

        def second_moments(i, j, M2, beta, R, logk_c, logk_r, z_, Sx, *args):
            # This comes from Deisenroth's thesis ( Eqs 2.51- 2.54 )
            Rij = R[i, j]
            n2 = logk_c[i] + logk_r[j]
            n2 += utils.maha(z_[i], -z_[j], 0.5 * solve(Rij, Sx))
            Q = tt.exp(n2) / tt.sqrt(det(Rij))

            # Eq 2.55
            m2 = matrix_dot(beta[i], Q, beta[j])

            m2 = theano.ifelse.ifelse(tt.eq(i, j), m2 + 1e-6, m2)
            M2 = tt.set_subtensor(M2[i, j], m2)
            return M2

        nseq = [self.beta, R, logk_c, logk_r, z_, Sx, self.iK, self.L]

        if unroll_scan:
            from lasagne.utils import unroll_scan
            [M2_] = unroll_scan(second_moments, indices, [M2], nseq,
                                len(triu_indices[0]))
            updts = {}
        else:
            M2_, updts = theano.scan(fn=second_moments,
                                     sequences=indices,
                                     outputs_info=[M2],
                                     non_sequences=nseq,
                                     allow_gc=False,
                                     strict=True,
                                     name="%s>M2_scan" % (self.name))
        M2 = M2_[-1]
        M2 = M2 + tt.triu(M2, k=1).T
        S = M2 - tt.outer(M, M)

        # apply saturating function to the output if available
        if self.sat_func is not None:
            # saturate the output
            M, S, U = self.sat_func(M, S)
            # compute the joint input output covariance
            V = V.dot(U)

        return M, S, V
Пример #39
0
    def __init__(self,
                 x_h_0,
                 v_h_0,
                 t_h_0,
                 x_t_0,
                 v_t_0,
                 a_t_0,
                 t_t_0,
                 time_steps,
                 exist,
                 is_leader,
                 x_goal,
                 turn_vec_h,
                 turn_vec_t,
                 n_steps,
                 lr,
                 game_params,
                 arch_params,
                 solver_params,
                 params):

        self._init_layers(params, arch_params, game_params)

        self._connect(game_params, solver_params)

        def _dist_from_rail(pos, rail_center, rail_radius):
            d = tt.sqrt(((pos - rail_center)**2).sum())
            return tt.sum((d - rail_radius)**2)

        def _step_state(x_h_, v_h_, angle_, speed_, t_h_, turn_vec_h, x_t_, v_t_, t_t_, turn_vec_t, ctrl, exist, time_step):

            a_t_e, v_t_e, x_t_e, t_t, t_h = step(x_h_, v_h_, t_h_, turn_vec_h, x_t_, v_t_, t_t_, turn_vec_h, exist, time_step)

            t_h = common.disconnected_grad(t_h)
            t_t = common.disconnected_grad(t_t)

            # approximated dynamic of the un-observed parts in the state
            a_t_a = tt.zeros(shape=(3,2), dtype=np.float32)

            v_t_a = v_t_

            x_t_a = x_t_ + self.dt * v_t_a

            # difference in predictions
            n_v_t = v_t_e - v_t_a

            n_a_t = a_t_e - a_t_a

            n_x_t = x_t_e - x_t_a

            # disconnect the gradient of the noise signals
            n_v_t = common.disconnected_grad(n_v_t)

            n_a_t = common.disconnected_grad(n_a_t)

            n_x_t = common.disconnected_grad(n_x_t)

            # add the noise to the approximation
            a_t = a_t_a + n_a_t

            v_t = v_t_a + n_v_t

            x_t = x_t_a + n_x_t

            # update the observed part of the state
            delta_steer = ctrl[0]
            accel = ctrl[1]

            delta_steer = tt.clip(delta_steer, -np.pi/4, np.pi/4)

            angle = angle_ + delta_steer

            speed = speed_ + accel * self.dt

            speed = tt.clip(speed, 0, self.v_max)

            v_h_x = speed * tt.sin(angle)
            v_h_y = speed * tt.cos(angle)

            v_h = tt.stack([v_h_x,v_h_y])

            x_h = x_h_ + self.dt * v_h
            x_h = tt.clip(x_h, -self.bw, self.bw)

            return x_h, v_h, angle, speed, t_h, x_t, v_t, a_t, t_t

        def _recurrence(time_step, x_h_, v_h_, angle_, speed_, t_h_, x_t_, v_t_, a_t_, t_t_, exist, is_leader, x_goal, turn_vec_h, turn_vec_t):
            # state
            '''
            1. host
                1.1 position (2) - (x,y) coordinates in cross coordinate system
                1.2 speed (2) - (v_x,v_y)
                # 1.3 acceleration (2) - (a_x,a_y)
                # 1.4 waiting time (1) - start counting on full stop. stop counting when clearing the junction
                1.5 x_goal (2) - destination position (indicates different turns)
                total = 5
            2. right lane car
                2.1 position (2) - null value = (-1,-1)
                2.2 speed (2) - null value = (0,0)
                2.3 acceleration (2) - null value = (0,0)
                2.4 waiting time (1) - null value = 0
                total = 7
            3. front lane car
                3.1 position (2)
                3.2 speed (2)
                3.3 acceleration (2)
                3.4 waiting time (1)
                total = 7
            4. target 3
                4.1 position (2)
                4.2 speed (2)
                4.3 acceleration (2)
                4.4 waiting time (1)
                total = 7
            total = 26
            '''

            # host_state_vec = tt.concatenate([x_h_, v_h_, t_h_])
            ang_spd = tt.stack([angle_, speed_])
            host_state_vec = tt.concatenate([x_h_, ang_spd,  x_goal])

            # target_state_vec = tt.concatenate([tt.flatten(x_t_), tt.flatten(v_t_), tt.flatten(a_t_), tt.flatten(t_t_)])
            target_state_vec = tt.concatenate([tt.flatten(x_t_), tt.flatten(v_t_), tt.flatten(a_t_), is_leader])

            state = tt.concatenate([host_state_vec, target_state_vec])

            h0 = tt.dot(state, self.W_0) + self.b_0

            relu0 = tt.nnet.relu(h0)

            h1 = tt.dot(relu0, self.W_1) + self.b_1

            relu1 = tt.nnet.relu(h1)

            h2 = tt.dot(relu1, self.W_2) + self.b_2

            relu2 = tt.nnet.relu(h2)

            a_h = tt.dot(relu2, self.W_c)

            x_h, v_h, angle, speed, t_h, x_t, v_t, a_t, t_t = _step_state(x_h_, v_h_, angle_, speed_, t_h_, turn_vec_h, x_t_, v_t_, t_t_, turn_vec_t, a_h, exist, time_step)

            # cost:

            discount_factor = 0.99**time_step

            # 0. smooth driving policy
            cost_steer = discount_factor * a_h[0]**2
            cost_accel = discount_factor * a_h[1]**2

            # 1. forcing the host to move forward
            dist_from_goal = tt.mean((x_goal - x_h)**2)

            cost_progress = discount_factor * dist_from_goal

            # 2. keeping distance from in front vehicles
            d_t_h = x_t - x_h

            h_t_dists = (d_t_h**2).sum(axis=1)

            # v_h_norm = tt.sqrt((v_h**2).sum())
            # d_t_h_norm = tt.sqrt((d_t_h**2).sum(axis=1))
            #
            # denominator = v_h_norm * d_t_h_norm
            #
            # host_targets_orientation = tt.dot(d_t_h, v_h) / (denominator + 1e-3)
            #
            # in_fornt_targets = tt.nnet.sigmoid(5 * host_targets_orientation)
            #
            # close_targets = tt.sum(tt.abs_(d_t_h))
            #
            # cost_accident = tt.mean(in_fornt_targets * close_targets)

            cost_accident = tt.sum(tt.nnet.relu(self.require_distance - h_t_dists))

            # 3. rail divergence
            cost_right_rail = _dist_from_rail(x_h, self.right_rail_center, self.right_rail_radius) * turn_vec_h[0]
            cost_front_rail = (x_h[0] - self.lw/2)**2 * turn_vec_h[1]
            cost_left_rail = _dist_from_rail(x_h, self.left_rail_center, self.left_rail_radius) * turn_vec_h[2]

            cost_rail = cost_right_rail + cost_left_rail + cost_front_rail

            return (x_h, v_h, angle, speed, t_h, x_t, v_t, a_t, t_t,
                    cost_steer, cost_accel, cost_progress, cost_accident, cost_rail, a_h), t.scan_module.until(dist_from_goal < 0.001)

        [x_h, v_h, angle, speed, t_h, x_t, v_t, a_t, t_t,
                costs_steer, costs_accel, costs_progress, costs_accident, costs_rail, a_hs], scan_updates = t.scan(fn=_recurrence,
                                                sequences=time_steps,
                                                outputs_info=[x_h_0, v_h_0, 0., 0., t_h_0, x_t_0, v_t_0, a_t_0, t_t_0,
                                                              None, None, None, None, None, None],
                                                non_sequences=[exist, is_leader, x_goal, turn_vec_h, turn_vec_t],
                                                n_steps=n_steps,
                                                name='scan_func')

        # 3. right of way cost term

        T = x_h.shape[0]

        x_h_rpt_1 = tt.repeat(x_h,T,axis=1) # (Tx2T)

        x_h_rpt_1_3d = x_h_rpt_1.dimshuffle(0,1,'x') # (Tx2Tx1)

        x_h_3D = tt.repeat(x_h_rpt_1_3d, 3, axis=2) # (Tx2Tx3)

        x_t_rshp_1 = tt.zeros(shape=(2*T,3),dtype=np.float32) # (2Tx3)

        x_t_rshp_1_x = tt.set_subtensor(x_t_rshp_1[:T,:],x_t[:,:,0])

        x_t_rshp_1_xy = tt.set_subtensor(x_t_rshp_1_x[T:,:],x_t[:,:,1])

        x_t_rshp_1_3d = x_t_rshp_1_xy.dimshuffle(0,1,'x') # (2Tx3x1)

        x_t_rpt_2_3d = tt.repeat(x_t_rshp_1_3d,T,axis=2) # (2Tx3xT)

        x_t_3D = x_t_rpt_2_3d.dimshuffle(2,0,1) # (Tx2Tx3)

        # abs_diff_mat = tt.abs_(x_h_3D - x_t_3D) # (Tx2Tx3)
        abs_diff_mat = (x_h_3D - x_t_3D)**2 # (Tx2Tx3)

        dists_mat = abs_diff_mat[:,:T,:] + abs_diff_mat[:,T:,:] # d_x+d_y: (TxTx3)

        # punish only when cutting a leader
        host_effective_dists = (tt.triu(dists_mat[:,:,0]) * is_leader[0] +
                                tt.triu(dists_mat[:,:,1]) * is_leader[1] +
                                tt.triu(dists_mat[:,:,2]) * is_leader[2])

        costs_row = tt.mean(tt.nnet.sigmoid(self.eps_row - host_effective_dists))

        self.cost_steer = tt.mean(costs_steer)
        self.cost_accel = tt.mean(costs_accel)
        self.cost_progress = tt.mean(costs_progress)
        self.cost_accident = tt.mean(costs_accident)
        self.cost_row = tt.mean(costs_row)
        self.cost_rail = tt.mean(costs_rail)

        self.weighted_cost = (
                    self.w_delta_steer * self.cost_steer +
                    self.w_accel * self.cost_accel +
                    self.w_progress * self.cost_progress +
                    self.w_accident * self.cost_accident +
                    # self.w_row * self.cost_row
                    self.w_rail * self.cost_rail
        )

        self.cost = (
            self.cost_steer +
            self.cost_accel +
            self.cost_progress +
            self.cost_accident +
            # self.cost_row
            self.cost_rail
        )

        objective = self.weighted_cost

        objective = common.weight_decay(objective=objective, params=self.params, l1_weight=self.l1_weight)

        objective = t.gradient.grad_clip(objective, -self.grad_clip_val, self.grad_clip_val)

        gradients = tt.grad(objective, self.params)

        self.updates = optimizers.optimizer(lr=lr, param_struct=self, gradients=gradients, solver_params=solver_params)

        self.x_h = x_h
        self.v_h = v_h
        self.x_t = x_t
        self.v_t = v_t

        self.max_a = tt.max(abs(a_hs))

        self.max_grad_val = 0
        self.grad_mean = 0
        for g in gradients:
            self.grad_mean += tt.mean(tt.abs_(g))
            self.max_grad_val = (tt.max(g) > self.max_grad_val) * tt.max(g) + (tt.max(g) <= self.max_grad_val) * self.max_grad_val

        self.params_abs_norm = self._calc_params_norm()
Пример #40
0
    def compile_theano():
        """
        This function generates theano compiled kernels for energy and force learning
        ker_jkmn_withcutoff = ker_jkmn #* cutoff_ikmn

        The position of the atoms relative to the centrla one, and their chemical species
        are defined by a matrix of dimension Mx5

        Returns:
            k3_ee (func): energy-energy kernel
            k3_ef (func): energy-force kernel
            k3_ff (func): force-force kernel
        """
        if not (os.path.exists(Mffpath / 'k3_ee_s.pickle')
                and os.path.exists(Mffpath / 'k3_ef_s.pickle')
                and os.path.exists(Mffpath / 'k3_ff_s.pickle')):
            print("Building Kernels")

            import theano.tensor as T
            from theano import function, scan
            logger.info("Started compilation of theano three body kernels")

            # --------------------------------------------------
            # INITIAL DEFINITIONS
            # --------------------------------------------------

            # positions of central atoms
            r1, r2 = T.dvectors('r1d', 'r2d')
            # positions of neighbours
            rho1, rho2 = T.dmatrices('rho1', 'rho2')
            # hyperparameter
            sig = T.dscalar('sig')
            # cutoff hyperparameters
            theta = T.dscalar('theta')
            rc = T.dscalar('rc')

            # positions of neighbours without chemical species

            rho1s = rho1[:, 0:3]
            rho2s = rho2[:, 0:3]

            # --------------------------------------------------
            # RELATIVE DISTANCES TO CENTRAL VECTOR AND BETWEEN NEIGHBOURS
            # --------------------------------------------------

            # first and second configuration
            r1j = T.sqrt(T.sum((rho1s[:, :] - r1[None, :])**2, axis=1))
            r2m = T.sqrt(T.sum((rho2s[:, :] - r2[None, :])**2, axis=1))
            rjk = T.sqrt(
                T.sum((rho1s[None, :, :] - rho1s[:, None, :])**2, axis=2))
            rmn = T.sqrt(
                T.sum((rho2s[None, :, :] - rho2s[:, None, :])**2, axis=2))

            # --------------------------------------------------
            # BUILD THE KERNEL
            # --------------------------------------------------

            # Squared exp of differences
            se_1j2m = T.exp(-(r1j[:, None] - r2m[None, :])**2 / (2 * sig**2))
            se_jkmn = T.exp(
                -(rjk[:, :, None, None] - rmn[None, None, :, :])**2 /
                (2 * sig**2))
            se_jk2m = T.exp(-(rjk[:, :, None] - r2m[None, None, :])**2 /
                            (2 * sig**2))
            se_1jmn = T.exp(-(r1j[:, None, None] - rmn[None, :, :])**2 /
                            (2 * sig**2))

            # Kernel not summed (cyclic permutations)
            k1n = (se_1j2m[:, None, :, None] * se_1j2m[None, :, None, :] *
                   se_jkmn)
            k2n = (se_1jmn[:, None, :, :] * se_jk2m[:, :, None, :] *
                   se_1j2m[None, :, :, None])
            k3n = (se_1j2m[:, None, None, :] * se_jk2m[:, :, :, None] *
                   se_1jmn[None, :, :, :])

            # final shape is M1 M1 M2 M2
            ker = k1n + k2n + k3n

            cut_j = 0.5 * (1 + T.cos(np.pi * r1j / rc)) * (
                (T.sgn(rc - r1j) + 1) / 2)
            cut_m = 0.5 * (1 + T.cos(np.pi * r2m / rc)) * (
                (T.sgn(rc - r2m) + 1) / 2)

            cut_jk = cut_j[:, None] * cut_j[None, :] * 0.5 * (
                1 + T.cos(np.pi * rjk / rc)) * ((T.sgn(rc - rjk) + 1) / 2)
            cut_mn = cut_m[:, None] * cut_m[None, :] * 0.5 * (
                1 + T.cos(np.pi * rmn / rc)) * ((T.sgn(rc - rmn) + 1) / 2)

            # --------------------------------------------------
            # REMOVE DIAGONAL ELEMENTS AND ADD CUTOFF
            # --------------------------------------------------

            # remove diagonal elements AND lower triangular ones from first configuration
            mask_jk = T.triu(T.ones_like(rjk)) - T.identity_like(rjk)

            # remove diagonal elements from second configuration
            mask_mn = T.ones_like(rmn) - T.identity_like(rmn)

            # Combine masks
            mask_jkmn = mask_jk[:, :, None, None] * mask_mn[None, None, :, :]

            # Apply mask and then apply cutoff functions
            ker = ker * mask_jkmn
            ker = T.sum(ker * cut_jk[:, :, None, None] *
                        cut_mn[None, None, :, :])

            # --------------------------------------------------
            # FINAL FUNCTIONS
            # --------------------------------------------------

            # global energy energy kernel
            k_ee_fun = function([r1, r2, rho1, rho2, sig, theta, rc],
                                ker,
                                on_unused_input='ignore')

            # global energy force kernel
            k_ef = T.grad(ker, r2)
            k_ef_fun = function([r1, r2, rho1, rho2, sig, theta, rc],
                                k_ef,
                                on_unused_input='ignore')

            # local force force kernel
            k_ff = T.grad(ker, r1)
            k_ff_der, updates = scan(lambda j, k_ff, r2: T.grad(k_ff[j], r2),
                                     sequences=T.arange(k_ff.shape[0]),
                                     non_sequences=[k_ff, r2])
            k_ff_fun = function([r1, r2, rho1, rho2, sig, theta, rc],
                                k_ff_der,
                                on_unused_input='ignore')

            # Save the function that we want to use for multiprocessing
            # This is necessary because theano is a crybaby and does not want to access the
            # Automaticallly stored compiled object from different processes
            with open(Mffpath / 'k3_ee_s.pickle', 'wb') as f:
                pickle.dump(k_ee_fun, f)
            with open(Mffpath / 'k3_ef_s.pickle', 'wb') as f:
                pickle.dump(k_ef_fun, f)
            with open(Mffpath / 'k3_ff_s.pickle', 'wb') as f:
                pickle.dump(k_ff_fun, f)

        else:
            print("Loading Kernels")
            with open(Mffpath / "k3_ee_s.pickle", 'rb') as f:
                k_ee_fun = pickle.load(f)
            with open(Mffpath / "k3_ef_s.pickle", 'rb') as f:
                k_ef_fun = pickle.load(f)
            with open(Mffpath / "k3_ff_s.pickle", 'rb') as f:
                k_ff_fun = pickle.load(f)

        # WRAPPERS (we don't want to plug the position of the central element every time)
        def k3_ee(conf1, conf2, sig, theta, rc):
            """
            Three body kernel for global energy-energy correlation

            Args:
                conf1 (array): first configuration.
                conf2 (array): second configuration.
                sig (float): lengthscale hyperparameter theta[0]
                theta (float): cutoff decay rate hyperparameter theta[1]
                rc (float): cutoff distance hyperparameter theta[2]

            Returns:
                kernel (float): scalar valued energy-energy 3-body kernel

            """
            return k_ee_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta,
                            rc)

        def k3_ef(conf1, conf2, sig, theta, rc):
            """
            Three body kernel for global energy-force correlation

            Args:
                conf1 (array): first configuration.
                conf2 (array): second configuration.
                sig (float): lengthscale hyperparameter theta[0]
                theta (float): cutoff decay rate hyperparameter theta[1]
                rc (float): cutoff distance hyperparameter theta[2]

            Returns:
                kernel (array): 3x1 energy-force 3-body kernel

            """
            return -k_ef_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig,
                             theta, rc)

        def k3_ff(conf1, conf2, sig, theta, rc):
            """
            Three body kernel for local force-force correlation

            Args:
                conf1 (array): first configuration.
                conf2 (array): second configuration.
                sig (float): lengthscale hyperparameter theta[0]
                theta (float): cutoff decay rate hyperparameter theta[1]
                rc (float): cutoff distance hyperparameter theta[2]

            Returns:
                kernel (matrix): 3x3 force-force 3-body kernel

            """
            return k_ff_fun(np.zeros(3), np.zeros(3), conf1, conf2, sig, theta,
                            rc)

        logger.info("Ended compilation of theano three body kernels")

        return k3_ee, k3_ef, k3_ff
Пример #41
0
    def __init__(self, rng, input, n_in, n_batch, d_bucket, activation, activation_deriv,
                 w=None, index_permute=None, index_permute_reverse=None):
        srng = RandomStreams(seed=234)
        
        n_bucket = n_in / d_bucket + 1
        self.input = input

        # randomly permute input space
        if index_permute is None:
            index_permute = srng.permutation(n=n_in)#numpy.random.permutation(n_in)
            index_permute_reverse = T.argsort(index_permute)
            self.index_permute = index_permute
            self.index_permute_reverse = index_permute_reverse

        permuted_input = input[:, index_permute]
        self.permuted_input = permuted_input

        # initialize matrix parameters
        if w is None:
            bound = numpy.sqrt(3. / d_bucket)
            w_values = numpy.asarray(rng.uniform(low=-bound,
                                                 high=bound,
                                                 size=(n_bucket, d_bucket, d_bucket)),
                                     dtype=theano.config.floatX)
            w = theano.shared(value=w_values, name='w')
            
        self.w = w
        
        
        # compute outputs and Jacobians
        
        log_jacobian = T.alloc(0, n_batch)
        for b in xrange(n_bucket):
            bucket_size = d_bucket
            if b == n_bucket - 1:
                bucket_size = n_in - b * d_bucket
            
           
            if b>0:
                prev_input = x_b
                
                """here we warp the previous bucket of inputs and add to the new input"""            

            x_b = self.permuted_input[:, b*d_bucket:b*d_bucket + bucket_size]
            w_b = self.w[b, :bucket_size, :bucket_size]

            if b>0:
                x_b_plus = x_b + m_b
            else:
                x_b_plus = x_b

            Upper = T.triu(w_b)
            Lower = T.tril(w_b)
            Lower = T.extra_ops.fill_diagonal(Lower, 1.)
            log_det_Upper = T.log(T.abs_(T.nlinalg.ExtractDiag()(Upper))).sum() 

            W = T.dot(Upper, Lower)
            log_jacobian = log_jacobian + T.alloc(log_det_Upper, n_batch)

            lin_output_b = T.dot(x_b_plus, W)
            if b>0:
                lin_output = T.concatenate([lin_output, lin_output_b], axis=1)
            else:
                lin_output = lin_output_b
            if activation is not None:
                derivs = activation_deriv(lin_output_b)     
                #import pdb; pdb.set_trace()
                log_jacobian = log_jacobian + T.log(T.abs_(derivs)).sum(axis=1)                 
                    
        self.log_jacobian = log_jacobian        


        self.output = (
            lin_output[:, index_permute_reverse] if activation is None
            else activation(lin_output[:, index_permute_reverse])
        )

        self.params = [w]