Exemplo n.º 1
0
    def output_probabilistic_sep(self, mx_previous, vx_previous):
        # create place holders
        mout = []
        vout = []

        # compute the psi0 term
        psi0 = self.kern.compute_psi0_theano(self.ls, self.sf, mx_previous,
                                             vx_previous)
        for d in range(self.Dout):
            # compute the psi1 and psi2 term
            psi1 = self.kern.compute_psi1_theano(self.ls, self.sf, mx_previous,
                                                 vx_previous, self.zu[d])
            psi1psi1T = T.outer(psi1, psi1.T)
            psi2 = self.kern.compute_psi2_theano(self.ls, self.sf, mx_previous,
                                                 vx_previous, self.zu[d])

            # precompute some terms
            psi1Kinv = T.dot(psi1, self.Kuuinv[d])
            Kinvpsi2 = T.dot(self.Kuuinv[d], psi2)
            Kinvpsi2Kinv = T.dot(Kinvpsi2, self.Kuuinv[d])
            vconst = T.exp(2 * self.sn) + (psi0 - Talg.trace(Kinvpsi2))

            mud = self.muhat[d]
            Sud = self.Suhat[d]
            moutd = T.sum(T.dot(psi1Kinv, mud))
            mout.append(moutd)

            Splusmm = Sud + T.outer(mud, mud)
            voutd = vconst + Talg.trace(T.dot(Splusmm,
                                              Kinvpsi2Kinv)) - moutd**2
            vout.append(T.sum(voutd))

        return mout, vout
Exemplo n.º 2
0
    def build(self, dim):
        M = theano.shared(value=np.eye(dim, dtype='float32'),
                          name='M',
                          borrow=True)

        pull_error = 0.
        ivectors = self._x[self._neighborpairs[:, 0]]
        jvectors = self._x[self._neighborpairs[:, 1]]
        diffv = ivectors - jvectors
        pull_error = linalg.trace(diffv.dot(M).dot(diffv.T))

        push_error = 0.0
        ivectors = self._x[self._set[:, 0]]
        jvectors = self._x[self._set[:, 1]]
        lvectors = self._x[self._set[:, 2]]
        diffij = ivectors - jvectors
        diffil = ivectors - lvectors
        lossij = diffij.dot(M).dot(diffij.T)
        lossil = diffil.dot(M).dot(diffil.T)
        mask = T.neq(self._y[self._set[:, 0]], self._y[self._set[:, 2]])
        push_error = linalg.trace(mask * T.maximum(lossij - lossil + 1, 0))

        error = (1 - self.mu) * pull_error + self.mu * push_error
        updates = [(M, M - self._lr * T.grad(error, M))]

        self.M = M
        self.updates = updates
        self.pull_error = pull_error
        self.push_error = push_error
        self.built = True
Exemplo n.º 3
0
    def build(self, dim):
        M = theano.shared(value=np.eye(dim, dtype='float32'), name='M', borrow=True)

        pull_error = 0.
        ivectors = self._x[self._neighborpairs[:, 0]]
        jvectors = self._x[self._neighborpairs[:, 1]]
        diffv = ivectors - jvectors
        pull_error = linalg.trace(diffv.dot(M).dot(diffv.T))

        push_error = 0.0
        ivectors = self._x[self._set[:, 0]]
        jvectors = self._x[self._set[:, 1]]
        lvectors = self._x[self._set[:, 2]]
        diffij = ivectors - jvectors
        diffil = ivectors - lvectors
        lossij = diffij.dot(M).dot(diffij.T)
        lossil = diffil.dot(M).dot(diffil.T)
        mask = T.neq(self._y[self._set[:, 0]], self._y[self._set[:, 2]])
        push_error = linalg.trace(mask*T.maximum(lossij - lossil + 1, 0))

        error = (1-self.mu) * pull_error + self.mu * push_error
        updates = [(M, M - self._lr * T.grad(error, M))]

        self.M = M
        self.updates = updates
        self.pull_error = pull_error
        self.push_error = push_error 
        self.built = True
Exemplo n.º 4
0
def gaussian_kl_loss(mx, Sx, mt, St):
    '''
        Returns KL ( Normal(mx, Sx) || Normal(mt, St) )
    '''
    if St is None:
        target_samples = mt
        mt, St = empirical_gaussian_params(target_samples)

    if Sx is None:
        # evaluate empirical KL (expectation over the rolled out samples)
        x = mx
        mx, Sx = empirical_gaussian_params(x)

        def logprob(x, m, S):
            delta = x - m
            L = cholesky(S)
            beta = solve_lower_triangular(L, delta.T).T
            lp = -0.5 * tt.square(beta).sum(-1)
            lp -= tt.sum(tt.log(tt.diagonal(L)))
            lp -= (0.5 * m.size * tt.log(2 * np.pi)).astype(
                theano.config.floatX)
            return lp

        return (logprob(x, mx, Sx) - logprob(x, mt, St)).mean(0)
    else:
        delta = mt - mx
        Stinv = matrix_inverse(St)
        kl = tt.log(det(St)) - tt.log(det(Sx))
        kl += trace(Stinv.dot(delta.T.dot(delta) + Sx - St))
        return 0.5 * kl
Exemplo n.º 5
0
Arquivo: ops.py Projeto: Ambier/Theano
def spectral_radius_bound(X, log2_exponent):
    """
    Returns upper bound on the largest eigenvalue of square symmetrix matrix X.

    log2_exponent must be a positive-valued integer. The larger it is, the
    slower and tighter the bound.  Values up to 5 should usually suffice.  The
    algorithm works by multiplying X by itself this many times.

    From V.Pan, 1990. "Estimating the Extremal Eigenvalues of a Symmetric
    Matrix", Computers Math Applic. Vol 20 n. 2 pp 17-22.
    Rq: an efficient algorithm, not used here, is defined in this paper.
    """
    if X.type.ndim != 2:
        raise TypeError('spectral_radius_bound requires a matrix argument', X)
    if not isinstance(log2_exponent, int):
        raise TypeError('spectral_radius_bound requires an integer exponent',
                        log2_exponent)
    if log2_exponent <= 0:
        raise ValueError('spectral_radius_bound requires a strictly positive '
                         'exponent', log2_exponent)

    XX = X
    for i in xrange(log2_exponent):
        XX = tensor.dot(XX, XX)
    return tensor.pow(
            trace(XX),
            2 ** (-log2_exponent))
Exemplo n.º 6
0
def spectral_radius_bound(X, log2_exponent):
    """
    Returns upper bound on the largest eigenvalue of square symmetrix matrix X.

    log2_exponent must be a positive-valued integer. The larger it is, the
    slower and tighter the bound. Values up to 5 should usually suffice. The
    algorithm works by multiplying X by itself this many times.

    From V.Pan, 1990. "Estimating the Extremal Eigenvalues of a Symmetric
    Matrix", Computers Math Applic. Vol 20 n. 2 pp 17-22.
    Rq: an efficient algorithm, not used here, is defined in this paper.

    """
    if X.type.ndim != 2:
        raise TypeError('spectral_radius_bound requires a matrix argument', X)
    if not isinstance(log2_exponent, int):
        raise TypeError('spectral_radius_bound requires an integer exponent',
                        log2_exponent)
    if log2_exponent <= 0:
        raise ValueError(
            'spectral_radius_bound requires a strictly positive '
            'exponent', log2_exponent)

    XX = X
    for i in xrange(log2_exponent):
        XX = tensor.dot(XX, XX)
    return tensor.pow(trace(XX), 2**(-log2_exponent))
Exemplo n.º 7
0
    def get_tensor_traces_scan(self, tensor_in):

        result, updates = th.scan(fn=lambda tensor_in: nlinalg.trace(tensor_in),
                                          outputs_info=None,
                                          sequences=[tensor_in],
                                          non_sequences=[])

        return result
Exemplo n.º 8
0
def batch_cca_loss(y_true, y_pred):
    """
    Return Sum of the diagonal - Sum of upper and lower triangles
    """
    trace = TN.trace(y_true[0, :, :])
    triu_sum = K.sum(K.abs(TB.triu(y_true[0, :, :], k=1)))
    tril_sum = K.sum(K.abs(TB.tril(y_true[0, :, :], k=-1)))
    return trace - tril_sum - triu_sum
Exemplo n.º 9
0
def test_trace():
    rng = np.random.RandomState(utt.fetch_seed())
    x = theano.tensor.matrix()
    g = trace(x)
    f = theano.function([x], g)

    for shp in [(2, 3), (3, 2), (3, 3)]:
        m = rng.rand(*shp).astype(config.floatX)
        v = np.trace(m)
        assert v == f(m)

    xx = theano.tensor.vector()
    ok = False
    try:
        trace(xx)
    except TypeError:
        ok = True
    assert ok
Exemplo n.º 10
0
def test_trace():
    rng = np.random.RandomState(utt.fetch_seed())
    x = theano.tensor.matrix()
    g = trace(x)
    f = theano.function([x], g)

    for shp in [(2, 3), (3, 2), (3, 3)]:
        m = rng.rand(*shp).astype(config.floatX)
        v = np.trace(m)
        assert v == f(m)

    xx = theano.tensor.vector()
    ok = False
    try:
        trace(xx)
    except TypeError:
        ok = True
    assert ok
Exemplo n.º 11
0
    def compute(self, symmetric_double_encoder, params):

        regularization = 0

        layer_number = len(symmetric_double_encoder)

        for ndx, layer in enumerate(symmetric_double_encoder):

            hidden_x = layer.output_forward_y
            hidden_y = layer.output_forward_x

            cov_x = Tensor.dot(hidden_x.T, hidden_x)
            cov_y = Tensor.dot(hidden_y.T, hidden_y)

            gama = (ndx / layer_number)

            regularization += gama * 0.5 * nlinalg.trace(cov_x - Tensor.identity_like(cov_x))
            regularization += (1 - gama) * 0.5 * nlinalg.trace(cov_y - Tensor.identity_like(cov_y))

        return regularization
Exemplo n.º 12
0
    def logp(self, X):
        n = self.n
        p = self.p
        V = self.V

        IVI = det(V)

        return bound(
            ((n - p - 1) * log(IVI) - trace(matrix_inverse(V).dot(X)) -
             n * p * log(2) - n * log(IVI) - 2 * multigammaln(p, n / 2)) / 2,
            n > (p - 1))
Exemplo n.º 13
0
    def _init_error(self):
        pull_error = 0.0
        ivectors = self._x[self._neighborpairs[:, 0]]
        jvectors = self._x[self._neighborpairs[:, 1]]
        diffv = ivectors - jvectors
        pull_error = linalg.trace(diffv.dot(self.M).dot(diffv.T))

        push_error = 0.0
        ivectors = self._x[self._set[:, 0]]
        jvectors = self._x[self._set[:, 1]]
        lvectors = self._x[self._set[:, 2]]
        diffij = ivectors - jvectors
        diffil = ivectors - lvectors
        lossij = diffij.dot(self.M).dot(diffij.T)
        lossil = diffil.dot(self.M).dot(diffil.T)
        push_error = linalg.trace(T.maximum(lossij - lossil + 1, 0))

        self.pull_error = pull_error
        self.push_error = push_error
        self.error = (1 - self.mu) * pull_error + self.mu * push_error
Exemplo n.º 14
0
    def logp(self, X):
        n = self.n
        p = self.p
        V = self.V

        IVI = det(V)
        IXI = det(X)

        return bound(
            ((n - p - 1) * log(IXI) - trace(matrix_inverse(V).dot(X)) -
             n * p * log(2) - n * log(IVI) - 2 * multigammaln(n / 2., p)) / 2,
            gt(n, (p - 1)), all(gt(eigh(X)[0], 0)), eq(X, X.T))
Exemplo n.º 15
0
    def logp(self, X):
        n = self.n
        p = self.p
        V = self.V

        IVI = det(V)
        IXI = det(X)

        return bound(
            ((n - p - 1) * tt.log(IXI) - trace(matrix_inverse(V).dot(X)) -
             n * p * tt.log(2) - n * tt.log(IVI) - 2 * multigammaln(n / 2., p))
            / 2, matrix_pos_def(X), tt.eq(X, X.T), n > (p - 1))
Exemplo n.º 16
0
    def logp(self, X):
        n = self.n
        p = self.p
        V = self.V

        IVI = det(V)
        IXI = det(X)

        return bound(
            ((n - p - 1) * T.log(IXI) - trace(matrix_inverse(V).dot(X)) -
             n * p * T.log(2) - n * T.log(IVI) - 2 * multigammaln(n / 2., p)) /
            2, T.all(eigh(X)[0] > 0), T.eq(X, X.T), n > (p - 1))
Exemplo n.º 17
0
    def logp(self, X):
        n = self.n
        p = self.p
        V = self.V

        IVI = det(V)
        IXI = det(X)

        return bound(
            ((n - p - 1) * log(IXI) - trace(matrix_inverse(V).dot(X)) -
                n * p * log(2) - n * log(IVI) - 2 * multigammaln(n / 2., p)) / 2,
             n > (p - 1))
Exemplo n.º 18
0
 def resfunc(i, xvec, y, h1, h2, h3vec, U1, U2, U3ten, conjU1, conjU2,
             conjU3ten):
     deph1 = TT.exp(-g * (t2 - y))
     deph2 = TT.exp(-g * (t3 - t2))
     deph3 = TT.exp(-g * (xvec[i] - t3))
     inhom14 = TT.exp(-s * ((xvec[i] - t3 + t2 - y)**2))
     inhom23 = TT.exp(-s * (((xvec[i] - t3) - (t2 - y))**2))
     r14a = (TT.dot(U1, TT.dot(m, TT.dot(p0, conjU1)))) * deph1
     r23a = (TT.dot(U1, TT.dot(p0, TT.dot(m, conjU1)))) * deph1
     r1 = TTnlinalg.trace(
         TT.dot(m, ((TT.dot(
             U3ten[:, :, i],
             TT.dot(
                 m,
                 TT.dot((
                     (TT.dot(U2, TT.dot(m, TT.dot(r14a, conjU2)))) *
                     deph2), conjU3ten[:, :, i])))) * deph3))) * inhom14
     r2 = (TTnlinalg.trace(
         TT.dot(m, ((TT.dot(
             U3ten[:, :, i],
             TT.dot(((TT.dot(U2, TT.dot(m, TT.dot(r23a, conjU2)))) *
                     deph2), TT.dot(m, conjU3ten[:, :, i])))) *
                    deph3)))) * inhom23
     r3 = (TTnlinalg.trace(
         TT.dot(m, ((TT.dot(
             U3ten[:, :, i],
             TT.dot(
                 m,
                 TT.dot(((TT.dot(U2, TT.dot(r23a, TT.dot(m, conjU2)))) *
                         deph2), conjU3ten[:, :, i])))) *
                    deph3)))) * inhom23
     r4 = (TTnlinalg.trace(
         TT.dot(m, ((TT.dot(
             U3ten[:, :, i],
             TT.dot(((TT.dot(U2, TT.dot(r14a, TT.dot(m, conjU2)))) *
                     deph2), TT.dot(m, conjU3ten[:, :, i])))) *
                    deph3)))) * inhom14
     return (1j * 1j * 1j) * h1 * h2 * h3vec[i] * (
         r1 + r2 + r3 + r4 - TT.conj(r1) - TT.conj(r2) - TT.conj(r3) -
         TT.conj(r4))
    def output_probabilistic_sep(self, mx_previous, vx_previous):
        # create place holders
        mout = []
        vout = []

        # compute the psi0 term
        psi0 = self.kern.compute_psi0_theano(
            self.ls, self.sf,
            mx_previous, vx_previous
        )
        for d in range(self.Dout):
            # compute the psi1 and psi2 term
            psi1 = self.kern.compute_psi1_theano(
                self.ls, self.sf,
                mx_previous, vx_previous, self.zu[d]
            )
            psi1psi1T =  T.outer(psi1, psi1.T)
            psi2 = self.kern.compute_psi2_theano(
                self.ls, self.sf,
                mx_previous, vx_previous, self.zu[d]
            )

            # precompute some terms
            psi1Kinv = T.dot(psi1, self.Kuuinv[d])
            Kinvpsi2 = T.dot(self.Kuuinv[d], psi2)
            Kinvpsi2Kinv = T.dot(Kinvpsi2, self.Kuuinv[d])
            vconst = T.exp(2 * self.sn) + (psi0 - Talg.trace(Kinvpsi2))

            mud = self.muhat[d]
            Sud = self.Suhat[d]
            moutd = T.sum(T.dot(psi1Kinv, mud))
            mout.append(moutd)

            Splusmm = Sud + T.outer(mud, mud)
            voutd = vconst + Talg.trace(T.dot(Splusmm, Kinvpsi2Kinv)) - moutd ** 2
            vout.append(T.sum(voutd))

        return mout, vout
Exemplo n.º 20
0
    def logp(self, X):
        n = self.n
        p = self.p
        V = self.V

        IVI = det(V)
        IXI = det(X)

        return bound(
            ((n - p - 1) * log(IXI) - trace(matrix_inverse(V).dot(X)) -
                n * p * log(2) - n * log(IVI) - 2 * multigammaln(n / 2., p)) / 2,
            gt(n, (p - 1)),
            all(gt(eigh(X)[0], 0)),
            eq(X, X.T)
        )
Exemplo n.º 21
0
    def logp(self, X):
        n = self.n
        p = self.p
        V = self.V

        IVI = det(V)
        IXI = det(X)

        return bound(((n - p - 1) * tt.log(IXI)
                      - trace(matrix_inverse(V).dot(X))
                      - n * p * tt.log(2) - n * tt.log(IVI)
                      - 2 * multigammaln(n / 2., p)) / 2,
                     matrix_pos_def(X),
                     tt.eq(X, X.T),
                     n > (p - 1))
Exemplo n.º 22
0
    def logp(self, X):
        nu = self.nu
        p = self.p
        V = self.V

        IVI = det(V)
        IXI = det(X)

        return bound(((nu - p - 1) * tt.log(IXI) -
                      trace(matrix_inverse(V).dot(X)) - nu * p * tt.log(2) -
                      nu * tt.log(IVI) - 2 * multigammaln(nu / 2., p)) / 2,
                     matrix_pos_def(X),
                     tt.eq(X, X.T),
                     nu > (p - 1),
                     broadcast_conditions=False)
Exemplo n.º 23
0
    def logp(self, X):
        nu = self.nu
        p = self.p
        V = self.V

        IVI = det(V)
        IXI = det(X)

        return bound(((nu - p - 1) * tt.log(IXI)
                      - trace(matrix_inverse(V).dot(X))
                      - nu * p * tt.log(2) - nu * tt.log(IVI)
                      - 2 * multigammaln(nu / 2., p)) / 2,
                     matrix_pos_def(X),
                     tt.eq(X, X.T),
                     nu > (p - 1),
                     broadcast_conditions=False
        )
Exemplo n.º 24
0
    def logp(self, X):
        n = self.n
        p = self.p
        V = self.V

        IVI = det(V)
        IXI = det(X)

        return bound(
            (
                (n - p - 1) * T.log(IXI)
                - trace(matrix_inverse(V).dot(X))
                - n * p * T.log(2)
                - n * T.log(IVI)
                - 2 * multigammaln(n / 2.0, p)
            )
            / 2,
            T.all(eigh(X)[0] > 0),
            T.eq(X, X.T),
            n > (p - 1),
        )
Exemplo n.º 25
0
    def __theano_longtermError(self, targetM, i, lastM):
        mask = T.neq(self._y[self._set[:, 1]], self._y[self._set[:, 2]])
        f = T.tanh #T.nnet.sigmoid
        if i == 0:
            # pull_error for global 0
            pull_error = 0.
            ivectors = self._stackx[:, i, :][self._neighborpairs[:, 0]]
            jvectors = self._stackx[:, i, :][self._neighborpairs[:, 1]]
            diffv = ivectors - jvectors
            pull_error = linalg.trace(diffv.dot(targetM).dot(diffv.T))

            # push_error for global 0
            push_error = 0.0
            ivectors = self._stackx[:, i, :][self._set[:, 0]]
            jvectors = self._stackx[:, i, :][self._set[:, 1]]
            lvectors = self._stackx[:, i, :][self._set[:, 2]]
            diffij = ivectors - jvectors
            diffil = ivectors - lvectors
            lossij = diffij.dot(targetM).dot(diffij.T)
            lossil = diffil.dot(targetM).dot(diffil.T)
            #cur_prediction = T.diag(lossij - lossil)
            cur_prediction = f(T.diag(lossil - lossij))

            ivectors = self._stackx[:, i-1, :][self._set[:, 0]]
            jvectors = self._stackx[:, i-1, :][self._set[:, 1]]
            lvectors = self._stackx[:, i-1, :][self._set[:, 2]]
            diffij = ivectors - jvectors
            diffil = ivectors - lvectors
            lossij = diffij.dot(diffij.T)
            lossil = diffil.dot(diffil.T)
            #lst_prediction = T.diag(lossij - lossil)
            lst_prediction = f(T.diag(lossil - lossij))
            push_error = T.sum(mask*(lst_prediction - cur_prediction))

        else:
            ivectors = self._stackx[:, i, :][self._neighborpairs[:, 0]]
            jvectors = self._stackx[:, i, :][self._neighborpairs[:, 1]]
            diffv1 = ivectors - jvectors
            distMcur = diffv1.dot(targetM).dot(diffv1.T)
            ivectors = self._stackx[:, i-1, :][self._neighborpairs[:, 0]]
            jvectors = self._stackx[:, i-1, :][self._neighborpairs[:, 1]]
            diffv2 = ivectors - jvectors
            distMlast = diffv2.dot(lastM).dot(diffv2.T)
            pull_error = linalg.trace(T.maximum(distMcur - distMlast + 1, 0))


            # self.debug.append( self._y[self._set[:, 0] )

            push_error = 0.0
            ivectors = self._stackx[:, i, :][self._set[:, 0]]
            jvectors = self._stackx[:, i, :][self._set[:, 1]]
            lvectors = self._stackx[:, i, :][self._set[:, 2]]
            diffij = ivectors - jvectors
            diffil = ivectors - lvectors
            lossij = diffij.dot(targetM).dot(diffij.T)
            lossil = diffil.dot(targetM).dot(diffil.T)
            #cur_prediction = T.diag(lossij - lossil)
            cur_prediction = f(T.diag(lossil - lossij))

            ivectors = self._stackx[:, i-1, :][self._set[:, 0]]
            jvectors = self._stackx[:, i-1, :][self._set[:, 1]]
            lvectors = self._stackx[:, i-1, :][self._set[:, 2]]
            diffij = ivectors - jvectors
            diffil = ivectors - lvectors
            lossij = diffij.dot(lastM).dot(diffij.T)
            lossil = diffil.dot(lastM).dot(diffil.T)
            #lst_prediction = T.diag(lossij - lossil)
            lst_prediction = f(T.diag(lossil - lossij))
            push_error = T.sum(mask*(lst_prediction - cur_prediction))

        return pull_error, push_error 
Exemplo n.º 26
0
def cmmd(dataset='mnist.pkl.gz',batch_size=500, layer_num = 2, hidden_dim = 20,seed = 0,layer_size=[500,200,100]):

	validation_frequency = 1
	test_frequency = 1
	pre_train = 0
	pre_train_epoch = 30

	print "Loading data ......."
	datasets = datapy.load_data_gpu_60000(dataset, have_matrix = True)
	train_set_x, train_set_y, train_y_matrix = datasets[0]
	valid_set_x, valid_set_y, valid_y_matrix = datasets[1]
	test_set_x, test_set_y, test_y_matrix = datasets[2]

	n_train_batches = train_set_x.get_value().shape[0] / batch_size
	n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
	n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

	rng = np.random.RandomState(seed)                                                          
	rng_share = theano.tensor.shared_randomstreams.RandomStreams(0)

	################################
	##        build model         ##
	################################
	print "Building model ......."

	index = T.lscalar()
	x = T.matrix('x')  ##### batch_size * 28^2
	y = T.vector('y') 
	y_matrix = T.matrix('y_matrix') 
	random_z = T.matrix('random_z') ### batch_size * hidden_dim
	Inv_K_d = T.matrix('Inv_K_d')

	layers = []
	layer_output= []

	activation = nonlinearity.relu
	#activation = Tnn.sigmoid
	#### first layer
	layers.append(FullyConnected.FullyConnected(
			rng = rng,
			n_in = 28*28 + hidden_dim, 
			#n_in = 28*28, 
			n_out = layer_size[0],
			activation = activation
	))
	layer_output.append(layers[-1].output_mix(input=[x,random_z]))
	#layer_output.append(layers[-1].output(input=x))

	#### middle layer
	for i in range(layer_num):
		layers.append(FullyConnected.FullyConnected(
			rng = rng,
			n_in = layer_size[i], 
			n_out = layer_size[i+1],
			activation = activation
		))
		layer_output.append(layers[-1].output(input= layer_output[-1]))

	#### last layer
	activation = Tnn.sigmoid
	layers.append(FullyConnected.FullyConnected(
		rng = rng,
		n_in = layer_size[-1],
		n_out = 10,
		activation = activation
	))
	y_gen = layers[-1].output(input = layer_output[-1])
	
	lambda1_ = 1e-3
	lambda_= theano.shared(np.asarray(lambda1_, dtype=np.float32))


	K_d = kernel_gram_for_x(x,x,batch_size,28*28)
	K_s = K_d 
	K_sd = K_d
	#Inv_K_d = NL.matrix_inverse(K_d +lambda_ * T.identity_like(K_d))
	Inv_K_s = Inv_K_d

	L_d = kernel_gram(y_matrix,y_matrix,batch_size,10)
	L_s = kernel_gram(y_gen,y_gen,batch_size,10)
	L_ds = kernel_gram(y_matrix,y_gen,batch_size,10)
	
	cost = -(NL.trace(K_d * Inv_K_d * L_d * Inv_K_d) +\
			NL.trace(K_s * Inv_K_s * L_s * Inv_K_s)- \
			NL.trace(K_sd * Inv_K_d * L_ds * Inv_K_s))
	cost_pre = -T.sum(T.sqr(y_matrix - y_gen))

	cc = T.argmax(y_gen,axis=1)
	correct = T.sum(T.eq(T.cast(T.argmax(y_gen,axis=1),'int32'),T.cast(y,'int32')))

	################################
	##        updates             ##
	################################
	params = []
	for aLayer in layers:
		params += aLayer.params
	gparams = [T.grad(cost,param) for param in params]
	gparams_pre = [T.grad(cost_pre,param) for param in params]

	learning_rate = 3e-4
	weight_decay=1.0/n_train_batches
	epsilon=1e-8

	l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32))
	get_optimizer = optimizer.get_adam_optimizer_max(learning_rate=l_r,
		decay1=0.1, decay2=0.001, weight_decay=weight_decay, epsilon=epsilon)
	updates = get_optimizer(params,gparams)

	updates_pre = get_optimizer(params,gparams_pre)


	################################
	##         pretrain model     ##
	################################
	parameters = theano.function(
			inputs = [],
			outputs = params,
			)

	'''
	pre_train_model = theano.function(
		inputs = [index,random_z],
		outputs = [cost_pre, correct],
		updates=updates_pre,
		givens={
			x:train_set_x[index * batch_size:(index + 1) * batch_size],
			y:train_set_y[index * batch_size:(index + 1) * batch_size],
			y_matrix:train_y_matrix[index * batch_size:(index + 1) * batch_size],
		},
		on_unused_input='warn'
		)
	cur_epoch = 0
	if pre_train == 1:
		for cur_epoch in range(pre_train_epoch):
			print 'cur_epoch: ', cur_epoch,
			cor = 0 
			for minibatch_index in range(n_train_batches):
				cost_pre_mini,correct_pre_mini = pre_train_model(minibatch_index,gen_random_z(batch_size,hidden_dim))
				cor = cor + correct_pre_mini
			print 'correct number: ' , cor
		#np.savez(,model = model)
		'''

	if pre_train == 1:
		print "pre-training model....."
		pre_train = np.load('model.npz')['model']
		for (para, pre) in zip(params, pre_train):
			para.set_value(pre)

	################################
	##         prepare data       ##
	################################

	#### compute matrix inverse
	print "Preparing data ...."
	Invv = NL.matrix_inverse(K_d +lambda_ * T.identity_like(K_d))
	prepare_data = theano.function(
			inputs = [index],
			outputs = [Invv,K_d],
			givens = {
				x:train_set_x[index * batch_size:(index + 1) * batch_size],
				}
			)

	Inv_K_d_l, K_d_l =  prepare_data(0)

	for minibatch_index in range(1, n_train_batches):
		if minibatch_index % 10 == 0:
			print 'minibatch_index:', minibatch_index
		Inv_pre_mini, K_d_pre_mini = prepare_data(minibatch_index)
		Inv_K_d_l = np.vstack((Inv_K_d_l,Inv_pre_mini))
		K_d_l = np.vstack((K_d_l,K_d_pre_mini))

	Inv_K_d_g = theano.shared(Inv_K_d_l,borrow=True)
	K_d_g = theano.shared(K_d_l, borrow=True)


	################################
	##         train model        ##
	################################

	train_model = theano.function(
		inputs = [index,random_z],
		outputs = [correct,cost,y,cc,y_gen],
		updates=updates,
		givens={
			x:train_set_x[index * batch_size:(index + 1) * batch_size],
			y:train_set_y[index * batch_size:(index + 1) * batch_size],
			y_matrix:train_y_matrix[index * batch_size:(index + 1) * batch_size],
			#K_d:K_d_g[index * batch_size:(index + 1) * batch_size],
			Inv_K_d:Inv_K_d_g[index * batch_size:(index + 1) * batch_size],
		},
		on_unused_input='warn'
	)

	valid_model = theano.function(
		inputs = [index,random_z],
		outputs = correct,
		#updates=updates,
		givens={
			x:valid_set_x[index * batch_size:(index + 1) * batch_size],
			y:valid_set_y[index * batch_size:(index + 1) * batch_size],
			y_matrix:valid_y_matrix[index * batch_size:(index + 1) * batch_size],
		},
		on_unused_input='warn'
	)

	test_model = theano.function(
		inputs = [index,random_z],
		outputs = [correct,y_gen],
		#updates=updates,
		givens={
			x:test_set_x[index * batch_size:(index + 1) * batch_size],
			y:test_set_y[index * batch_size:(index + 1) * batch_size],
			y_matrix:test_y_matrix[index * batch_size:(index + 1) * batch_size],
		},
		on_unused_input='warn'
	)

	n_epochs = 500
	cur_epoch = 0



	print "Training model ......"

	while (cur_epoch < n_epochs) :
		cur_epoch = cur_epoch + 1
		cor = 0
		for minibatch_index in xrange(n_train_batches):
			print minibatch_index,
			print " : ",
			correct,cost,a,b,y_gen = train_model(minibatch_index,gen_random_z(batch_size,hidden_dim))
			cor = cor + correct
			print correct
			print b
			print y_gen
		with open('log.txt','a') as f:
			print >>f , "epoch: " , cur_epoch, "training_correct: " , cor

		if cur_epoch % validation_frequency == 0:
			cor2 = 0
			for minibatch_index in xrange(n_valid_batches):
				correct = valid_model(minibatch_index,gen_random_z(batch_size,hidden_dim))
				cor2 = cor2 + correct
			with open('log.txt','a') as f:
				print >>f , "	validation_correct: " , cor2

		if cur_epoch % test_frequency == 0:
			cor2 = 0
			for minibatch_index in xrange(n_test_batches):
				correct,y_gen = test_model(minibatch_index,gen_random_z(batch_size,hidden_dim))
				with open('log.txt','a') as f:
					for index in range(batch_size):
						if not np.argmax(y_gen[index]) == test_set_y[minibatch_index * batch_size + index]:
							print >>f , "index: " , minibatch_index * batch_size + index, 'true Y: ', test_set_y[minibatch_index * batch_size + index]
							print >>f , 'gen_y: ' , y_gen[index]

				cor2 = cor2 + correct
			with open('log.txt','a') as f:
				print >>f , "	test_correct: " , cor2
		
		if epoch %1 == 0:
			model = parameters()
			for i in range(len(model)):
				model[i] = np.asarray(model[i]).astype(np.float32)
			np.savez('model-'+str(epoch),model=model)
Exemplo n.º 27
0
def cmmd(dataset='mnist.pkl.gz',
         batch_size=100,
         layer_num=3,
         hidden_dim=5,
         seed=0,
         layer_size=[64, 256, 256, 512]):

    validation_frequency = 1
    test_frequency = 1
    pre_train = 1

    dim_input = (28, 28)
    colorImg = False

    print "Loading data ......."
    #datasets = datapy.load_data_gpu_60000_with_noise(dataset, have_matrix = True)
    datasets = datapy.load_data_gpu_60000(dataset, have_matrix=True)
    train_set_x, train_set_y, train_y_matrix = datasets[0]
    valid_set_x, valid_set_y, valid_y_matrix = datasets[1]
    test_set_x, test_set_y, test_y_matrix = datasets[2]

    rng = np.random.RandomState(seed)
    rng_share = theano.tensor.shared_randomstreams.RandomStreams(0)

    n_train_batches = train_set_x.get_value().shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    aImage = paramgraphics.mat_to_img(train_set_x.get_value()[0:169].T,
                                      dim_input,
                                      colorImg=colorImg)
    aImage.save('mnist_sample', 'PNG')

    ################################
    ##        build model         ##
    ################################
    print "Building model ......."

    index = T.lscalar()
    x = T.matrix('x')  ##### batch_size * 28^2
    y = T.vector('y')
    y_matrix = T.matrix('y_matrix')
    random_z = T.matrix('random_z')  ### batch_size * hidden_dim
    Inv_K_d = T.matrix('Inv_K_d')

    layers = []
    layer_output = []

    activation = nonlinearity.relu
    #activation = Tnn.sigmoid
    #### first layer
    layers.append(
        FullyConnected.FullyConnected(
            rng=rng,
            n_in=10 + hidden_dim,
            #n_in = 10,
            n_out=layer_size[0],
            activation=activation))
    layer_output.append(layers[-1].output_mix(input=[y_matrix, random_z]))
    #layer_output.append(layers[-1].output_mix2(input=[y_matrix,random_z]))
    #layer_output.append(layers[-1].output(input=x))
    #layer_output.append(layers[-1].output(input=random_z))

    #### middle layer
    for i in range(layer_num):
        layers.append(
            FullyConnected.FullyConnected(rng=rng,
                                          n_in=layer_size[i],
                                          n_out=layer_size[i + 1],
                                          activation=activation))
        layer_output.append(layers[-1].output(input=layer_output[-1]))

    #### last layer
    activation = Tnn.sigmoid
    #activation = nonlinearity.relu
    layers.append(
        FullyConnected.FullyConnected(rng=rng,
                                      n_in=layer_size[-1],
                                      n_out=28 * 28,
                                      activation=activation))
    x_gen = layers[-1].output(input=layer_output[-1])

    lambda1_ = 100
    lambda_ = theano.shared(np.asarray(lambda1_, dtype=np.float32))

    K_d = kernel_gram_for_y(y_matrix, y_matrix, batch_size, 10)
    K_s = K_d
    K_sd = K_d

    Invv_1 = T.sum(y_matrix, axis=0) / batch_size
    Invv = NL.alloc_diag(1 / Invv_1)
    Inv_K_d = Invv
    #Inv_K_d = NL.matrix_inverse(K_d +lambda_ * T.identity_like(K_d))
    Inv_K_s = Inv_K_d

    L_d = kernel_gram_for_x(x, x, batch_size, 28 * 28)
    L_s = kernel_gram_for_x(x_gen, x_gen, batch_size, 28 * 28)
    L_ds = kernel_gram_for_x(x, x_gen, batch_size, 28 * 28)
    '''
	cost = -(NL.trace(T.dot(T.dot(T.dot(K_d, Inv_K_d), L_d), Inv_K_d)) +\
			NL.trace(T.dot(T.dot(T.dot(K_s, Inv_K_s), L_s),Inv_K_s))- \
			2 * NL.trace(T.dot(T.dot(T.dot(K_sd, Inv_K_d) ,L_ds ), Inv_K_s)))
	'''
    '''
	cost = -(NL.trace(T.dot(L_d, T.ones_like(L_d) )) +\
			NL.trace(T.dot(L_s,T.ones_like(L_s)))- \
			2 * NL.trace(T.dot(L_ds,T.ones_like(L_ds) )))


	cost2 =  2 * T.sum(L_ds) - T.sum(L_s)  + NL.trace(T.dot(L_s, T.ones_like(L_s)))\
			- 2 * NL.trace( T.dot(L_ds , T.ones_like(L_ds)))
	cost2 = T.dot(T.dot(Inv_K_d, K_d),Inv_K_d)
	'''
    cost2 = K_d
    #cost2 = T.dot(T.dot(Inv_K_d,K_d),Inv_K_d)
    #cost =  - T.sum(L_d) +2 * T.sum(L_ds) - T.sum(L_s)
    cost2 = K_d
    cost2 = T.dot(T.dot(T.dot(y_matrix, Inv_K_d), Inv_K_d), y_matrix.T)

    cost = -(NL.trace(T.dot(T.dot(T.dot(T.dot(L_d, y_matrix),Inv_K_d), Inv_K_d),y_matrix.T)) +\
      NL.trace(T.dot(T.dot(T.dot(T.dot(L_s, y_matrix),Inv_K_s), Inv_K_s),y_matrix.T))- \
      2 * NL.trace(T.dot(T.dot(T.dot(T.dot(L_ds, y_matrix),Inv_K_d), Inv_K_s),y_matrix.T)))
    '''
	cost =  - T.sum(L_d) +2 * T.sum(L_ds) - T.sum(L_s)
	cost =  - NL.trace(K_s * Inv_K_s * L_s * Inv_K_s)+ \
			2 * NL.trace(K_sd * Inv_K_d * L_ds * Inv_K_s)
	'''

    ################################
    ##        updates             ##
    ################################
    params = []
    for aLayer in layers:
        params += aLayer.params
    gparams = [T.grad(cost, param) for param in params]

    learning_rate = 3e-4
    weight_decay = 1.0 / n_train_batches
    epsilon = 1e-8

    l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32))
    get_optimizer = optimizer.get_adam_optimizer_max(learning_rate=l_r,
                                                     decay1=0.1,
                                                     decay2=0.001,
                                                     weight_decay=weight_decay,
                                                     epsilon=epsilon)
    updates = get_optimizer(params, gparams)

    ################################
    ##         pretrain model     ##
    ################################
    parameters = theano.function(
        inputs=[],
        outputs=params,
    )

    gen_fig = theano.function(
        inputs=[y_matrix, random_z],
        outputs=x_gen,
        on_unused_input='warn',
    )

    if pre_train == 1:
        print "pre-training model....."
        pre_train = np.load('./result/MMD-100-5-64-256-256-512.npz')['model']
        for (para, pre) in zip(params, pre_train):
            para.set_value(pre)

        s = 8
        for jj in range(10):
            a = np.zeros((s, 10), dtype=np.float32)
            for ii in range(s):
                kk = random.randint(0, 9)
                a[ii, kk] = 1

            x_gen = gen_fig(a, gen_random_z(s, hidden_dim))

            ttt = train_set_x.get_value()
            for ll in range(s):
                minn = 1000000
                ss = 0
                for kk in range(ttt.shape[0]):
                    tt = np.linalg.norm(x_gen[ll] - ttt[kk])
                    if tt < minn:
                        minn = tt
                        ss = kk
                #np.concatenate(x_gen,ttt[ss])
                x_gen = np.vstack((x_gen, ttt[ss]))

            aImage = paramgraphics.mat_to_img(x_gen.T,
                                              dim_input,
                                              colorImg=colorImg)
            aImage.save('samples_' + str(jj) + '_similar', 'PNG')

    ################################
    ##         prepare data       ##
    ################################

    #### compute matrix inverse
    #print "Preparing data ...."
    #Invv = NL.matrix_inverse(K_d +lambda_ * T.identity_like(K_d))
    '''
	Invv_1 = T.sum(y_matrix,axis=0)/batch_size
	Invv = NL.alloc_diag(1/Invv_1)
	Inv_K_d = Invv

	prepare_data = theano.function(
			inputs = [index],
			outputs = [Invv,K_d],
			givens = {
				#x:train_set_x[index * batch_size:(index + 1) * batch_size],
				y_matrix:train_y_matrix[index * batch_size:(index + 1) * batch_size],
				}
			)

	Inv_K_d_l, K_d_l =  prepare_data(0)
	print Inv_K_d_l

	for minibatch_index in range(1, n_train_batches):
		if minibatch_index % 10 == 0:
			print 'minibatch_index:', minibatch_index
		Inv_pre_mini, K_d_pre_mini = prepare_data(minibatch_index)
		Inv_K_d_l = np.vstack((Inv_K_d_l,Inv_pre_mini))
		K_d_l = np.vstack((K_d_l,K_d_pre_mini))

	Inv_K_d_g = theano.shared(Inv_K_d_l,borrow=True)
	K_d_g = theano.shared(K_d_l, borrow=True)
	'''

    ################################
    ##         train model        ##
    ################################

    train_model = theano.function(
        inputs=[index, random_z],
        outputs=[cost, x_gen, cost2],
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            y_matrix:
            train_y_matrix[index * batch_size:(index + 1) * batch_size],
            #K_d:K_d_g[index * batch_size:(index + 1) * batch_size],
            #Inv_K_d:Inv_K_d_g[index * batch_size:(index + 1) * batch_size],
        },
        on_unused_input='warn')

    n_epochs = 500
    cur_epoch = 0

    print "Training model ......"

    while (cur_epoch < n_epochs):
        cur_epoch = cur_epoch + 1
        cor = 0
        for minibatch_index in xrange(n_train_batches):
            print minibatch_index,
            print " : ",
            cost, x_gen, cost2 = train_model(
                minibatch_index, gen_random_z(batch_size, hidden_dim))
            print 'cost: ', cost
            print 'cost2: ', cost2
            if minibatch_index % 30 == 0:
                aImage = paramgraphics.mat_to_img(x_gen[0:1].T,
                                                  dim_input,
                                                  colorImg=colorImg)
                aImage.save(
                    'samples_epoch_' + str(cur_epoch) + '_mini_' +
                    str(minibatch_index), 'PNG')

        if cur_epoch % 1 == 0:
            model = parameters()
            for i in range(len(model)):
                model[i] = np.asarray(model[i]).astype(np.float32)
            np.savez('model-' + str(cur_epoch), model=model)