Exemplo n.º 1
0
    def get_cost_updates(self, x, W, W_prime, b, b_prime, corruption_level, learning_rate, l2reg=0., l1reg=0.):
        """ This function computes the cost and the updates for one trainng
        step of the dA """
        self.x = x
        self.W = W
        self.W_prime = W_prime
        self.b = b
        self.b_prime = b_prime
        self.params = [self.W, self.W_prime, self.b, self.b_prime]
        if corruption_level == None:
            tilde_x = self.x
        else:
            tilde_x = self.get_corrupted_input(self.x, corruption_level)
        y       = self.get_hidden_values( tilde_x)
        z       = self.get_reconstructed_input(y)
        # note : we sum over the size of a datapoint; if we are using minibatches,
        #        L will  be a vector, with one entry per example in minibatch
        
        XE = self.x * T.log(z) + (1 - self.x) *  T.log(1-z)
        cost = -T.mean(T.sum(XE, axis=1),axis=0)
        
        if l2reg != 0.:
            cost += l2reg * (T.mean(T.sum(self.W*self.W,1),0) + T.mean(T.sum(self.W_prime*self.W_prime,1),0))
        if l1reg != 0.:
            cost += l1reg * (T.mean(T.sum(T.abs_(y),1),0) + T.mean(T.sum(T.abs_(y),1),0))
        # compute the gradients of the cost of the `dA` with respect
        # to its parameters 
        gparams = T.grad(cost, self.params)
#        # generate the list of updates
#        updates = {}
#        for param, gparam in zip(self.params, gparams):
#            updates[param] = param -  learning_rate*gparam
        updates = [-learning_rate*gparam for gparam in gparams]

        return (cost, updates)
Exemplo n.º 2
0
    def sample(self, Y):
        """ Given samples from the upper layer Y, sample values from X
            and return then together with their log probability.

        Parameters
        ----------
        Y:      T.tensor
            samples from the upper layer

        Returns
        -------
        X:      T.tensor
            samples from the lower layer
        log_p:  T.tensor
            log-posterior for the samples returned in X
        """
        n_X, = self.get_hyper_params(['n_X'])
        W, b = self.get_model_params(['W', 'b'])

        n_samples = Y.shape[0]

        # sample X given Y
        prob_X = self.sigmoid(T.dot(Y, W) + b)
        U = theano_rng.uniform((n_samples, n_X), nstreams=512)
        X = T.cast(U <= prob_X, dtype=floatX)

        log_prob = X*T.log(prob_X) + (1-X)*T.log(1-prob_X)
        log_prob = log_prob.sum(axis=1)

        return X, log_prob
Exemplo n.º 3
0
    def get_cost_updates(self, corruption_level, learning_rate):
        """ This function computes the cost and the updates for one trainng
        step of the dA """

        tilde_x = self.get_corrupted_input(self.x, corruption_level)
        y = self.get_hidden_values(tilde_x)
        z = self.get_reconstructed_input(y)
        # note : we sum over the size of a datapoint; if we are using
        #        minibatches, L will be a vector, with one entry per
        #        example in minibatch
        L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
        # note : L is now a vector, where each element is the
        #        cross-entropy cost of the reconstruction of the
        #        corresponding example of the minibatch. We need to
        #        compute the average of all these to get the cost of
        #        the minibatch
        cost = T.mean(L)

        # compute the gradients of the cost of the `dA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params)
        # generate the list of updates
        updates = [
            (param, param - learning_rate * gparam)
            for param, gparam in zip(self.params, gparams)
        ]

        return (cost, updates)
 def cross_entropy(self, y):
     
     #return (-(y * T.log(self.y) + (1.0 - y) * T.log(1.0 - self.y))).mean()
     #return T.nnet.binary_crossentropy(self.y, y).mean()
      y_used = self.y
      y_used = T.clip(self.y, 0.0000001, 0.999999999)
      return T.mean(-y * T.log(y_used) - (1 - y) * T.log(1 - y_used))
Exemplo n.º 5
0
    def get_reconstruction_cost(self, updates, pre_sigmoid_nv):
        """ Approximation to the recontruction error

        Note that this function requires the pre-sigmoid actiavtion as input. To
        undertstand why this is so you need to understand a bit about how Theano works.
        Whenever you compile a Theano function, the computational graph that you pass as input
        gets optimized for speed and stability. This is done by changing several parts of
        the subgraphs with others. One such optimization expresses terms of softplus. We need this
        optimizations for the cross-entropy since sigmoid of numbers larger than 30. (or even less
        then that) return to 1. and numbers of smaller than -30, turn to 0 which ini terms will force
        theano to compute log(0) and thereforce we will get either -inf or NaN as cost. If the value is
        expressed in terms of softplus we do not get this undersirable behaviour. This optimiation usually
        works fine, but here we have a special case. The sigmoid is applied inside the scan op, while
        the log is outisde. Therefore Theano will only see log(scan(...)) instead of log(sigmoid(..))
        and will not apply the wanted optimization. We can not go and replace the sigmoid in scan
        with something else alse, because this only needs to be done on the last step. Therefore the
        easiest adn more efficient way is to get also teh pre-sigmoid activation as an output of
        scan, and apply both the log and sigmoid outside scan sunch that Theano can catch and optimize
        the expression.
        """

        cross_entropy = T.mean(
            T.sum(
                self.input * T.log(T.nnet.sigmoid(pre_sigmoid_nv)) +
                ( 1 - self.input) * T.log(1 - T.nnet.sigmoid(pre_sigmoid_nv)),
                axis=1
            )
        )

        return  cross_entropy
Exemplo n.º 6
0
 def _build_marginal_likelihood_logp(self, y, X, Xu, sigma):
     sigma2 = tt.square(sigma)
     Kuu = self.cov_func(Xu)
     Kuf = self.cov_func(Xu, X)
     Luu = cholesky(stabilize(Kuu))
     A = solve_lower(Luu, Kuf)
     Qffd = tt.sum(A * A, 0)
     if self.approx == "FITC":
         Kffd = self.cov_func(X, diag=True)
         Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2
         trace = 0.0
     elif self.approx == "VFE":
         Lamd = tt.ones_like(Qffd) * sigma2
         trace = ((1.0 / (2.0 * sigma2)) *
                  (tt.sum(self.cov_func(X, diag=True)) -
                   tt.sum(tt.sum(A * A, 0))))
     else:  # DTC
         Lamd = tt.ones_like(Qffd) * sigma2
         trace = 0.0
     A_l = A / Lamd
     L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A)))
     r = y - self.mean_func(X)
     r_l = r / Lamd
     c = solve_lower(L_B, tt.dot(A, r_l))
     constant = 0.5 * X.shape[0] * tt.log(2.0 * np.pi)
     logdet = 0.5 * tt.sum(tt.log(Lamd)) + tt.sum(tt.log(tt.diag(L_B)))
     quadratic = 0.5 * (tt.dot(r, r_l) - tt.dot(c, c))
     return -1.0 * (constant + logdet + quadratic + trace)
Exemplo n.º 7
0
Arquivo: AE.py Projeto: felidadae/dnn
    def compileFunctions(self, x_image_global, examples, ib, B, K, corrupt):
        if x_image_global == None:
            x_image_global = self.x

        if corrupt == 0.0:
            self.x_c = self.x
        else:
            self.x_c = self.theano_rng.binomial(
                size=self.x.shape, n=1, p=1-corrupt,
                dtype=theano.config.floatX) * self.x

        self.h = self.g(T.dot(self.x_c, self.W_hl) + self.b_hl)
        self.x_r = self.o(T.dot(self.h, self.W_ol) + self.b_ol)
        self.params = [self.W_hl, self.b_hl, self.b_ol]
        self.cost = \
            (- T.sum(
                self.x * T.log(self.x_r) + (1 - self.x) * T.log(1 - self.x_r),
                axis=(0,1)))

        gparams = T.grad(self.cost, self.params)
        updates = [
            (param, param - K * gparam)
            for param, gparam in zip(self.params, gparams)
        ]

        fun_train = theano.function(
            inputs=[ib],
            outputs=(self.cost, self.x_r, self.x_c),
            updates=updates,
            givens={
                x_image_global: examples[ib*B: (ib+1)*B]
            }
        )

        return fun_train
Exemplo n.º 8
0
def binomial_lpdf(node, x, kw):
    random_state, size, n, p = node.inputs

    # for the n > 1 the "choose" operation is required
    # TODO assert n == 1

    return tensor.switch(tensor.eq(x, 1.0), tensor.log(p), tensor.log(1.0 - p))
Exemplo n.º 9
0
def _elbo_t(logp, uw, inarray, n_mcsamples, random_seed):
    """Create Theano tensor of approximate ELBO by Monte Carlo sampling.
    """
    l = (uw.size / 2)
    l_int = l.astype('int64')
    u = uw[:l_int]
    w = uw[l_int:]

    # Callable tensor
    def logp_(input):
        return theano.clone(logp, {inarray: input}, strict=False)

    # Naive Monte-Carlo
    if random_seed is None:
        r = MRG_RandomStreams(gen_random_state())
    else:
        r = MRG_RandomStreams(seed=random_seed)

    if n_mcsamples == 1:
        n = r.normal(size=inarray.tag.test_value.shape)
        q = n * tt.exp(w) + u
        elbo = logp_(q) + tt.sum(w) + 0.5 * l * (1 + tt.log(2.0 * np.pi))
    else:
        n = r.normal(size=(n_mcsamples, u.tag.test_value.shape[0]))
        qs = n * tt.exp(w) + u
        logps, _ = theano.scan(fn=lambda q: logp_(q),
                               outputs_info=None,
                               sequences=[qs])
        elbo = tt.mean(logps) + tt.sum(w) + 0.5 * l * (1 + tt.log(2.0 * np.pi))

    return elbo
Exemplo n.º 10
0
 def logp(self, value):
     mu = self.mu
     tau = self.tau
     return bound(-0.5 * tau * (T.log(value) - mu)**2
                  + 0.5 * T.log(tau/(2. * np.pi))
                  - T.log(value),
                  tau > 0)
Exemplo n.º 11
0
 def logp(self, value):
     alpha = self.alpha
     beta = self.beta
     return bound(T.log(alpha) - T.log(beta)
                  + (alpha - 1) * T.log(value/beta)
                  - (value/beta)**alpha,
                  value >= 0, alpha > 0, beta > 0)
Exemplo n.º 12
0
    def grad_init(self):
        mask_ = self.mask.flatten()
        rewards_ = self.rewards.flatten()
        actions_ = self.actions.reshape([self.actions.shape[0]*self.actions.shape[1],-1])

        #self.mov_std = theano.shared(numpy.float32(1.), 'std')

        pp = self.params.values()
        mean_rewards = (mask_ * rewards_).sum(-1, keepdims=True) / tensor.maximum(1., mask_.sum(-1, keepdims=True))
        centered_rewards = rewards_ - self.vapprox.v[:,0] - mean_rewards
        mean2_rewards = (mask_ * (rewards_ ** 2)).sum(-1, keepdims=True) / tensor.maximum(1., mask_.sum(-1, keepdims=True))
        var_rewards = mean2_rewards - (mean_rewards ** 2)
        scaled_rewards = centered_rewards  / tensor.maximum(1., tensor.sqrt(tensor.maximum(0., var_rewards)))
        #scaled_rewards = centered_rewards

        logprob = 0.
        reg = 0.
        for oi in xrange(self.n_out):
            labs = actions_[:,oi].flatten()
            labs_idx = tensor.arange(labs.shape[0]) * self.out_dim + labs
            logprob = logprob + (mask_ * tensor.log(self.pi[oi].flatten()+1e-6)[labs_idx])
            reg = reg - (self.pi[oi] * tensor.log(self.pi[oi]+1e-6)).sum(-1).sum(0)

        self.cost = -tensor.mean(scaled_rewards * logprob + self.reg_c * reg)
        self.grads = tensor.grad(self.cost, wrt=pp)
Exemplo n.º 13
0
    def get_cost(self, p=0, sigma=1):
        # the last layer    
        z = self.sigmoid_layers[-1].output
        L = -T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
        p_idx = len(self.sigmoid_layers)/2 - 1 #penalty layer, the middle layer        
        if p == 0:
            cost = T.mean(L)
#             cost = T.mean(T.sqrt(T.mean(self.errors, axis=1))) #Log Spectral Distance(LSD)
        elif (p != 0) and (sigma == 0):# for square penalty
            square_cost = self.get_square_cost(self.sigmoid_layers[p_idx].output, p)
            cost = T.mean(L) + T.mean(square_cost)
        elif(p != 0) and (sigma != 0):# for Gaussian penalty
            gaussian_cost = self.get_gaussian_cost(self.sigmoid_layers[p_idx].output, p, sigma)
            cost = T.mean(L) + T.mean(gaussian_cost)
#         elif(p == -1) and (sigma == 0):#binary
#             code_val = self.sigmoid_layers[p_idx].output
#             binary_val = code_val>=0.5
#             self.sigmoid_layers[p_idx+1].input = binary_val
#             z = self.sigmoid_layers[-1].output
#             L = -T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
#             cost = T.mean(L)
#         elif(p == -1) and (sigma != 0):#add gaussian noise
#             gaussian_data = self.theano_rng.normal(size=self.sigmoid_layers[p_idx-1].output.shape, std=sigma,
#                                                    dtype=theano.config.floatX)
#             self.sigmoid_layers[p_idx].input = self.sigmoid_layers[p_idx-1].output + gaussian_data
#             z = self.sigmoid_layers[-1].output
#             L = -T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
#             cost = T.mean(L)
        else:
            cost = T.mean(L)
        return cost
Exemplo n.º 14
0
def unet_crossentropy_loss_sampled(y_true, y_pred):
    print 'unet_crossentropy_loss_sampled'
    epsilon = 1.0e-4
    y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0-epsilon))
    y_true = T.flatten(y_true)
    # this seems to work
    # it is super ugly though and I am sure there is a better way to do it
    # but I am struggling with theano to cooperate
    # filter the right indices
    indPos = T.nonzero(y_true)[0] # no idea why this is a tuple
    indNeg = T.nonzero(1-y_true)[0]
    # shuffle
    n = indPos.shape[0]
    indPos = indPos[srng.permutation(n=n)]
    n = indNeg.shape[0]
    indNeg = indNeg[srng.permutation(n=n)]
    # take equal number of samples depending on which class has less
    n_samples = T.cast(T.min([T.sum(y_true), T.sum(1-y_true)]), dtype='int64')

    indPos = indPos[:n_samples]
    indNeg = indNeg[:n_samples]
    loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(1-y_pred_clipped[indNeg]))
    average_loss = T.mean(loss_vector)
    print 'average_loss:', average_loss
    return average_loss
Exemplo n.º 15
0
def get_model(Ws, bs, dropout=False):
    v = T.matrix('input')
    m = T.matrix('missing')
    q = T.matrix('target')
    k = T.vector('normalization factor')

    # Set all missing/target values to 0.5
    keep_mask = (1-m) * (1-q)
    h = keep_mask * (v * 2 - 1) # Convert to +1, -1
    
    # Normalize layer 0
    h *= k.dimshuffle(0, 'x')

    for l in xrange(len(Ws)):
        h = T.dot(h, Ws[l]) + bs[l]

        if l < len(Ws) - 1:
            h = h * (h > 0) # relu
            if dropout:
                mask = srng.binomial(n=1, p=0.5, size=h.shape)
                h = h * mask * 2

    output = sigmoid(h)
    LL = v * T.log(output) + (1 - v) * T.log(1 - output)
    # loss = -(q * LL).sum() / q.sum()
    loss = -((1 - m) * LL).sum() / (1 - m).sum()

    return v, m, q, k, output, loss
Exemplo n.º 16
0
 def expr(self, model, data):
     
     v = data
     mid = model.get_enc(v)
     rou_mid = mid.mean(axis=0)
     cs294_sparse = (self.rou * T.log(self.rou / rou_mid) + (1 - self.rou) * T.log((1 - self.rou) / (1 - rou_mid))).sum()
     return cs294_sparse
Exemplo n.º 17
0
def GMM(y, mu, sig, coeff):
    """
    Gaussian mixture model negative log-likelihood
    Parameters
    ----------
    y     : TensorVariable
    mu    : FullyConnected (Linear)
    sig   : FullyConnected (Softplus)
    coeff : FullyConnected (Softmax)
    """

    n_dim = y.ndim
    shape_y = y.shape
    y = y.reshape((-1, shape_y[-1]))
    y = y.dimshuffle(0, 1, "x")

    mu = mu.reshape((-1, mu.shape[-1] / coeff.shape[-1], coeff.shape[-1]))
    sig = sig.reshape((-1, sig.shape[-1] / coeff.shape[-1], coeff.shape[-1]))
    coeff = coeff.reshape((-1, coeff.shape[-1]))

    inner = -0.5 * T.sum(T.sqr(y - mu) / sig ** 2 + 2 * T.log(sig) + T.log(2 * np.pi), axis=-2)

    nll = -logsumexp(T.log(coeff) + inner, axis=-1)

    # Adjust dimension
    new_dim = T.set_subtensor(shape_y[-1], 1)

    nll = nll.reshape(new_dim, ndim=n_dim)
    nll = nll.flatten(n_dim - 1)

    return nll
Exemplo n.º 18
0
    def lp_norm(self, n, k, r, c, z):
        '''
        Lp = ( 1/n * sum(|x_i|^p, 1..n))^(1/p) where p = 1 + ln(1+e^P)
        :param n:
        :param k:
        :param r:
        :param c:
        :param z:
        :return:
        '''
        ds0, ds1 = self.pool_size
        st0, st1 = self.stride
        pad_h = self.pad[0]
        pad_w = self.pad[1]

        row_st = r * st0
        row_end = T.minimum(row_st + ds0, self.img_rows)
        row_st = T.maximum(row_st, self.pad[0])
        row_end = T.minimum(row_end, self.x_m2d + pad_h)

        col_st = c * st1
        col_end = T.minimum(col_st + ds1, self.img_cols)
        col_st = T.maximum(col_st, self.pad[1])
        col_end = T.minimum(col_end, self.x_m1d + pad_w)

        Lp = T.pow(
                T.mean(T.pow(
                        T.abs_(T.flatten(self.y[n, k, row_st:row_end, col_st:col_end], 1)),
                        1 + T.log(1 + T.exp(self.P))
                )),
                1 / (1 + T.log(1 + T.exp(self.P)))
        )

        return T.set_subtensor(z[n, k, r, c], Lp)
Exemplo n.º 19
0
def sequence_log_likelihood(y, y_hat, y_mask, y_hat_mask, blank_symbol, log_scale=True):
    """
    Based on code from Shawn Tan.
    Credits to Kyle Kastner as well.

    This function computes the CTC log likelihood for a sequence that has
    been augmented with blank labels.


    """
    y_hat_mask_len = tensor.sum(y_hat_mask, axis=0, dtype="int32")
    y_mask_len = tensor.sum(y_mask, axis=0, dtype="int32")

    if log_scale:
        log_probabs = _log_path_probabs(y, T.log(y_hat), y_mask, y_hat_mask, blank_symbol)
        batch_size = log_probabs.shape[1]

        # Add the probabilities of the final time steps to get the total
        # sequence likelihood.
        log_labels_probab = _log_add(
            log_probabs[y_hat_mask_len - 1, tensor.arange(batch_size), y_mask_len - 1],
            log_probabs[y_hat_mask_len - 1, tensor.arange(batch_size), y_mask_len - 2],
        )
    else:
        probabilities = _path_probabs(y, y_hat, y_mask, y_hat_mask, blank_symbol)
        batch_size = probabilities.shape[1]
        labels_probab = (
            probabilities[y_hat_mask_len - 1, tensor.arange(batch_size), y_mask_len - 1]
            + probabilities[y_hat_mask_len - 1, tensor.arange(batch_size), y_mask_len - 2]
        )
        log_labels_probab = tensor.log(labels_probab)
    return log_labels_probab
Exemplo n.º 20
0
 def __init__(self, n_in, n_out, n_h, learning_rate=0.12):
    self.x = T.matrix(dtype=theano.config.floatX)  # @UndefinedVariable
    self.target = T.matrix(dtype=theano.config.floatX)  # @UndefinedVariable
    bound_x = numpy.sqrt(6. / (n_in + n_h))
    bound_h = numpy.sqrt(6. / (n_h + n_h))
    self.params = []
    self.w_x = theano.shared(np.array(np.random.uniform(low=-bound_x, high=bound_x, size=(n_in, n_h)), dtype=theano.config.floatX))  # @UndefinedVariable
    self.params.append(self.w_x)
    self.w_h = theano.shared(np.array(np.random.uniform(low=-bound_h, high=bound_h, size=(n_h, n_h)), dtype=theano.config.floatX))  # @UndefinedVariable
    self.params.append(self.w_h)
    self.b_h = theano.shared(np.array(np.random.uniform(low=-bound_h, high=bound_h, size=(n_h,)), dtype=theano.config.floatX))  # @UndefinedVariable
    self.params.append(self.b_h)
    self.w = theano.shared(np.array(np.random.uniform(low=-bound_h, high=bound_h, size=(n_h, n_out)), dtype=theano.config.floatX))  # @UndefinedVariable
    self.params.append(self.w)
    self.b = theano.shared(np.array(np.random.uniform(low=-bound_h, high=bound_h, size=(n_out,)), dtype=theano.config.floatX))  # @UndefinedVariable
    self.params.append(self.b)
    self.h0 = theano.shared(np.array(np.random.uniform(low=-bound_x, high=bound_x, size=(n_h,)), dtype=theano.config.floatX))  # @UndefinedVariable
    self.params.append(self.h0)
    
    def one_step(x, h1):
        h = T.nnet.sigmoid(T.dot(x, self.w_x) + T.dot(h1, self.w_h) + self.b_h)
        y = T.nnet.sigmoid(T.dot(h, self.w) + self.b)
        return h, y
    
    [hs, ys], _ = theano.scan(fn=one_step, sequences=self.x, outputs_info=[self.h0, None])
    cost = -T.mean(self.target * T.log(ys) + (1 - self.target) * T.log(1 - ys))
    grads = T.grad(cost, self.params)
    
    updates = [(param, param - learning_rate * grad) for param, grad in zip(self.params, grads)]
    
    self.train = theano.function([self.x, self.target], cost, updates=updates)
    
    self.predict = theano.function([self.x], ys)
Exemplo n.º 21
0
    def get_cost_updates(self, contraction_level, learning_rate):
        """ This function computes the cost and the updates for one trainng
        step of the cA """

        y = self.get_hidden_values(self.x)
        z = self.get_reconstructed_input(y)
        J = self.get_jacobian(y, self.W)
        # note : we sum over the size of a datapoint; if we are using
        #        minibatches, L will be a vector, with one entry per
        #        example in minibatch
        self.L_rec = - T.sum(self.x * T.log(z) +
                             (1 - self.x) * T.log(1 - z),
                             axis=1)

        # Compute the jacobian and average over the number of samples/minibatch
        self.L_jacob = T.sum(J ** 2) // self.n_batchsize

        # note : L is now a vector, where each element is the
        #        cross-entropy cost of the reconstruction of the
        #        corresponding example of the minibatch. We need to
        #        compute the average of all these to get the cost of
        #        the minibatch
        cost = T.mean(self.L_rec) + contraction_level * T.mean(self.L_jacob)

        # compute the gradients of the cost of the `cA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params)
        # generate the list of updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - learning_rate * gparam))

        return (cost, updates)
Exemplo n.º 22
0
    def test_log1msigm_to_softplus(self):
        x = T.matrix()

        out = T.log(1 - sigmoid(x))
        f = theano.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 2
        assert isinstance(topo[0].op.scalar_op,
                          theano.tensor.nnet.sigm.ScalarSoftplus)
        assert isinstance(topo[1].op.scalar_op, theano.scalar.Neg)
        f(numpy.random.rand(54, 11).astype(config.floatX))

        # Same test with a flatten
        out = T.log(1 - T.flatten(sigmoid(x)))
        f = theano.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 3
        assert isinstance(topo[0].op, T.Flatten)
        assert isinstance(topo[1].op.scalar_op,
                          theano.tensor.nnet.sigm.ScalarSoftplus)
        assert isinstance(topo[2].op.scalar_op, theano.scalar.Neg)
        f(numpy.random.rand(54, 11).astype(config.floatX))

        # Same test with a reshape
        out = T.log(1 - sigmoid(x).reshape([x.size]))
        f = theano.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        #assert len(topo) == 3
        assert any(isinstance(node.op, T.Reshape) for node in topo)
        assert any(isinstance(getattr(node.op, 'scalar_op', None),
                              theano.tensor.nnet.sigm.ScalarSoftplus)
                   for node in topo)
        f(numpy.random.rand(54, 11).astype(config.floatX))
Exemplo n.º 23
0
    def negative_log_likelihood(self, y):
        """ Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.

        .. math::
            \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
            \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|} \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
                    \ell (\theta=\{W,b\}, \mathcal{D})

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label
        Note: we use the mean instead of the sum so that
              the learning rate is less dependent on the batch size
        """

        # y.shape[0] is (symbolically) the number of rows in y, i.e.,
        # number of examples (call it n) in the minibatch
        # T.arange(y.shape[0]) is a symbolic vector which will contain
        # [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
        # Log-Probabilities (call it LP) with one row per example and
        # one column per class LP[T.arange(y.shape[0]),y] is a vector
        # v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
        # LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
        # the mean (across minibatch examples) of the elements in v,
        # i.e., the mean log-likelihood across the minibatch.
        if self.is_binary:
            -T.mean(T.log(self.p_y_given_x))
        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
Exemplo n.º 24
0
def simple_RNN(nh):
  Wx = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (1, nh)).astype(theano.config.floatX))
  Wh = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (nh, nh)).astype(theano.config.floatX))
  Wy = theano.shared(0.2 * numpy.random.uniform(-1.0, 1.0, (nh, 1)).astype(theano.config.floatX))
  bh = theano.shared(numpy.zeros(nh, dtype=theano.config.floatX))
  by = theano.shared(numpy.zeros(1, dtype=theano.config.floatX))
  h0 = theano.shared(numpy.zeros(nh, dtype=theano.config.floatX))
  p = [Wx, Wh, Wy, bh, by, h0]

  x = T.matrix()

  def recurrence(x_t, h_tm1):
    h_t = T.tanh(T.dot(x_t, Wx) + T.dot(h_tm1, Wh) + bh)
    s_t = T.dot(h_t, Wy) + by
    return [h_t, s_t]

  ([h, activations], updates) = theano.scan(fn=recurrence, sequences=x, outputs_info=[h0, dict()])

  t = x[0, 0]
  s = activations[-1, 0]
  y = T.nnet.sigmoid(s)
  loss = -t*T.log(y + 1e-14) - (1-t)*T.log((1-y) + 1e-14)
  acc = T.neq(T.round(y), t)
  
  return p, [x], s, [loss, acc], h
Exemplo n.º 25
0
    def get_cost_updates(self, contraction_level, learning_rate, cost_measure="cross_entropy"):
        """ This function computes the cost and the updates for one trainng
        step of the cA """

        y = self.get_hidden_values(self.x)
        z = self.get_reconstructed_input(y)
        J = self.get_jacobian(y, self.W)

        if cost_measure=="cross_entropy":
            #self.L_rec = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
            self.L_rec = T.mean(- T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z),axis=1))
        elif cost_measure=="euclidean":
            self.L_rec = T.mean(T.sum((self.x-z)**2,axis=1)) 
            
        # Compute the jacobian and average over the number of samples/minibatch
        self.L_jacob = T.mean(T.sum(J ** 2) / self.n_batchsize)
        
        cost = self.L_rec + contraction_level * self.L_jacob

        # compute the gradients of the cost of the `cA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params)
        # generate the list of updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - learning_rate * gparam))

        return (cost, updates)
 def forward_jacobian_log_det(self, x):
     dy_dx, _ = th.scan(lambda x_i: th.grad(self.forward_func(x_i), x_i),
                        sequences=[x.flatten()])
     if self.fudge != 0.:
         return tt.log(dy_dx + self.fudge).sum()
     else:
         return tt.log(dy_dx).sum()
 def forward_jacobian_log_det(self, x):
     y_sum = self.forward_map(x).sum()
     dy_dx = th.grad(y_sum, x)
     if self.fudge != 0.:
         return tt.log(dy_dx + self.fudge).sum()
     else:
         return tt.log(dy_dx).sum()
Exemplo n.º 28
0
    def get_sparsity_cost(self):

        # update mean activation using exponential moving average
        hack_h = self.h_given_v(self.sp_pos_v)

        # define loss based on value of sp_type
        if self.sp_type == 'kl':
            eps = npy_floatX(1./self.batch_size)
            loss = lambda targ, val: - npy_floatX(targ) * T.log(eps + val) \
                                     - npy_floatX(1-targ) * T.log(1 - val + eps)
        else:
            raise NotImplementedError('Sparsity type %s is not implemented' % self.sp_type)

        cost = T.zeros((), dtype=floatX)

        params = []
        if self.sp_weight['h']: 
            cost += self.sp_weight['h']  * T.sum(loss(self.sp_targ['h'], hack_h.mean(axis=0)))
            params += [self.hbias]

        if self.sp_type in ['kl'] and self.sp_weight['h']:
            params += [self.Wv, self.alpha, self.mu]
            if self.flags['split_norm']:
                params += [self.scalar_norms]

        return costmod.Cost(cost, params)
 def forward_jacobian_log_det(self, x):
     if x.ndim == 1:
         return tt.log(tt.abs_(self.diag_weights)).sum()
     elif x.ndim == 2:
         return x.shape[0] * tt.log(tt.abs_(self.diag_weights)).sum()
     else:
         raise ValueError('x must be one or two dimensional.')
Exemplo n.º 30
0
    def get_reconstruction_cost(self, updates, pre_sigmoid_nv):
        cross_entropy = T.mean(
                T.sum(self.input * T.log(T.nnet.sigmoid(pre_sigmoid_nv)) +
                (1 - self.input) * T.log(1 - T.nnet.sigmoid(pre_sigmoid_nv)),
                      axis=1))

        return cross_entropy
Exemplo n.º 31
0
 def hinge_loss(self, y):
     return -T.mean(T.log(self.p_y_given_x)[:,y]) # TODO
Exemplo n.º 32
0
 def forward(self, x):
     return tt.switch(x < 1, tt.log(x), x - 1.0)
Exemplo n.º 33
0
def log(x):
    return T.log(x)
Exemplo n.º 34
0
 def logp(self, value):
     w = self.w
     
     return bound(logsumexp(tt.log(w) + self._comp_logp(value), axis=-1).sum(),
                  w >= 0, w <= 1, tt.allclose(w.sum(axis=-1), 1))
Exemplo n.º 35
0
    def __init__(self, dim, n_entities, batch_size=None, validation_samples=2):

        self.__dict__.update(locals())
        del self.self

        theano_rng = RandomStreams(numpy.random.randint(2**30))

        #Start by defining the graph

        ##Parameter setup
        self.emb = theano.shared((numpy.random.uniform(
            -1.0, 1.0,
            (self.n_entities, self.dim))).astype(theano.config.floatX))
        self.emb.tag.test_value = (numpy.random.uniform(
            -1.0, 1.0,
            (self.n_entities, self.dim))).astype(theano.config.floatX)

        self.a = theano.shared(numpy.asarray(1.0).astype(theano.config.floatX))
        self.b = theano.shared(numpy.asarray(0.0).astype(theano.config.floatX))

        self.params = [self.emb, self.a, self.b]

        ### Input setup!
        self.x1_idxs = T.ivector()
        self.x2_idxs = T.ivector()
        self.x1_idxs.tag.test_value = numpy.asarray([0, 1], dtype=numpy.int32)
        self.x2_idxs.tag.test_value = numpy.asarray([1, 2], dtype=numpy.int32)

        #generate negative samples
        choice = theano_rng.binomial(size=self.x1_idxs.shape)
        alternative = theano_rng.random_integers(size=self.x1_idxs.shape,
                                                 low=0,
                                                 high=n_entities - 1)
        self.x1_idxs_negative = T.switch(choice, self.x1_idxs, alternative)
        self.x2_idxs_negative = T.switch(choice, alternative, self.x2_idxs)

        ### Define graph from input to predictive loss
        def get_embed(index_tensor):
            return sigmoid(self.emb[index_tensor].reshape(
                (index_tensor.shape[0], self.dim)))

        x1_emb = get_embed(self.x1_idxs)
        x2_emb = get_embed(self.x2_idxs)
        x1neg_emb = get_embed(self.x1_idxs_negative)
        x2neg_emb = get_embed(self.x2_idxs_negative)

        def get_prob1(embed_tensor1, embed_tensor2):
            return sigmoid(
                self.a * T.mean(embed_tensor1 * embed_tensor2 +
                                (1 - embed_tensor1) * (1 - embed_tensor2),
                                axis=1) +
                self.b)  #probability of a link, 0 to 1.'

        self.loss = T.mean(-T.log(get_prob1(x1_emb, x2_emb)) -
                           T.log(1 - get_prob1(x1neg_emb, x2neg_emb)))

        ###Define graph from input to sampled/validated loss
        randomizationA = theano_rng.uniform(size=(self.validation_samples,
                                                  self.dim))
        randomizationB = theano_rng.uniform(size=(self.validation_samples,
                                                  self.dim))
Exemplo n.º 36
0
def softplus_f(v):
    return T.log(1 + T.exp(v))
Exemplo n.º 37
0
 def hinge_loss_sum(self, y):
     return -T.sum(T.log(self.p_y_given_x)[:,y])
Exemplo n.º 38
0
def tlogit(x):
    return T.log(x / (np.float32(1) - x))
b2_init = np.zeros(output_size)

thX = T.matrix("X")
thT = T.matrix("T")
W1 = theano.shared(W1_init, "W1")
W2 = theano.shared(W2_init, "W2")
b1 = theano.shared(b1_init, "b1")
b2 = theano.shared(b2_init, "b2")

thZ = T.nnet.relu(thX.dot(W1) + b1)

thY = T.nnet.softmax(thZ.dot(W2) + b2)

prediction = T.argmax(thY, axis=1)

cost = -(thT * T.log(thY)).sum() + reg * ((W1 * W1).sum() + (b1 * b1).sum() +
                                          (W2 * W2).sum() + (b2 * b2).sum())

update_W1 = W1 - lr * T.grad(cost, W1)
update_b1 = b1 - lr * T.grad(cost, b1)
update_W2 = W2 - lr * T.grad(cost, W2)
update_b2 = b2 - lr * T.grad(cost, b2)

train = theano.function([thX, thT],
                        updates=[(W1, update_W1), (W2, update_W2),
                                 (b1, update_b1), (b2, update_b2)])

get_prediction = theano.function(inputs=[thX, thT], outputs=[cost, prediction])

costs = []
Exemplo n.º 40
0
 def negative_log_likelihood_sum(self, y):
     return -T.sum(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
Exemplo n.º 41
0
 def kld(self, mu, var):
     return 0.5 * T.sum(1 + T.log(var) - mu**2 - var, axis=1)
Exemplo n.º 42
0
def LME(x, axis=None, dtype=None, keepdims=False, acc_dtype=None):
    return T.log(T.mean(T.exp(x), axis, dtype, keepdims, acc_dtype))
Exemplo n.º 43
0
 def f_softplus(x):
     return T.log(T.exp(x) + 1)  # - np.log(2)
Exemplo n.º 44
0
 def cost_nll(self, pred, label):
     cost = -T.log(pred) * label
     cost = T.mean(T.sum(cost, axis=1))
     return cost
    def __init__(self, We_initial, params):

        if params.maxval:
            self.nout = params.maxval - params.minval + 1

        if params.traintype == "reg" or params.traintype == "rep":
            p = cPickle.load(file(params.regfile, 'rb'))
            print p  #containes We

        if params.traintype == "reg":
            print "regularizing to parameters"

        if params.traintype == "rep":
            print "not updating embeddings"

        #params
        initial_We = theano.shared(np.asarray(We_initial, dtype=config.floatX))
        We = theano.shared(np.asarray(We_initial, dtype=config.floatX))

        if params.traintype == "reg":
            initial_We = theano.shared(
                np.asarray(p[0].get_value(), dtype=config.floatX))
            We = theano.shared(
                np.asarray(p[0].get_value(), dtype=config.floatX))

        if params.traintype == "rep":
            We = theano.shared(
                np.asarray(p[0].get_value(), dtype=config.floatX))

        g1batchindices = T.imatrix()
        g2batchindices = T.imatrix()
        g1mask = T.matrix()
        g2mask = T.matrix()
        scores = T.matrix()

        l_in = lasagne.layers.InputLayer((None, None, 1))
        l_mask = lasagne.layers.InputLayer(shape=(None, None))
        l_emb = lasagne.layers.EmbeddingLayer(
            l_in,
            input_size=We.get_value().shape[0],
            output_size=We.get_value().shape[1],
            W=We)
        l_out = lasagne_average_layer([l_emb, l_mask])

        embg1 = lasagne.layers.get_output(l_out, {
            l_in: g1batchindices,
            l_mask: g1mask
        })
        embg2 = lasagne.layers.get_output(l_out, {
            l_in: g2batchindices,
            l_mask: g2mask
        })

        g1_dot_g2 = embg1 * embg2
        g1_abs_g2 = abs(embg1 - embg2)

        lin_dot = lasagne.layers.InputLayer((None, We.get_value().shape[1]))
        lin_abs = lasagne.layers.InputLayer((None, We.get_value().shape[1]))
        l_sum = lasagne.layers.ConcatLayer([lin_dot, lin_abs])
        l_sigmoid = lasagne.layers.DenseLayer(
            l_sum, params.memsize, nonlinearity=lasagne.nonlinearities.sigmoid)
        if params.task == "sim":
            l_softmax = lasagne.layers.DenseLayer(l_sigmoid,
                                                  self.nout,
                                                  nonlinearity=T.nnet.softmax)
            X = lasagne.layers.get_output(l_softmax, {
                lin_dot: g1_dot_g2,
                lin_abs: g1_abs_g2
            })
            Y = T.log(X)

            cost = scores * (T.log(scores) - Y)
            cost = cost.sum(axis=1) / (float(self.nout))

            prediction = 0.
            i = params.minval
            while i <= params.maxval:
                prediction = prediction + i * X[:, i - 1]
                i += 1
        elif params.task == "ent":
            l_softmax = lasagne.layers.DenseLayer(l_sigmoid,
                                                  3,
                                                  nonlinearity=T.nnet.softmax)
            X = lasagne.layers.get_output(l_softmax, {
                lin_dot: g1_dot_g2,
                lin_abs: g1_abs_g2
            })

            cost = theano.tensor.nnet.categorical_crossentropy(X, scores)

            prediction = T.argmax(X, axis=1)
        else:
            raise ValueError('Params.task not set correctly.')

        # if params.l_out == '':
        #     lasagne.layers.set_all_param_values(l_out, s)
        #
        # if params.l_so
        self.network_params = lasagne.layers.get_all_params(
            l_out, trainable=True) + lasagne.layers.get_all_params(
                l_softmax, trainable=True)
        self.network_params.pop(0)
        self.all_params = lasagne.layers.get_all_params(
            l_out, trainable=True) + lasagne.layers.get_all_params(
                l_softmax, trainable=True)

        reg = self.getRegTerm(params, We, initial_We)
        self.trainable = self.getTrainableParams(params)
        cost = T.mean(cost) + reg

        self.feedforward_function = theano.function([g1batchindices, g1mask],
                                                    embg1)

        self.scoring_function = theano.function(
            [g1batchindices, g2batchindices, g1mask, g2mask], prediction)
        self.cost_function = theano.function(
            [scores, g1batchindices, g2batchindices, g1mask, g2mask], cost)

        grads = theano.gradient.grad(cost, self.trainable)
        if params.clip:
            grads = [
                lasagne.updates.norm_constraint(grad, params.clip,
                                                range(grad.ndim))
                for grad in grads
            ]
        updates = params.learner(grads, self.trainable, params.eta)
        self.train_function = theano.function(
            [scores, g1batchindices, g2batchindices, g1mask, g2mask],
            cost,
            updates=updates)
Exemplo n.º 46
0
 def multivariate_bernoulli(self, y_pred, y_true):
     return T.sum(y_true * T.log(y_pred) + (1 - y_true) * T.log(1 - y_pred),
                  axis=1)
Exemplo n.º 47
0
    def _compute_losses(self, model_output):
        # model_output.shape : (batch_size, seq_len, K, M, target_size)
        # self.dataset.symb_targets.shape = (batch_size, seq_len+K-1, target_dims)

        # mask.shape : (batch_size, seq_len) or None
        mask = self.dataset.symb_mask

        # mu.shape = (batch_size, seq_len, K, M, target_dims)
        mu = model_output[:, :, :, :, 0:3]

        # sigma.shape = (batch_size, seq_len, K, M, target_dims)
        sigma = model_output[:, :, :, :, 3:6]

        # Stack K targets for each input (sliding window style)
        # targets.shape = (batch_size, seq_len, K, target_dims)
        targets = T.stack([
            self.dataset.symb_targets[:, i:(-self.model.k + i + 1) or None]
            for i in range(self.model.k)
        ],
                          axis=2)

        # Add new axis for sum over M
        # targets.shape = (batch_size, seq_len, K, 1, target_dims)
        targets = targets[:, :, :, None, :]

        # For monitoring the L2 error of using $mu$ as the predicted direction (should be comparable to MICCAI's work).
        normalized_mu = mu[:, :, 0, 0] / l2distance(
            mu[:, :, 0, 0], keepdims=True, eps=1e-8)
        normalized_targets = targets[:, :, 0, 0] / l2distance(
            targets[:, :, 0, 0], keepdims=True, eps=1e-8)
        self.L2_error_per_item = T.sqrt(
            T.sum(((normalized_mu - normalized_targets)**2), axis=2))
        if mask is not None:
            self.mean_sqr_error = T.sum(self.L2_error_per_item * mask,
                                        axis=1) / T.sum(mask, axis=1)
        else:
            self.mean_sqr_error = T.mean(self.L2_error_per_item, axis=1)

        # Likelihood of multivariate gaussian (n dimensions) is :
        # ((2 \pi)^D |\Sigma|)^{-1/2} exp(-1/2 (x - \mu)^T \Sigma^-1 (x - \mu))
        # We suppose a diagonal covariance matrix, so we have :
        #   => |\Sigma| = \prod_n \sigma_n^2
        #   => (x - \mu)^T \Sigma^-1 (x - \mu) = \sum_n ((x_n - \mu_n) / \sigma_n)^2
        m_log_likelihoods = -np.float32(
            (self.target_dims / 2.) * np.log(2 * np.pi)) + T.sum(
                -T.log(sigma) - 0.5 * T.sqr((targets - mu) / sigma), axis=4)

        # k_losses_per_timestep.shape : (batch_size, seq_len, K)
        self.k_losses_per_timestep = T.log(self.m) - logsumexp(
            m_log_likelihoods, axis=3, keepdims=False)

        # loss_per_timestep.shape : (batch_size, seq_len)
        self.loss_per_time_step = T.mean(self.k_losses_per_timestep, axis=2)

        # Average over sequence steps.
        # k_nlls_per_seq.shape :(batch_size, K)
        if mask is not None:
            self.k_losses_per_seq = T.sum(
                self.k_losses_per_timestep * mask[:, :, None], axis=1) / T.sum(
                    mask, axis=1, keepdims=True)
        else:
            self.k_losses_per_seq = T.mean(self.k_losses_per_timestep, axis=1)

        # Average over K
        # loss_per_seq.shape :(batch_size,)
        self.loss_per_seq = T.mean(self.k_losses_per_seq, axis=1)
        return self.loss_per_seq
Exemplo n.º 48
0
    def factors(self, x, z, A):

        v = self.v
        w = self.w
        '''
        z is unused
        x['x'] is the data
        
        The names of dict z[...] may be confusing here: the latent variable z is not included in the dict z[...],
        but implicitely computed from epsilon and parameters in w.

        z is computed with g(.) from eps and variational parameters
        let logpx be the generative model density: log p(x|z) where z=g(.)
        let logpz be the prior of Z plus the entropy of q(z|x): logp(z) + H_q(z|x)
        So the lower bound L(x) = logpx + logpz
        
        let logpv and logpw be the (prior) density of the parameters
        '''
        def f_softplus(x):
            return T.log(T.exp(x) + 1)  # - np.log(2)

        def f_rectlin(x):
            return x * (x > 0)

        def f_rectlin2(x):
            return x * (x > 0) + 0.01 * x

        nonlinear = {
            'tanh': T.tanh,
            'sigmoid': T.nnet.sigmoid,
            'softplus': f_softplus,
            'rectlin': f_rectlin,
            'rectlin2': f_rectlin2
        }
        nonlinear_q = nonlinear[self.nonlinear_q]
        nonlinear_p = nonlinear[self.nonlinear_p]

        #rng = rng_curand.CURAND_RandomStreams(0)
        import theano.tensor.shared_randomstreams
        rng = theano.tensor.shared_randomstreams.RandomStreams(0)

        # Compute q(z|x,y)
        #
        # it seems that z = f(v['w0x'] * x + v['w0y'] * y + b)
        #
        hidden_q = [
            nonlinear_q(
                T.dot(v['w0x'], x['x']) + T.dot(v['w0y'], x['y']) +
                T.dot(v['b0'], A))
        ]
        for i in range(1, len(self.n_hidden_q)):
            hidden_q.append(
                nonlinear_q(
                    T.dot(v['w' + str(i)], hidden_q[-1]) +
                    T.dot(v['b' + str(i)], A)))

        q_mean = T.dot(v['mean_w'], hidden_q[-1]) + T.dot(v['mean_b'], A)
        if self.type_qz == 'gaussian' or self.type_qz == 'gaussianmarg':
            q_logvar = T.dot(v['logvar_w'], hidden_q[-1]) + T.dot(
                v['logvar_b'], A)
        else:
            raise Exception()

        # function for distribution q(z|x)
        theanofunc = lazytheanofunc('warn', mode='FAST_RUN')
        self.dist_qz['z'] = theanofunc([x['x'], x['mean_prior'], x['y']] + [A],
                                       [q_mean, q_logvar])

        # Compute virtual sample
        eps = rng.normal(size=q_mean.shape, dtype='float32')
        _z = q_mean + T.exp(0.5 * q_logvar) * eps

        # Compute log p(x|z)
        #
        # log p(x | z, y)
        # It seems that x = f((w0y * y + w0z * z) + b0)
        #
        hidden_p = [
            nonlinear_p(
                T.dot(w['w0y'], x['y']) + T.dot(w['w0z'], _z) +
                T.dot(w['b0'], A))
        ]
        for i in range(1, len(self.n_hidden_p)):
            hidden_p.append(
                nonlinear_p(
                    T.dot(w['w' + str(i)], hidden_p[-1]) +
                    T.dot(w['b' + str(i)], A)))
            if self.dropout:
                hidden_p[-1] *= 2. * (rng.uniform(size=hidden_p[-1].shape,
                                                  dtype='float32') > .5)

        if self.type_px == 'bernoulli':
            p = T.nnet.sigmoid(
                T.dot(w['out_w'], hidden_p[-1]) + T.dot(w['out_b'], A))
            _logpx = -T.nnet.binary_crossentropy(p, x['x'])
            self.dist_px['x'] = theanofunc([x['y'], _z] + [A], p)
        elif self.type_px == 'gaussian':
            x_mean = T.dot(w['out_w'], hidden_p[-1]) + T.dot(w['out_b'], A)
            x_logvar = T.dot(w['out_logvar_w'], hidden_p[-1]) + T.dot(
                w['out_logvar_b'], A)
            _logpx = ap.logpdfs.normal2(x['x'], x_mean, x_logvar)
            self.dist_px['x'] = theanofunc([x['y'], _z] + [A],
                                           [x_mean, x_logvar])
        elif self.type_px == 'laplace':
            x_mean = T.dot(w['out_w'], hidden_p[-1]) + T.dot(w['out_b'], A)
            x_logvar = T.dot(w['out_logvar_w'], hidden_p[-1]) + T.dot(
                w['out_logvar_b'], A)
            _logpx = ap.logpdfs.laplace(x['x'], x_mean, x_logvar)
            self.dist_px['x'] = theanofunc([x['y'], _z] + [A],
                                           [x_mean, x_logvar])

        else:
            raise Exception("")

        # Note: logpx is a row vector (one element per sample)
        logpx = T.dot(shared32(np.ones((1, self.n_x))),
                      _logpx)  # logpx = log p(x|z,w)

        # log p(y) (prior of y)
        #_logpy = w['logpy']
        #if self.uniform_y: _logpy *= 0
        #py_model = T.nnet.softmax(T.dot(_logpy, A).T).T
        #logpy = (- T.nnet.categorical_crossentropy(py_model.T, x['y'].T).T).reshape((1,-1))
        #logpx += logpy
        #self.dist_px['y'] = theanofunc([A], py_model)

        # log p(z) (prior of z)
        #
        # E_q[log(p(z))]
        #
        if self.type_pz == 'gaussianmarg':
            logpz = -0.5 * (np.log(2 * np.pi) + (
                (q_mean - x['mean_prior'])**2 + T.exp(q_logvar))).sum(
                    axis=0, keepdims=True)
        elif self.type_pz == 'gaussian':
            logpz = ap.logpdfs.standard_normal(_z).sum(axis=0, keepdims=True)
        elif self.type_pz == 'mog':
            pz = 0
            for i in range(self.n_mixture):
                pz += T.exp(
                    ap.logpdfs.normal2(_z, T.dot(w['mog_mean' + str(i)], A),
                                       T.dot(w['mog_logvar' + str(i)], A)))
            logpz = T.log(pz).sum(axis=0, keepdims=True) - self.n_z * np.log(
                float(self.n_mixture))
        elif self.type_pz == 'laplace':
            logpz = ap.logpdfs.standard_laplace(_z).sum(axis=0, keepdims=True)
        elif self.type_pz == 'studentt':
            logpz = ap.logpdfs.studentt(_z, T.dot(T.exp(w['logv']),
                                                  A)).sum(axis=0,
                                                          keepdims=True)
        else:
            raise Exception("Unknown type_pz")

        # loq q(z|x) (entropy of z)
        #
        # E_q[-log(q)]
        #
        if self.type_qz == 'gaussianmarg':
            logqz = -0.5 * (np.log(2 * np.pi) + 1 + q_logvar).sum(
                axis=0, keepdims=True)
        elif self.type_qz == 'gaussian':
            logqz = ap.logpdfs.normal2(_z, q_mean, q_logvar).sum(axis=0,
                                                                 keepdims=True)
        else:
            raise Exception()

        # Note: logpv and logpw are a scalars
        def f_prior(_w, prior_sd=self.prior_sd):
            return ap.logpdfs.normal(_w, 0, prior_sd).sum()

        logpv = 0
        logpv += f_prior(v['w0x'])
        logpv += f_prior(v['w0y'])
        for i in range(1, len(self.n_hidden_q)):
            logpv += f_prior(v['w' + str(i)])
        logpv += f_prior(v['mean_w'])
        if self.type_qz in ['gaussian', 'gaussianmarg']:
            logpv += f_prior(v['logvar_w'])

        logpw = 0
        logpw += f_prior(w['w0y'])
        logpw += f_prior(w['w0z'])
        for i in range(1, len(self.n_hidden_p)):
            logpw += f_prior(w['w' + str(i)])
        logpw += f_prior(w['out_w'])
        if self.type_px in ['sigmoidgaussian', 'gaussian', 'laplace']:
            logpw += f_prior(w['out_logvar_w'])
        if self.type_pz == 'studentt':
            logpw += f_prior(w['logv'])

        #return logpv, logpw, logpx, logpz, logqz
        return logpx, logpz, logqz
Exemplo n.º 49
0
 def KL(self, y):
     return T.mean(y * T.log(y / self.y_pred) +
                   (1 - y) * T.log((1 - y) / (1 - self.y_pred)))
Exemplo n.º 50
0
x = T.dmatrix("x")
y = T.dvector("y")
learning_rate = T.dscalar("lr")

# declare the weight w and b
w = theano.shared(value=numpy.random.rand(feat), name="w")
b = theano.shared(value=0., name="b")

print("initialized weights \n")
print(w.get_value())
print(b.get_value())

# build the graph
output = 1/(1+T.exp(-T.dot(x, w)-b))
prediction = output > 0.5
cross_entropy = -y * T.log(output) - (1-y)*T.log(1-output)
loss = cross_entropy.mean() + 0.01*(w**2).sum()
gradW, gradb = T.grad(loss, [w, b])

# train function
train = theano.function(inputs=[x,y,learning_rate], outputs=[prediction, cross_entropy,loss, learning_rate], \
                        updates=((w,w-learning_rate*gradW), (b,b-learning_rate*gradb)))
# predict function
predict = theano.function(inputs=[x], outputs=prediction)

for i in range(training_step):
    if (i < 1000):
        learning_rate = 0.1
    else:
        learning_rate =0.01
    pred, cro, l,lr = train(D[0], D[1], learning_rate)
Exemplo n.º 51
0
Arquivo: util.py Projeto: tinyloop/dtp
def NLL(probs, labels) : # labels are not one-hot code 
    return - T.mean( T.log(probs)[T.arange(labels.shape[0]), T.cast(labels,'int32')] )
Exemplo n.º 52
0
 def loss(self,delta):
     #return T.log(1+T.exp(euclid(self.output1,self.output2)))
     #return T.log(1+T.exp(T.sqrt(T.sum(T.sqr(self.output1-self.output2)))))
     #return T.log(1+T.exp(T.sqrt(T.sum(T.sqr(self.output1)))))
     return T.log(1+T.exp(delta*(T.sum(T.sqr(self.output1-self.output2)))))
Exemplo n.º 53
0
    def trainMB(self, V_egMin, noOfEpoch, noOfMiniBatchEx):
        """
        trains the current RBM object, returns nothing with parameter updates being internal
        
        args:
        V_egMin (theano.shared 2D array): call eval() to supply as argument. rows of this are input examples. V_egMin[N:M] extracts M-N examples, each of size noOfVisible units
        noOfEpoch (int): total number of Epoch to simulate, each Epoch goes through V_egMin
        noOfMiniBatchEx (int): number of examples to be grouped into minibatches
        
        """
        self.miniBatchSize = noOfMiniBatchEx
        print("size of input example is: " + str(V_egMin.shape))
        V_egM = T.matrix(name="T_egM", dtype=theano.config.floatX)
        [V_CDmAcc, H_CDmAcc, H_CDmean, V_CDmean] , scan_updates = theano.scan(self.vtovMBall, outputs_info=[V_egM, None, None, None] , n_steps=self.CD_n)
        V_CDm = V_CDmAcc[-1] #these are matrixes
        H_CDm = H_CDmAcc[-1] #these are matrixes
        
       
        H_egM = self.vtohMB(V_egM)
        energyVector_eg = self.energyFnMB(V_egM, H_egM)
        energyVector_cd = self.energyFnMB(V_CDm, H_CDm)
        costFn = T.mean(energyVector_eg, dtype=theano.config.floatX, acc_dtype=theano.config.floatX) - T.mean(energyVector_cd, dtype=theano.config.floatX, acc_dtype=theano.config.floatX) 
        
        Ta_grad, Tb_grad, Tz_grad, Tomg_grad = T.grad(cost=costFn,
                                                        wrt=[self.T_a, self.T_b, self.T_z, self.T_omega],
                                                        consider_constant=[V_egM, H_egM, V_CDm, H_CDm])
        
        #regular gradient
        gradFromMB = theano.function(inputs=[V_egM], outputs=[Ta_grad, Tb_grad, Tz_grad, Tomg_grad], 
                                     allow_input_downcast=True, 
                                     updates = scan_updates + [(self.T_a, self.T_a + self.aRate*Ta_grad),
                                                               (self.T_b, self.T_b + self.bRate*Tb_grad),
                                                               (self.T_z, self.T_z + self.sigmaRate*Tz_grad),
                                                               (self.T_omega, self.T_omega + self.omegaRate*Tomg_grad)],
                                     mode='FAST_RUN')#NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True))
        
        #rprop: Code not used
        Ta_rpropMag = T.mul(T.abs_(self.Ta_grad_prev), T.mul(self.T_posUpdate, T.abs_(T.sgn(self.Ta_grad_prev)+T.sgn(Ta_grad))) + 
                            T.mul(self.T_negUpdate, T.abs_(T.abs_(T.sgn(self.Ta_grad_prev)+T.sgn(Ta_grad))-np.float32(2.0))))      
        Ta_rprop = T.mul(T.sgn(Ta_grad),Ta_rpropMag.clip(np.float32(self.epsilon),50))
        Tb_rpropMag = T.mul(T.abs_(self.Tb_grad_prev), T.mul(self.T_posUpdate, T.abs_(T.sgn(self.Tb_grad_prev)+T.sgn(Tb_grad))) + 
                            T.mul(self.T_negUpdate, T.abs_(T.abs_(T.sgn(self.Tb_grad_prev)+T.sgn(Tb_grad))-np.float32(2.0))))      
        Tb_rprop = T.mul(T.sgn(Tb_grad),Tb_rpropMag.clip(np.float32(self.epsilon),50))
        Tz_rpropMag = T.mul(T.abs_(self.Tz_grad_prev), T.mul(self.T_posUpdate, T.abs_(T.sgn(self.Tz_grad_prev)+T.sgn(Tz_grad))) + 
                            T.mul(self.T_negUpdate, T.abs_(T.abs_(T.sgn(self.Tz_grad_prev)+T.sgn(Tz_grad))-np.float32(2.0))) )     
        Tz_rprop = T.mul(T.sgn(Tz_grad),Tz_rpropMag.clip(np.float32(self.epsilon),50))
        Tomg_rpropMag = T.mul(T.abs_(self.Tomg_grad_prev), T.mul(self.T_posUpdate, T.abs_(T.sgn(self.Tomg_grad_prev)+T.sgn(Tomg_grad))) + 
                            T.mul(self.T_negUpdate, T.abs_(T.abs_(T.sgn(self.Tomg_grad_prev)+T.sgn(Tomg_grad))-np.float32(2.0))))      
        Tomg_rprop = T.mul(T.sgn(Tomg_grad),Tomg_rpropMag.clip(np.float32(self.epsilon),50)) 
        gradFromMBrprop = theano.function(inputs=[V_egM], outputs=[Ta_rprop, Tb_rprop, Tz_rprop, Tomg_rprop], 
                                     allow_input_downcast=True, 
                                     updates = scan_updates + [(self.T_a, self.T_a + Ta_rprop),
                                                               (self.T_b, self.T_b + Tb_rprop),
                                                               (self.T_z, self.T_z + Tz_rprop),
                                                               (self.T_omega, self.T_omega + Tomg_rprop),
                                                               (self.Ta_grad_prev, Ta_rprop),
                                                               (self.Tb_grad_prev, Tb_rprop),
                                                               (self.Tz_grad_prev, Tz_rprop),
                                                               (self.Tomg_grad_prev, Tomg_rprop)],
                                     mode='FAST_RUN')#NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True))
        
        #RMSprop only: 
        [a_grad, b_grad, z_grad, omg_grad] = gradFromMB(V_egMin[0:noOfMiniBatchEx]) #initial RMS correction
        if (not(self.parameterLoaded) and not(self.parameterSaved)):
            self.Ta_rms.set_value(np.float32(np.abs(a_grad))) # =  theano.shared(value = np.float32(np.abs(a_grad)), name = 'Ta_rms', borrow=True, allow_downcast=True)
        Tb_rms =  theano.shared(value = np.float32(np.abs(b_grad)), name = 'Tb_rms', borrow=True, allow_downcast=True)
        Tz_rms =  theano.shared(value = np.float32(np.abs(z_grad)), name = 'Tz_rms', borrow=True, allow_downcast=True)
        Tomg_rms =  theano.shared(value = np.float32(np.abs(omg_grad)), name = 'Tomg_rms', borrow=True, allow_downcast=True)
        gradFromMBRMSprop = theano.function(inputs=[V_egM], outputs=[Ta_grad, Tb_grad, Tz_grad, Tomg_grad], 
                                     allow_input_downcast=True, 
                                     updates = scan_updates + [(self.Ta_rms, T.sqrt(T.mul(np.float32(0.9),T.mul(self.Ta_rms,self.Ta_rms))+T.mul(np.float32(0.1),T.mul(Ta_grad,Ta_grad)))),
                                                               (Tb_rms, T.sqrt(T.mul(np.float32(0.9),T.mul(Tb_rms,Tb_rms))+T.mul(np.float32(0.1),T.mul(Tb_grad,Tb_grad)))),
                                                               (Tz_rms, T.sqrt(T.mul(np.float32(0.9),T.mul(Tz_rms,Tz_rms))+T.mul(np.float32(0.1),T.mul(Tz_grad,Tz_grad)))),
                                                               (Tomg_rms, T.sqrt(T.mul(np.float32(0.9),T.mul(Tomg_rms,Tomg_rms))+T.mul(np.float32(0.1),T.mul(Tomg_grad,Tomg_grad)))),
                                                               (self.T_a, self.T_a + self.aRate*T.mul(Ta_grad,T.maximum(np.float32(self.epsilon),self.Ta_rms)**-1)),
                                                               (self.T_b, self.T_b + self.bRate*T.mul(Tb_grad,T.maximum(np.float32(self.epsilon),Tb_rms)**-1)),
                                                               (self.T_z, self.T_z + self.sigmaRate*T.mul(Tz_grad,T.maximum(np.float32(self.epsilon),Tz_rms)**-1)),
                                                               (self.T_omega, self.T_omega + self.omegaRate*T.mul(Tomg_grad,T.maximum(np.float32(self.epsilon),Tomg_rms)**-1))],
                                                                 mode='FAST_RUN')#NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True))  
        
        #sparse hidden units optimization + RMSprop:   
        #first calculate probability of hidden units firing given visible examples:
        aVomg = T.dot(T.mul(T.fill(V_egM, T.exp(-self.T_z)), V_egM), self.T_omega)
        aT_Hp = T.nnet.sigmoid(T.fill(aVomg, self.T_b) + aVomg)#T.nnet.ultra_fast_sigmoid() did not work for us 
        aT_HpMean = T.mean(aT_Hp) # mean activation over minibatch and all Hk
        #cross entropy between mean hidden unit activation and target mean activation probability "self.sparseTargetp" 
        sparseHcost = T.mul(np.float32(-self.sparseTargetp), T.log(aT_HpMean)) - T.mul((np.float32(1.0)-self.sparseTargetp), T.log(np.float32(1.0)-aT_HpMean))
        
        Tb_gradH, Tz_gradH, Tomg_gradH = T.grad(cost=sparseHcost,
                                                        wrt=[self.T_b, self.T_z, self.T_omega],
                                                        consider_constant=[V_egM])
        sparseGradFn = theano.function(inputs = [V_egM], outputs =[Tb_gradH, Tz_gradH, Tomg_gradH], allow_input_downcast=True, mode = 'FAST_RUN')
        
        [b_gradH, z_gradH, omg_gradH] = sparseGradFn(V_egMin[0:noOfMiniBatchEx]) #initial RMS correction
        
        if (not(self.parameterLoaded) and not(self.parameterSaved)):
            self.Tb_rmsH.set_value(np.float32(np.abs(b_grad - b_gradH))) 
            self.Tz_rmsH.set_value(np.float32(np.abs(z_grad - z_gradH))) 
            self.Tomg_rmsH.set_value(np.float32(np.abs(omg_grad - omg_gradH))) 
        gradSparseH = theano.function(inputs=[V_egM], outputs=[Ta_grad, Tb_grad, Tz_grad, Tomg_grad, Tb_gradH, Tz_gradH, Tomg_gradH], 
                                     allow_input_downcast=True, 
                                     updates = scan_updates + [(self.Ta_rms, T.sqrt(T.mul(np.float32(0.9),T.mul(self.Ta_rms,self.Ta_rms))+T.mul(np.float32(0.1),T.mul(Ta_grad,Ta_grad)))),
                                                               (self.Tb_rmsH, T.sqrt(T.mul(np.float32(0.9),T.mul(self.Tb_rmsH,self.Tb_rmsH))+T.mul(np.float32(0.1),T.mul(Tb_grad-Tb_gradH,Tb_grad-Tb_gradH)))),
                                                               (self.Tz_rmsH, T.sqrt(T.mul(np.float32(0.9),T.mul(self.Tz_rmsH,self.Tz_rmsH))+T.mul(np.float32(0.1),T.mul(Tz_grad-Tz_gradH,Tz_grad-Tz_gradH)))),
                                                               (self.Tomg_rmsH, T.sqrt(T.mul(np.float32(0.9),T.mul(self.Tomg_rmsH,self.Tomg_rmsH))+T.mul(np.float32(0.1),T.mul(Tomg_grad-Tomg_gradH,Tomg_grad-Tomg_gradH)))),
                                                               (self.T_a, self.T_a + self.aRate*T.mul(Ta_grad,T.maximum(np.float32(self.epsilon),self.Ta_rms)**-1)),
                                                               (self.T_b, self.T_b + self.bRate*T.mul(Tb_grad-Tb_gradH,T.maximum(np.float32(self.epsilon),self.Tb_rmsH)**-1)),
                                                               (self.T_z, self.T_z + self.sigmaRate*T.mul(Tz_grad-Tz_gradH,T.maximum(np.float32(self.epsilon),self.Tz_rmsH)**-1)),
                                                               (self.T_omega, self.T_omega + self.omegaRate*T.mul(Tomg_grad-Tomg_gradH,T.maximum(np.float32(self.epsilon),self.Tomg_rmsH)**-1))],
                                     mode='FAST_RUN')#NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)) 
        
        #reconstruction errors:
        [V_egM_recon, H_egM_reconStub, H_meanStubC, V_meanStubC] = self.vtovMBall(V_egM)
        V_error = V_egM - V_egM_recon
        V_errorSqr = T.mul(V_error, V_error)
        reconError = theano.function(inputs = [V_egM], outputs = [T.mean(T.sum(V_errorSqr,axis=1, acc_dtype=theano.config.floatX), acc_dtype=theano.config.floatX)], 
                                     allow_input_downcast=True,
                                     mode='FAST_RUN')

        print("***************************************************************************************************")
        print("training network with " + str(self.nv) + " real visible units and " + str(self.nh) + " binary hidden units")
        print("reconstruction error before training = " + str(np.array(reconError(V_egMin))[0]))
        noOfMiniBatches = np.int(len(V_egMin)/noOfMiniBatchEx)
        print("number of mini-batches = " + str(noOfMiniBatches) + ", with " + str(noOfMiniBatchEx) + " examples per mini-batch")
        print("number of Epochs = " + str(noOfEpoch))
        print("***************************************************************************************************")        

        #input images already randomised with consecutive images belonging to different class, use directly as minibatch.
        for j in xrange(noOfEpoch):
            pretime=time.time()
            for i in xrange(noOfMiniBatches):
                [a_upDate, b_upDate, z_upDate, omg_upDate, b_upDateH, z_upDateH, omg_upDateH] = gradSparseH(V_egMin[i*noOfMiniBatchEx:(i+1)*noOfMiniBatchEx])
                
            myErr = reconError(V_egMin)
            self.likelihood4plot = self.likelihood4plot + [np.float32(myErr)]
            print("epoch " + str(j) + ": reconstruction error = " + str(myErr[0])  + ", time taken = " + str(time.time() - pretime))

        print("\n***************************************************************************************************") 
        print("reconstruction error after training for " + str(noOfEpoch) + " epochs = " + str(np.array(reconError(V_egMin))[0]))
        self.checkNaN()
        print("***************************************************************************************************")         
        
        plt.figure
        plt.plot(np.arange(0.0, len(self.likelihood4plot), 1), self.likelihood4plot)
        plt.show()
Exemplo n.º 54
0
 def CrossEntropy(self, y):
     return -T.mean(y * T.log(self.y_pred))
Exemplo n.º 55
0
training_epochs = 25
learning_rate = 0.1
batch_size = 128

W1 = init_weights(28 * 28, 900)
b1 = init_bias(900)
b1_prime = init_bias(28 * 28)
W1_prime = W1.transpose()
W2 = init_weights(900, 10)
b2 = init_bias(10)

tilde_x = theano_rng.binomial(
    size=x.shape, n=1, p=1 - corruption_level, dtype=theano.config.floatX) * x
y1 = T.nnet.sigmoid(T.dot(tilde_x, W1) + b1)
z1 = T.nnet.sigmoid(T.dot(y1, W1_prime) + b1_prime)
cost1 = -T.mean(T.sum(x * T.log(z1) + (1 - x) * T.log(1 - z1), axis=1))

params1 = [W1, b1, b1_prime]
grads1 = T.grad(cost1, params1)
updates1 = [(param1, param1 - learning_rate * grad1)
            for param1, grad1 in zip(params1, grads1)]
train_da1 = theano.function(inputs=[x],
                            outputs=cost1,
                            updates=updates1,
                            allow_input_downcast=True)

p_y2 = T.nnet.softmax(T.dot(y1, W2) + b2)
y2 = T.argmax(p_y2, axis=1)
cost2 = T.mean(T.nnet.categorical_crossentropy(p_y2, d))

params2 = [W1, b1, W2, b2]
Exemplo n.º 56
0
 def negative_log_likelihood(self, y):
     return -T.mean(T.log(self.y_t)[:, y])
Exemplo n.º 57
0
 def get_tester(self, y):
     return self.inp, T.log(self.p_y_given_x)[T.arange(y.shape[0]), y]
Exemplo n.º 58
0
def build_model(alpha, beta, tparams, options):
    trng = RandomStreams(SEED)

    # Used for dropout.
    use_noise = theano.shared(numpy_floatX(0.))

    x_zheng = tensor.matrix('x_zheng', dtype='int32')
    x_zheng_mask = tensor.matrix('x_zheng_mask', dtype=config.floatX)
    x_ni = tensor.matrix('x_ni', dtype='int32')
    x_ni_mask = tensor.matrix('x_ni_mask', dtype=config.floatX)
    y = tensor.vector('y', dtype='int32')

    n_timesteps = x_zheng.shape[0]
    n_samples = x_zheng.shape[1]

    emb_zheng = tparams['Wemb'][x_zheng.flatten()].reshape(
        [n_timesteps, n_samples, options['dim_proj']])

    proj1 = get_layer(options['encoder'])[1](tparams,
                                             emb_zheng,
                                             options,
                                             prefix='lstm_zheng',
                                             mask=x_zheng_mask)
    if options['encoder'] == 'lstm':
        proj_zheng = (proj1 * x_zheng_mask[:, :, None]).sum(axis=0)
        proj_zheng = proj_zheng / x_zheng_mask.sum(axis=0)[:, None]

    emb_ni = tparams['Wemb'][x_ni.flatten()].reshape(
        [n_timesteps, n_samples, options['dim_proj']])

    proj2 = get_layer(options['encoder'])[1](tparams,
                                             emb_ni,
                                             options,
                                             prefix='lstm_ni',
                                             mask=x_ni_mask)

    if options['encoder'] == 'lstm':
        proj_ni = (proj2 * x_ni_mask[:, :, None]).sum(axis=0)
        proj_ni = proj_ni / x_ni_mask.sum(axis=0)[:, None]

    proj = tensor.concatenate((proj_zheng, proj_ni), axis=1)

    if options['use_dropout']:
        proj = dropout_layer(proj, use_noise, trng)

    pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U']) + tparams['b'])

    pred_zheng = tensor.nnet.softmax(
        tensor.dot(proj_zheng, tparams['U_zheng'] + tparams['b']))

    pred_ni = tensor.nnet.softmax(
        tensor.dot(proj_ni, tparams['U_ni'] + tparams['b']))

    f_pred_prob = theano.function([x_zheng, x_zheng_mask, x_ni, x_ni_mask],
                                  pred,
                                  name='f_pred_prob')

    f_pred = theano.function([x_zheng, x_zheng_mask, x_ni, x_ni_mask],
                             pred.argmax(axis=1),
                             name='f_pred')

    f_proj = theano.function([x_zheng, x_zheng_mask, x_ni, x_ni_mask],
                             proj,
                             name='f_proj')

    off = 1e-8
    if pred.dtype == 'float16':
        off = 1e-6

    cost1 = -tensor.log(pred[tensor.arange(n_samples), y] + off).mean()
    cost2 = -tensor.log(pred_zheng[tensor.arange(n_samples), y] + off).mean()
    cost3 = -tensor.log(pred_ni[tensor.arange(n_samples), y] + off).mean()
    cost4 = tensor.sum(tensor.square(proj_zheng - proj_ni), axis=1).mean()
    cost = alpha * (cost1 + cost2 + cost3) + beta * cost4

    return use_noise, x_zheng, x_zheng_mask, x_ni, x_ni_mask, y, f_pred_prob, f_pred, cost1, cost2, cost3, cost4, cost, f_proj
Exemplo n.º 59
0
    def logp(self, value):
        w = self.w

        return bound(logsumexp(tt.log(w) + self._comp_logp(value), axis=-1),
                     w >= 0, w <= 1, tt.allclose(w.sum(axis=-1), 1),
                     broadcast_conditions=False)
 def cost(self, net):
     "Return the log-likelihood cost."
     return -T.mean(
         T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y])