def mlpSingleOutput1Layer_costfunc(x, *args):
    inputSize, l1Size, lambda_hidden, inputs, targets = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    inputs = gpu.garray(inputs)
    targets = gpu.garray(targets)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1)))
    theta_output = gpu.garray(reshape(x[num_weights_L1:shape(x)[0]], (1, l1Size+1)))
    inputs = gpu.concatenate((gpu.ones((1,numCases)), inputs), axis = 0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis = 0)
    #hidden_activation_L1 = hidden_activation_L1 * dropout_prob
    hidden_sum_output = gpu.dot(theta_output, hidden_activation_L1)
    outputs = hidden_sum_output.logistic()
    output_target_diff = (outputs - targets)**2
    regularized_penalty_output = theta_output[:,1:shape(theta_output)[1]]
    regularized_penalty_output = regularized_penalty_output * regularized_penalty_output
    regularized_penalty_L1 = theta_L1[:,1:shape(theta_L1)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    cost = gpu.sum(output_target_diff)/(2*numCases) + 0.5 * lambda_hidden*(gpu.sum(regularized_penalty_L1)+gpu.sum(regularized_penalty_output))
    print 'Multilayer Preceptron Cost:', cost
    del inputs
    del theta_L1
    del hidden_sum_L1
    del hidden_activation_L1
    del regularized_penalty_output
    del regularized_penalty_L1
    gpu.free_reuse_cache()
    return cost
Пример #2
0
    def next(self):
        """
        Get the next minibatch of data.

        Return a tuple of (minibatch_x, minibatch_t) if t is not None,
        otherwise return only minibatch_x.
        """
        minibatch_t = None
        if self.i_ptr + self.minibatch_size <= self.n_cases:
            minibatch_x = self.x[self.idx[self.i_ptr:self.i_ptr +
                                          self.minibatch_size]]
            if self.t is not None:
                minibatch_t = self.t[self.idx[self.i_ptr:self.i_ptr +
                                              self.minibatch_size]]
            self.i_ptr += self.minibatch_size
        else:
            if self.i_ptr >= self.n_cases:  # empty part, needed for garray handling
                # minibatch_x_part = self.x[:0].copy()
                minibatch_x_part = None
                if self.t is not None:
                    # minibatch_t_part = self.t[:0].copy()
                    minibatch_t_part = None
            else:
                minibatch_x_part = self.x[self.idx[self.i_ptr:]].copy()
                if self.t is not None:
                    minibatch_t_part = self.t[self.idx[self.i_ptr:]].copy()

            other_part_size = self.minibatch_size - (self.n_cases - self.i_ptr)
            self.shuffle_data()
            if minibatch_x_part is not None:
                if isinstance(self.x, gnp.garray):
                    minibatch_x = gnp.concatenate(
                        [minibatch_x_part, self.x[self.idx[:other_part_size]]],
                        axis=0)
                else:
                    minibatch_x = np.r_[minibatch_x_part,
                                        self.x[self.idx[:other_part_size]]]
            else:
                minibatch_x = self.x[self.idx[:other_part_size]]

            if self.t is not None:
                if minibatch_t_part is not None:
                    if isinstance(self.t, gnp.garray):
                        minibatch_t = gnp.concatenate([
                            minibatch_t_part,
                            self.t[self.idx[:other_part_size]]
                        ],
                                                      axis=0)
                    else:
                        minibatch_t = np.r_[minibatch_t_part,
                                            self.t[self.idx[:other_part_size]]]
                else:
                    minibatch_t = self.t[self.idx[:other_part_size]]

            self.i_ptr = other_part_size

        if self.t is not None:
            return minibatch_x, minibatch_t
        else:
            return minibatch_x
Пример #3
0
def costfunc_gpu_ReLU(x, *args):
    num_input,num_hidden,num_output,inputs,lambda_val,sparsityParam,beta = args
    num_weights1 = (num_input+1)*num_hidden
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    #weights1 = gpu.garray(reshape(x[0:num_weights1],(num_hidden,num_input+1)))
    weights1 = x[0:num_weights1].reshape((num_hidden,num_input+1))
    #weights2 = gpu.garray(reshape(x[num_weights1:shape(x)[0]], (num_output,num_hidden+1)))
    weights2 = x[num_weights1:shape(x)[0]].reshape((num_output,num_hidden+1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1,nData)), inputs), axis = 0)
    hidden_sum = gpu.dot(weights1, data)
    hidden_activation = gpu.log(1+hidden_sum.exp())
    p_avg = gpu.sum(hidden_activation,axis=1)/nData
    hidden_activation = gpu.concatenate((gpu.ones((1,nData)), hidden_activation), axis = 0)
    output = gpu.dot(weights2, hidden_activation)
    regularized_penalty1 = weights1[:,1:shape(weights1)[1]]
    regularized_penalty2 = weights2[:,1:shape(weights2)[1]]
    regularized_penalty1 = regularized_penalty1 * regularized_penalty1
    regularized_penalty2 = regularized_penalty2 * regularized_penalty2
    output_target_diff = (output - inputs)*(output - inputs)
    KL = gpu.sum(sparsityParam*gpu.log(sparsityParam/p_avg) + (1-sparsityParam)*gpu.log((1-sparsityParam)/(1-p_avg)))
    cost = gpu.sum(output_target_diff)/(2*nData) + 0.5 * lambda_val * (gpu.sum(regularized_penalty1) + gpu.sum(regularized_penalty2)) + beta*KL
    print 'ReLU Linear Decoder Cost: ', cost
    return cost
Пример #4
0
    def forward(self, X, test=False):
        """
        Feed-forward pass through the model
        X: ('batchsize' x 'context') matrix of word indices
        """
        batchsize = X.shape[0]
        R = self.R
        C = self.C
        bw = self.bw

        # Obtain word features
        tmp = R.as_numpy_array()[:,X.flatten()].flatten(order='F')  # flatten(), default in row-major order, order='F' means Fortran(column-major) order
        tmp = tmp.reshape((batchsize, self.K * self.context))   # reshape(), in row-major order
        words = np.zeros((batchsize, self.K, self.context))
        for i in range(batchsize):
            words[i,:,:] = tmp[i,:].reshape((self.K, self.context), order='F')
        words = gpu.garray(words)

        # Compute the hidden layer (predicted next word representation)
        acts = gpu.zeros((batchsize, self.K))
        for i in range(self.context):
            acts = acts + gpu.dot(words[:,:,i], C[i,:,:]) # the dot() of 2-D matrix is equiverlent to multiply
        acts = gpu.concatenate((acts, gpu.ones((batchsize, 1))), 1)

        # Compute softmax
        preds = gpu.dot(acts, gpu.concatenate((R, bw)))
        preds = gpu.exp(preds - preds.max(1).reshape(batchsize, 1))
        denom = preds.sum(1).reshape(batchsize, 1)
        preds = gpu.concatenate((preds / denom, gpu.ones((batchsize, 1))), 1)

        return (words, acts, preds.as_numpy_array())
Пример #5
0
    def forward(self, X, test=False):
        """
        Feed-forward pass through the model
        X: ('batchsize' x 'context') matrix of word indices
        """
        batchsize = X.shape[0]
        R = self.R
        C = self.C
        bw = self.bw

        # Obtain word features
        tmp = R.as_numpy_array()[:, X.flatten()].flatten(order='F')
        tmp = tmp.reshape((batchsize, self.K * self.context))
        words = np.zeros((batchsize, self.K, self.context))
        for i in range(batchsize):
            words[i, :, :] = tmp[i, :].reshape((self.K, self.context),
                                               order='F')
        words = gpu.garray(words)

        # Compute the hidden layer (predicted next word representation)
        acts = gpu.zeros((batchsize, self.K))
        for i in range(self.context):
            acts = acts + gpu.dot(words[:, :, i], C[i, :, :])
        acts = gpu.concatenate((acts, gpu.ones((batchsize, 1))), 1)

        # Compute softmax
        preds = gpu.dot(acts, gpu.concatenate((R, bw)))
        preds = gpu.exp(preds - preds.max(1).reshape(batchsize, 1))
        denom = preds.sum(1).reshape(batchsize, 1)
        preds = gpu.concatenate((preds / denom, gpu.ones((batchsize, 1))), 1)

        return (words, acts, preds.as_numpy_array())
def grad_costfunc_gpu_ReLU(x, *args):
    num_input, num_hidden, num_output, inputs, lambda_val, sparsityParam, beta = args
    num_weights1 = (num_input + 1) * num_hidden
    num_weights2 = (num_hidden + 1) * num_output
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    weights1 = x[0:num_weights1].reshape((num_hidden, num_input + 1))
    weights2 = x[num_weights1:shape(x)[0]].reshape(
        (num_output, num_hidden + 1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1, nData)), inputs), axis=0)
    hidden_sum = gpu.dot(weights1, data)
    #hidden_activation = gpu.log(1+hidden_sum.exp())
    relu_mask_hidden1 = gpu.ones(shape(hidden_sum)) * (hidden_sum > 0)
    hidden_activation = hidden_sum * relu_mask_hidden1
    #hidden_derivative = hidden_sum.logistic()
    hidden_derivative = relu_mask_hidden1
    hidden_activation = gpu.concatenate((gpu.ones(
        (1, nData)), hidden_activation),
                                        axis=0)
    hidden_derivative = gpu.concatenate((gpu.ones(
        (1, nData)), hidden_derivative),
                                        axis=0)
    outputs = gpu.dot(weights2, hidden_activation)
    weights1_grad = gpu.zeros(shape(weights1))
    weights2_grad = gpu.zeros(shape(weights2))
    p = outputs - inputs
    weights2_grad += gpu.dot(
        p, gpu.garray(transpose(hidden_activation.as_numpy_array())))
    q_temp = gpu.dot(gpu.garray(transpose(weights2.as_numpy_array())), p)
    #q = multiply(multiply(q_temp,hidden_activation),(1-hidden_activation))
    q = q_temp * hidden_derivative
    delta2 = gpu.dot(q, gpu.garray(transpose(data.as_numpy_array())))
    weights1_grad += delta2[1:shape(delta2)[0], :]
    weights1_grad = weights1_grad / nData
    weights2_grad = weights2_grad / nData
    weights1_grad[:, 1:shape(weights1_grad)[1]] = weights1_grad[:, 1:shape(
        weights1_grad)[1]] + weights1[:, 1:shape(weights1)[1]] * lambda_val
    weights2_grad[:, 1:shape(weights2_grad)[1]] = weights2_grad[:, 1:shape(
        weights2_grad)[1]] + weights2[:, 1:shape(weights2)[1]] * lambda_val
    #weights1_grad = reshape(weights1_grad, num_weights1)
    weights1_grad = weights1_grad.reshape(num_weights1)
    #weights2_grad = reshape(weights2_grad, num_weights2)
    weights2_grad = weights2_grad.reshape(num_weights2)
    del x
    del inputs
    del data
    del p
    del q_temp
    del q
    del delta2
    del hidden_sum
    del hidden_activation
    del weights1
    del weights2
    gpu.free_reuse_cache()
    return hstack(
        (weights1_grad.as_numpy_array(), weights2_grad.as_numpy_array()))
Пример #7
0
def forwardProp(X, theta1, theta2):

    a1 = gpu.concatenate((X, gpu.ones((np.size(X[:, 0]), 1))), axis=1)

    a2 = sigmoid(theta1.dot(a1.T))
    a2 = gpu.concatenate((a2, gpu.ones((1, np.size(a2[0, :])))), axis=0)

    a3 = sigmoid(theta2.dot(a2))

    return a1, a2, a3
Пример #8
0
def mlpSoftmax_costfunc(x, *args):
    numClasses, inputSize, l1Size, l2Size, lambda_softmax, lambda_hidden, inputs, labels, groundTruth = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_L2 = l2Size * (l1Size + 1)
    #x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1],
                                  (l1Size, inputSize + 1)))
    #theta_L1 = x[0:num_weights_L1].reshape((l1Size, inputSize + 1))
    #print numClasses, l2Size
    theta_L2 = gpu.garray(
        reshape(x[num_weights_L1:num_weights_L2 + num_weights_L1],
                (l2Size, l1Size + 1)))
    #theta_L2 = x[num_weights_L1:num_weights_L2+num_weights_L1].reshape((l2Size, l1Size + 1))
    theta_softmax = gpu.garray(
        reshape(x[num_weights_L2 + num_weights_L1:shape(x)[0]],
                (numClasses, l2Size)))
    #theta_softmax = x[num_weights_L2+num_weights_L1:shape(x)[0]].reshape((numClasses, l2Size))
    inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones(
        (1, numCases)), hidden_activation_L1),
                                           axis=0)
    hidden_sum_L2 = gpu.dot(theta_L2, hidden_activation_L1)
    hidden_activation_L2 = hidden_sum_L2.logistic()
    hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L2)
    hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis=0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions, axis=0)
    temp = groundTruth * gpu.log(predictions)
    regularized_penalty_L1 = theta_L1[:, 1:shape(theta_L1)[1]]
    regularized_penalty_L2 = theta_L2[:, 1:shape(theta_L2)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    regularized_penalty_L2 = regularized_penalty_L2 * regularized_penalty_L2
    cost = -1 * gpu.sum(temp) / numCases + 0.5 * lambda_hidden * (
        gpu.sum(regularized_penalty_L1) + gpu.sum(regularized_penalty_L2)
    ) + 0.5 * lambda_softmax * gpu.sum(theta_softmax * theta_softmax)
    print 'Multilayer Softmax Cost:', cost
    del inputs
    del theta_L1
    del theta_L2
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_L2
    del hidden_activation_L2
    del hidden_sum_softmax
    del predictions
    del temp
    del regularized_penalty_L1
    del regularized_penalty_L2
    gpu.free_reuse_cache()
    return cost
Пример #9
0
 def getMMReps(self, data):
     tops = []
     for i in range(self.modalsCnt):
         x = self.saes[i].forward2Top(data[i])
         tops.append(x[-1])
     if self.has_joint:
         jinp = gp.concatenate((tuple(tops)), axis=1)
         ja = self.jsae.forward2Top(jinp)
         return ja[-1].as_numpy_array()
     else:
         return gp.concatenate((tuple(tops)), axis=1).as_numpy_array()
Пример #10
0
def grad_costfunc_gpu(x, *args):
    num_input,num_hidden,num_output,inputs,lambda_val,sparsityParam,beta = args
    num_weights1 = (num_input+1)*num_hidden
    num_weights2 = (num_hidden+1)*num_output
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    weights1 = x[0:num_weights1].reshape((num_hidden,num_input+1))
    weights2 = x[num_weights1:shape(x)[0]].reshape((num_output,num_hidden+1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1,nData)), inputs), axis = 0)
    hidden_sum = gpu.dot(weights1, data)
    hidden_activation = hidden_sum.logistic()
    p_avg = gpu.sum(hidden_activation,axis=1)/nData
    grad_sparse = -1*sparsityParam/p_avg.as_numpy_array() + (1-sparsityParam)/(1-p_avg.as_numpy_array())
    grad_sparse = append(0,grad_sparse)
    grad_sparse = tile(grad_sparse, (nData, 1))
    grad_sparse = gpu.garray(transpose(grad_sparse))
    hidden_activation = gpu.concatenate((gpu.ones((1,nData)), hidden_activation), axis = 0)
    outputs = gpu.dot(weights2, hidden_activation)
    weights1_grad = gpu.zeros(shape(weights1))
    weights2_grad = gpu.zeros(shape(weights2))
    p = outputs-inputs
    weights2_grad += gpu.dot(p, gpu.garray(transpose(hidden_activation.as_numpy_array())))
    q_temp = gpu.dot(gpu.garray(transpose(weights2.as_numpy_array())),p) + beta*grad_sparse
    #q = multiply(multiply(q_temp,hidden_activation),(1-hidden_activation))
    q = (q_temp*hidden_activation)*(1-hidden_activation)
    delta2 = gpu.dot(q, gpu.garray(transpose(data.as_numpy_array())))
    weights1_grad += delta2[1:shape(delta2)[0], :]
    weights1_grad = weights1_grad/nData
    weights2_grad = weights2_grad/nData
    weights1_grad[:,1:shape(weights1_grad)[1]] = weights1_grad[:,1:shape(weights1_grad)[1]] + weights1[:,1:shape(weights1)[1]] * lambda_val
    weights2_grad[:,1:shape(weights2_grad)[1]] = weights2_grad[:,1:shape(weights2_grad)[1]] + weights2[:,1:shape(weights2)[1]] * lambda_val
    #weights1_grad = reshape(weights1_grad, num_weights1)
    weights1_grad = weights1_grad.reshape(num_weights1)
    #weights2_grad = reshape(weights2_grad, num_weights2)
    weights2_grad = weights2_grad.reshape(num_weights2)
    del x
    del inputs
    del data
    del grad_sparse
    del p
    del q_temp
    del q
    del delta2
    del hidden_sum
    del hidden_activation
    del weights1
    del weights2
    gpu.free_reuse_cache()
    return hstack((weights1_grad.as_numpy_array(),weights2_grad.as_numpy_array()))
def costfunc_gpu(x, *args):
    num_input, num_hidden, num_output, inputs, noNoiseData, lambda_val, sparsityParam, beta = args
    num_weights1 = (num_input + 1) * num_hidden
    x = gpu.garray(x)
    #    randomNoise = random.random_sample(shape(inputs))
    #    criteriaTable = randomNoise > 0.32
    #    inputs = inputs * criteriaTable
    inputs = gpu.garray(inputs)
    noNoiseData = gpu.garray(noNoiseData)
    #weights1 = gpu.garray(reshape(x[0:num_weights1],(num_hidden,num_input+1)))
    weights1 = x[0:num_weights1].reshape((num_hidden, num_input + 1))
    #weights2 = gpu.garray(reshape(x[num_weights1:shape(x)[0]], (num_output,num_hidden+1)))
    weights2 = x[num_weights1:shape(x)[0]].reshape(
        (num_output, num_hidden + 1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1, nData)), inputs), axis=0)
    hidden_sum = gpu.dot(weights1, data)
    hidden_activation = hidden_sum.logistic()
    p_avg = gpu.sum(hidden_activation, axis=1) / nData
    hidden_activation = gpu.concatenate((gpu.ones(
        (1, nData)), hidden_activation),
                                        axis=0)
    output = gpu.dot(weights2, hidden_activation)
    regularized_penalty1 = weights1[:, 1:shape(weights1)[1]]
    regularized_penalty2 = weights2[:, 1:shape(weights2)[1]]
    regularized_penalty1 = regularized_penalty1 * regularized_penalty1
    regularized_penalty2 = regularized_penalty2 * regularized_penalty2
    output_target_diff = (output - noNoiseData) * (output - noNoiseData)
    KL = gpu.sum(sparsityParam * gpu.log(sparsityParam / p_avg) +
                 (1 - sparsityParam) * gpu.log((1 - sparsityParam) /
                                               (1 - p_avg)))
    cost = gpu.sum(output_target_diff) / (2 * nData) + 0.5 * lambda_val * (
        gpu.sum(regularized_penalty1) +
        gpu.sum(regularized_penalty2)) + beta * KL
    print 'GPU Linear Denoising Decoder Cost: ', cost
    del x
    del inputs
    del noNoiseData
    del data
    del hidden_sum
    del hidden_activation
    del p_avg
    del output
    del regularized_penalty1
    del regularized_penalty2
    del weights1
    del weights2
    del output_target_diff
    gpu.free_reuse_cache()
    return cost
Пример #12
0
    def next(self):
        """
        Get the next minibatch of data.

        Return a tuple of (minibatch_x, minibatch_t) if t is not None,
        otherwise return only minibatch_x.
        """
        minibatch_t = None
        if self.i_ptr + self.minibatch_size <= self.n_cases:
            minibatch_x = self.x[self.idx[self.i_ptr:self.i_ptr + self.minibatch_size]]
            if self.t is not None:
                minibatch_t = self.t[self.idx[self.i_ptr:self.i_ptr + self.minibatch_size]]
            self.i_ptr += self.minibatch_size
        else:
            if self.i_ptr >= self.n_cases:  # empty part, needed for garray handling
                # minibatch_x_part = self.x[:0].copy()
                minibatch_x_part = None
                if self.t is not None:
                    # minibatch_t_part = self.t[:0].copy()
                    minibatch_t_part = None
            else:
                minibatch_x_part = self.x[self.idx[self.i_ptr:]].copy()
                if self.t is not None:
                    minibatch_t_part = self.t[self.idx[self.i_ptr:]].copy()

            other_part_size = self.minibatch_size - (self.n_cases - self.i_ptr)
            self.shuffle_data()
            if minibatch_x_part is not None:
                if isinstance(self.x, gnp.garray):
                    minibatch_x = gnp.concatenate([minibatch_x_part, self.x[self.idx[:other_part_size]]], axis=0)
                else:
                    minibatch_x = np.r_[minibatch_x_part, self.x[self.idx[:other_part_size]]]
            else:
                minibatch_x = self.x[self.idx[:other_part_size]]

            if self.t is not None:
                if minibatch_t_part is not None:
                    if isinstance(self.t, gnp.garray):
                        minibatch_t = gnp.concatenate([minibatch_t_part, self.t[self.idx[:other_part_size]]], axis=0)
                    else:
                        minibatch_t = np.r_[minibatch_t_part, self.t[self.idx[:other_part_size]]]
                else:
                    minibatch_t = self.t[self.idx[:other_part_size]]

            self.i_ptr = other_part_size

        if self.t is not None:
            return minibatch_x, minibatch_t
        else:
            return minibatch_x
def feedforward(theta, data):
    nData = shape(data)[1]
    x = gpu.concatenate((gpu.ones((1,nData)), data), axis = 0)
    hidden_sum = gpu.dot(theta, x)
    relu_mask_hidden = gpu.ones(shape(hidden_sum)) * (hidden_sum>0)
    hidden_activation = hidden_sum*relu_mask_hidden
    return hidden_activation
Пример #14
0
    def pack(self):
        return g.concatenate([self.h_init.ravel(),

                              self.W_hh.ravel(),
                              self.W_vh.ravel(),

                              self.W_ho.ravel()])
Пример #15
0
def setup_training_data(params, midi_dir, verbose=False):
    '''
    load and setup training data

    input:
    T - max-lag for computing frame size
    '''

    # load training data
    sequential_data, sequential_labels, num_labels = load_data(midi_dir)

    T = max(params['Tv'], params['Th'])  # max look-behind
    # convert sequences into subsequences of length T+1
    subseq_data, subseq_labels = frame_subseqs(T + 1, sequential_data,
                                               sequential_labels)
    subseq_data *= params['vis_scale']  # put training data at correct scale
    training_data = subseq_to_frames(subseq_data)

    Nl = params['Nl']
    training_labels = compute_binary_labels(subseq_to_frames(subseq_labels),
                                            Nl)
    input_training_data = gp.concatenate(
        (gp.garray(training_data), gp.garray(training_labels)), axis=1)

    return input_training_data
Пример #16
0
    def apply_update(self, pos_moments, neg_moments, rbm, weight_decay, lrate):
        assert np.allclose(lrate.vbias, lrate.hbias)

        if self.count < self.params.start_after:
            rbm.sgd_update(pos_moments, neg_moments, lrate)
            return

        # base rates
        ds = gnp.concatenate([
            pos_moments.expect_vis - neg_moments.expect_vis,
            pos_moments.expect_hid - neg_moments.expect_hid
        ])
        dbias = lrate.vbias * gnp.dot(self.Lambda, ds.as_numpy_array())
        da, db = dbias[:rbm.nvis], dbias[rbm.nvis:]

        residuals = pos_moments.expect_prod - neg_moments.expect_prod + \
                    -weight_decay * rbm.weights + \
                    -self.beta[:, :, 0] * (pos_moments.expect_vis - neg_moments.expect_vis)[:, nax] + \
                    -self.beta[:, :, 1] * (pos_moments.expect_hid - neg_moments.expect_hid)[nax, :]
        lam = 1. / self.sigma_sq

        dw = lrate.weights * lam * residuals
        da -= lrate.weights * (lam * residuals * self.beta[:, :, 0]).sum(1)
        db -= lrate.weights * (lam * residuals * self.beta[:, :, 1]).sum(0)

        update = binary_rbms.Update(da, db, dw)
        rbm += update
Пример #17
0
    def apply_update(self, pos_moments, neg_moments, rbm, weight_decay, lrate):
        assert np.allclose(lrate.vbias, lrate.hbias)

        if self.count < self.params.start_after:
            rbm.sgd_update(pos_moments, neg_moments, lrate)
            return

        # base rates
        ds = gnp.concatenate([pos_moments.expect_vis - neg_moments.expect_vis,
                              pos_moments.expect_hid - neg_moments.expect_hid])
        dbias = lrate.vbias * gnp.dot(self.Lambda, ds.as_numpy_array())
        da, db = dbias[:rbm.nvis], dbias[rbm.nvis:]

        residuals = pos_moments.expect_prod - neg_moments.expect_prod + \
                    -weight_decay * rbm.weights + \
                    -self.beta[:, :, 0] * (pos_moments.expect_vis - neg_moments.expect_vis)[:, nax] + \
                    -self.beta[:, :, 1] * (pos_moments.expect_hid - neg_moments.expect_hid)[nax, :]
        lam = 1. / self.sigma_sq

        dw = lrate.weights * lam * residuals
        da -= lrate.weights * (lam * residuals * self.beta[:, :, 0]).sum(1)
        db -= lrate.weights * (lam * residuals * self.beta[:, :, 1]).sum(0)

        update = binary_rbms.Update(da, db, dw)
        rbm += update
Пример #18
0
def dbn_forward_pass(ws_vh, ws_v, ws_h, x, y=None):
    """
    Deep belief net forward pass.
    
    x: input data (N x D matrix)
    y: Class label (1-of-K coded, N x K matrix). If not None, it is concatenated
        to the input for top layer RBM when calculating the output of the DBN.
    ws_vh: list of layer weights (L x D x H)
    ws_v: list of layer input biases (L x D x 1)
    ws_h: list of layer output biases (L x H x 1)
    Returns activations (continuous) and outputs (0-1, sigmoid(activations)) of
    top layer
    """
    L = len(ws_vh)
    h = x.T

    # forward (bottom-up) pass
    for l in range(L - 1):
        ah = gnp.dot(ws_vh[l].T, h) + ws_h[l]
        h = gnp.logistic(ah)

    # if supervised, concatenate class labels to input to top layer RBM
    if y is not None:
        h = gnp.concatenate((y.T, h))

    ah = gnp.dot(ws_vh[-1].T, h) + ws_h[-1]
    h = gnp.logistic(ah)

    return ah.T, h.T
Пример #19
0
def dbn_supervised_predict_sample(ws_vh, ws_v, ws_h, x, k=20):
    """
    Predict the class label of input x from supervised DBN
    WARNING: THIS IS PRETTY SLOW AND LESS RELIABLE THAN THE EXACT METHOD
    Uses the sampling method mentioned in section 6.2 of Hinton, Osindero, Teh 2006
    
    x: Input data. (NxD matrix)
    k: Number of Gibbs steps
    """
    L = len(ws_vh)
    N = x.shape[0]

    # make a forward pass to get from input layer to visible layer of top level
    # RBM
    h_prev = x.T

    # forward (bottom-up) pass, (use deterministic (we pass the activations, not
    # the stochastically sampled steps) forward pass)
    for l in range(L - 1):
        ah = gnp.dot(ws_vh[l].T, h_prev) + ws_h[l]
        h_prev = gnp.logistic(ah)

    H = ws_vh[-1].shape[0]  # number of visible units top level RBM
    Hx = h_prev.shape[0]  # number of hidden units in the penultimate layer
    K = H - Hx
    # (H - Hx) is the number of supervised inputs to top level RBM
    # we give random values to the supervised portion of the input
    v = gnp.concatenate((gnp.ones((K, N)) / K, h_prev))
    # we keep the visible units clamped while sampling
    h, v = rbm_sample(ws_vh[-1], ws_v[-1], ws_h[-1], v, k, clamped=(K, H))

    # sample visible units of top level RBM given
    return v[0:K, :].T
    def forward(self, X, Im, test=False):
        """
        Feed-forward pass through the model
        X: ('batchsize' x 'context') matrix of word indices
        """
        batchsize = X.shape[0]
        Im = gpu.garray(Im)
        C = self.C
        M = self.M
        bw = self.bw
        J = self.J
        bj = self.bj
        Wfx = self.Wfx
        Whf = self.Whf
        Wfv = self.Wfv

        # Forwardprop images
        Im = gpu.concatenate((Im, gpu.ones((batchsize, 1))), 1)
        IF = gpu.dot(Im, gpu.concatenate((J, bj)))
        IF = IF * (IF > 0)

        # Obtain word features
        R = gpu.dot(Wfx, Whf)
        tmp = R.as_numpy_array()[:,X.flatten()].flatten(order='F')
        tmp = tmp.reshape((batchsize, self.K * self.context))
        words = np.zeros((batchsize, self.K, self.context))
        for i in range(batchsize):
            words[i,:,:] = tmp[i,:].reshape((self.K, self.context), order='F')
        words = gpu.garray(words)

        # Compute the hidden layer (predicted next word representation)
        acts = gpu.zeros((batchsize, self.K))
        for i in range(self.context):
            acts = acts + gpu.dot(words[:,:,i], C[i,:,:])
        acts = acts + gpu.dot(IF, M)

        # Multiplicative interaction
        F = gpu.dot(acts, Wfx) * gpu.dot(IF, Wfv)
        F = gpu.concatenate((F, gpu.ones((batchsize, 1))), 1)

        # Compute softmax
        preds = gpu.dot(F, gpu.concatenate((Whf, bw)))
        preds = gpu.exp(preds - preds.max(1).reshape(batchsize, 1))
        denom = preds.sum(1).reshape(batchsize, 1)
        preds = gpu.concatenate((preds / denom, gpu.ones((batchsize, 1))), 1)

        return (words, acts, IF, F, preds.as_numpy_array())
def mlpSoftmax_costfunc(x, *args):
    numClasses, inputSize, l1Size, l2Size, lambda_softmax, lambda_hidden, inputs, labels, groundTruth = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_L2 = l2Size * (l1Size + 1)
    #x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1)))
    #theta_L1 = x[0:num_weights_L1].reshape((l1Size, inputSize + 1))
    #print numClasses, l2Size
    theta_L2 = gpu.garray(reshape(x[num_weights_L1:num_weights_L2+num_weights_L1], (l2Size, l1Size + 1)))
    #theta_L2 = x[num_weights_L1:num_weights_L2+num_weights_L1].reshape((l2Size, l1Size + 1))
    theta_softmax = gpu.garray(reshape(x[num_weights_L2+num_weights_L1:shape(x)[0]], (numClasses, l2Size)))
    #theta_softmax = x[num_weights_L2+num_weights_L1:shape(x)[0]].reshape((numClasses, l2Size))
    inputs = gpu.concatenate((gpu.ones((1,numCases)), inputs), axis = 0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis=0)
    hidden_sum_L2 = gpu.dot(theta_L2, hidden_activation_L1)
    hidden_activation_L2 = hidden_sum_L2.logistic()
    hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L2)
    hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis = 0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions,axis = 0)
    temp = groundTruth*gpu.log(predictions)
    regularized_penalty_L1 = theta_L1[:,1:shape(theta_L1)[1]]
    regularized_penalty_L2 = theta_L2[:,1:shape(theta_L2)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    regularized_penalty_L2 = regularized_penalty_L2 * regularized_penalty_L2
    cost = -1*gpu.sum(temp)/numCases + 0.5 * lambda_hidden*(gpu.sum(regularized_penalty_L1) + gpu.sum(regularized_penalty_L2)) + 0.5 * lambda_softmax * gpu.sum(theta_softmax*theta_softmax)
    print 'Multilayer Softmax Cost:', cost
    del inputs
    del theta_L1
    del theta_L2
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_L2
    del hidden_activation_L2
    del hidden_sum_softmax
    del predictions
    del temp
    del regularized_penalty_L1
    del regularized_penalty_L2
    gpu.free_reuse_cache()
    return cost
def mlpSingleOutput1Layer_grad(x, *args):
    inputSize, l1Size, lambda_hidden, inputs, targets = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_output = 1 * (l1Size + 1)
    inputs = gpu.garray(inputs)
    targets = gpu.garray(targets)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1],
                                  (l1Size, inputSize + 1)))
    theta_output = gpu.garray(
        reshape(x[num_weights_L1:shape(x)[0]], (1, l1Size + 1)))
    inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones(
        (1, numCases)), hidden_activation_L1),
                                           axis=0)
    #hidden_activation_L1 = hidden_activation_L1 * dropout_prob
    hidden_sum_output = gpu.dot(theta_output, hidden_activation_L1)
    outputs = hidden_sum_output.logistic()
    theta_L1_grad = gpu.zeros(shape(theta_L1))
    theta_output_grad = gpu.zeros(shape(theta_output))
    a = (outputs - targets) * outputs * (1 - outputs)
    theta_output_grad += gpu.dot(
        a, gpu.garray(transpose(hidden_activation_L1.as_numpy_array())))
    b_temp = gpu.dot(gpu.garray(transpose(theta_output.as_numpy_array())), a)
    b = (b_temp * hidden_activation_L1) * (1 - hidden_activation_L1)
    delta2 = gpu.dot(b, gpu.garray(transpose(inputs.as_numpy_array())))
    theta_L1_grad += delta2[1:shape(delta2)[0], :]
    theta_L1_grad = theta_L1_grad / numCases
    theta_output_grad = theta_output_grad / numCases
    theta_output_grad[:, 1:shape(
        theta_output_grad)[1]] = theta_output_grad[:, 1:shape(
            theta_output_grad
        )[1]] + theta_output[:, 1:shape(theta_output)[1]] * lambda_hidden
    theta_L1_grad[:, 1:shape(theta_L1_grad)[1]] = theta_L1_grad[:, 1:shape(
        theta_L1_grad)[1]] + theta_L1[:, 1:shape(theta_L1)[1]] * lambda_hidden
    theta_output_grad = reshape(theta_output_grad.as_numpy_array(),
                                num_weights_output)
    theta_L1_grad = reshape(theta_L1_grad.as_numpy_array(), num_weights_L1)
    del inputs
    del theta_L1
    del hidden_sum_L1
    del hidden_activation_L1
    gpu.free_reuse_cache()
    return hstack((theta_L1_grad, theta_output_grad))
Пример #23
0
def dbn_train(train_x, H, batch_size, epoch_count, epsilon, momentum, train_y=None, return_hidden=True, verbose=True):
    """
    NOTE: SUPERVISED TRAINING IS NOT REALLY TESTED WELL. TEST IT SOMEDAY!!!
    Unsupervised layerwise training of a sigmoidal Deep Belief Net.
    
    train_x: Training data. NxD matrix.
    train_y: Training labels NxK matrix (1-of-K coded). If provided, labels
        are included in the inputs to top layer RBM (See Hinton, Osindero, Teh 2006)
    H: Number of hidden units in each layer. e.g. [100, 2000, 300]
    batch_size: Batch size. Either a scalar or a list (epoch count for each layer).
    epsilon: Learning rate. Either a scalar or a list (an epsilon for each layer and epoch).
    momentum: Momentum. Either a scalar or a list (an epsilon for each layer and epoch).
    return_hidden: If True, returns hidden unit activations for training data.
    verbose: If True, prints progress information
    Returns ws_vh (list of weight matrices for each layer), ws_v (list of input 
    unit biases for each layer), ws_h (list of output unit biases for each layer),
    and, if return_hidden is True, h (output layer hidden unit activations for training data)
    """
    layer_count = len(H)
    # if any of the training parameters are given as scalars, convert them to lists
    if not isinstance(epoch_count, list):
        epoch_count = [epoch_count] * layer_count
    if not isinstance(batch_size, list):
        batch_size = [batch_size] * layer_count
    if not isinstance(epsilon, list):
        epsilon = [[epsilon] * e_c for e_c in epoch_count]
    if not isinstance(momentum, list):
        momentum = [[momentum] * e_c for e_c in epoch_count]

    ws_vh = []
    ws_v = []
    ws_h = []
    error = []
    # train layer by layer
    h = train_x
    for i, h_count in enumerate(H):
        # we need to return the hidden unit activations only for output layer, if
        # return_hidden is True
        if not return_hidden and i == layer_count - 1:
            rh = False
        else:
            rh = True

        # if we have train_y and we are training the last layer, concatenate
        # class labels to inputs
        if train_y is not None and i == layer_count - 1:
            h = gnp.concatenate((train_y, h), axis=1)

        w_vh, w_v, w_h, h, l_error = rbm_train(
            h, h_count, batch_size[i], epoch_count[i], epsilon[i], momentum[i], return_hidden=rh, verbose=verbose
        )

        ws_vh.append(w_vh)
        ws_v.append(w_v)
        ws_h.append(w_h)
        error.append(l_error)

    return ws_vh, ws_v, ws_h, h, error
Пример #24
0
def _init_plus(X, K, dist='euclidean'):
    f_dist = choose_distance_metric(dist)
    C = X[np.random.randint(X.shape[0])].reshape(1,-1)

    for k in xrange(1, K):
        idx = f_dist(X, C).min(axis=1).argmax()
        C = gnp.concatenate([C, X[idx].reshape(1,-1)], axis=0)

    return C
def mlpSoftmax1Layer_grad(x, *args):
    numClasses, inputSize, l1Size, lambda_softmax, lambda_hidden, inputs, groundTruth = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_softmax = numClasses * l1Size
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1],
                                  (l1Size, inputSize + 1)))
    theta_softmax = gpu.garray(
        reshape(x[num_weights_L1:shape(x)[0]], (numClasses, l1Size)))
    theta_L1_grad = gpu.zeros(shape(theta_L1))
    inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    #hidden_activation_L1 = gpu.log(1+hidden_sum_L1.exp())
    #hidden_derivative_L1 = hidden_sum_L1.logistic()
    relu_mask_hidden1 = gpu.ones(shape(hidden_sum_L1)) * (hidden_sum_L1 > 0)
    hidden_activation_L1 = hidden_sum_L1 * relu_mask_hidden1
    #hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_derivative_L1 = relu_mask_hidden1
    hidden_sum_softmax_imd = gpu.dot(theta_softmax, hidden_activation_L1)
    hidden_sum_softmax = hidden_sum_softmax_imd - hidden_sum_softmax_imd.max(
        axis=0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions, axis=0)
    softmax_imd = groundTruth - predictions
    theta_softmax_grad = -1 * gpu.dot(
        softmax_imd,
        gpu.garray(transpose(hidden_activation_L1.as_numpy_array()))
    ) / numCases + lambda_softmax * theta_softmax
    deltaOut = -softmax_imd
    delta_L1_imd = gpu.dot(
        gpu.garray(transpose(theta_softmax.as_numpy_array())), deltaOut)
    delta_L1_imd2 = delta_L1_imd * hidden_derivative_L1
    #delta_L1_imd2 = (delta_L1_imd*hidden_activation_L1)*(1-hidden_activation_L1)
    delta_L1 = gpu.dot(delta_L1_imd2,
                       gpu.garray(transpose(inputs.as_numpy_array())))
    theta_L1_grad += delta_L1
    theta_L1_grad = theta_L1_grad / numCases
    theta_L1_grad[:, 1:shape(theta_L1_grad)[1]] = theta_L1_grad[:, 1:shape(
        theta_L1_grad)[1]] + theta_L1[:, 1:shape(theta_L1)[1]] * lambda_hidden
    theta_L1_grad = reshape(theta_L1_grad.as_numpy_array(), num_weights_L1)
    theta_softmax_grad = reshape(theta_softmax_grad.as_numpy_array(),
                                 num_weights_softmax)
    del inputs
    del theta_L1
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_softmax
    del predictions
    del softmax_imd
    del deltaOut
    del delta_L1_imd
    del delta_L1_imd2
    del delta_L1
    gpu.free_reuse_cache()
    return hstack((theta_L1_grad, theta_softmax_grad))
Пример #26
0
 def pack(self):
     return g.concatenate([self.h_init.ravel(),
                           self.W_hf.ravel(),
                           self.W_fh.ravel(),
                           #self.W_hh.ravel(),
                           self.f_bias.ravel(),
                           self.W_vh.ravel(),
                           self.W_vf.ravel(),
                           self.W_ho.ravel()])
def costfunc_gpu_ReLU(x, *args):
    num_input, num_hidden, num_output, inputs, lambda_val, sparsityParam, beta = args
    num_weights1 = (num_input + 1) * num_hidden
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    #weights1 = gpu.garray(reshape(x[0:num_weights1],(num_hidden,num_input+1)))
    weights1 = x[0:num_weights1].reshape((num_hidden, num_input + 1))
    #weights2 = gpu.garray(reshape(x[num_weights1:shape(x)[0]], (num_output,num_hidden+1)))
    weights2 = x[num_weights1:shape(x)[0]].reshape(
        (num_output, num_hidden + 1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1, nData)), inputs), axis=0)
    hidden_sum = gpu.dot(weights1, data)
    #hidden_activation = gpu.log(1+hidden_sum.exp())
    relu_mask_hidden1 = gpu.ones(shape(hidden_sum)) * (hidden_sum > 0)
    hidden_activation = hidden_sum * relu_mask_hidden1
    hidden_activation = gpu.concatenate((gpu.ones(
        (1, nData)), hidden_activation),
                                        axis=0)
    output = gpu.dot(weights2, hidden_activation)
    regularized_penalty1 = weights1[:, 1:shape(weights1)[1]]
    regularized_penalty2 = weights2[:, 1:shape(weights2)[1]]
    regularized_penalty1 = regularized_penalty1 * regularized_penalty1
    regularized_penalty2 = regularized_penalty2 * regularized_penalty2
    output_target_diff = (output - inputs) * (output - inputs)
    cost = gpu.sum(output_target_diff) / (2 * nData) + 0.5 * lambda_val * (
        gpu.sum(regularized_penalty1) + gpu.sum(regularized_penalty2))
    print 'GPU ReLU Linear Decoder Cost: ', cost
    del x
    del inputs
    del data
    del hidden_sum
    del hidden_activation
    del output
    del regularized_penalty1
    del regularized_penalty2
    del weights1
    del weights2
    del output_target_diff
    gpu.free_reuse_cache()
    return cost
Пример #28
0
    def trainClassifierOneBatch(self,
                                trainbatch,
                                labelbatch,
                                epoch,
                                diff_cost=1.0,
                                recf=1.0):
        """
        trains one pair in which each element has two modalities
        im1: first element's image data
        tx1: first element's text data
        im2: second element's image data
        tx2: second element's text data
        sim: if the pair is in similar set
        recf: reconstruction factor
        """
        a = []

        for m in xrange(self.modalsCnt):
            a.append(self.saes[m].forward2Top(trainbatch[m]))

        jinp = gp.concatenate(tuple(e[self.depth - 1] for e in a), axis=1)
        ja = self.jsae.forward(jinp)

        for m in xrange(self.modalsCnt):
            a[m].append(ja[-1][:, self.dims[m]:self.dims[m + 1]])
            self.saes[m].backward2Bottom(a[m])

        #get path grad for z
        #backpropagate x and y wrt z
        g, jg, rl = self.getClassificationGrad(a,
                                               ja,
                                               labelbatch,
                                               diff_factor=diff_cost,
                                               recf=recf)

        #this lines are just for debug:
        perfaf = gp.concatenate(tuple(e[0] for e in a), axis=1)
        perfal = gp.concatenate(tuple(e[-1] for e in a), axis=1)
        perf = self.getDiffLoss(perfaf, perfal)

        return perf, g, jg
Пример #29
0
def dbn_supervised_predict_exact(ws_vh, ws_v, ws_h, x):
    """
    Predict the class label of input x from supervised DBN
    Uses the exact method mentioned in section 6.2 of Hinton, Osindero, Teh 2006
    The free energy formula is taken from http://deeplearning.net/tutorial/rbm.html
    
    x: Input data. (NxD matrix)
    """
    L = len(ws_vh)
    N = x.shape[0]

    # make a forward pass to get from input layer to visible layer of top level
    # RBM
    h_prev = x.T

    # forward (bottom-up) pass, (use deterministic (we pass the activations, not
    # the stochastically sampled steps) forward pass)
    for l in range(L - 1):
        ah = gnp.dot(ws_vh[l].T, h_prev) + ws_h[l]
        h_prev = gnp.logistic(ah)

    H = ws_vh[-1].shape[0]  # number of visible units top level RBM
    Hx = h_prev.shape[0]  # number of hidden units in the penultimate layer
    K = H - Hx
    # (H - Hx) is the number of supervised inputs to top level RBM

    # for every class, assume it is the correct label and calculate its free energy
    y = gnp.zeros((K, N))
    free_energy = gnp.zeros((N, K))  # we actually calculate -free_energy
    for k in range(K):
        # set the current assumed class label
        y[k, :] = 1.0

        # visible unit vector
        v = gnp.concatenate((y, h_prev))
        e_v = gnp.dot(ws_v[-1].T, v)  # bias energy term

        ah = gnp.dot(ws_vh[-1].T, v) + ws_h[-1]
        e_h = gnp.sum(gnp.log(gnp.exp(ah) + 1.0), axis=0)

        free_energy[:, k] = e_v + e_h

        # zero the class labels for next iteration
        y[:, :] = 0.0

    # since these numbers may get pretty small, use the sum-exp trick for converting
    # these to probabilities
    pred_y = (
        gnp.exp(free_energy - gnp.max(free_energy, axis=1)[:, gnp.newaxis])
        / gnp.sum(gnp.exp(free_energy - gnp.max(free_energy, axis=1)[:, gnp.newaxis]), axis=1)[:, gnp.newaxis]
    )

    return pred_y
Пример #30
0
def basic_gradient_descent():
    digits = datasets.load_digits()
    # iris = datasets.load_iris()
    X = digits.images.reshape((digits.images.shape[0], -1))

    scaler = pre.Scaler()
    X = scaler.fit_transform(X)

    y = ut.all_to_sparse(digits.target, max(digits.target) + 1)
    X, y, X_val, y_val, X_test, y_test = neur.cross_validation_sets(
        gpu.as_garray(X), gpu.as_garray(y), "digits")
    X_val = gpu.concatenate([X_val, X_test])
    y_val = gpu.concatenate([y_val, y_test])
    thetas, costs, val_costs = neur.gradient_decent(
        gpu.as_garray(X),
        gpu.as_garray(y),
        #hidden_layer_sz = 11,
        hidden_layer_sz=45,
        iter=500,
        wd_coef=0.0,
        learning_rate=0.25,
        momentum_multiplier=0.9,
        rand_init_epsilon=0.012,
        do_early_stopping=True,
        #do_dropout = True,
        dropout_percentage=0.7,
        #do_learning_adapt = True,
        X_val=gpu.as_garray(X_val),
        y_val=gpu.as_garray(y_val))
    h_x, a = neur.forward_prop(X_test, thetas)
    h_x = map(lambda x: x.as_numpy_array(), h_x)
    print "percentage correct predictions: ", ut.percent_equal(
        ut.map_to_max_binary_result(h_x), y_test.as_numpy_array())
    print "training error:", costs[-1:][0]
    print "validation error:", val_costs[-1:][0]
    print "lowest validation error:", min(val_costs)
    plt.plot(costs, label='cost')
    plt.plot(val_costs, label='val cost')
    plt.legend()
    plt.ylabel('error rate')
    def backward(self, Y, preds, F, IF, acts, words, X, Im):
        """
        Backward pass through the network
        """
        batchsize = preds.shape[0]
        Im = gpu.garray(Im)

        # Compute part of df/dR
        Ix = gpu.garray(preds[:,:-1] - Y) / batchsize
        delta = gpu.dot(F.T, Ix)
        dWhf = delta[:-1,:] + self.gamma_r * self.Whf
        db = delta[-1,:]

        # Compute df/Wfv and part of df/Wfx
        Ix = gpu.dot(Ix, self.Whf.T)
        dWfv = gpu.dot(IF.T, Ix * gpu.dot(acts, self.Wfx)) + self.gamma_r * self.Wfv
        dWfx = gpu.dot(acts.T, Ix * gpu.dot(IF, self.Wfv)) + self.gamma_r * self.Wfx
        
        # Compute df/dC and word inputs for df/dR
        Ix_word = gpu.dot(Ix * gpu.dot(IF, self.Wfv), self.Wfx.T)
        dC = gpu.zeros(np.shape(self.C))
        dR = np.zeros((self.K, self.V))
        for i in range(self.context):
            delta = gpu.dot(words[:,:,i].T, Ix_word)
            dC[i,:,:] = delta + self.gamma_c * self.C[i,:,:]
            delta = gpu.dot(Ix_word, self.C[i,:,:].T)
            delta = delta.as_numpy_array()
            for j in range(X.shape[0]):
                dR[:,X[j,i]] = dR[:,X[j,i]] + delta.T[:,j]

        dR = gpu.garray(dR)
        dWfx = dWfx + gpu.dot(dR, self.Whf.T)
        dWhf = dWhf + gpu.dot(self.Wfx.T, dR)

        # Compute df/dM
        dM = gpu.dot(IF.T, Ix_word) + self.gamma_c * self.M

        # Compute df/dJ
        Ix = gpu.dot(Ix * gpu.dot(acts, self.Wfx), self.Wfv.T) * (IF > 0) + gpu.dot(Ix_word, self.M.T) * (IF > 0)
        Im = gpu.concatenate((Im, gpu.ones((batchsize, 1))), 1)
        delta = gpu.dot(Im.T, Ix)
        dJ = delta[:-1,:] + self.gamma_c * self.J
        dBj = delta[-1,:]

        self.db = db
        self.dC = dC
        self.dM = dM
        self.dJ = dJ
        self.dBj = dBj
        self.dWhf = dWhf
        self.dWfv = dWfv
        self.dWfx = dWfx
Пример #32
0
    def gradDebug(self, inputBatch, targetBatch):
        inputBatch = inputBatch if isinstance(inputBatch, gnp.garray) else gnp.garray(inputBatch)
        targetBatch = targetBatch if isinstance(targetBatch, gnp.garray) else gnp.garray(targetBatch)
        

        mbsz = inputBatch.shape[0]
        outputActs = self.fprop(inputBatch)
        outputErrSignal = -self.outputActFunct.dErrordNetInput(targetBatch, self.state[-1], outputActs)
        errSignals = self.bprop(outputErrSignal)
        for i, (WGrad, biasGrad) in enumerate(self.gradients(self.state, errSignals)):
            self.WGrads[i] = WGrad
            self.biasGrads[i] = biasGrad
        allWeightGrads = itertools.chain(self.WGrads, self.biasGrads)
        return gnp.as_numpy_array(gnp.concatenate([dw.ravel() for dw in allWeightGrads])) 
Пример #33
0
def basic_gradient_descent():
    digits = datasets.load_digits()
        # iris = datasets.load_iris()
    X = digits.images.reshape((digits.images.shape[0], -1))

    scaler = pre.Scaler()
    X = scaler.fit_transform(X)
    
    y = ut.all_to_sparse( digits.target, max(digits.target) + 1 )
    X, y, X_val, y_val, X_test, y_test = neur.cross_validation_sets(gpu.as_garray(X), gpu.as_garray(y), "digits")
    X_val = gpu.concatenate([X_val, X_test]) 
    y_val = gpu.concatenate([y_val, y_test]) 
    thetas, costs, val_costs = neur.gradient_decent(gpu.as_garray(X), 
                                                    gpu.as_garray(y),
                                                    #hidden_layer_sz = 11,
                                                    hidden_layer_sz = 45,
                                                    iter = 500,
                                                    wd_coef = 0.0,
                                                    learning_rate = 0.25,
                                                    momentum_multiplier = 0.9,
                                                    rand_init_epsilon = 0.012,
                                                    do_early_stopping = True,
                                                    #do_dropout = True,
                                                    dropout_percentage = 0.7,
                                                    #do_learning_adapt = True,
                                                    X_val = gpu.as_garray(X_val),
                                                    y_val = gpu.as_garray(y_val))
    h_x, a = neur.forward_prop(X_test, thetas)
    h_x = map(lambda x: x.as_numpy_array(), h_x)
    print "percentage correct predictions: ", ut.percent_equal(ut.map_to_max_binary_result(h_x), y_test.as_numpy_array())
    print "training error:",   costs[-1:][0]
    print "validation error:", val_costs[-1:][0]
    print "lowest validation error:", min(val_costs)
    plt.plot(costs, label='cost')
    plt.plot(val_costs, label='val cost')
    plt.legend()
    plt.ylabel('error rate')
def mlpSingleOutput1Layer_costfunc(x, *args):
    inputSize, l1Size, lambda_hidden, inputs, targets = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    inputs = gpu.garray(inputs)
    targets = gpu.garray(targets)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1],
                                  (l1Size, inputSize + 1)))
    theta_output = gpu.garray(
        reshape(x[num_weights_L1:shape(x)[0]], (1, l1Size + 1)))
    inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones(
        (1, numCases)), hidden_activation_L1),
                                           axis=0)
    #hidden_activation_L1 = hidden_activation_L1 * dropout_prob
    hidden_sum_output = gpu.dot(theta_output, hidden_activation_L1)
    outputs = hidden_sum_output.logistic()
    output_target_diff = (outputs - targets)**2
    regularized_penalty_output = theta_output[:, 1:shape(theta_output)[1]]
    regularized_penalty_output = regularized_penalty_output * regularized_penalty_output
    regularized_penalty_L1 = theta_L1[:, 1:shape(theta_L1)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    cost = gpu.sum(output_target_diff) / (
        2 *
        numCases) + 0.5 * lambda_hidden * (gpu.sum(regularized_penalty_L1) +
                                           gpu.sum(regularized_penalty_output))
    print 'Multilayer Preceptron Cost:', cost
    del inputs
    del theta_L1
    del hidden_sum_L1
    del hidden_activation_L1
    del regularized_penalty_output
    del regularized_penalty_L1
    gpu.free_reuse_cache()
    return cost
Пример #35
0
Файл: dbn.py Проект: caomw/gdbn
    def gradDebug(self, inputBatch, targetBatch):
        inputBatch = inputBatch if isinstance(inputBatch, gnp.garray) else gnp.garray(inputBatch)
        targetBatch = targetBatch if isinstance(targetBatch, gnp.garray) else gnp.garray(targetBatch)

        mbsz = inputBatch.shape[0]
        outputActs = self.fprop(inputBatch)
        outputErrSignal = -self.outputActFunct.dErrordNetInput(targetBatch, self.state[-1], outputActs)
        # error = self.outputActFunct.error(targetBatch, self.state[-1], outputActs)
        errSignals = self.bprop(outputErrSignal)
        for i, (WGrad, biasGrad) in enumerate(self.gradients(self.state, errSignals)):
            # update the weight increments
            self.WGrads[i] = WGrad
            self.biasGrads[i] = biasGrad
        allWeightGrads = itertools.chain(self.WGrads, self.biasGrads)
        return gnp.as_numpy_array(gnp.concatenate([dw.ravel() for dw in allWeightGrads]))
def fine_tuning_cost_gpu(x, *args):
    inputSize, l1Size, l2Size, l3Size, lambda_val, inputs = args
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_L2 = l2Size * (l1Size + 1)
    num_weights_L3 = l3Size * (l2Size + 1)
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    weights1 = x[0:num_weights_L1].reshape((l1Size, inputSize + 1))
    weights2 = x[num_weights_L1:num_weights_L1+num_weights_L2].reshape((l2Size, l1Size + 1))
    weights3 = x[num_weights_L1+num_weights_L2:num_weights_L1+num_weights_L2+num_weights_L3].reshape((l3Size, l2Size + 1))
    weights4 = x[num_weights_L1+num_weights_L2+num_weights_L3:shape(x)[0]].reshape((inputSize, l3Size + 1))
    nData = shape(inputs)[1]
    x = gpu.concatenate((gpu.ones((1,nData)), inputs), axis = 0)
    hidden1_sum = gpu.dot(weights1, x)
    #hidden1_activation = gpu.log(1+hidden1_sum.exp())
    relu_mask_hidden1 = gpu.ones(shape(hidden1_sum)) * (hidden1_sum>0)
    hidden1_activation = hidden1_sum*relu_mask_hidden1
    hidden1_activation = gpu.concatenate((gpu.ones((1,nData)), hidden1_activation), axis = 0)
    hidden2_sum = gpu.dot(weights2, hidden1_activation)
    #hidden2_activation = gpu.log(1+hidden2_sum.exp())
    relu_mask_hidden2 = gpu.ones(shape(hidden2_sum)) * (hidden2_sum>0)
    hidden2_activation = hidden2_sum*relu_mask_hidden2
    hidden2_activation = gpu.concatenate((gpu.ones((1,nData)), hidden2_activation), axis = 0)
    hidden3_sum = gpu.dot(weights3, hidden2_activation)
    hidden3_activation = hidden3_sum
    hidden3_activation = gpu.concatenate((gpu.ones((1,nData)), hidden3_activation), axis = 0)
    output_sum = gpu.dot(weights4, hidden3_activation)
    outputs = output_sum
    regularized_penalty3 = weights3[:,1:shape(weights3)[1]]
    regularized_penalty4 = weights4[:,1:shape(weights4)[1]]
    regularized_penalty3 = regularized_penalty3 ** 2
    regularized_penalty4 = regularized_penalty4 ** 2
    output_target_diff = (outputs - inputs)**2
    cost = gpu.sum(output_target_diff)/(2*nData) + 0.5 * lambda_val * (gpu.sum(regularized_penalty3) + gpu.sum(regularized_penalty4))
    print 'Fine Tuning Cost: ', cost
    return cost
def mlpSingleOutput1Layer_grad(x, *args):
    inputSize, l1Size, lambda_hidden, inputs, targets = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_output = 1 * (l1Size+1)
    inputs = gpu.garray(inputs)
    targets = gpu.garray(targets)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1)))
    theta_output = gpu.garray(reshape(x[num_weights_L1:shape(x)[0]], (1, l1Size+1)))
    inputs = gpu.concatenate((gpu.ones((1,numCases)), inputs), axis = 0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis = 0)
    #hidden_activation_L1 = hidden_activation_L1 * dropout_prob
    hidden_sum_output = gpu.dot(theta_output, hidden_activation_L1)
    outputs = hidden_sum_output.logistic()
    theta_L1_grad = gpu.zeros(shape(theta_L1))
    theta_output_grad = gpu.zeros(shape(theta_output))
    a = (outputs - targets) * outputs * (1-outputs)
    theta_output_grad += gpu.dot(a, gpu.garray(transpose(hidden_activation_L1.as_numpy_array())))
    b_temp = gpu.dot(gpu.garray(transpose(theta_output.as_numpy_array())),a)
    b = (b_temp*hidden_activation_L1)*(1-hidden_activation_L1)
    delta2 = gpu.dot(b, gpu.garray(transpose(inputs.as_numpy_array())))
    theta_L1_grad += delta2[1:shape(delta2)[0], :]
    theta_L1_grad = theta_L1_grad/numCases
    theta_output_grad = theta_output_grad/numCases
    theta_output_grad[:,1:shape(theta_output_grad)[1]] = theta_output_grad[:,1:shape(theta_output_grad)[1]] + theta_output[:,1:shape(theta_output)[1]] * lambda_hidden
    theta_L1_grad[:,1:shape(theta_L1_grad)[1]] = theta_L1_grad[:,1:shape(theta_L1_grad)[1]] + theta_L1[:,1:shape(theta_L1)[1]] * lambda_hidden
    theta_output_grad = reshape(theta_output_grad.as_numpy_array(), num_weights_output)
    theta_L1_grad = reshape(theta_L1_grad.as_numpy_array(), num_weights_L1)
    del inputs
    del theta_L1
    del hidden_sum_L1
    del hidden_activation_L1
    gpu.free_reuse_cache()
    return hstack((theta_L1_grad,theta_output_grad))
Пример #38
0
    def forward_prop_setup_bn_mean_std_on_big_set(self, X, minibatch_size=1000):
        i_start = X.shape[0] % minibatch_size
        a = [X[:i_start].dot(self.params.W)]
        while i_start < X.shape[0]:
            a.append(X[i_start:i_start + minibatch_size].dot(self.params.W))
            i_start += minibatch_size

        a = gnp.concatenate(a, axis=0)
        if not self.use_batch_normalization:
            a += self.params.b
        else:
            self.bn_layer.setup_mean_std_stats(a)
            a = self.bn_layer.forward_prop(a, is_test=True)

        Y = self.nonlin.forward_prop(a)
        return Y
Пример #39
0
def plot_samples(init_samples, samples, save_to_file=False, epoch=None):
    all_samples = gp.concatenate((init_samples.reshape((1, init_samples.shape[0], init_samples.shape[1])), samples))
    n_samples = all_samples.shape[0]
    n_chains = all_samples.shape[1]
    img = np.zeros((29 * n_samples + 1, 29 * n_chains - 1), dtype="uint8")

    for step in range(n_samples):
        v = all_samples[step, :, :]
        A = dlutil.tile_raster_images(
            gp.as_numpy_array(v), img_shape=(28, 28), tile_shape=(1, n_chains), tile_spacing=(1, 1)
        )
        img[29 * step : 29 * step + 28, :] = A

    if save_to_file:
        assert epoch is not None
        pilimage = pil.fromarray(img)
        pilimage.save("samples-%02i.png" % epoch)
    return img
Пример #40
0
    def forward_prop_setup_bn_mean_std_on_big_set(self,
                                                  X,
                                                  minibatch_size=1000):
        i_start = X.shape[0] % minibatch_size
        a = [X[:i_start].dot(self.params.W)]
        while i_start < X.shape[0]:
            a.append(X[i_start:i_start + minibatch_size].dot(self.params.W))
            i_start += minibatch_size

        a = gnp.concatenate(a, axis=0)
        if not self.use_batch_normalization:
            a += self.params.b
        else:
            self.bn_layer.setup_mean_std_stats(a)
            a = self.bn_layer.forward_prop(a, is_test=True)

        Y = self.nonlin.forward_prop(a)
        return Y
Пример #41
0
    def extractTrainReps(self, datahandler, numBatch):
        """
        extract representations for (big) training data through DataHandler
        """
        for tl in datahandler:
            tl.reset()

        for i in range(numBatch):
            batches = [None for x in datahandler]
            for i in range(len(batches)):
                batches[i] = datahandler[i].getOneBatch()

            batch = gp.concatenate(tuple(batches), axis=1)
            if batch is None:
                break
            reps = self.getReps(batch)
            datahandler[0].write(reps)
        datahandler[0].flush()
Пример #42
0
def dbn_sample(ws_vh, ws_v, ws_h, x, y=None, k=1):
    """
    Sample from DBN
    
    ws_vh, ws_v, ws_h: Lists of layer weights for DBN
    x: Initial sample. This is the input to DBN. (1xD vector)
    y: Class label for the sample. This corresponds to sampling from class
        conditionals. (1-of-K coded, row vector) 
    k: Number of Gibbs steps
    Returns a sample from DBN (1xD vector)
    """
    L = len(ws_vh)

    # make a forward pass to get from input layer to visible layer of top level
    # RBM
    h_prev = x.T

    # forward (bottom-up) pass
    for l in range(L - 1):
        ah = gnp.dot(ws_vh[l].T, h_prev) + ws_h[l]
        h_prev = gnp.logistic(ah)
        h_prev = h_prev > gnp.rand(h_prev.shape[0], h_prev.shape[1])

    # if not supervised, sample from top layer RBM without clamping any of its
    # inputs
    if y is None:
        # sample from top layer RBM
        h, v = rbm_sample(ws_vh[-1], ws_v[-1], ws_h[-1], h_prev, k)
    else:
        K = y.shape[1]  # number of classes
        H = ws_vh[-1].shape[0]
        # generate a random input to top layer RBM with class label units clamped to y
        v = gnp.concatenate((y.T, h_prev))
        # sample from top layer RBM
        h, v = rbm_sample(ws_vh[-1], ws_v[-1], ws_h[-1], v, k, clamped=(0, K))
        v = v[K:H, :]

    # backward (top-down) pass
    # propagate sample from RBM back to input
    for l in range(L - 2, -1, -1):
        av = gnp.dot(ws_vh[l], v) + ws_v[l]
        v = gnp.logistic(av)

    return v.T
def fine_tuning_cost_gpu(x, *args):
    inputSize, l1Size, l2Size, l3Size, l4Size, l5Size, lambda_val, inputs = args
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_L2 = l2Size * (l1Size + 1)
    num_weights_L3 = l3Size * (l2Size + 1)
    num_weights_L4 = l4Size * (l3Size + 1)
    num_weights_L5 = l5Size * (l4Size + 1)
    #num_weights_L6 = inputSize * (l5Size + 1)
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    #weights1 = reshape(x[0:num_weights_L1], (l1Size, inputSize + 1))
    weights1 = x[0:num_weights_L1].reshape((l1Size, inputSize + 1))
    #weights2 = reshape(x[num_weights_L1:num_weights_L1+num_weights_L2], (l2Size, l1Size + 1))
    weights2 = x[num_weights_L1:num_weights_L1+num_weights_L2].reshape((l2Size, l1Size + 1))
    #weights3 = reshape(x[num_weights_L1+num_weights_L2:num_weights_L1+num_weights_L2+num_weights_L3], (l3Size, l2Size + 1))
    weights3 = x[num_weights_L1+num_weights_L2:num_weights_L1+num_weights_L2+num_weights_L3].reshape((l3Size, l2Size + 1))
    #weights4 = reshape(x[num_weights_L1+num_weights_L2+num_weights_L3:num_weights_L1+num_weights_L2+num_weights_L3+num_weights_L4], (l4Size, l3Size + 1))
    weights4 = x[num_weights_L1+num_weights_L2+num_weights_L3:num_weights_L1+num_weights_L2+num_weights_L3+num_weights_L4].reshape((l4Size, l3Size + 1))
    #weights5 = reshape(x[num_weights_L1+num_weights_L2+num_weights_L3+num_weights_L4:num_weights_L1+num_weights_L2+num_weights_L3+num_weights_L4+num_weights_L5], (l5Size, l4Size + 1))
    weights5 = x[num_weights_L1+num_weights_L2+num_weights_L3+num_weights_L4:num_weights_L1+num_weights_L2+num_weights_L3+num_weights_L4+num_weights_L5].reshape((l5Size, l4Size + 1))
    #weights6 = reshape(x[num_weights_L1+num_weights_L2+num_weights_L3+num_weights_L4+num_weights_L5:shape(x)[0]], (inputSize, l5Size+1))
    weights6 = x[num_weights_L1+num_weights_L2+num_weights_L3+num_weights_L4+num_weights_L5:shape(x)[0]].reshape((inputSize, l5Size+1))
    nData = shape(inputs)[1]
    x = gpu.concatenate((gpu.ones((1,nData)), inputs), axis = 0)
    hidden1_sum = gpu.dot(weights1, x)
    hidden1_activation = hidden1_sum.logistic()
    hidden1_activation = gpu.concatenate((gpu.ones((1,nData)), hidden1_activation), axis = 0)
    hidden2_sum = gpu.dot(weights2, hidden1_activation)
    hidden2_activation = hidden2_sum.logistic()
    hidden2_activation = gpu.concatenate((gpu.ones((1,nData)), hidden2_activation), axis = 0)
    hidden3_sum = gpu.dot(weights3, hidden2_activation)
    hidden3_activation = hidden3_sum.logistic()
    hidden3_activation = gpu.concatenate((gpu.ones((1,nData)), hidden3_activation), axis = 0)
    hidden4_sum = gpu.dot(weights4, hidden3_activation)
    hidden4_activation = hidden4_sum.logistic()
    hidden4_activation = gpu.concatenate((gpu.ones((1,nData)), hidden4_activation), axis = 0)
    hidden5_sum = gpu.dot(weights5, hidden4_activation)
    hidden5_activation = hidden5_sum.logistic()
    hidden5_activation = gpu.concatenate((gpu.ones((1,nData)), hidden5_activation), axis = 0)
    output_sum = gpu.dot(weights6, hidden5_activation)
    outputs = output_sum.logistic()
    regularized_penalty4 = weights4[:,1:shape(weights4)[1]]
    regularized_penalty5 = weights5[:,1:shape(weights5)[1]]
    regularized_penalty6 = weights6[:,1:shape(weights6)[1]]
    regularized_penalty4 = regularized_penalty4 ** 2
    regularized_penalty5 = regularized_penalty5 ** 2
    regularized_penalty6 = regularized_penalty6 ** 2
    output_target_diff = (outputs - inputs)**2
    cost = gpu.sum(output_target_diff)/(2*nData) + 0.5 * lambda_val * (gpu.sum(regularized_penalty4) + gpu.sum(regularized_penalty5) + gpu.sum(regularized_penalty6))
    print 'Fine Tuning Cost: ', cost
    return cost
    def backward(self, Y, preds, IF, acts, words, X, Im):
        """
        Backward pass through the network
        """
        batchsize = preds.shape[0]
        Im = gpu.garray(Im)

        # Compute part of df/dR
        Ix = gpu.garray(preds[:,:-1] - Y) / batchsize
        delta = gpu.dot(acts.T, Ix)
        dR = delta[:-1,:] + self.gamma_r * self.R
        db = delta[-1,:]
        dR = dR.as_numpy_array()

        # Compute df/dC and word inputs for df/dR
        Ix = gpu.dot(Ix, self.R.T)
        dC = gpu.zeros(np.shape(self.C))
        for i in range(self.context):
            delta = gpu.dot(words[:,:,i].T, Ix)
            dC[i,:,:] = delta + self.gamma_c * self.C[i,:,:]
            delta = gpu.dot(Ix, self.C[i,:,:].T)
            delta = delta.as_numpy_array()
            for j in range(X.shape[0]):
                dR[:,X[j,i]] = dR[:,X[j,i]] + delta.T[:,j]

        # Compute df/dM
        dM = gpu.dot(IF.T, Ix) + self.gamma_c * self.M

        # Compute df/dJ
        Ix = gpu.dot(Ix, self.M.T) * (IF > 0)
        Im = gpu.concatenate((Im, gpu.ones((batchsize, 1))), 1)
        delta = gpu.dot(Im.T, Ix)
        dJ = delta[:-1,:] + self.gamma_c * self.J
        dBj = delta[-1,:]

        self.dR = gpu.garray(dR)
        self.dM = dM
        self.db = db
        self.dC = dC
        self.dJ = dJ
        self.dBj = dBj
def mlpSoftmax1Layer_costfunc(x, *args):
    numClasses, inputSize, l1Size, lambda_softmax, lambda_hidden, inputs, groundTruth = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1],
                                  (l1Size, inputSize + 1)))
    theta_softmax = gpu.garray(
        reshape(x[num_weights_L1:shape(x)[0]], (numClasses, l1Size)))
    inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    #hidden_activation_L1 = gpu.log(1+hidden_sum_L1.exp())
    relu_mask_hidden1 = gpu.ones(shape(hidden_sum_L1)) * (hidden_sum_L1 > 0)
    hidden_activation_L1 = hidden_sum_L1 * relu_mask_hidden1
    #hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L1)
    hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis=0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions, axis=0)
    temp = groundTruth * gpu.log(predictions)
    temp = temp.as_numpy_array()
    temp[temp == -inf] = -200.0
    temp = nan_to_num(temp)
    regularized_penalty_L1 = theta_L1[:, 1:shape(theta_L1)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    cost = -1 * sum(temp) / numCases + 0.5 * lambda_hidden * (
        gpu.sum(regularized_penalty_L1)) + 0.5 * lambda_softmax * gpu.sum(
            theta_softmax * theta_softmax)
    print 'Multilayer Softmax Cost:', cost
    del inputs
    del theta_L1
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_softmax
    del predictions
    del temp
    del regularized_penalty_L1
    gpu.free_reuse_cache()
    return cost
Пример #46
0
    def from_activations(cls, v, h):
        nvis, nhid = v.shape[1], h.shape[1]
        v_mean = v.mean(0)
        h_mean = h.mean(0)
        vh = gnp.concatenate([v, h], axis=1)
        m_unary = vh.mean(0)

        S_unary = gnp.dot(vh.T, vh) / vh.shape[0]
        S_unary[:nvis, :nvis] += gnp.diagflat((v * (1. - v)).mean(0))
        S_unary[nvis:, nvis:] += gnp.diagflat((h * (1. - h)).mean(0))

        m_pair = gnp.zeros((nvis, nhid, 3))
        m_pair[:, :, 0] = v_mean[:, nax]
        m_pair[:, :, 1] = h_mean[nax, :]
        m_pair[:, :, 2] = gnp.dot(v.T, h) / h.shape[0]

        S_pair = gnp.zeros((nvis, nhid, 3, 3))
        S_pair[:] = S_unary[:nvis, nvis:, nax, nax]
        S_pair[:, :, 0, 0] = v_mean[:, nax]
        S_pair[:, :, 1, 1] = h_mean[nax, :]

        return cls(m_unary, S_unary, m_pair, S_pair)
Пример #47
0
    def from_activations(cls, v, h):
        nvis, nhid = v.shape[1], h.shape[1]
        v_mean = v.mean(0)
        h_mean = h.mean(0)
        vh = gnp.concatenate([v, h], axis=1)
        m_unary = vh.mean(0)

        S_unary = gnp.dot(vh.T, vh) / vh.shape[0]
        S_unary[:nvis, :nvis] += gnp.diagflat((v * (1. - v)).mean(0))
        S_unary[nvis:, nvis:] += gnp.diagflat((h * (1. - h)).mean(0))

        m_pair = gnp.zeros((nvis, nhid, 3))
        m_pair[:, :, 0] = v_mean[:, nax]
        m_pair[:, :, 1] = h_mean[nax, :]
        m_pair[:, :, 2] = gnp.dot(v.T, h) / h.shape[0]

        S_pair = gnp.zeros((nvis, nhid, 3, 3))
        S_pair[:] = S_unary[:nvis, nvis:, nax, nax]
        S_pair[:, :, 0, 0] = v_mean[:, nax]
        S_pair[:, :, 1, 1] = h_mean[nax, :]

        return cls(m_unary, S_unary, m_pair, S_pair)
Пример #48
0
def setup_training_data(params,midi_dir,verbose=False):
    '''
    load and setup training data

    input:
    T - max-lag for computing frame size
    '''

    # load training data
    sequential_data, sequential_labels, num_labels = load_data(midi_dir)

    T = max(params['Tv'],params['Th']) # max look-behind
    # convert sequences into subsequences of length T+1
    subseq_data, subseq_labels = frame_subseqs(T+1,sequential_data,sequential_labels)
    subseq_data *= params['vis_scale'] # put training data at correct scale
    training_data = subseq_to_frames(subseq_data)

    Nl = params['Nl']
    training_labels = compute_binary_labels(subseq_to_frames(subseq_labels),Nl)
    input_training_data = gp.concatenate((gp.garray(training_data),
                                            gp.garray(training_labels)),axis=1)

    return input_training_data
def mlpSoftmax_costfunc(x, *args):
    numClasses, inputSize, l1Size, l2Size, l3Size, lambda_softmax, lambda_hidden, inputs, labels, groundTruth, dropout_probability = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_L2 = l2Size * (l1Size + 1)
    num_weights_L3 = l3Size * (l2Size + 1)
    num_weights_softmax = numClasses * l3Size
    #x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1],
                                  (l1Size, inputSize + 1)))
    #theta_L1 = x[0:num_weights_L1].reshape((l1Size, inputSize + 1))
    #print numClasses, l2Size
    theta_L2 = gpu.garray(
        reshape(x[num_weights_L1:num_weights_L2 + num_weights_L1],
                (l2Size, l1Size + 1)))
    #theta_L2 = x[num_weights_L1:num_weights_L2+num_weights_L1].reshape((l2Size, l1Size + 1))
    theta_L3 = gpu.garray(
        reshape(
            x[num_weights_L2 + num_weights_L1:num_weights_L2 + num_weights_L1 +
              num_weights_L3], (l3Size, l2Size + 1)))
    theta_softmax = gpu.garray(
        reshape(
            x[num_weights_L2 + num_weights_L1 + num_weights_L3:shape(x)[0]],
            (numClasses, l3Size)))
    #theta_softmax = x[num_weights_L2+num_weights_L1:shape(x)[0]].reshape((numClasses, l2Size))
    theta_L1_grad = gpu.zeros(shape(theta_L1))
    theta_L2_grad = gpu.zeros(shape(theta_L2))
    theta_L3_grad = gpu.zeros(shape(theta_L3))
    dropout_l1 = gpu.garray(
        bernoulli.rvs(dropout_probability, size=(l1Size + 1, numCases)))
    dropout_l2 = gpu.garray(
        bernoulli.rvs(dropout_probability, size=(l2Size + 1, numCases)))
    dropout_l3 = gpu.garray(
        bernoulli.rvs(dropout_probability, size=(l3Size, numCases)))
    inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    #hidden_activation_L1 = gpu.log(1+hidden_sum_L1.exp())
    relu_mask_hidden1 = gpu.ones(shape(hidden_sum_L1)) * (hidden_sum_L1 > 0)
    hidden_activation_L1 = hidden_sum_L1 * relu_mask_hidden1
    hidden_derivative_L1 = relu_mask_hidden1
    #hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis=0)
    hidden_derivative_L1 = gpu.concatenate((gpu.ones(
        (1, numCases)), hidden_derivative_L1),
                                           axis=0)
    hidden_activation_L1 = gpu.concatenate(
        (gpu.ones((1, numCases)), hidden_activation_L1), axis=0) * dropout_l1
    hidden_sum_L2 = gpu.dot(theta_L2, hidden_activation_L1)
    #hidden_activation_L2 = gpu.log(1+hidden_sum_L2.exp())
    relu_mask_hidden2 = gpu.ones(shape(hidden_sum_L2)) * (hidden_sum_L2 > 0)
    hidden_activation_L2 = hidden_sum_L2 * relu_mask_hidden2
    hidden_derivative_L2 = relu_mask_hidden2
    #hidden_activation_L2 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L2), axis=0)
    hidden_derivative_L2 = gpu.concatenate((gpu.ones(
        (1, numCases)), hidden_derivative_L2),
                                           axis=0)
    hidden_activation_L2 = gpu.concatenate(
        (gpu.ones((1, numCases)), hidden_activation_L2), axis=0) * dropout_l2
    hidden_sum_L3 = gpu.dot(theta_L3, hidden_activation_L2)
    #hidden_activation_L3 = gpu.log(1+hidden_sum_L3.exp())
    relu_mask_hidden3 = gpu.ones(shape(hidden_sum_L3)) * (hidden_sum_L3 > 0)
    #hidden_activation_L3 = hidden_sum_L3*relu_mask_hidden3
    hidden_derivative_L3 = relu_mask_hidden3
    hidden_activation_L3 = hidden_sum_L3 * relu_mask_hidden3 * dropout_l3
    #hidden_activation_L3 = hidden_sum_L3.logistic() * dropout_l3
    hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L3)
    hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis=0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions, axis=0)
    pred = predictions.argmax(axis=0) + 1
    accuracy = mean(pred == labels) * 100
    temp = groundTruth * gpu.log(predictions)
    temp = temp.as_numpy_array()
    temp[temp == -inf] = -200.0
    temp = nan_to_num(temp)
    regularized_penalty_L1 = theta_L1[:, 1:shape(theta_L1)[1]]
    regularized_penalty_L2 = theta_L2[:, 1:shape(theta_L2)[1]]
    regularized_penalty_L3 = theta_L3[:, 1:shape(theta_L3)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    regularized_penalty_L2 = regularized_penalty_L2 * regularized_penalty_L2
    regularized_penalty_L3 = regularized_penalty_L3 * regularized_penalty_L3
    pred_cost = -1 * sum(temp) / numCases
    l2norm_cost = 0.5 * lambda_hidden * (
        gpu.sum(regularized_penalty_L3) + gpu.sum(regularized_penalty_L2) +
        gpu.sum(regularized_penalty_L1)) + 0.5 * lambda_softmax * gpu.sum(
            theta_softmax * theta_softmax)
    #l2norm_cost = 0
    cost = pred_cost + l2norm_cost
    print 'Prediction Accuracy:                       ', accuracy, '%'
    print 'Multilayer Softmax Prediction Cost:        ', pred_cost
    print 'Multilayer Softmax L2 Normalisation Cost:  ', l2norm_cost
    print 'Multilayer Softmax Cost:                   ', cost
    print '--------------------------------------------------------------------'
    softmax_imd = groundTruth - predictions
    #theta_softmax_grad = -1*gpu.dot(softmax_imd, gpu.garray(transpose(hidden_activation_L3.as_numpy_array())))/numCases
    theta_softmax_grad = -1 * gpu.dot(
        softmax_imd,
        gpu.garray(transpose(hidden_activation_L3.as_numpy_array()))
    ) / numCases + lambda_softmax * theta_softmax
    deltaOut = -softmax_imd
    delta_L3_imd = gpu.dot(
        gpu.garray(transpose(theta_softmax.as_numpy_array())), deltaOut)
    delta_L3_imd2 = delta_L3_imd * hidden_derivative_L3
    #delta_L3_imd2 = (delta_L3_imd * hidden_activation_L3) * (1-hidden_activation_L3)
    delta_L3 = gpu.dot(
        delta_L3_imd2,
        gpu.garray(transpose(hidden_activation_L2.as_numpy_array())))
    theta_L3_grad += delta_L3
    delta_L2_imd = gpu.dot(gpu.garray(transpose(theta_L3.as_numpy_array())),
                           delta_L3_imd2)
    delta_L2_imd2 = delta_L2_imd * hidden_derivative_L2
    delta_L2_imd2 = delta_L2_imd2[1:shape(delta_L2_imd2)[0] + 1, :]
    delta_L2 = gpu.dot(
        delta_L2_imd2,
        gpu.garray(transpose(hidden_activation_L1.as_numpy_array())))
    theta_L2_grad += delta_L2
    delta_L1_imd = gpu.dot(gpu.garray(transpose(theta_L2.as_numpy_array())),
                           delta_L2_imd2)
    delta_L1_imd2 = delta_L1_imd * hidden_derivative_L1
    delta_L1_imd2 = delta_L1_imd2[1:shape(delta_L1_imd2)[0] + 1, :]
    delta_L1 = gpu.dot(delta_L1_imd2,
                       gpu.garray(transpose(inputs.as_numpy_array())))
    theta_L1_grad += delta_L1
    theta_L1_grad = theta_L1_grad / numCases
    theta_L2_grad = theta_L2_grad / numCases
    theta_L3_grad = theta_L3_grad / numCases
    theta_L1_grad[:, 1:shape(theta_L1_grad)[1]] = theta_L1_grad[:, 1:shape(
        theta_L1_grad)[1]] + theta_L1[:, 1:shape(theta_L1)[1]] * lambda_hidden
    theta_L2_grad[:, 1:shape(theta_L2_grad)[1]] = theta_L2_grad[:, 1:shape(
        theta_L2_grad)[1]] + theta_L2[:, 1:shape(theta_L2)[1]] * lambda_hidden
    theta_L3_grad[:, 1:shape(theta_L3_grad)[1]] = theta_L3_grad[:, 1:shape(
        theta_L3_grad)[1]] + theta_L3[:, 1:shape(theta_L3)[1]] * lambda_hidden
    theta_L1_grad = reshape(theta_L1_grad.as_numpy_array(), num_weights_L1)
    theta_L2_grad = reshape(theta_L2_grad.as_numpy_array(), num_weights_L2)
    theta_L3_grad = reshape(theta_L3_grad.as_numpy_array(), num_weights_L3)
    theta_softmax_grad = reshape(theta_softmax_grad.as_numpy_array(),
                                 num_weights_softmax)
    del inputs
    del theta_L1
    del theta_L2
    del theta_L3
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_L2
    del hidden_activation_L2
    del hidden_activation_L3
    del hidden_sum_L3
    del hidden_sum_softmax
    del predictions
    del temp
    del softmax_imd
    del deltaOut
    del delta_L3_imd
    del delta_L3_imd2
    del delta_L3
    del delta_L2_imd
    del delta_L2_imd2
    del delta_L2
    del delta_L1_imd
    del delta_L1_imd2
    del delta_L1
    #del regularized_penalty_L1
    #del regularized_penalty_L2
    gpu.free_reuse_cache()
    return cost, hstack(
        (theta_L1_grad, theta_L2_grad, theta_L3_grad, theta_softmax_grad))
Пример #50
0
def bias(X, bias_val=1.0):
    """Append a bias columns of magnitude bias_val to X."""
    Xb = gp.concatenate((X, gp.ones((X.shape[0],1))), axis=1)
    return Xb
Пример #51
0
 def vector_weights(self, Ws=[]):
     """Get vectorized form of weights in Ws (or current net weights)."""
     if (len(Ws) == 0):
         Ws = self.layer_weights()
     Wv = [W.reshape((W.size, 1)) for W in Ws]
     return gp.concatenate(Wv, axis=0)
Пример #52
0
    def computeGrads2(self, a, ja, diffgrad, recf):
        #         assert(recf==0)
        aes = []
        grad = []
        d = []
        for m in xrange(self.modalsCnt):
            aes.append(self.saes[m].ae)
            grad.append([])
            d.append([0] * self.depth)
        jaes = self.jsae.ae
        jgrad = []
        jd = [0] * self.jdepth
        topidx = self.depth - 1
        topjidx = self.jdepth - 1

        if recf > 0:
            for m in xrange(self.modalsCnt):
                #compute derivatives of reconstruction layers from L_r for saes
                d[m][0] = aes[m][1].computeDlast(a[m][0], a[m][-1], recf)
                for i in range(1, self.depth):
                    grad[m].append(aes[m][i].getbGradient(d[m][i - 1]))
                    grad[m].append(aes[m][i].getWGradient(
                        d[m][i - 1], a[m][-1 - i], aes[m][i].W2))
                    if i + 1 < self.depth:
                        d[m][i] = aes[m][i + 1].computeD(
                            a[m][-1 - i], d[m][i - 1], aes[m][i].W2)
                d[m][topidx] = aes[m][topidx].computeD(a[m][-1 - topidx],
                                                       d[m][topidx - 1],
                                                       aes[m][topidx].W2)
#                 d[m][topidx]=gp.dot(d[m][topidx-1],aes[m][topidx].W2.T)

#compute derivatives of reconstruction layers from L_r for jsae
            if self.has_joint:
                jd[0] = gp.concatenate(tuple(e[self.depth - 1] for e in d),
                                       axis=1)

                #             jd[0] = jaes[1].computeDlast(ja[0],ja[-1],recf)

                for i in range(1, self.jdepth):
                    jgrad.append(jaes[i].getbGradient(jd[i - 1]))
                    jgrad.append(jaes[i].getWGradient(jd[i - 1], ja[-1 - i],
                                                      jaes[i].W2))
                    if i + 1 < self.jdepth:
                        jd[i] = jaes[i + 1].computeD(ja[-1 - i], jd[i - 1],
                                                     jaes[i].W2)
                jd[topjidx] = gp.dot(jd[topjidx - 1], jaes[topjidx].W2.T)

        #add diffgrad to generative loss
        if self.has_joint:
            if diffgrad is not None:
                jd[topjidx] += (diffgrad + self.sparsityFactor *
                                (2 * ja[topjidx] /
                                 (1 + ja[topjidx] * ja[topjidx]))
                                ) * jaes[topjidx].getActivationGradient(
                                    ja[topjidx])
            #backprop in jsae
            for i in range(self.jdepth - 1, 0, -1):
                #compute derivates of latent layers
                jgrad.append(jaes[i].getbGradient(jd[i]))
                jgrad.append(jaes[i].getWGradient(jd[i], ja[i - 1],
                                                  jaes[i].W1))
                if i > 1:
                    jd[i - 1] = jaes[i - 1].computeD(
                        ja[i - 1], jd[i], jaes[i].W1) + self.sparsityFactor * (
                            2 * ja[i - 1] / (1 + ja[i - 1] * ja[i - 1])
                        ) * jaes[topjidx].getActivationGradient(ja[i - 1])

        #propagate to isae and tsae
        if diffgrad is not None:
            if self.has_joint:
                transD = aes[0][topidx].computeD((ja[0]), jd[1], (jaes[1].W1))
            else:
                transD = (diffgrad) * aes[0][topidx].getActivationGradient(
                    gp.concatenate(tuple(e[topidx]
                                         for e in a), axis=1))  # no sparsity
        for m in xrange(self.modalsCnt):
            if diffgrad is not None:
                #combine derivates from L_d
                d[m][topidx] += transD[:, self.dims[m]:self.dims[m + 1]]
            d[m][topidx] *= aes[m][topidx].getActivationGradient(a[m][topidx])

            for i in range(self.depth - 1, 0, -1):
                #compute derivates of latent layers
                grad[m].append(aes[m][i].getbGradient(d[m][i]))
                grad[m].append(aes[m][i].getWGradient(d[m][i], a[m][i - 1],
                                                      aes[m][i].W1))
                if i > 1:
                    d[m][i - 1] = aes[m][i - 1].computeD(
                        a[m][i - 1], d[m][i], aes[m][i].W1)

        for m in xrange(self.modalsCnt):
            grad[m].reverse()
        jgrad.reverse()
        return grad, jgrad
Пример #53
0
    def getSinglePathGrad2(self, a, ja, sim, other, recf, sim_diff_factor,
                           dis_diff_factor):
        """
        ia:image ae data
        ta:text ae data
        ja:joint ae data
        sim: should this be similar to other
        other:output of jae given other element of the pair
        """
        recloss = []
        for m in xrange(self.modalsCnt):
            recloss.append(0)
        if recf > 0:
            for m in xrange(self.modalsCnt):
                #                 a[m]=self.saes[m].backward2Bottom(a[m])
                recloss[m] = self.saes[m].ae[1].getErrorLoss(
                    a[m][0], a[m][-1], recf)

#             ja=self.jsae.backward2Bottom(ja)
#             jrecloss=self.jsae.ae[1].getErrorLoss(ja[0],ja[-1],recf)
        if sim_diff_factor == 0 and dis_diff_factor == 0:
            diffgrad = None
        else:
            if (sim):

                if self.has_joint:
                    npj = ja[self.jdepth - 1]  #.as_numpy_array()
                else:
                    npj = gp.concatenate(tuple(e[self.depth - 1] for e in a),
                                         axis=1)  #.as_numpy_array()
                npo = other  #.as_numpy_array()
                jsum = ((npj**2).sum(
                    axis=1))**0.5  #(np.linalg.norm(npj,axis=1))
                nj = (npj / jsum[:, gp.newaxis])
                osum = ((npj**2).sum(
                    axis=1))**0.5  #(np.linalg.norm(npo,axis=1))
                no = (npo / osum[:, gp.newaxis])
                #                 jsum = gp.as_garray(jsum)
                #                 osum = gp.as_garray(osum)
                #                 nj = gp.as_garray(nj)
                #                 no = gp.as_garray(no)

                tmp = gp.sum(nj * no, axis=1)
                tmp = tmp.reshape(tmp.shape + (1, ))
                tmp = gp.garray(tmp)
                tmp = (tmp * nj - no)
                tmp = tmp / jsum[:, gp.newaxis]

                dist = (1 - gp.sum(nj * no, axis=1))
                dist = dist > 0.034
                diffgrad = gp.zeros(nj.shape)
                for i in xrange(self.batchsize):
                    if dist[i]:
                        diffgrad[i, :] = (tmp[i, :])

                diffgrad = sim_diff_factor * diffgrad / self.batchsize

            else:

                if self.has_joint:
                    npj = ja[self.jdepth - 1]  #.as_numpy_array()
                else:
                    npj = gp.concatenate(tuple(e[self.depth - 1] for e in a),
                                         axis=1)  #.as_numpy_array()
                npo = other  #.as_numpy_array()
                jsum = ((npj**2).sum(
                    axis=1))**0.5  #(np.linalg.norm(npj,axis=1))
                nj = (npj / jsum[:, gp.newaxis])
                osum = ((npj**2).sum(
                    axis=1))**0.5  #(np.linalg.norm(npo,axis=1))
                no = (npo / osum[:, gp.newaxis])
                #                 jsum = gp.as_garray(jsum)
                #                 osum = gp.as_garray(osum)
                #                 nj = gp.as_garray(nj)
                #                 no = gp.as_garray(no)

                tmp = gp.sum(nj * no, axis=1)
                tmp = tmp.reshape(tmp.shape + (1, ))
                tmp = (tmp * nj - no)
                tmp = -1 * tmp / jsum[:, gp.newaxis]

                dist = (1 - gp.sum(nj * no, axis=1))
                dist = dist < 0.1
                diffgrad = gp.zeros(nj.shape)
                for i in xrange(self.batchsize):
                    if dist[i]:
                        diffgrad[i, :] = (tmp[i, :])

                diffgrad = dis_diff_factor * diffgrad / self.batchsize

        g, jg = self.computeGrads2(a, ja, diffgrad, recf)
        return g, jg, recloss
Пример #54
0
    def trainOnePair(self, bat1, bat2, sim, epoch, recf, sim_diffcost,
                     dis_diffcost):
        """
        trains one pair in which each element has two modalities
        im1: first element's image data
        tx1: first element's text data
        im2: second element's image data
        tx2: second element's text data
        sim: if the pair is in similar set
        recf: reconstruction factor
        """
        #consider diffcost?!
        a1 = []
        a2 = []
        for m in xrange(self.modalsCnt):
            a1.append(self.saes[m].forward2Top(bat1[m]))
            a2.append(self.saes[m].forward2Top(bat2[m]))

        j1a = None
        j2a = None
        if self.has_joint:
            j1inp = gp.concatenate(tuple(e[self.depth - 1] for e in a1),
                                   axis=1)
            j2inp = gp.concatenate(tuple(e[self.depth - 1] for e in a2),
                                   axis=1)
            j1a = self.jsae.forward(j1inp)
            j2a = self.jsae.forward(j2inp)

        for m in xrange(self.modalsCnt):
            if self.has_joint:
                a1[m].append(j1a[-1][:, self.dims[m]:self.dims[m + 1]])
            self.saes[m].backward2Bottom(a1[m])
            if self.has_joint:
                a2[m].append(j2a[-1][:, self.dims[m]:self.dims[m + 1]])
            self.saes[m].backward2Bottom(a2[m])

#         j1a = j1a[1:-1]
#         j2a = j2a[1:-1]

#get path grad for z
#backpropagate x and y wrt z
        if self.has_joint:
            other1 = j2a[self.jdepth - 1]
            other2 = j1a[self.jdepth - 1]
        else:
            other1 = gp.concatenate(tuple(e[self.depth - 1] for e in a2),
                                    axis=1)
            other2 = gp.concatenate(tuple(e[self.depth - 1] for e in a1),
                                    axis=1)

        g1, jg1, rl1 = self.getSinglePathGrad2(a1, j1a, sim, other1, recf,
                                               sim_diffcost, dis_diffcost)
        g2, jg2, rl2 = self.getSinglePathGrad2(a2, j2a, sim, other2, recf,
                                               sim_diffcost, dis_diffcost)

        g = [[] for x in g1]
        for m in xrange(self.modalsCnt):
            g[m] = [[] for x in g1[m]]
            for i in xrange(len(g1[m])):
                g[m][i] = g1[m][i] + g2[m][i]

        jg = None
        if self.has_joint:
            jg = [[] for x in jg1]
            for i in xrange(len(jg1)):
                jg[i] = jg1[i] + jg2[i]

        #this lines are just for debug:
        if self.has_joint:
            perf = [
                sim,
                self.getDiffLoss(j1a[self.jdepth - 1], j2a[self.jdepth - 1])
            ]
        else:
            perf = [
                sim,
                self.getDiffLoss(
                    gp.concatenate(tuple(e[self.depth - 1] for e in a1),
                                   axis=1),
                    gp.concatenate(tuple(e[self.depth - 1] for e in a2),
                                   axis=1))
            ]
#         for i in range(1,self.depth):
#             perf.append(self.getDiffLoss(ia[i],ta[i]))
#         a=ia[1:self.depth]+ta[1:self.depth]
#         ae=self.isae.ae[1:]+self.tsae.ae[1:]
#         for i in range(len(a)):
#             perf.append(ae[i].computeSparsity(a[i]))
        return np.array(perf), g, jg