Пример #1
0
 def forward(self, xi_t, xf_t, xc_t, xo_t, h_tm1, c_tm1):
     i_t = sigmoid(xi_t + T.dot(h_tm1, self.W_hi) + c_tm1 * self.W_ci)
     f_t = sigmoid(xf_t + T.dot(h_tm1, self.W_hf) + c_tm1 * self.W_cf)
     c_t = f_t * c_tm1 + i_t * self.activation(xc_t + T.dot(h_tm1, self.W_hc))
     o_t = sigmoid(xo_t + T.dot(h_tm1, self.W_ho) + c_t * self.W_co)
     h_t = o_t * self.activation(c_t)
     return h_t, c_t
Пример #2
0
 def recurrence(self, x_t, h_tm1):
     r_t = sigmoid(T.dot(x_t, self.W_xr) + T.dot(h_tm1, self.W_hr))
     z_t = sigmoid(T.dot(x_t, self.W_xz) + T.dot(h_tm1, self.W_hz))
     h_hat_t = self.activation(
         T.dot(x_t, self.W_xh) + T.dot((r_t * h_tm1), self.W_hh))
     h_t = (1. - z_t) * h_tm1 + z_t * h_hat_t
     return h_t
Пример #3
0
 def forward(self, xi_t, xf_t, xc_t, xo_t, h_tm1, c_tm1):
     i_t = sigmoid(xi_t + T.dot(h_tm1, self.W_hi) + c_tm1 * self.W_ci)
     f_t = sigmoid(xf_t + T.dot(h_tm1, self.W_hf) + c_tm1 * self.W_cf)
     c_t = f_t * c_tm1 + i_t * self.activation(xc_t + T.dot(h_tm1, self.W_hc))
     o_t = sigmoid(xo_t + T.dot(h_tm1, self.W_ho) + c_t * self.W_co)
     h_t = o_t * self.activation(c_t)
     return h_t, c_t
Пример #4
0
    def __init__(self, dataset, epochs, w=None, print_step=None):
        self.train_x, self.test_x, self.train_y, self.test_y = dataset
        self.l1_error = 0
        self.neurons = self.train_x.shape[1]
        self.Xavier = np.sqrt(1.0 / 2 * self.neurons)

        if w is None:
            self.w0 = 2 * np.random.random((self.neurons, 1)) - 1
        else:
            self.w0 = w[0]
        for j in xrange(1, epochs + 1):
            l1 = sigmoid(np.dot(self.train_x, self.w0))
            self.l1_error = self.train_y - l1

            if (print_step is not None) and (
                    (j % print_step == 0) or j == epochs):
                accuracy = self.calc_accuracy()
                print(
                    "{},{},{}".format(
                        j,
                        np.mean(
                            np.abs(
                                self.l1_error)),
                        accuracy))

            adjustment = self.l1_error * sigmoid(l1, deriv=True)
            self.w0 += self.train_x.T.dot(adjustment) * learning_rate
Пример #5
0
    def __init__(
            self,
            train_x,
            train_y,
            test_x,
            test_y,
            epochs,
            w=None,
            print_step=None):
        self.l1_error = 0
        if w is None:
            self.w0 = 2 * \
                np.random.random((train_x.size / train_x.__len__(), 1)) - 1
        else:
            self.w0 = w
        for j in xrange(1, epochs + 1):
            l1 = sigmoid(np.dot(train_x, self.w0))
            self.l1_error = train_y - l1

            if (print_step is not None) and (
                    (j % print_step == 0) or j == epochs):
                accuracy = self.calc_accuracy(test_x, test_y)
                print(
                    "{},{},{}".format(
                        j,
                        np.mean(
                            np.abs(
                                self.l1_error)),
                        accuracy))

            adjustment = self.l1_error * sigmoid(l1, deriv=True)
            self.w0 += train_x.T.dot(adjustment) * learning_rate
 def __forward(self, input_data):
     input_data = add_bias(input_data)
     z1 = input_data.dot(self.first_layer_weights)
     hidden_layer = sigmoid(z1)
     hidden_layer = add_bias(hidden_layer)
     z2 = hidden_layer.dot(self.second_layer_weights)
     output_layer = sigmoid(z2)
     return hidden_layer, output_layer
Пример #7
0
 def forward(self, xi_t, xf_t, xc_t, xo_t, h_tm1, c_tm1):
     """
     :param x_t: 1D: Batch, 2D: n_in
     :param h_tm1: 1D: Batch, 2D: n_h
     :param c_tm1: 1D: Batch, 2D; n_h
     :return: h_t: 1D: Batch, 2D: n_h
     :return: c_t: 1D: Batch, 2D: n_h
     """
     i_t = sigmoid(xi_t + T.dot(h_tm1, self.W_hi) + c_tm1 * self.W_ci)
     f_t = sigmoid(xf_t + T.dot(h_tm1, self.W_hf) + c_tm1 * self.W_cf)
     c_t = f_t * c_tm1 + i_t * self.activation(xc_t + T.dot(h_tm1, self.W_hc))
     o_t = sigmoid(xo_t + T.dot(h_tm1, self.W_ho) + c_t * self.W_co)
     h_t = o_t * self.activation(c_t)
     return h_t, c_t
Пример #8
0
 def forward(self, xi_t, xf_t, xc_t, xo_t, h_tm1, c_tm1):
     """
     :param x_t: 1D: Batch, 2D: n_in
     :param h_tm1: 1D: Batch, 2D: n_h
     :param c_tm1: 1D: Batch, 2D; n_h
     :return: h_t: 1D: Batch, 2D: n_h
     :return: c_t: 1D: Batch, 2D: n_h
     """
     i_t = sigmoid(xi_t + T.dot(h_tm1, self.W_hi) + c_tm1 * self.W_ci)
     f_t = sigmoid(xf_t + T.dot(h_tm1, self.W_hf) + c_tm1 * self.W_cf)
     c_t = f_t * c_tm1 + i_t * self.activation(xc_t + T.dot(h_tm1, self.W_hc))
     o_t = sigmoid(xo_t + T.dot(h_tm1, self.W_ho) + c_t * self.W_co)
     h_t = o_t * self.activation(c_t)
     return h_t, c_t
Пример #9
0
    def predict(self, w, b, X, show_image):
        """
            Predicting a dataset using a model
            X.shape = (features, m)
        """

        #--- #The predictions - Computing the Activation of all X
        Z = np.dot(w.T, X) + b
        A = nn_utils.sigmoid(Z)

        #--- Store each prediction in the vector
        m = X.shape[1]
        Y_predictions = np.zeros((1, m))
        zero_count = 0
        one_count = 0
        for i in range(m):
            prediction = 1 if A[0, i] > 0.5 else 0
            Y_predictions[0, i] = prediction
            if (prediction == 1):
                one_count += 1
            elif (prediction == 0):
                zero_count += 1

            if (prediction == 1
                    and show_image):  #Showing in the screen certain pictures
                plt.imshow(X[:, i].reshape((64, 64, 3)))
                #plt.show()

        print("1s: " + str(one_count))
        print("0s: " + str(zero_count))

        return Y_predictions
Пример #10
0
def linear_activation_forward(A_prev, W, b, activation):
    '''
    Implements the forward propagation for the Linear->Activation layer.

    Arguments:
    A_prev -- activation from previous layer(or input data)
    W -- weight matrix
    b -- bias matrix
    activation -- activation used in this layer, 'relu' or 'sigmoid

    Returns:
    A -- the output of activation function
    cache -- tuple containing 'linear_cache' and 'activation_cache',
             stored for computing backward pass efficiently
    '''

    Z, linear_cache = linear_forward(A_prev, W, b)
    ## calling linear_forward function
    ## to get the value of Z and linear cache

    if activation == 'sigmoid':
        A, activation_cache = sigmoid(Z)
        ## calling sigmoid function defined in nn_utils

    elif activation == 'relu':
        A, activation_cache = relu(Z)
        ## calling relu function defined in nn_utlis

    assert (A.shape == (W.shape[0], A_prev.shape[1]))
    ## assertion for checking the shape of A

    cache = (linear_cache, activation_cache)
    return A, cache
def linear_activation_forward(A_prev, W, b, activation):
    '''
    Implements forward propagation.

    Arguments:
    A_prev -- activation from previous layers
    W -- weight matrix
    b -- bias matrix
    activation -- the activation used in this layer, 'sigmoid' or 'relu'

    Returns:
    A -- the output activation 
    cache -- a python dictionary containing 'linear_cache' and 'activation_cache'
    '''
    
    Z, linear_cache = linear_forward(A_prev, W, b)

    if activation == 'relu':
        A, activation_cache = nn_utils.relu(Z)
    
    elif activation == 'sigmoid':
        A, activation_cache = nn_utils.sigmoid(Z)

    cache = (linear_cache, activation_cache)

    return A, cache
def linear_activation_forward(A_prev, W, b, activation):
    """
    Implement the forward propagation for the LINEAR->ACTIVATION layer

    Arguments:
    A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)
    W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
    b -- bias vector, numpy array of shape (size of the current layer, 1)
    activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"

    Returns:
    A -- the output of the activation function, also called the post-activation value
    cache -- a python dictionary containing "linear_cache" and "activation_cache";
             stored for computing the backward pass efficiently
    """

    if activation == "sigmoid":
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = nn_utils.sigmoid(Z)

    elif activation == "relu":
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = nn_utils.relu(Z)

    assert (A.shape == (W.shape[0], A_prev.shape[1]))
    cache = (linear_cache, activation_cache)

    return A, cache
Пример #13
0
    def skip_recurrence_inter(self, x_t, i_t, A_t):
        """
        :param  x_t: 1D: batch_size, 2D: dim_hidden
        :param  i_t: 1D: batch_size, 2D: n_agents; elem=one hot vector
        :return A_t: 1D: batch_size, 2D: n_agents, 3D: dim_agent
        """
        x_r = T.dot(x_t, self.W_xr)
        x_z = T.dot(x_t, self.W_xz)
        x_h = T.dot(x_t, self.W_xh)

        A_sub = A_t[T.arange(A_t.shape[0]), i_t]
        r_t = sigmoid(x_r + T.dot(A_sub, self.W_hr))
        z_t = sigmoid(x_z + T.dot(A_sub, self.W_hz))
        h_hat_t = self.activation(x_h + T.dot((r_t * A_sub), self.W_hh))
        h_t = (1. - z_t) * A_sub + z_t * h_hat_t

        return T.set_subtensor(A_sub, h_t)
Пример #14
0
    def recurrence_inter(self, x_t, i_t, A_t):
        """
        :param  x_t: 1D: batch_size, 2D: dim_hidden
        :param  i_t: 1D: batch_size, 2D: n_agents; elem=one hot vector
        :return A_t: 1D: batch_size, 2D: n_agents, 3D: dim_agent
        """
        x_r = T.dot(x_t, self.W_xr).dimshuffle(0, 'x', 1) * i_t.dimshuffle(
            0, 1, 'x')
        x_z = T.dot(x_t, self.W_xz).dimshuffle(0, 'x', 1) * i_t.dimshuffle(
            0, 1, 'x')
        x_h = T.dot(x_t, self.W_xh).dimshuffle(0, 'x', 1) * i_t.dimshuffle(
            0, 1, 'x')

        r_t = sigmoid(x_r + T.dot(A_t, self.W_hr))
        z_t = sigmoid(x_z + T.dot(A_t, self.W_hz))
        h_hat_t = self.activation(x_h + T.dot((r_t * A_t), self.W_hh))
        h_t = (1. - z_t) * A_t + z_t * h_hat_t

        return h_t
    def forward_one_layer(A_previous, W, b, activation_function):
        """
            Forwarding only 1 layer ahead
            W.shape = (l, l-1)
            Z, A_prev, A.shape = (l, m)
            Returns A and [cache = A_prev, W, b, Z]
        """
        Z = np.dot(
            W, A_previous) + b  # W.shape: (l, l-1) / A_previous.shape: (l, m)

        A = None
        if activation_function == "sigmoid":
            A = nn_utils.sigmoid(Z)
        elif activation_function == "relu":
            A = nn_utils.relu(Z)

        cache = A_previous, W, b, Z

        return A, cache
Пример #16
0
    def propagate(self, w, b, X, Y):
        """
            Forward the dataset
            X.shape: (numFeatures, m)
            Y.shape: (1, num examples)
        """
        m = X.shape[1]

        #--- Forward Propagation
        Z = np.dot(w.T, X) + b
        A = nn_utils.sigmoid(Z)  #computing the Activation

        #--- Cost calculation
        cost = nn_utils.cost(A, Y)

        #--- Backward Propagation
        dZ = A - Y
        dw = np.dot(X, (dZ).T) / m
        db = np.sum(dZ) / m

        grads = {"dw": dw, "db": db}

        return grads, cost
    def forward_one_layer(self, A_previous, W, b, activation_function):
        """
            Forwarding only 1 layer ahead
            W.shape = (nl, nl-1)
            Z, A_prev, A.shape = (nl, m)
            Returns A and [cache = A_prev, W, b, Z]
        """

        print("--------------------")
        print(A_previous.shape)
        print(W.shape)
        print("--------------------")

        Z = np.dot(W, A_previous) + b

        if activation_function == "sigmoid":
            A = nn_utils.sigmoid(Z)
        elif activation_function == "relu":
            A = nn_utils.relu(Z)

        cache = A_previous, W, b, Z

        return A, cache
Пример #18
0
    def __init__(self, dataset, epochs, w=None, print_step=None):
        self.train_x, self.test_x, self.train_y, self.test_y = dataset
        self.l3_error = 0
        self.neurons = self.train_x.shape[1]
        self.Xavier = 1  # np.sqrt(1.0 / 2 * self.neurons)
        if w is None:
            self.w0 = (2 * np.random.random(
                (self.neurons, self.neurons)) - 1) * self.Xavier
            self.w1 = (2 * np.random.random(
                (self.neurons, self.neurons)) - 1) * self.Xavier
            self.w2 = (2 * np.random.random(
                (self.neurons, 1)) - 1) * self.Xavier
        else:
            self.w0, self.w1, self.w2 = w[0], w[1], w[2]
        for j in xrange(1, epochs + 1):
            l1 = sigmoid(np.dot(self.train_x, self.w0))
            l2 = sigmoid(np.dot(l1, self.w1))
            l3 = sigmoid(np.dot(l2, self.w2))
            self.l3_error = self.train_y - l3

            if (print_step is not None) and ((j % print_step == 0)
                                             or j == epochs):
                accuracy, acc_std = self.calc_accuracy()
                print("{},{},{},{}".format(j, np.mean(np.abs(self.l3_error)),
                                           accuracy, acc_std))

            l3_adjustment = self.l3_error * sigmoid(l3, deriv=True)
            l2_error = l3_adjustment.dot(self.w2.T)

            l2_adjustment = l2_error * sigmoid(l2, deriv=True)
            l1_error = l2_adjustment.dot(self.w1.T)

            l1_adjustment = l1_error * sigmoid(l1, deriv=True)

            # dropout of 10%
            # self._drop_out(self.W2, DROPOUT_RATE)

            # update weights for all the synapses (no learning rate term)
            self.w2 += l2.T.dot(l3_adjustment) * learning_rate
            self.w1 += l1.T.dot(l2_adjustment) * learning_rate
            self.w0 += self.train_x.T.dot(l1_adjustment) * learning_rate
Пример #19
0
 def calc_accuracy(self, test_x, test_y):
     prime_y = sigmoid(np.dot(test_x, self.w0))
     y_error = test_y - prime_y
     return 1 - np.mean(np.abs(y_error))
Пример #20
0
 def forward(self, xr_t, xz_t, xh_t, h_tm1):
     r_t = sigmoid(xr_t + T.dot(h_tm1, self.W_hr))
     z_t = sigmoid(xz_t + T.dot(h_tm1, self.W_hz))
     h_hat_t = self.activation(xh_t + T.dot((r_t * h_tm1), self.W_hh))
     h_t = (1. - z_t) * h_tm1 + z_t * h_hat_t
     return h_t
Пример #21
0
 def forward(self, xr_t, xz_t, xh_t, h_tm1):
     r_t = sigmoid(xr_t + T.dot(h_tm1, self.W_hr))
     z_t = sigmoid(xz_t + T.dot(h_tm1, self.W_hz))
     h_hat_t = self.activation(xh_t + T.dot((r_t * h_tm1), self.W_hh))
     h_t = (1. - z_t) * h_tm1 + z_t * h_hat_t
     return h_t
Пример #22
0
def launch_learning(x):
    """
    Funkcja pobiera macierz przykladow zapisanych w macierzy X o wymiarach NxD i zwraca wektor y o wymiarach Nx1,
    gdzie kazdy element jest z zakresu {0, ..., 35} i oznacza znak rozpoznany na danym przykladzie.
    :param x: macierz o wymiarach NxD
    :return: wektor o wymiarach Nx1
    """
    x_train, y_train = load_training_data()

    x_train = prepare_x(x_train)
    y_train = prepare_y(y_train)
    x = prepare_x(x)

    hog_for_shape = hog.hog(x_train[0],
                            cell_size=(HOG_CELL_SIZE, HOG_CELL_SIZE),
                            cells_per_block=(HOG_CELL_BLOCK, HOG_CELL_BLOCK),
                            signed_orientation=False,
                            nbins=HOG_NBINS,
                            visualise=False,
                            normalise=True,
                            flatten=True,
                            same_size=True)

    with open(TRAIN_HOG_FILE_PATH, 'rb') as f:
        features_train = pkl.load(f)

    print('features_train after load:{}'.format(features_train))
    print('features_train after load shape:{}'.format(features_train.shape))

    if features_train.shape != (x_train.shape[0], hog_for_shape.shape[0]):
        features_train = np.empty(shape=(x_train.shape[0],
                                         hog_for_shape.shape[0]))
        print('Need to recompute features for training set')
        for i in range(x_train.shape[0]):
            features_train[i] = hog.hog(x_train[i],
                                        cell_size=(HOG_CELL_SIZE,
                                                   HOG_CELL_SIZE),
                                        cells_per_block=(HOG_CELL_BLOCK,
                                                         HOG_CELL_BLOCK),
                                        signed_orientation=False,
                                        nbins=HOG_NBINS,
                                        visualise=False,
                                        normalise=True,
                                        flatten=True,
                                        same_size=True)

        with open(TRAIN_HOG_FILE_PATH, 'wb') as pickle_file:
            pkl.dump(features_train, pickle_file)

    # those lines are neccesary in upload version, above code will disappear however
    # features_x = np.empty(shape=(x.shape[0], hog_for_shape.shape[0]))
    #         for i in range(x.shape[0]):
    #                 features_x[i] = hog.hog(x[i], cell_size=(HOG_CELL_SIZE, HOG_CELL_SIZE),
    #                             cells_per_block=(HOG_CELL_BLOCK, HOG_CELL_BLOCK),
    #                             signed_orientation=False, nbins=HOG_NBINS, visualise=False,
    #                             normalise=True, flatten=True, same_size=True)

    input_layer_neurons = features_train.shape[1]
    hidden_layer_neurons = NN_HIDDEN_NEURONS
    output_neurons = NUMBER_OF_LABELS
    needs_init = False
    try:
        with open(WEIGHTS_HIDDEN_PATH, 'rb') as f:
            weights_hidden = pkl.load(f)
        with open(BIASES_HIDDEN_PATH, 'rb') as f:
            biases_hidden = pkl.load(f)
        with open(WEIGHTS_OUTPUT_PATH, 'rb') as f:
            weights_output = pkl.load(f)
        with open(BIASES_OUTPUT_PATH, 'rb') as f:
            biases_output = pkl.load(f)
    except EOFError:
        needs_init = True

    if needs_init or weights_hidden.shape != (input_layer_neurons,
                                              hidden_layer_neurons):
        print('starting learning')

        # all connections from every feature to every node in hidden layer
        weights_hidden = np.random.uniform(size=(input_layer_neurons,
                                                 hidden_layer_neurons))
        biases_hidden = np.random.uniform(size=(1, hidden_layer_neurons))

        # all connections from every hidden_neuron to output neuron
        weights_output = np.random.uniform(size=(hidden_layer_neurons,
                                                 output_neurons))
        biases_output = np.random.uniform(size=(1, output_neurons))

    for i in range(epochs):
        print('weights hidden:{} {} {}'.format(weights_hidden[0][1],
                                               weights_hidden[0][2],
                                               weights_hidden[0][3]))
        # if using batches it will go here
        hidden_ins_w = np.dot(features_train, weights_hidden)
        hidden_layer_input = hidden_ins_w + biases_hidden
        hidden_activations = nn.sigmoid(hidden_layer_input)

        output_hidden_ins_w = np.dot(hidden_activations, weights_output)
        output_layer_input = output_hidden_ins_w + biases_output
        output = nn.sigmoid(output_layer_input)

        # back propagation
        print('starting back propagation:{}'.format(i))
        error = calc_error(output, y_train)
        slope_output_layer = nn.sigmoid_derivative(output)
        slope_hidden_layer = nn.sigmoid_derivative(hidden_activations)

        delta_output = slope_output_layer * error

        error_hidden = delta_output.dot(weights_output.T)
        delta_hidden_layer = error_hidden * slope_hidden_layer

        weights_output += hidden_activations.T.dot(
            delta_output) * LEARNING_RATE
        biases_output += np.sum(delta_output, axis=0,
                                keepdims=True) * LEARNING_RATE

        weights_hidden += features_train.T.dot(
            delta_hidden_layer) * LEARNING_RATE
        biases_hidden += np.sum(delta_hidden_layer, axis=0,
                                keepdims=True) * LEARNING_RATE

        with open(WEIGHTS_HIDDEN_PATH, 'wb') as f:
            pkl.dump(weights_hidden, f)
        with open(BIASES_HIDDEN_PATH, 'wb') as f:
            pkl.dump(biases_hidden, f)
        with open(WEIGHTS_OUTPUT_PATH, 'wb') as f:
            pkl.dump(weights_output, f)
        with open(BIASES_OUTPUT_PATH, 'wb') as f:
            pkl.dump(biases_output, f)

    return 1
    pass
Пример #23
0
 def calc_accuracy(self):
     l1 = sigmoid(np.dot(self.test_x, self.w0))
     l2 = sigmoid(np.dot(l1, self.w1))
     l3 = sigmoid(np.dot(l2, self.w2))
     y_error = self.test_y - l3
     return 1 - np.mean(np.abs(y_error)), np.std(y_error)
Пример #24
0
    def recurrence_interleave(self, x_t, a_t, b_t, A_t):
        """
        :param  x_t: 1D: batch_size, 2D: dim_hidden
        :param  a_t: 1D: batch, 2D: n_agents; elem=one hot vector for speaker
        :param  b_t: 1D: batch, 2D: n_agents; elem=one hot vector for addressee
        :return A_t: 1D: batch_size, 2D: n_agents, 3D: dim_agent
        """
        h_a = A_t * a_t.dimshuffle(0, 1,
                                   'x')  # batch_size x n_agents x dim_agent
        h_b = A_t * b_t.dimshuffle(0, 1,
                                   'x')  # batch_size x n_agents x dim_agent
        h_other = A_t - h_a - h_b  # batch_size x n_agents x dim_agent

        h_a = T.sum(h_a, 1)  # batch_size x dim_agent
        h_b = T.sum(h_b, 1)  # batch_size x dim_agent

        xt_ha = T.concatenate([h_a, x_t],
                              1)  # batch_size x (dim_agent + dim_hidden)

        # update for speaker
        r_t = sigmoid(
            T.dot(xt_ha, self.WA_xr) + T.dot(h_a, self.WA_hr) +
            T.dot(h_b, self.VA_hr))
        p_t = sigmoid(
            T.dot(xt_ha, self.WA_xp) + T.dot(h_a, self.WA_hp) +
            T.dot(h_b, self.VA_hp))
        z_t = sigmoid(
            T.dot(xt_ha, self.WA_xz) + T.dot(h_a, self.WA_hz) +
            T.dot(h_b, self.VA_hz))
        h_hat_t = self.activation(
            T.dot(xt_ha, self.WA_xh) + T.dot((r_t * h_a), self.WA_hh) +
            T.dot((p_t * h_b), self.VA_hh))
        ha_t = (1. - z_t) * h_a + z_t * h_hat_t
        A_t_a = ha_t.dimshuffle(0, 'x', 1) * a_t.dimshuffle(0, 1, 'x')

        # update for addressee
        r_t = sigmoid(
            T.dot(xt_ha, self.WB_xr) + T.dot(h_b, self.WB_hr) +
            T.dot(h_a, self.VB_hr))
        p_t = sigmoid(
            T.dot(xt_ha, self.WB_xp) + T.dot(h_b, self.WB_hp) +
            T.dot(h_a, self.VB_hp))
        z_t = sigmoid(
            T.dot(xt_ha, self.WB_xz) + T.dot(h_b, self.WB_hz) +
            T.dot(h_a, self.VB_hz))
        h_hat_t = self.activation(
            T.dot(xt_ha, self.WB_xh) + T.dot((r_t * h_b), self.WB_hh) +
            T.dot((p_t * h_a), self.VB_hh))
        hb_t = (1. - z_t) * h_b + z_t * h_hat_t
        A_t_b = hb_t.dimshuffle(0, 'x', 1) * b_t.dimshuffle(0, 1, 'x')

        # update for others
        x_r = T.dot(xt_ha, self.Wother_xr).dimshuffle(0, 'x', 1) * (
            1 - (a_t.dimshuffle(0, 1, 'x') + b_t.dimshuffle(0, 1, 'x'))
        )  # batch_size x n_agnets x dim_hidden
        x_z = T.dot(xt_ha, self.Wother_xz).dimshuffle(0, 'x', 1) * (
            1 - (a_t.dimshuffle(0, 1, 'x') + b_t.dimshuffle(0, 1, 'x'))
        )  # batch_size x n_agnets x dim_hidden
        x_h = T.dot(xt_ha, self.Wother_xh).dimshuffle(0, 'x', 1) * (
            1 - (a_t.dimshuffle(0, 1, 'x') + b_t.dimshuffle(0, 1, 'x'))
        )  # batch_size x n_agnets x dim_hidden

        r_t = sigmoid(x_r + T.dot(h_other, self.Wother_hr))
        z_t = sigmoid(x_z + T.dot(h_other, self.Wother_hz))
        h_hat_t = self.activation(x_h + T.dot((r_t * h_other), self.Wother_hh))
        h_t = (1. - z_t) * h_other + z_t * h_hat_t
        A_t_other = h_t

        return A_t_a + A_t_b + A_t_other
    def __init__(self, x_span, x_word, x_ctx, x_dist, y, init_emb, n_vocab, dim_w, dim_d, dim_h, L2_reg):
        """
        :param x_span: 1D: batch, 2D: limit * 2 (10); elem=word id
        :param x_word: 1D: batch, 2D: 4 (m_first, m_last, a_first, a_last); elem=word id
        :param x_ctx : 1D: batch, 2D: window * 2 * 2 (20); elem=word id
        :param x_dist: 1D: batch; elem=distance between sentences of ant and ment
        :param y     : 1D: batch
        """

        self.input  = [x_span, x_word, x_ctx, x_dist, y]
        self.x_span = x_span
        self.x_word = x_word
        self.x_ctx  = x_ctx
        self.x_dist = x_dist
        self.y      = y

        dim_x = dim_w * (2 + 4 + 20) + 1
        batch = y.shape[0]

        """ Params """
        if init_emb is None:
            self.emb = theano.shared(sample_weights(n_vocab, dim_w))
        else:
            self.emb = theano.shared(init_emb)

        self.W_d = theano.shared(sample_weights(dim_d))
        self.W_i = theano.shared(sample_weights(dim_x, dim_h*3))
        self.W_h = theano.shared(sample_weights(dim_h*3, dim_h))
        self.W_o = theano.shared(sample_weights(dim_h))
        self.params = [self.W_d, self.W_i, self.W_h, self.W_o]

        """ Input Layer """
        x_s = self.emb[x_span]     # 1D: batch, 2D: limit * 2,      3D: dim_w
        x_w = self.emb[x_word]     # 1D: batch, 2D: 4,              3D: dim_w
        x_c = self.emb[x_ctx]      # 1D: batch, 2D: window * 2 * 2, 3D: dim_w
        x_d = self.W_d[x_dist]     # 1D: batch
        x_s_avg = T.concatenate([T.mean(x_s[:, :x_s.shape[1]/2], 1), T.mean(x_s[:, x_s.shape[1]/2:], 1)], 1)
        x = T.concatenate([x_s_avg, x_w.reshape((batch, -1)), x_c.reshape((batch, -1)), x_d.reshape((batch, 1))], 1)

        """ Intermediate Layers """
        h1 = relu(T.dot(x, self.W_i))   # h1: 1D: batch, 2D: dim_h
        h2 = relu(T.dot(h1, self.W_h))  # h2: 1D: batch, 2D: dim_h

        """ Output Layer """
        p_y = sigmoid(T.dot(h2, self.W_o))  # p_y: 1D: batch

        """ Predicts """
        self.thresholds = theano.shared(np.asarray([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], dtype=theano.config.floatX))
        self.y_hat = self.binary_predict(p_y)  # 1D: batch, 2D: 9 (thresholds)
        self.y_hat_index = T.argmax(p_y)
        self.p_y_hat = p_y[self.y_hat_index]

        """ Cost Function """
        self.nll = - T.sum(y * T.log(p_y) + (1. - y) * T.log((1. - p_y)))  # TODO: ranking criterion
        self.cost = self.nll + L2_reg * L2_sqr(params=self.params) / 2

        """ Update """
        self.grad = T.grad(self.cost, self.params)
        self.updates = adam(self.params, self.grad)

        """ Check Results """
        self.result = T.eq(self.y_hat, y.reshape((y.shape[0], 1)))  # 1D: batch, 2D: 9 (thresholds)
        self.total_p = T.sum(self.y_hat, 0)
        self.total_r = T.sum(y, keepdims=True)
        self.correct = T.sum(self.result, 0)
        self.correct_t, self.correct_f = correct_tf(self.result, y.reshape((y.shape[0], 1)))
Пример #26
0
 def calc_accuracy(self):
     prime_y = sigmoid(np.dot(self.test_x, self.w0))
     y_error = self.test_y - prime_y
     return 1 - np.mean(np.abs(y_error)), np.std(y_error)
Пример #27
0
    def __init__(self, x_span, x_word, x_ctx, x_dist, x_slen, y, init_emb, n_vocab, dim_w, dim_d, dim_h, L2_reg):
        """
        :param x_span: 1D: batch, 2D: limit * 2 (10); elem=word id
        :param x_word: 1D: batch, 2D: 4 (m_first, m_last, a_first, a_last); elem=word id
        :param x_ctx : 1D: batch, 2D: window * 2 * 2 (20); elem=word id
        :param x_dist: 1D: batch; 2D: 2; elem=[sent dist, ment dist]
        :param x_slen: 1D: batch; 2D: 3; elem=[m_span_len, a_span_len, head_match]
        :param y     : 1D: batch
        """

        self.input  = [x_span, x_word, x_ctx, x_dist, y]
        self.x_span = x_span
        self.x_word = x_word
        self.x_ctx  = x_ctx
        self.x_dist = x_dist
        self.x_slen = x_slen
        self.y      = y

        dim_x = dim_w * (10 + 4 + 4 + 2 + 3)
        batch = y.shape[0]

        """ Params """
        if init_emb is None:
            self.emb = theano.shared(sample_weights(n_vocab, dim_w))
        else:
            self.emb = theano.shared(init_emb)

        self.W_d = theano.shared(sample_weights(dim_d, dim_w))
        self.W_l = theano.shared(sample_weights(7, dim_w))
        self.W_i = theano.shared(sample_weights(dim_x, dim_h))
        self.W_h = theano.shared(sample_weights(dim_h, dim_h))
        self.W_o = theano.shared(sample_weights(dim_h))
        self.params = [self.W_d, self.W_l, self.W_i, self.W_h, self.W_o]

        """ Input Layer """
        x_vec = T.concatenate([x_span, x_word, x_ctx], 1).flatten()  # 1D: batch * (limit * 2 + 4 + 20)
        x_in = self.emb[x_vec]     # 1D: batch, 2D: limit * 2, 3D: dim_w
        x_d = self.W_d[x_dist]     # 1D: batch, 2D: 2, 3D: dim_w
        x_l = self.W_l[x_slen]     # 1D: batch, 2D: 2, 3D: dim_w
        x = T.concatenate([x_in.reshape((batch, -1)), x_d.reshape((batch, -1)), x_l.reshape((batch, -1))], 1)

        """ Intermediate Layers """
        h1 = relu(T.dot(x, self.W_i))   # h1: 1D: batch, 2D: dim_h
        h2 = relu(T.dot(h1, self.W_h))  # h2: 1D: batch, 2D: dim_h

        """ Output Layer """
        p_y = sigmoid(T.dot(h2, self.W_o))  # p_y: 1D: batch

        """ Cost Function """
        self.nll = - T.sum(y * T.log(p_y) + (1. - y) * T.log((1. - p_y)))  # TODO: ranking criterion
        self.cost = self.nll + L2_reg * L2_sqr(params=self.params) / 2

        """ Update """
        self.updates = sgd(self.cost, self.params, self.emb, x_in)

        """ Predicts """
        self.thresholds = theano.shared(np.asarray([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], dtype=theano.config.floatX))
        self.y_hat = self.binary_predict(p_y)  # 1D: batch, 2D: 9 (thresholds)
        self.y_hat_index = T.argmax(p_y)
        self.p_y_hat = p_y[self.y_hat_index]

        """ Check Results """
        self.result = T.eq(self.y_hat, y.reshape((y.shape[0], 1)))  # 1D: batch, 2D: 9 (thresholds)
        self.total_p = T.sum(self.y_hat, 0)
        self.total_r = T.sum(y, keepdims=True)
        self.correct = T.sum(self.result, 0)
        self.correct_t, self.correct_f = correct_tf(self.result, y.reshape((y.shape[0], 1)))
    def __init__(self, x_span, x_word, x_ctx, x_dist, y, init_emb, n_vocab,
                 dim_w, dim_d, dim_h, L2_reg):
        """
        :param x_span: 1D: batch, 2D: limit * 2 (10); elem=word id
        :param x_word: 1D: batch, 2D: 4 (m_first, m_last, a_first, a_last); elem=word id
        :param x_ctx : 1D: batch, 2D: window * 2 * 2 (20); elem=word id
        :param x_dist: 1D: batch; elem=distance between sentences of ant and ment
        :param y     : 1D: batch
        """

        self.input = [x_span, x_word, x_ctx, x_dist, y]
        self.x_span = x_span
        self.x_word = x_word
        self.x_ctx = x_ctx
        self.x_dist = x_dist
        self.y = y

        dim_x = dim_w * (2 + 4 + 20) + 1
        batch = y.shape[0]
        """ Params """
        if init_emb is None:
            self.emb = theano.shared(sample_weights(n_vocab, dim_w))
        else:
            self.emb = theano.shared(init_emb)

        self.W_d = theano.shared(sample_weights(dim_d))
        self.W_i = theano.shared(sample_weights(dim_x, dim_h * 3))
        self.W_h = theano.shared(sample_weights(dim_h * 3, dim_h))
        self.W_o = theano.shared(sample_weights(dim_h))
        self.params = [self.W_d, self.W_i, self.W_h, self.W_o]
        """ Input Layer """
        x_s = self.emb[x_span]  # 1D: batch, 2D: limit * 2,      3D: dim_w
        x_w = self.emb[x_word]  # 1D: batch, 2D: 4,              3D: dim_w
        x_c = self.emb[x_ctx]  # 1D: batch, 2D: window * 2 * 2, 3D: dim_w
        x_d = self.W_d[x_dist]  # 1D: batch
        x_s_avg = T.concatenate([
            T.mean(x_s[:, :x_s.shape[1] / 2], 1),
            T.mean(x_s[:, x_s.shape[1] / 2:], 1)
        ], 1)
        x = T.concatenate([
            x_s_avg,
            x_w.reshape((batch, -1)),
            x_c.reshape((batch, -1)),
            x_d.reshape((batch, 1))
        ], 1)
        """ Intermediate Layers """
        h1 = relu(T.dot(x, self.W_i))  # h1: 1D: batch, 2D: dim_h
        h2 = relu(T.dot(h1, self.W_h))  # h2: 1D: batch, 2D: dim_h
        """ Output Layer """
        p_y = sigmoid(T.dot(h2, self.W_o))  # p_y: 1D: batch
        """ Predicts """
        self.thresholds = theano.shared(
            np.asarray([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
                       dtype=theano.config.floatX))
        self.y_hat = self.binary_predict(p_y)  # 1D: batch, 2D: 9 (thresholds)
        self.y_hat_index = T.argmax(p_y)
        self.p_y_hat = p_y[self.y_hat_index]
        """ Cost Function """
        self.nll = -T.sum(y * T.log(p_y) + (1. - y) * T.log(
            (1. - p_y)))  # TODO: ranking criterion
        self.cost = self.nll + L2_reg * L2_sqr(params=self.params) / 2
        """ Update """
        self.grad = T.grad(self.cost, self.params)
        self.updates = adam(self.params, self.grad)
        """ Check Results """
        self.result = T.eq(self.y_hat, y.reshape(
            (y.shape[0], 1)))  # 1D: batch, 2D: 9 (thresholds)
        self.total_p = T.sum(self.y_hat, 0)
        self.total_r = T.sum(y, keepdims=True)
        self.correct = T.sum(self.result, 0)
        self.correct_t, self.correct_f = correct_tf(self.result,
                                                    y.reshape((y.shape[0], 1)))