def forward(self, xi_t, xf_t, xc_t, xo_t, h_tm1, c_tm1): i_t = sigmoid(xi_t + T.dot(h_tm1, self.W_hi) + c_tm1 * self.W_ci) f_t = sigmoid(xf_t + T.dot(h_tm1, self.W_hf) + c_tm1 * self.W_cf) c_t = f_t * c_tm1 + i_t * self.activation(xc_t + T.dot(h_tm1, self.W_hc)) o_t = sigmoid(xo_t + T.dot(h_tm1, self.W_ho) + c_t * self.W_co) h_t = o_t * self.activation(c_t) return h_t, c_t
def recurrence(self, x_t, h_tm1): r_t = sigmoid(T.dot(x_t, self.W_xr) + T.dot(h_tm1, self.W_hr)) z_t = sigmoid(T.dot(x_t, self.W_xz) + T.dot(h_tm1, self.W_hz)) h_hat_t = self.activation( T.dot(x_t, self.W_xh) + T.dot((r_t * h_tm1), self.W_hh)) h_t = (1. - z_t) * h_tm1 + z_t * h_hat_t return h_t
def __init__(self, dataset, epochs, w=None, print_step=None): self.train_x, self.test_x, self.train_y, self.test_y = dataset self.l1_error = 0 self.neurons = self.train_x.shape[1] self.Xavier = np.sqrt(1.0 / 2 * self.neurons) if w is None: self.w0 = 2 * np.random.random((self.neurons, 1)) - 1 else: self.w0 = w[0] for j in xrange(1, epochs + 1): l1 = sigmoid(np.dot(self.train_x, self.w0)) self.l1_error = self.train_y - l1 if (print_step is not None) and ( (j % print_step == 0) or j == epochs): accuracy = self.calc_accuracy() print( "{},{},{}".format( j, np.mean( np.abs( self.l1_error)), accuracy)) adjustment = self.l1_error * sigmoid(l1, deriv=True) self.w0 += self.train_x.T.dot(adjustment) * learning_rate
def __init__( self, train_x, train_y, test_x, test_y, epochs, w=None, print_step=None): self.l1_error = 0 if w is None: self.w0 = 2 * \ np.random.random((train_x.size / train_x.__len__(), 1)) - 1 else: self.w0 = w for j in xrange(1, epochs + 1): l1 = sigmoid(np.dot(train_x, self.w0)) self.l1_error = train_y - l1 if (print_step is not None) and ( (j % print_step == 0) or j == epochs): accuracy = self.calc_accuracy(test_x, test_y) print( "{},{},{}".format( j, np.mean( np.abs( self.l1_error)), accuracy)) adjustment = self.l1_error * sigmoid(l1, deriv=True) self.w0 += train_x.T.dot(adjustment) * learning_rate
def __forward(self, input_data): input_data = add_bias(input_data) z1 = input_data.dot(self.first_layer_weights) hidden_layer = sigmoid(z1) hidden_layer = add_bias(hidden_layer) z2 = hidden_layer.dot(self.second_layer_weights) output_layer = sigmoid(z2) return hidden_layer, output_layer
def forward(self, xi_t, xf_t, xc_t, xo_t, h_tm1, c_tm1): """ :param x_t: 1D: Batch, 2D: n_in :param h_tm1: 1D: Batch, 2D: n_h :param c_tm1: 1D: Batch, 2D; n_h :return: h_t: 1D: Batch, 2D: n_h :return: c_t: 1D: Batch, 2D: n_h """ i_t = sigmoid(xi_t + T.dot(h_tm1, self.W_hi) + c_tm1 * self.W_ci) f_t = sigmoid(xf_t + T.dot(h_tm1, self.W_hf) + c_tm1 * self.W_cf) c_t = f_t * c_tm1 + i_t * self.activation(xc_t + T.dot(h_tm1, self.W_hc)) o_t = sigmoid(xo_t + T.dot(h_tm1, self.W_ho) + c_t * self.W_co) h_t = o_t * self.activation(c_t) return h_t, c_t
def predict(self, w, b, X, show_image): """ Predicting a dataset using a model X.shape = (features, m) """ #--- #The predictions - Computing the Activation of all X Z = np.dot(w.T, X) + b A = nn_utils.sigmoid(Z) #--- Store each prediction in the vector m = X.shape[1] Y_predictions = np.zeros((1, m)) zero_count = 0 one_count = 0 for i in range(m): prediction = 1 if A[0, i] > 0.5 else 0 Y_predictions[0, i] = prediction if (prediction == 1): one_count += 1 elif (prediction == 0): zero_count += 1 if (prediction == 1 and show_image): #Showing in the screen certain pictures plt.imshow(X[:, i].reshape((64, 64, 3))) #plt.show() print("1s: " + str(one_count)) print("0s: " + str(zero_count)) return Y_predictions
def linear_activation_forward(A_prev, W, b, activation): ''' Implements the forward propagation for the Linear->Activation layer. Arguments: A_prev -- activation from previous layer(or input data) W -- weight matrix b -- bias matrix activation -- activation used in this layer, 'relu' or 'sigmoid Returns: A -- the output of activation function cache -- tuple containing 'linear_cache' and 'activation_cache', stored for computing backward pass efficiently ''' Z, linear_cache = linear_forward(A_prev, W, b) ## calling linear_forward function ## to get the value of Z and linear cache if activation == 'sigmoid': A, activation_cache = sigmoid(Z) ## calling sigmoid function defined in nn_utils elif activation == 'relu': A, activation_cache = relu(Z) ## calling relu function defined in nn_utlis assert (A.shape == (W.shape[0], A_prev.shape[1])) ## assertion for checking the shape of A cache = (linear_cache, activation_cache) return A, cache
def linear_activation_forward(A_prev, W, b, activation): ''' Implements forward propagation. Arguments: A_prev -- activation from previous layers W -- weight matrix b -- bias matrix activation -- the activation used in this layer, 'sigmoid' or 'relu' Returns: A -- the output activation cache -- a python dictionary containing 'linear_cache' and 'activation_cache' ''' Z, linear_cache = linear_forward(A_prev, W, b) if activation == 'relu': A, activation_cache = nn_utils.relu(Z) elif activation == 'sigmoid': A, activation_cache = nn_utils.sigmoid(Z) cache = (linear_cache, activation_cache) return A, cache
def linear_activation_forward(A_prev, W, b, activation): """ Implement the forward propagation for the LINEAR->ACTIVATION layer Arguments: A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples) W -- weights matrix: numpy array of shape (size of current layer, size of previous layer) b -- bias vector, numpy array of shape (size of the current layer, 1) activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: A -- the output of the activation function, also called the post-activation value cache -- a python dictionary containing "linear_cache" and "activation_cache"; stored for computing the backward pass efficiently """ if activation == "sigmoid": # Inputs: "A_prev, W, b". Outputs: "A, activation_cache". Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = nn_utils.sigmoid(Z) elif activation == "relu": # Inputs: "A_prev, W, b". Outputs: "A, activation_cache". Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = nn_utils.relu(Z) assert (A.shape == (W.shape[0], A_prev.shape[1])) cache = (linear_cache, activation_cache) return A, cache
def skip_recurrence_inter(self, x_t, i_t, A_t): """ :param x_t: 1D: batch_size, 2D: dim_hidden :param i_t: 1D: batch_size, 2D: n_agents; elem=one hot vector :return A_t: 1D: batch_size, 2D: n_agents, 3D: dim_agent """ x_r = T.dot(x_t, self.W_xr) x_z = T.dot(x_t, self.W_xz) x_h = T.dot(x_t, self.W_xh) A_sub = A_t[T.arange(A_t.shape[0]), i_t] r_t = sigmoid(x_r + T.dot(A_sub, self.W_hr)) z_t = sigmoid(x_z + T.dot(A_sub, self.W_hz)) h_hat_t = self.activation(x_h + T.dot((r_t * A_sub), self.W_hh)) h_t = (1. - z_t) * A_sub + z_t * h_hat_t return T.set_subtensor(A_sub, h_t)
def recurrence_inter(self, x_t, i_t, A_t): """ :param x_t: 1D: batch_size, 2D: dim_hidden :param i_t: 1D: batch_size, 2D: n_agents; elem=one hot vector :return A_t: 1D: batch_size, 2D: n_agents, 3D: dim_agent """ x_r = T.dot(x_t, self.W_xr).dimshuffle(0, 'x', 1) * i_t.dimshuffle( 0, 1, 'x') x_z = T.dot(x_t, self.W_xz).dimshuffle(0, 'x', 1) * i_t.dimshuffle( 0, 1, 'x') x_h = T.dot(x_t, self.W_xh).dimshuffle(0, 'x', 1) * i_t.dimshuffle( 0, 1, 'x') r_t = sigmoid(x_r + T.dot(A_t, self.W_hr)) z_t = sigmoid(x_z + T.dot(A_t, self.W_hz)) h_hat_t = self.activation(x_h + T.dot((r_t * A_t), self.W_hh)) h_t = (1. - z_t) * A_t + z_t * h_hat_t return h_t
def forward_one_layer(A_previous, W, b, activation_function): """ Forwarding only 1 layer ahead W.shape = (l, l-1) Z, A_prev, A.shape = (l, m) Returns A and [cache = A_prev, W, b, Z] """ Z = np.dot( W, A_previous) + b # W.shape: (l, l-1) / A_previous.shape: (l, m) A = None if activation_function == "sigmoid": A = nn_utils.sigmoid(Z) elif activation_function == "relu": A = nn_utils.relu(Z) cache = A_previous, W, b, Z return A, cache
def propagate(self, w, b, X, Y): """ Forward the dataset X.shape: (numFeatures, m) Y.shape: (1, num examples) """ m = X.shape[1] #--- Forward Propagation Z = np.dot(w.T, X) + b A = nn_utils.sigmoid(Z) #computing the Activation #--- Cost calculation cost = nn_utils.cost(A, Y) #--- Backward Propagation dZ = A - Y dw = np.dot(X, (dZ).T) / m db = np.sum(dZ) / m grads = {"dw": dw, "db": db} return grads, cost
def forward_one_layer(self, A_previous, W, b, activation_function): """ Forwarding only 1 layer ahead W.shape = (nl, nl-1) Z, A_prev, A.shape = (nl, m) Returns A and [cache = A_prev, W, b, Z] """ print("--------------------") print(A_previous.shape) print(W.shape) print("--------------------") Z = np.dot(W, A_previous) + b if activation_function == "sigmoid": A = nn_utils.sigmoid(Z) elif activation_function == "relu": A = nn_utils.relu(Z) cache = A_previous, W, b, Z return A, cache
def __init__(self, dataset, epochs, w=None, print_step=None): self.train_x, self.test_x, self.train_y, self.test_y = dataset self.l3_error = 0 self.neurons = self.train_x.shape[1] self.Xavier = 1 # np.sqrt(1.0 / 2 * self.neurons) if w is None: self.w0 = (2 * np.random.random( (self.neurons, self.neurons)) - 1) * self.Xavier self.w1 = (2 * np.random.random( (self.neurons, self.neurons)) - 1) * self.Xavier self.w2 = (2 * np.random.random( (self.neurons, 1)) - 1) * self.Xavier else: self.w0, self.w1, self.w2 = w[0], w[1], w[2] for j in xrange(1, epochs + 1): l1 = sigmoid(np.dot(self.train_x, self.w0)) l2 = sigmoid(np.dot(l1, self.w1)) l3 = sigmoid(np.dot(l2, self.w2)) self.l3_error = self.train_y - l3 if (print_step is not None) and ((j % print_step == 0) or j == epochs): accuracy, acc_std = self.calc_accuracy() print("{},{},{},{}".format(j, np.mean(np.abs(self.l3_error)), accuracy, acc_std)) l3_adjustment = self.l3_error * sigmoid(l3, deriv=True) l2_error = l3_adjustment.dot(self.w2.T) l2_adjustment = l2_error * sigmoid(l2, deriv=True) l1_error = l2_adjustment.dot(self.w1.T) l1_adjustment = l1_error * sigmoid(l1, deriv=True) # dropout of 10% # self._drop_out(self.W2, DROPOUT_RATE) # update weights for all the synapses (no learning rate term) self.w2 += l2.T.dot(l3_adjustment) * learning_rate self.w1 += l1.T.dot(l2_adjustment) * learning_rate self.w0 += self.train_x.T.dot(l1_adjustment) * learning_rate
def calc_accuracy(self, test_x, test_y): prime_y = sigmoid(np.dot(test_x, self.w0)) y_error = test_y - prime_y return 1 - np.mean(np.abs(y_error))
def forward(self, xr_t, xz_t, xh_t, h_tm1): r_t = sigmoid(xr_t + T.dot(h_tm1, self.W_hr)) z_t = sigmoid(xz_t + T.dot(h_tm1, self.W_hz)) h_hat_t = self.activation(xh_t + T.dot((r_t * h_tm1), self.W_hh)) h_t = (1. - z_t) * h_tm1 + z_t * h_hat_t return h_t
def launch_learning(x): """ Funkcja pobiera macierz przykladow zapisanych w macierzy X o wymiarach NxD i zwraca wektor y o wymiarach Nx1, gdzie kazdy element jest z zakresu {0, ..., 35} i oznacza znak rozpoznany na danym przykladzie. :param x: macierz o wymiarach NxD :return: wektor o wymiarach Nx1 """ x_train, y_train = load_training_data() x_train = prepare_x(x_train) y_train = prepare_y(y_train) x = prepare_x(x) hog_for_shape = hog.hog(x_train[0], cell_size=(HOG_CELL_SIZE, HOG_CELL_SIZE), cells_per_block=(HOG_CELL_BLOCK, HOG_CELL_BLOCK), signed_orientation=False, nbins=HOG_NBINS, visualise=False, normalise=True, flatten=True, same_size=True) with open(TRAIN_HOG_FILE_PATH, 'rb') as f: features_train = pkl.load(f) print('features_train after load:{}'.format(features_train)) print('features_train after load shape:{}'.format(features_train.shape)) if features_train.shape != (x_train.shape[0], hog_for_shape.shape[0]): features_train = np.empty(shape=(x_train.shape[0], hog_for_shape.shape[0])) print('Need to recompute features for training set') for i in range(x_train.shape[0]): features_train[i] = hog.hog(x_train[i], cell_size=(HOG_CELL_SIZE, HOG_CELL_SIZE), cells_per_block=(HOG_CELL_BLOCK, HOG_CELL_BLOCK), signed_orientation=False, nbins=HOG_NBINS, visualise=False, normalise=True, flatten=True, same_size=True) with open(TRAIN_HOG_FILE_PATH, 'wb') as pickle_file: pkl.dump(features_train, pickle_file) # those lines are neccesary in upload version, above code will disappear however # features_x = np.empty(shape=(x.shape[0], hog_for_shape.shape[0])) # for i in range(x.shape[0]): # features_x[i] = hog.hog(x[i], cell_size=(HOG_CELL_SIZE, HOG_CELL_SIZE), # cells_per_block=(HOG_CELL_BLOCK, HOG_CELL_BLOCK), # signed_orientation=False, nbins=HOG_NBINS, visualise=False, # normalise=True, flatten=True, same_size=True) input_layer_neurons = features_train.shape[1] hidden_layer_neurons = NN_HIDDEN_NEURONS output_neurons = NUMBER_OF_LABELS needs_init = False try: with open(WEIGHTS_HIDDEN_PATH, 'rb') as f: weights_hidden = pkl.load(f) with open(BIASES_HIDDEN_PATH, 'rb') as f: biases_hidden = pkl.load(f) with open(WEIGHTS_OUTPUT_PATH, 'rb') as f: weights_output = pkl.load(f) with open(BIASES_OUTPUT_PATH, 'rb') as f: biases_output = pkl.load(f) except EOFError: needs_init = True if needs_init or weights_hidden.shape != (input_layer_neurons, hidden_layer_neurons): print('starting learning') # all connections from every feature to every node in hidden layer weights_hidden = np.random.uniform(size=(input_layer_neurons, hidden_layer_neurons)) biases_hidden = np.random.uniform(size=(1, hidden_layer_neurons)) # all connections from every hidden_neuron to output neuron weights_output = np.random.uniform(size=(hidden_layer_neurons, output_neurons)) biases_output = np.random.uniform(size=(1, output_neurons)) for i in range(epochs): print('weights hidden:{} {} {}'.format(weights_hidden[0][1], weights_hidden[0][2], weights_hidden[0][3])) # if using batches it will go here hidden_ins_w = np.dot(features_train, weights_hidden) hidden_layer_input = hidden_ins_w + biases_hidden hidden_activations = nn.sigmoid(hidden_layer_input) output_hidden_ins_w = np.dot(hidden_activations, weights_output) output_layer_input = output_hidden_ins_w + biases_output output = nn.sigmoid(output_layer_input) # back propagation print('starting back propagation:{}'.format(i)) error = calc_error(output, y_train) slope_output_layer = nn.sigmoid_derivative(output) slope_hidden_layer = nn.sigmoid_derivative(hidden_activations) delta_output = slope_output_layer * error error_hidden = delta_output.dot(weights_output.T) delta_hidden_layer = error_hidden * slope_hidden_layer weights_output += hidden_activations.T.dot( delta_output) * LEARNING_RATE biases_output += np.sum(delta_output, axis=0, keepdims=True) * LEARNING_RATE weights_hidden += features_train.T.dot( delta_hidden_layer) * LEARNING_RATE biases_hidden += np.sum(delta_hidden_layer, axis=0, keepdims=True) * LEARNING_RATE with open(WEIGHTS_HIDDEN_PATH, 'wb') as f: pkl.dump(weights_hidden, f) with open(BIASES_HIDDEN_PATH, 'wb') as f: pkl.dump(biases_hidden, f) with open(WEIGHTS_OUTPUT_PATH, 'wb') as f: pkl.dump(weights_output, f) with open(BIASES_OUTPUT_PATH, 'wb') as f: pkl.dump(biases_output, f) return 1 pass
def calc_accuracy(self): l1 = sigmoid(np.dot(self.test_x, self.w0)) l2 = sigmoid(np.dot(l1, self.w1)) l3 = sigmoid(np.dot(l2, self.w2)) y_error = self.test_y - l3 return 1 - np.mean(np.abs(y_error)), np.std(y_error)
def recurrence_interleave(self, x_t, a_t, b_t, A_t): """ :param x_t: 1D: batch_size, 2D: dim_hidden :param a_t: 1D: batch, 2D: n_agents; elem=one hot vector for speaker :param b_t: 1D: batch, 2D: n_agents; elem=one hot vector for addressee :return A_t: 1D: batch_size, 2D: n_agents, 3D: dim_agent """ h_a = A_t * a_t.dimshuffle(0, 1, 'x') # batch_size x n_agents x dim_agent h_b = A_t * b_t.dimshuffle(0, 1, 'x') # batch_size x n_agents x dim_agent h_other = A_t - h_a - h_b # batch_size x n_agents x dim_agent h_a = T.sum(h_a, 1) # batch_size x dim_agent h_b = T.sum(h_b, 1) # batch_size x dim_agent xt_ha = T.concatenate([h_a, x_t], 1) # batch_size x (dim_agent + dim_hidden) # update for speaker r_t = sigmoid( T.dot(xt_ha, self.WA_xr) + T.dot(h_a, self.WA_hr) + T.dot(h_b, self.VA_hr)) p_t = sigmoid( T.dot(xt_ha, self.WA_xp) + T.dot(h_a, self.WA_hp) + T.dot(h_b, self.VA_hp)) z_t = sigmoid( T.dot(xt_ha, self.WA_xz) + T.dot(h_a, self.WA_hz) + T.dot(h_b, self.VA_hz)) h_hat_t = self.activation( T.dot(xt_ha, self.WA_xh) + T.dot((r_t * h_a), self.WA_hh) + T.dot((p_t * h_b), self.VA_hh)) ha_t = (1. - z_t) * h_a + z_t * h_hat_t A_t_a = ha_t.dimshuffle(0, 'x', 1) * a_t.dimshuffle(0, 1, 'x') # update for addressee r_t = sigmoid( T.dot(xt_ha, self.WB_xr) + T.dot(h_b, self.WB_hr) + T.dot(h_a, self.VB_hr)) p_t = sigmoid( T.dot(xt_ha, self.WB_xp) + T.dot(h_b, self.WB_hp) + T.dot(h_a, self.VB_hp)) z_t = sigmoid( T.dot(xt_ha, self.WB_xz) + T.dot(h_b, self.WB_hz) + T.dot(h_a, self.VB_hz)) h_hat_t = self.activation( T.dot(xt_ha, self.WB_xh) + T.dot((r_t * h_b), self.WB_hh) + T.dot((p_t * h_a), self.VB_hh)) hb_t = (1. - z_t) * h_b + z_t * h_hat_t A_t_b = hb_t.dimshuffle(0, 'x', 1) * b_t.dimshuffle(0, 1, 'x') # update for others x_r = T.dot(xt_ha, self.Wother_xr).dimshuffle(0, 'x', 1) * ( 1 - (a_t.dimshuffle(0, 1, 'x') + b_t.dimshuffle(0, 1, 'x')) ) # batch_size x n_agnets x dim_hidden x_z = T.dot(xt_ha, self.Wother_xz).dimshuffle(0, 'x', 1) * ( 1 - (a_t.dimshuffle(0, 1, 'x') + b_t.dimshuffle(0, 1, 'x')) ) # batch_size x n_agnets x dim_hidden x_h = T.dot(xt_ha, self.Wother_xh).dimshuffle(0, 'x', 1) * ( 1 - (a_t.dimshuffle(0, 1, 'x') + b_t.dimshuffle(0, 1, 'x')) ) # batch_size x n_agnets x dim_hidden r_t = sigmoid(x_r + T.dot(h_other, self.Wother_hr)) z_t = sigmoid(x_z + T.dot(h_other, self.Wother_hz)) h_hat_t = self.activation(x_h + T.dot((r_t * h_other), self.Wother_hh)) h_t = (1. - z_t) * h_other + z_t * h_hat_t A_t_other = h_t return A_t_a + A_t_b + A_t_other
def __init__(self, x_span, x_word, x_ctx, x_dist, y, init_emb, n_vocab, dim_w, dim_d, dim_h, L2_reg): """ :param x_span: 1D: batch, 2D: limit * 2 (10); elem=word id :param x_word: 1D: batch, 2D: 4 (m_first, m_last, a_first, a_last); elem=word id :param x_ctx : 1D: batch, 2D: window * 2 * 2 (20); elem=word id :param x_dist: 1D: batch; elem=distance between sentences of ant and ment :param y : 1D: batch """ self.input = [x_span, x_word, x_ctx, x_dist, y] self.x_span = x_span self.x_word = x_word self.x_ctx = x_ctx self.x_dist = x_dist self.y = y dim_x = dim_w * (2 + 4 + 20) + 1 batch = y.shape[0] """ Params """ if init_emb is None: self.emb = theano.shared(sample_weights(n_vocab, dim_w)) else: self.emb = theano.shared(init_emb) self.W_d = theano.shared(sample_weights(dim_d)) self.W_i = theano.shared(sample_weights(dim_x, dim_h*3)) self.W_h = theano.shared(sample_weights(dim_h*3, dim_h)) self.W_o = theano.shared(sample_weights(dim_h)) self.params = [self.W_d, self.W_i, self.W_h, self.W_o] """ Input Layer """ x_s = self.emb[x_span] # 1D: batch, 2D: limit * 2, 3D: dim_w x_w = self.emb[x_word] # 1D: batch, 2D: 4, 3D: dim_w x_c = self.emb[x_ctx] # 1D: batch, 2D: window * 2 * 2, 3D: dim_w x_d = self.W_d[x_dist] # 1D: batch x_s_avg = T.concatenate([T.mean(x_s[:, :x_s.shape[1]/2], 1), T.mean(x_s[:, x_s.shape[1]/2:], 1)], 1) x = T.concatenate([x_s_avg, x_w.reshape((batch, -1)), x_c.reshape((batch, -1)), x_d.reshape((batch, 1))], 1) """ Intermediate Layers """ h1 = relu(T.dot(x, self.W_i)) # h1: 1D: batch, 2D: dim_h h2 = relu(T.dot(h1, self.W_h)) # h2: 1D: batch, 2D: dim_h """ Output Layer """ p_y = sigmoid(T.dot(h2, self.W_o)) # p_y: 1D: batch """ Predicts """ self.thresholds = theano.shared(np.asarray([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], dtype=theano.config.floatX)) self.y_hat = self.binary_predict(p_y) # 1D: batch, 2D: 9 (thresholds) self.y_hat_index = T.argmax(p_y) self.p_y_hat = p_y[self.y_hat_index] """ Cost Function """ self.nll = - T.sum(y * T.log(p_y) + (1. - y) * T.log((1. - p_y))) # TODO: ranking criterion self.cost = self.nll + L2_reg * L2_sqr(params=self.params) / 2 """ Update """ self.grad = T.grad(self.cost, self.params) self.updates = adam(self.params, self.grad) """ Check Results """ self.result = T.eq(self.y_hat, y.reshape((y.shape[0], 1))) # 1D: batch, 2D: 9 (thresholds) self.total_p = T.sum(self.y_hat, 0) self.total_r = T.sum(y, keepdims=True) self.correct = T.sum(self.result, 0) self.correct_t, self.correct_f = correct_tf(self.result, y.reshape((y.shape[0], 1)))
def calc_accuracy(self): prime_y = sigmoid(np.dot(self.test_x, self.w0)) y_error = self.test_y - prime_y return 1 - np.mean(np.abs(y_error)), np.std(y_error)
def __init__(self, x_span, x_word, x_ctx, x_dist, x_slen, y, init_emb, n_vocab, dim_w, dim_d, dim_h, L2_reg): """ :param x_span: 1D: batch, 2D: limit * 2 (10); elem=word id :param x_word: 1D: batch, 2D: 4 (m_first, m_last, a_first, a_last); elem=word id :param x_ctx : 1D: batch, 2D: window * 2 * 2 (20); elem=word id :param x_dist: 1D: batch; 2D: 2; elem=[sent dist, ment dist] :param x_slen: 1D: batch; 2D: 3; elem=[m_span_len, a_span_len, head_match] :param y : 1D: batch """ self.input = [x_span, x_word, x_ctx, x_dist, y] self.x_span = x_span self.x_word = x_word self.x_ctx = x_ctx self.x_dist = x_dist self.x_slen = x_slen self.y = y dim_x = dim_w * (10 + 4 + 4 + 2 + 3) batch = y.shape[0] """ Params """ if init_emb is None: self.emb = theano.shared(sample_weights(n_vocab, dim_w)) else: self.emb = theano.shared(init_emb) self.W_d = theano.shared(sample_weights(dim_d, dim_w)) self.W_l = theano.shared(sample_weights(7, dim_w)) self.W_i = theano.shared(sample_weights(dim_x, dim_h)) self.W_h = theano.shared(sample_weights(dim_h, dim_h)) self.W_o = theano.shared(sample_weights(dim_h)) self.params = [self.W_d, self.W_l, self.W_i, self.W_h, self.W_o] """ Input Layer """ x_vec = T.concatenate([x_span, x_word, x_ctx], 1).flatten() # 1D: batch * (limit * 2 + 4 + 20) x_in = self.emb[x_vec] # 1D: batch, 2D: limit * 2, 3D: dim_w x_d = self.W_d[x_dist] # 1D: batch, 2D: 2, 3D: dim_w x_l = self.W_l[x_slen] # 1D: batch, 2D: 2, 3D: dim_w x = T.concatenate([x_in.reshape((batch, -1)), x_d.reshape((batch, -1)), x_l.reshape((batch, -1))], 1) """ Intermediate Layers """ h1 = relu(T.dot(x, self.W_i)) # h1: 1D: batch, 2D: dim_h h2 = relu(T.dot(h1, self.W_h)) # h2: 1D: batch, 2D: dim_h """ Output Layer """ p_y = sigmoid(T.dot(h2, self.W_o)) # p_y: 1D: batch """ Cost Function """ self.nll = - T.sum(y * T.log(p_y) + (1. - y) * T.log((1. - p_y))) # TODO: ranking criterion self.cost = self.nll + L2_reg * L2_sqr(params=self.params) / 2 """ Update """ self.updates = sgd(self.cost, self.params, self.emb, x_in) """ Predicts """ self.thresholds = theano.shared(np.asarray([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], dtype=theano.config.floatX)) self.y_hat = self.binary_predict(p_y) # 1D: batch, 2D: 9 (thresholds) self.y_hat_index = T.argmax(p_y) self.p_y_hat = p_y[self.y_hat_index] """ Check Results """ self.result = T.eq(self.y_hat, y.reshape((y.shape[0], 1))) # 1D: batch, 2D: 9 (thresholds) self.total_p = T.sum(self.y_hat, 0) self.total_r = T.sum(y, keepdims=True) self.correct = T.sum(self.result, 0) self.correct_t, self.correct_f = correct_tf(self.result, y.reshape((y.shape[0], 1)))
def __init__(self, x_span, x_word, x_ctx, x_dist, y, init_emb, n_vocab, dim_w, dim_d, dim_h, L2_reg): """ :param x_span: 1D: batch, 2D: limit * 2 (10); elem=word id :param x_word: 1D: batch, 2D: 4 (m_first, m_last, a_first, a_last); elem=word id :param x_ctx : 1D: batch, 2D: window * 2 * 2 (20); elem=word id :param x_dist: 1D: batch; elem=distance between sentences of ant and ment :param y : 1D: batch """ self.input = [x_span, x_word, x_ctx, x_dist, y] self.x_span = x_span self.x_word = x_word self.x_ctx = x_ctx self.x_dist = x_dist self.y = y dim_x = dim_w * (2 + 4 + 20) + 1 batch = y.shape[0] """ Params """ if init_emb is None: self.emb = theano.shared(sample_weights(n_vocab, dim_w)) else: self.emb = theano.shared(init_emb) self.W_d = theano.shared(sample_weights(dim_d)) self.W_i = theano.shared(sample_weights(dim_x, dim_h * 3)) self.W_h = theano.shared(sample_weights(dim_h * 3, dim_h)) self.W_o = theano.shared(sample_weights(dim_h)) self.params = [self.W_d, self.W_i, self.W_h, self.W_o] """ Input Layer """ x_s = self.emb[x_span] # 1D: batch, 2D: limit * 2, 3D: dim_w x_w = self.emb[x_word] # 1D: batch, 2D: 4, 3D: dim_w x_c = self.emb[x_ctx] # 1D: batch, 2D: window * 2 * 2, 3D: dim_w x_d = self.W_d[x_dist] # 1D: batch x_s_avg = T.concatenate([ T.mean(x_s[:, :x_s.shape[1] / 2], 1), T.mean(x_s[:, x_s.shape[1] / 2:], 1) ], 1) x = T.concatenate([ x_s_avg, x_w.reshape((batch, -1)), x_c.reshape((batch, -1)), x_d.reshape((batch, 1)) ], 1) """ Intermediate Layers """ h1 = relu(T.dot(x, self.W_i)) # h1: 1D: batch, 2D: dim_h h2 = relu(T.dot(h1, self.W_h)) # h2: 1D: batch, 2D: dim_h """ Output Layer """ p_y = sigmoid(T.dot(h2, self.W_o)) # p_y: 1D: batch """ Predicts """ self.thresholds = theano.shared( np.asarray([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], dtype=theano.config.floatX)) self.y_hat = self.binary_predict(p_y) # 1D: batch, 2D: 9 (thresholds) self.y_hat_index = T.argmax(p_y) self.p_y_hat = p_y[self.y_hat_index] """ Cost Function """ self.nll = -T.sum(y * T.log(p_y) + (1. - y) * T.log( (1. - p_y))) # TODO: ranking criterion self.cost = self.nll + L2_reg * L2_sqr(params=self.params) / 2 """ Update """ self.grad = T.grad(self.cost, self.params) self.updates = adam(self.params, self.grad) """ Check Results """ self.result = T.eq(self.y_hat, y.reshape( (y.shape[0], 1))) # 1D: batch, 2D: 9 (thresholds) self.total_p = T.sum(self.y_hat, 0) self.total_r = T.sum(y, keepdims=True) self.correct = T.sum(self.result, 0) self.correct_t, self.correct_f = correct_tf(self.result, y.reshape((y.shape[0], 1)))