def __init__(self, input_dim, hidden_dim, activation=T.nnet.sigmoid, with_batch=True, name='RNN'): """ Initialize neural network. """ self.input_dim = input_dim self.hidden_dim = hidden_dim self.activation = activation self.with_batch = with_batch self.name = name # Randomly generate weights self.w_x = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_x') self.w_h = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_h') # Initialize the bias vector and h_0 to zero vectors self.b_h = create_shared(np.zeros((hidden_dim, )), name + '__b_h') self.h_0 = create_shared(np.zeros((hidden_dim, )), name + '__h_0') # Define parameters self.params = [self.w_x, self.w_h, self.b_h, self.h_0]
def __init__(self, input_dim, rnn_hidden_dim, rnn_output_dim, values_dim, output_dim, name='stack'): """ Initialize neural network. """ self.input_dim = input_dim self.rnn_hidden_dim = rnn_hidden_dim self.rnn_output_dim = rnn_output_dim self.values_dim = values_dim self.output_dim = output_dim self.name = name # Generate weights and bias to compute the push scalar (d_t), the pop scalar (u_t), # the value vector (v_t), and the network output (o_t) # Weights self.w_op_d = create_shared(random_weights((rnn_output_dim, 1)), name + '__w_op_d') self.w_op_u = create_shared(random_weights((rnn_output_dim, 1)), name + '__w_op_u') self.w_op_v = create_shared( random_weights((rnn_output_dim, values_dim)), name + '__w_op_v') self.w_op_o = create_shared( random_weights((rnn_output_dim, output_dim)), name + '__w_op_o') # Bias self.b_op_d = create_shared(np.zeros((1, )), name + '__b_op_d') self.b_op_u = create_shared(np.zeros((1, )), name + '__b_op_u') self.b_op_v = create_shared(np.zeros((values_dim, )), name + '__b_op_v') self.b_op_o = create_shared(np.zeros((output_dim, )), name + '__b_op_o') # RNN Controller weights self.w_xrh_hop = create_shared( random_weights((input_dim + values_dim + rnn_hidden_dim, rnn_hidden_dim + rnn_output_dim)), name + '__w_xrh_hop') self.b_xrh_hop = create_shared( np.zeros((rnn_hidden_dim + rnn_output_dim, )), name + '__b_xrh_hop') # Initial hidden states H_0 - H_t = (h_t, r_t, (v_t, s_t)) self.h_0 = create_shared(np.zeros((rnn_hidden_dim, )), name + '__h_0') self.r_0 = create_shared(np.zeros((values_dim, )), name + '__r_0') # self.v_0 = create_shared(np.zeros((values_dim,)), name + '__v_0') # self.s_0 = create_shared(np.zeros((1,)), name + '__s_0') # Define parameters self.params = [ self.w_op_d, self.w_op_u, self.w_op_v, self.w_op_o, self.b_op_d, self.b_op_u, self.b_op_v, self.b_op_o, self.w_xrh_hop, self.b_xrh_hop, self.h_0 ] # _TODO_ check this (why not put r_0, s_0, v_0)
def __init__(self, nb_filters, stack_size, filter_height, filter_width, wide, name): """ Construct a convolutional layer `wide`: False: only apply filter to complete patches of the image. Generates output of shape: image_shape - filter_shape + 1 True: zero-pads image to multiple of filter shape to generate output of shape: image_shape + filter_shape - 1 """ self.nb_filters = nb_filters self.stack_size = stack_size self.filter_height = filter_height self.filter_width = filter_width self.wide = wide self.name = name self.filter_shape = (nb_filters, stack_size, filter_height, filter_width) fan_in = stack_size * filter_height * filter_width # number of inputs to each hidden unit fan_out = ((nb_filters * filter_height * filter_width)) # each unit in the lower layer receives a gradient from drange = np.sqrt(6. / (fan_in + fan_out)) # initialize filters with random values self.filters = create_shared(drange * random_weights(self.filter_shape), name + '__filters') self.bias = create_shared(np.zeros((nb_filters,)), name + '__bias') # parameters in the layer self.params = [self.filters, self.bias]
def __init__(self, input_dim, hidden_dim, name='LSTM'): """ Initialize neural network. """ self.input_dim = input_dim self.hidden_dim = hidden_dim self.name = name self.W = create_shared(random_weights((input_dim, hidden_dim * 4)), name + 'W') self.U = create_shared(random_weights((hidden_dim, hidden_dim * 4)), name + 'U') self.b = create_shared(random_weights((hidden_dim * 4, )), name + 'b') self.c_0 = create_shared(np.zeros((hidden_dim,)), name + '__c_0') self.h_0 = create_shared(np.zeros((hidden_dim,)), name + '__h_0') self.params = [self.W, self.U, self.b]
def __init__(self, nb_filters, stack_size, filter_height, filter_width, wide, name): """ Construct a convolutional layer `wide`: False: only apply filter to complete patches of the image. Generates output of shape: image_shape - filter_shape + 1 True: zero-pads image to multiple of filter shape to generate output of shape: image_shape + filter_shape - 1 """ self.nb_filters = nb_filters self.stack_size = stack_size self.filter_height = filter_height self.filter_width = filter_width self.wide = wide self.name = name self.filter_shape = (nb_filters, stack_size, filter_height, filter_width) fan_in = stack_size * filter_height * filter_width # number of inputs to each hidden unit fan_out = ((nb_filters * filter_height * filter_width) ) # each unit in the lower layer receives a gradient from drange = np.sqrt( 6. / (fan_in + fan_out)) # initialize filters with random values self.filters = create_shared( drange * random_weights(self.filter_shape), name + '__filters') self.bias = create_shared(np.zeros((nb_filters, )), name + '__bias') # parameters in the layer self.params = [self.filters, self.bias]
def __init__(self, nb_filters, stack_size, filter_height, filter_width, border_mode, stride, name): """ Construct a convolutional layer. """ self.nb_filters = nb_filters self.stack_size = stack_size self.filter_height = filter_height self.filter_width = filter_width self.border_mode = border_mode self.filter_shape = (nb_filters, stack_size, filter_height, filter_width) self.stride = stride self.name = name fan_in = stack_size * filter_height * filter_width # number of inputs to each hidden unit fan_out = ((nb_filters * filter_height * filter_width) ) # each unit in the lower layer receives a gradient from drange = np.sqrt( 6. / (fan_in + fan_out)) # initialize filters with random values self.filters = create_shared( drange * random_weights(self.filter_shape), name + '__filters') self.bias = create_shared( np.ones((nb_filters, )) * 0.1, name + '__bias') # parameters in the layer self.params = [self.filters, self.bias]
def __init__(self, input_dim, output_dim, bias=True, activation='sigmoid', name='hidden_layer'): self.input_dim = input_dim self.output_dim = output_dim self.bias = bias self.name = name if activation is None: self.activation = None elif activation == 'tanh': self.activation = T.tanh elif activation == 'sigmoid': self.activation = T.nnet.sigmoid elif activation == 'softmax': self.activation = T.nnet.softmax elif activation == 'relu': self.activation = T.nnet.relu else: raise Exception("Unknown activation function: %s" % activation) # Initialize weights and bias self.weights = create_shared(random_weights((input_dim, output_dim)), name + '__weights') self.bias = create_shared(np.zeros((output_dim, )), name + '__bias') # Define parameters if self.bias: self.params = [self.weights, self.bias] else: self.params = [self.weights]
def __init__(self, input_dim, output_dim, bias=True, activation='sigmoid', name='hidden_layer'): self.input_dim = input_dim self.output_dim = output_dim self.bias = bias self.name = name if activation is None: self.activation = None elif activation == 'tanh': self.activation = T.tanh elif activation == 'sigmoid': self.activation = T.nnet.sigmoid elif activation == 'softmax': self.activation = T.nnet.softmax elif activation == 'relu': self.activation = T.nnet.relu else: raise Exception("Unknown activation function: %s" % activation) # Initialize weights and bias self.weights = create_shared( random_weights((input_dim, output_dim)), name + '__weights' ) if activation == 'relu': self.bias = create_shared(np.ones((output_dim,)) * 0.1, name + '__bias') else: self.bias = create_shared(np.zeros((output_dim,)), name + '__bias') # Define parameters if self.bias: self.params = [self.weights, self.bias] else: self.params = [self.weights]
def __init__(self, input_dim, hidden_dim, name='LSTM'): """ Initialize neural network. """ self.input_dim = input_dim self.hidden_dim = hidden_dim self.name = name self.W = create_shared(random_weights((input_dim, hidden_dim * 4)), name + 'W') self.U = create_shared(random_weights((hidden_dim, hidden_dim * 4)), name + 'U') self.b = create_shared(random_weights((hidden_dim * 4, )), name + 'b') self.c_0 = create_shared(np.zeros((hidden_dim, )), name + '__c_0') self.h_0 = create_shared(np.zeros((hidden_dim, )), name + '__h_0') self.params = [self.W, self.U, self.b]
def generate_context_vector(input_vector, hidden_state_vector): irows = input_vector.shape[1] icols = input_vector.shape[0] # TODO: replace this with timesteps input_wt = utils.random_weights(irows, icols) hrows = hidden_state_vector.shape[1] hcols = hidden_state_vector.shape[0] hidden_wt = utils.random_weights(hrows, hcols) beta = np.tanh( np.dot(input_vector, input_wt) + np.dot(hidden_state_vector, hidden_wt)) alpha = utils.softmax(beta) context_vector = np.multiply( alpha, np.concatenate((input_vector, hidden_state_vector), axis=1)) return context_vector
def __init__(self, input_dim, rnn_hidden_dim, rnn_output_dim, values_dim, output_dim, name='stack'): """ Initialize neural network. """ self.input_dim = input_dim self.rnn_hidden_dim = rnn_hidden_dim self.rnn_output_dim = rnn_output_dim self.values_dim = values_dim self.output_dim = output_dim self.name = name # Generate weights and bias to compute the push scalar (d_t), the pop scalar (u_t), # the value vector (v_t), and the network output (o_t) # Weights self.w_op_d = create_shared(random_weights((rnn_output_dim, 1)), name + '__w_op_d') self.w_op_u = create_shared(random_weights((rnn_output_dim, 1)), name + '__w_op_u') self.w_op_v = create_shared(random_weights((rnn_output_dim, values_dim)), name + '__w_op_v') self.w_op_o = create_shared(random_weights((rnn_output_dim, output_dim)), name + '__w_op_o') # Bias self.b_op_d = create_shared(np.zeros((1,)), name + '__b_op_d') self.b_op_u = create_shared(np.zeros((1,)), name + '__b_op_u') self.b_op_v = create_shared(np.zeros((values_dim,)), name + '__b_op_v') self.b_op_o = create_shared(np.zeros((output_dim,)), name + '__b_op_o') # RNN Controller weights self.w_xrh_hop = create_shared(random_weights((input_dim + values_dim + rnn_hidden_dim, rnn_hidden_dim + rnn_output_dim)), name + '__w_xrh_hop') self.b_xrh_hop = create_shared(np.zeros((rnn_hidden_dim + rnn_output_dim,)), name + '__b_xrh_hop') # Initial hidden states H_0 - H_t = (h_t, r_t, (v_t, s_t)) self.h_0 = create_shared(np.zeros((rnn_hidden_dim,)), name + '__h_0') self.r_0 = create_shared(np.zeros((values_dim,)), name + '__r_0') # self.v_0 = create_shared(np.zeros((values_dim,)), name + '__v_0') # self.s_0 = create_shared(np.zeros((1,)), name + '__s_0') # Define parameters self.params = [ self.w_op_d, self.w_op_u, self.w_op_v, self.w_op_o, self.b_op_d, self.b_op_u, self.b_op_v, self.b_op_o, self.w_xrh_hop, self.b_xrh_hop, self.h_0 ] # _TODO_ check this (why not put r_0, s_0, v_0)
def __init__(self, input_dim, hidden_dim, activation=T.nnet.sigmoid, with_batch=True, name='RNN'): """ Initialize neural network. """ self.input_dim = input_dim self.hidden_dim = hidden_dim self.activation = activation self.with_batch = with_batch self.name = name # Randomly generate weights self.w_x = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_x') self.w_h = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_h') # Initialize the bias vector and h_0 to zero vectors self.b_h = create_shared(np.zeros((hidden_dim,)), name + '__b_h') self.h_0 = create_shared(np.zeros((hidden_dim,)), name + '__h_0') # Define parameters self.params = [self.w_x, self.w_h, self.b_h, self.h_0]
def __init__(self, input_dim, hidden_dim, with_batch=True, name='LSTM'): """ Initialize neural network. """ self.input_dim = input_dim self.hidden_dim = hidden_dim self.with_batch = with_batch self.name = name # Update gate weights and bias self.w_z = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_z') self.u_z = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__u_z') self.b_z = create_shared(np.zeros((hidden_dim,)), name + '__b_z') # Reset gate weights and bias self.w_r = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_r') self.u_r = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__u_r') self.b_r = create_shared(np.zeros((hidden_dim,)), name + '__b_r') # New memory content weights and bias self.w_c = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_c') self.u_c = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__u_c') self.b_c = create_shared(np.zeros((hidden_dim,)), name + '__b_c') # Initialize the bias vector, h_0, to the zero vector self.h_0 = create_shared(np.zeros((hidden_dim,)), name + '__h_0') # Define parameters self.params = [self.w_z, self.u_z, self.b_z, self.w_r, self.u_r, self.b_r, self.w_c, self.u_c, self.b_c, self.h_0]
def __init__(self, input_dim, output_dim, name='embedding_layer'): """ Typically, input_dim is the vocabulary size, and output_dim the embedding dimension. """ self.input_dim = input_dim self.output_dim = output_dim self.name = name # Randomly generate weights self.embeddings = create_shared( random_weights((input_dim, output_dim)), self.name + '__embeddings') # Define parameters self.params = [self.embeddings]
def __init__(self, input_dim, output_dim, name='embedding_layer'): """ Typically, input_dim is the vocabulary size, and output_dim the embedding dimension. """ self.input_dim = input_dim self.output_dim = output_dim self.name = name # Randomly generate weights self.embeddings = create_shared( random_weights((input_dim, output_dim)), self.name + '__embeddings' ) # Define parameters self.params = [self.embeddings]
def LogisticLinearLeaner(dataset, learning_rate=0.01, epochs=100): """ [Section 18.6.4] Linear classifier with logistic regression. """ idx_i = dataset.inputs idx_t = dataset.target examples = dataset.examples num_examples = len(examples) # X transpose X_col = [dataset.values[i] for i in idx_i] # vertical columns of X # add dummy ones = [1 for _ in range(len(examples))] X_col = [ones] + X_col # initialize random weights num_weights = len(idx_i) + 1 w = random_weights(min_value=-0.5, max_value=0.5, num_weights=num_weights) for epoch in range(epochs): err = [] h = [] # pass over all examples for example in examples: x = [1] + example y = sigmoid(dot_product(w, x)) h.append(sigmoid_derivative(y)) t = example[idx_t] err.append(t - y) # update weights for i in range(len(w)): buffer = [x * y for x, y in zip(err, h)] w[i] = w[i] + learning_rate * (dot_product(buffer, X_col[i]) / num_examples) def predict(example): x = [1] + example return sigmoid(dot_product(w, x)) return predict
def LinearLearner(dataset, learning_rate=0.01, epochs=100): """ [Section 18.6.3] Linear classifier with hard threshold. """ idx_i = dataset.inputs idx_t = dataset.target examples = dataset.examples num_examples = len(examples) # X transpose X_col = [dataset.values[i] for i in idx_i] # vertical columns of X # add dummy ones = [1 for _ in range(len(examples))] X_col = [ones] + X_col # initialize random weights num_weights = len(idx_i) + 1 w = random_weights(min_value=-0.5, max_value=0.5, num_weights=num_weights) for epoch in range(epochs): err = [] # pass over all examples for example in examples: x = [1] + example y = dot_product(w, x) t = example[idx_t] err.append(t - y) # update weights for i in range(len(w)): w[i] = w[i] + learning_rate * (dot_product(err, X_col[i]) / num_examples) def predict(example): x = [1] + example return dot_product(w, x) return predict
def __init__(self, nb_filters, stack_size, filter_height, filter_width, border_mode, stride, name): """ Construct a convolutional layer. """ self.nb_filters = nb_filters self.stack_size = stack_size self.filter_height = filter_height self.filter_width = filter_width self.border_mode = border_mode self.filter_shape = (nb_filters, stack_size, filter_height, filter_width) self.stride = stride self.name = name fan_in = stack_size * filter_height * filter_width # number of inputs to each hidden unit fan_out = ((nb_filters * filter_height * filter_width)) # each unit in the lower layer receives a gradient from drange = np.sqrt(6. / (fan_in + fan_out)) # initialize filters with random values self.filters = create_shared(drange * random_weights(self.filter_shape), name + '__filters') self.bias = create_shared(np.ones((nb_filters,)) * 0.1, name + '__bias') # parameters in the layer self.params = [self.filters, self.bias]
def __init__(self, input_dim, hidden_dim, with_batch=True, name='LSTM'): """ Initialize neural network. """ self.input_dim = input_dim self.hidden_dim = hidden_dim self.with_batch = with_batch self.name = name # Update gate weights and bias self.w_z = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_z') self.u_z = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__u_z') self.b_z = create_shared(np.zeros((hidden_dim, )), name + '__b_z') # Reset gate weights and bias self.w_r = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_r') self.u_r = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__u_r') self.b_r = create_shared(np.zeros((hidden_dim, )), name + '__b_r') # New memory content weights and bias self.w_c = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_c') self.u_c = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__u_c') self.b_c = create_shared(np.zeros((hidden_dim, )), name + '__b_c') # Initialize the bias vector, h_0, to the zero vector self.h_0 = create_shared(np.zeros((hidden_dim, )), name + '__h_0') # Define parameters self.params = [ self.w_z, self.u_z, self.b_z, self.w_r, self.u_r, self.b_r, self.w_c, self.u_c, self.b_c, self.h_0 ]
def __init__(self, input_dim, hidden_dim, with_batch=True, name='LSTM'): """ Initialize neural network. """ self.input_dim = input_dim self.hidden_dim = hidden_dim self.with_batch = with_batch self.name = name # Input gate weights self.w_xi = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xi') self.w_hi = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hi') self.w_ci = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_ci') # Forget gate weights self.w_xf = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xf') self.w_hf = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hf') self.w_cf = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_cf') # Output gate weights self.w_xo = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xo') self.w_ho = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_ho') self.w_co = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_co') # Cell weights self.w_xc = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xc') self.w_hc = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hc') # Initialize the bias vectors, c_0 and h_0 to zero vectors self.b_i = create_shared(np.zeros((hidden_dim,)), name + '__b_i') self.b_f = create_shared(np.zeros((hidden_dim,)), name + '__b_f') self.b_c = create_shared(np.zeros((hidden_dim,)), name + '__b_c') self.b_o = create_shared(np.zeros((hidden_dim,)), name + '__b_o') self.c_0 = create_shared(np.zeros((hidden_dim,)), name + '__c_0') self.h_0 = create_shared(np.zeros((hidden_dim,)), name + '__h_0') # Define parameters self.params = [self.w_xi, self.w_hi, # self.w_ci, self.w_xf, self.w_hf, # self.w_cf, self.w_xo, self.w_ho, # self.w_co, self.w_xc, self.w_hc, self.b_i, self.b_c, self.b_o, self.b_f, self.c_0, self.h_0]
def __init__(self, input_dim, hidden_dim, output_emb_dim, output_dim, with_batch=True, name='LSTM'): """ Initialize neural network. - input_dim: dimension of input vectors - hidden_dim: dimension of hidden vectors - output_emb_dim: dimension of output embeddings - output_dim: number of possible outputs """ self.input_dim = input_dim self.hidden_dim = hidden_dim self.output_emb_dim = output_emb_dim self.output_dim = output_dim self.with_batch = with_batch self.name = name # Input gate weights self.w_xi = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xi') self.w_hi = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hi') self.w_yi = create_shared(random_weights((output_emb_dim, hidden_dim)), name + '__w_yi') self.w_ci = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_ci') # Forget gate weights self.w_xf = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xf') self.w_hf = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hf') self.w_yf = create_shared(random_weights((output_emb_dim, hidden_dim)), name + '__w_yf') self.w_cf = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_cf') # Output gate weights self.w_xo = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xo') self.w_ho = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_ho') self.w_yo = create_shared(random_weights((output_emb_dim, hidden_dim)), name + '__w_yo') self.w_co = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_co') # Cell weights self.w_xc = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xc') self.w_hc = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hc') self.w_yc = create_shared(random_weights((output_emb_dim, hidden_dim)), name + '__w_yc') # Initialize the bias vectors, c_0 and h_0 to zero vectors self.b_i = create_shared(np.zeros((hidden_dim,)), name + '__b_i') self.b_f = create_shared(np.zeros((hidden_dim,)), name + '__b_f') self.b_c = create_shared(np.zeros((hidden_dim,)), name + '__b_c') self.b_o = create_shared(np.zeros((hidden_dim,)), name + '__b_o') self.c_0 = create_shared(np.zeros((hidden_dim,)), name + '__c_0') self.h_0 = create_shared(np.zeros((hidden_dim,)), name + '__h_0') # self.y_0 = create_shared(np.zeros((output_emb_dim,)), name + '__y_0') # Weights for projection to final output, and outputs embeddings self.embeddings = create_shared(random_weights((output_dim + 1, output_emb_dim)), name + '__embeddings') self.weights = create_shared(random_weights((hidden_dim, output_dim)), name + '__weights') self.bias = create_shared(random_weights((output_dim,)), name + '__bias') # Define parameters self.params = [self.w_xi, self.w_hi, self.w_yi, self.w_ci, self.w_xf, self.w_hf, self.w_yf, self.w_cf, self.w_xo, self.w_ho, self.w_yo, self.w_co, self.w_xc, self.w_hc, self.w_yc, self.b_i, self.b_c, self.b_o, self.b_f, self.c_0, self.h_0, # self.y_0, self.embeddings, self.weights, self.bias]
def __init__(self, input_dim, hidden_dim, output_emb_dim, output_dim, with_batch=True, name='LSTM'): """ Initialize neural network. - input_dim: dimension of input vectors - hidden_dim: dimension of hidden vectors - output_emb_dim: dimension of output embeddings - output_dim: number of possible outputs """ self.input_dim = input_dim self.hidden_dim = hidden_dim self.output_emb_dim = output_emb_dim self.output_dim = output_dim self.with_batch = with_batch self.name = name # Input gate weights self.w_xi = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xi') self.w_hi = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hi') self.w_yi = create_shared(random_weights((output_emb_dim, hidden_dim)), name + '__w_yi') self.w_ci = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_ci') # Forget gate weights self.w_xf = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xf') self.w_hf = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hf') self.w_yf = create_shared(random_weights((output_emb_dim, hidden_dim)), name + '__w_yf') self.w_cf = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_cf') # Output gate weights self.w_xo = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xo') self.w_ho = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_ho') self.w_yo = create_shared(random_weights((output_emb_dim, hidden_dim)), name + '__w_yo') self.w_co = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_co') # Cell weights self.w_xc = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xc') self.w_hc = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hc') self.w_yc = create_shared(random_weights((output_emb_dim, hidden_dim)), name + '__w_yc') # Initialize the bias vectors, c_0 and h_0 to zero vectors self.b_i = create_shared(np.zeros((hidden_dim, )), name + '__b_i') self.b_f = create_shared(np.zeros((hidden_dim, )), name + '__b_f') self.b_c = create_shared(np.zeros((hidden_dim, )), name + '__b_c') self.b_o = create_shared(np.zeros((hidden_dim, )), name + '__b_o') self.c_0 = create_shared(np.zeros((hidden_dim, )), name + '__c_0') self.h_0 = create_shared(np.zeros((hidden_dim, )), name + '__h_0') # self.y_0 = create_shared(np.zeros((output_emb_dim,)), name + '__y_0') # Weights for projection to final output, and outputs embeddings self.embeddings = create_shared( random_weights((output_dim + 1, output_emb_dim)), name + '__embeddings') self.weights = create_shared(random_weights((hidden_dim, output_dim)), name + '__weights') self.bias = create_shared(random_weights((output_dim, )), name + '__bias') # Define parameters self.params = [ self.w_xi, self.w_hi, self.w_yi, self.w_ci, self.w_xf, self.w_hf, self.w_yf, self.w_cf, self.w_xo, self.w_ho, self.w_yo, self.w_co, self.w_xc, self.w_hc, self.w_yc, self.b_i, self.b_c, self.b_o, self.b_f, self.c_0, self.h_0, # self.y_0, self.embeddings, self.weights, self.bias ]
def __init__(self, input_dim, hidden_dim, with_batch=True, name='LSTM'): """ Initialize neural network. """ self.input_dim = input_dim self.hidden_dim = hidden_dim self.with_batch = with_batch self.name = name # Input gate weights self.w_xi = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xi') self.w_hi = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hi') self.w_ci = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_ci') # Forget gate weights self.w_xf = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xf') self.w_hf = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hf') self.w_cf = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_cf') # Output gate weights self.w_xo = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xo') self.w_ho = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_ho') self.w_co = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_co') # Cell weights self.w_xc = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xc') self.w_hc = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hc') # Initialize the bias vectors, c_0 and h_0 to zero vectors self.b_i = create_shared(np.zeros((hidden_dim, )), name + '__b_i') self.b_f = create_shared(np.zeros((hidden_dim, )), name + '__b_f') self.b_c = create_shared(np.zeros((hidden_dim, )), name + '__b_c') self.b_o = create_shared(np.zeros((hidden_dim, )), name + '__b_o') self.c_0 = create_shared(np.zeros((hidden_dim, )), name + '__c_0') self.h_0 = create_shared(np.zeros((hidden_dim, )), name + '__h_0') # Define parameters self.params = [ self.w_xi, self.w_hi, # self.w_ci, self.w_xf, self.w_hf, # self.w_cf, self.w_xo, self.w_ho, # self.w_co, self.w_xc, self.w_hc, self.b_i, self.b_c, self.b_o, self.b_f, self.c_0, self.h_0 ]
def BackPropagationLearner(dataset, net, learning_rate, epochs, activation=sigmoid): """ [Figure 18.23] The back-propagation algorithm for multilayer networks. """ # initialise weights for layer in net: for node in layer: node.weights = random_weights(min_value=-0.5, max_value=0.5, num_weights=len(node.weights)) examples = dataset.examples # As of now dataset.target gives an int instead of list, # Changing dataset class will have effect on all the learners. # Will be taken care of later. o_nodes = net[-1] i_nodes = net[0] o_units = len(o_nodes) idx_t = dataset.target idx_i = dataset.inputs n_layers = len(net) inputs, targets = init_examples(examples, idx_i, idx_t, o_units) for epoch in range(epochs): # iterate over each example for e in range(len(examples)): i_val = inputs[e] t_val = targets[e] # activate input layer for v, n in zip(i_val, i_nodes): n.value = v # forward pass for layer in net[1:]: for node in layer: inc = [n.value for n in node.inputs] in_val = dotproduct(inc, node.weights) node.value = node.activation(in_val) # initialize delta delta = [[] for _ in range(n_layers)] # compute outer layer delta # error for the MSE cost function err = [t_val[i] - o_nodes[i].value for i in range(o_units)] # calculate delta at output if node.activation == sigmoid: delta[-1] = [sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] elif node.activation == relu: delta[-1] = [relu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] elif node.activation == tanh: delta[-1] = [tanh_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] elif node.activation == elu: delta[-1] = [elu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] else: delta[-1] = [leaky_relu_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] # backward pass h_layers = n_layers - 2 for i in range(h_layers, 0, -1): layer = net[i] h_units = len(layer) nx_layer = net[i + 1] # weights from each ith layer node to each i + 1th layer node w = [[node.weights[k] for node in nx_layer] for k in range(h_units)] if activation == sigmoid: delta[i] = [sigmoid_derivative(layer[j].value) * dotproduct(w[j], delta[i + 1]) for j in range(h_units)] elif activation == relu: delta[i] = [relu_derivative(layer[j].value) * dotproduct(w[j], delta[i + 1]) for j in range(h_units)] elif activation == tanh: delta[i] = [tanh_derivative(layer[j].value) * dotproduct(w[j], delta[i + 1]) for j in range(h_units)] elif activation == elu: delta[i] = [elu_derivative(layer[j].value) * dotproduct(w[j], delta[i + 1]) for j in range(h_units)] else: delta[i] = [leaky_relu_derivative(layer[j].value) * dotproduct(w[j], delta[i + 1]) for j in range(h_units)] # update weights for i in range(1, n_layers): layer = net[i] inc = [node.value for node in net[i - 1]] units = len(layer) for j in range(units): layer[j].weights = vector_add(layer[j].weights, scalar_vector_product(learning_rate * delta[i][j], inc)) return net