def lenet5(self): with tf.name_scope('LeNet5'): self.conv1 = tools.conv('conv1', self.input, 32, kernel_size=[5, 5], stride=[1, 1, 1, 1], is_trainable=self.is_trainable) self.pool1 = tools.pool('pool1', self.conv1, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True) self.conv2 = tools.conv('conv2', self.pool1, 64, kernel_size=[5, 5], stride=[1, 1, 1, 1], is_trainable=self.is_trainable) self.pool2 = tools.pool('pool2', self.conv2, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True) self.fc1 = tools.fc_layer('fc1', self.pool2, out_nodes=512) self.dropout1 = tools.dropout('dropout1', self.fc1, self.keep_prob) self.logits = tools.fc_layer('fc2', self.dropout1, use_relu=False, out_nodes=self.n_classes)
def gradient(self, weight_vector, training_data, training_targets): layer_indexes = range(len(self.layers))[::-1] # reversed self.weights = self.unpack(np.array(weight_vector)) input_signals, derivatives = self.update(training_data, trace=True) out = input_signals[-1] cost_derivative = self.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = self.cost_function(out, training_targets) layers = [] for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # calculate the weight change dropped = dropout( input_signals[i], # dropout probability self.hidden_layer_dropout if i > 0 else self.input_layer_dropout) layers.append(np.dot(delta, add_bias(dropped)).T.flat) if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot(self.weights[i][1:, :], delta) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i - 1] #end weight adjustment loop return np.hstack(reversed(layers))
def gradient(self, weight_vector, training_data, training_targets ): layer_indexes = range( len(self.layers) )[::-1] # reversed self.weights = self.unpack( np.array(weight_vector) ) input_signals, derivatives = self.update( training_data, trace=True ) out = input_signals[-1] cost_derivative = self.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = self.cost_function(out, training_targets ) layers = [] for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # calculate the weight change dropped = dropout( input_signals[i], # dropout probability self.hidden_layer_dropout if i > 0 else self.input_layer_dropout ) layers.append(np.dot( delta, add_bias(dropped) ).T.flat) if i!= 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( self.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] #end weight adjustment loop return np.hstack( reversed(layers) )
def parallel_backpropagation_one_process(network, trainingset, block_number, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = ()): training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) layer_indexes = range( len(network.layers) )[::-1] # reversed momentum = collections.defaultdict( int ) epoch = 0 input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] error = network.cost_function(out, training_targets ) cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] while epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability network.hidden_layer_dropout if i > 0 else network.input_layer_dropout ) # calculate the weight change dW = -learning_rate * np.dot( delta, add_bias(dropped) ).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( network.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] # Store the momentum momentum[i] = dW # Update the weights network.weights[ i ] += dW #end weight adjustment loop input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] error = network.cost_function(out, training_targets ) cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] result = [] result.append(block_number) result.append(out) result.append(error) result.append(cost_derivative) result.append(delta) return result
def VGG16N(x, n_classes, keep_prob, is_pretrain=True): # fix a layer by is_pretrain with tf.name_scope('VGG16'): x = tools.conv('conv1_1', x, 64, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain) x = tools.conv('conv1_2', x, 64, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain) with tf.name_scope('pool1'): x = tools.pool('pool1', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True) x = tools.conv('conv2_1', x, 128, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain) x = tools.conv('conv2_2', x, 128, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain) with tf.name_scope('pool2'): x = tools.pool('pool2', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True) x = tools.conv('conv3_1', x, 256, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain) x = tools.conv('conv3_2', x, 256, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain) x = tools.conv('conv3_3', x, 256, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain) with tf.name_scope('pool3'): x = tools.pool('pool3', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True) x = tools.conv('conv4_1', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain) x = tools.conv('conv4_2', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain) x = tools.conv('conv4_3', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain) with tf.name_scope('pool4'): x = tools.pool('pool4', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True) x = tools.conv('conv5_1', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain) x = tools.conv('conv5_2', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain) x = tools.conv('conv5_3', x, 512, kernel_size=[3, 3], stride=[1, 1, 1, 1], is_pretrain=is_pretrain) with tf.name_scope('pool5'): x = tools.pool('pool5', x, kernel=[1, 2, 2, 1], stride=[1, 2, 2, 1], is_max_pool=True) x = tools.FC_layer('fc6', x, out_nodes=4096) # with tf.name_scope('batch_norm1'): # x = tools.batch_norm(x) x = tools.dropout(x, keep_prob) x = tools.FC_layer('fc7', x, out_nodes=4096) # with tf.name_scope('batch_norm2'): # x = tools.batch_norm(x) x = tools.dropout(x, keep_prob) x = tools.FC_layer('fc8', x, out_nodes=n_classes) return x # %%
def lenet_300_100(self): with tf.name_scope('LeNet_300_100'): self.fc1 = tools.fc_layer('fc1', self.input, out_nodes=300) self.fc2 = tools.fc_layer('fc2', self.fc1, out_nodes=100) self.dropout1 = tools.dropout('dropout1', self.fc1, self.keep_prob) self.logits = tools.fc_layer('fc3', self.dropout1, use_relu=False, out_nodes=self.n_classes)
def backpropagation(self, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = () ): assert trainingset[0].features.shape[0] == self.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == self.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) layer_indexes = range( len(self.layers) )[::-1] # reversed momentum = collections.defaultdict( int ) MSE = ( ) # inf epoch = 0 input_signals, derivatives = self.update( training_data, trace=True ) out = input_signals[-1] error = (out - training_targets).T delta = error * derivatives[-1] MSE = np.mean( np.power(error,2) ) while MSE > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability self.hidden_layer_dropout if i else self.input_layer_dropout ) # calculate the weight change dW = -learning_rate * np.dot( delta, add_bias(dropped) ).T + momentum_factor * momentum[i] if i!= 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( self.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] # Store the momentum momentum[i] = dW # Update the weights self.weights[ i ] += dW #end weight adjustment loop input_signals, derivatives = self.update( training_data, trace=True ) out = input_signals[-1] error = (out - training_targets).T delta = error * derivatives[-1] MSE = np.mean( np.power(error,2) ) if epoch%1000==0: # Show the current training status print "* current network error (MSE):", MSE print "* Converged to error bound (%.4g) with MSE = %.4g." % ( ERROR_LIMIT, MSE ) print "* Trained for %d epochs." % epoch if self.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): self.save_to_file()
def backpropagation(network, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = () ): assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) layer_indexes = range( len(network.layers) )[::-1] # reversed momentum = collections.defaultdict( int ) epoch = 0 input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] error = network.cost_function(out, training_targets ) cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability network.hidden_layer_dropout if i > 0 else network.input_layer_dropout ) # calculate the weight change dW = -learning_rate * np.dot( delta, add_bias(dropped) ).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( network.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] # Store the momentum momentum[i] = dW # Update the weights network.weights[ i ] += dW #end weight adjustment loop input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] error = network.cost_function(out, training_targets ) cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] if epoch%1000==0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error ) print "[training] Trained for %d epochs." % epoch if network.save_trained_network: network.save_to_file()
def backpropagation(self, trainingset, ERROR_LIMIT=1e-3, learning_rate=0.3, momentum_factor=0.9): assert trainingset[0].features.shape[0] == self.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == self.n_outputs, \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset]) training_targets = np.array( [instance.targets for instance in trainingset]) MSE = () # inf neterror = None momentum = collections.defaultdict(int) batch_size = self.batch_size if self.batch_size != 0 else training_data.shape[ 0] epoch = 0 while MSE > ERROR_LIMIT: epoch += 1 for start in xrange(0, len(training_data), batch_size): batch = training_data[start:start + batch_size] input_layers = self.update(training_data, trace=True) out = input_layers[-1] error = out - training_targets delta = error MSE = np.mean(np.power(error, 2)) loop = itertools.izip( xrange(len(self.weights) - 1, -1, -1), reversed(self.weights), reversed(input_layers[:-1]), ) for i, weight_layer, input_signals in loop: # Loop over the weight layers in reversed order to calculate the deltas if i == 0: dropped = dropout( add_bias(input_signals).T, self.input_layer_dropout) else: dropped = dropout( add_bias(input_signals).T, self.hidden_layer_dropout) # Calculate weight change dW = learning_rate * np.dot( dropped, delta) + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skipping the bias weight during calculation. weight_delta = np.dot(delta, weight_layer[1:, :].T) # Calculate the delta for the subsequent layer delta = np.multiply( weight_delta, self.activation_functions[i - 1](input_signals, derivative=True)) # Store the momentum momentum[i] = dW # Update the weights self.weights[i] -= dW if epoch % 1000 == 0: # Show the current training status print "* current network error (MSE):", MSE print "* Converged to error bound (%.4g) with MSE = %.4g." % ( ERROR_LIMIT, MSE) print "* Trained for %d epochs." % epoch
def backpropagation(network, trainingset, ERROR_LIMIT=1e-3, learning_rate=0.03, momentum_factor=0.9, max_iterations=()): assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array([instance.features for instance in trainingset]) training_targets = np.array([instance.targets for instance in trainingset]) layer_indexes = range(len(network.layers))[::-1] # reversed momentum = collections.defaultdict(int) epoch = 0 input_signals, derivatives = network.update(training_data, trace=True) out = input_signals[-1] error = network.cost_function(out, training_targets) cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability network.hidden_layer_dropout if i > 0 else network.input_layer_dropout) # calculate the weight change dW = -learning_rate * np.dot( delta, add_bias(dropped)).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot(network.weights[i][1:, :], delta) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i - 1] # Store the momentum momentum[i] = dW # Update the weights network.weights[i] += dW #end weight adjustment loop input_signals, derivatives = network.update(training_data, trace=True) out = input_signals[-1] error = network.cost_function(out, training_targets) cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] if epoch % 1000 == 0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error) print "[training] Trained for %d epochs." % epoch if network.save_trained_network and confirm( promt="Do you wish to store the trained network?"): network.save_to_file()
def backpropagation(network, trainingset, testset, cost_function, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = (), input_layer_dropout = 0.0, hidden_layer_dropout = 0.0, save_trained_network = False ): assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) test_data = np.array( [instance.features for instance in testset ] ) test_targets = np.array( [instance.targets for instance in testset ] ) momentum = collections.defaultdict( int ) input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] cost_derivative = cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = cost_function(network.update( test_data ), test_targets ) layer_indexes = range( len(network.layers) )[::-1] # reversed epoch = 0 n_samples = float(training_data.shape[0]) while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability hidden_layer_dropout if i > 0 else input_layer_dropout ) # calculate the weight change dW = -learning_rate * (np.dot( delta, add_bias(input_signals[i]) )/n_samples).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( network.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] # Store the momentum momentum[i] = dW # Update the weights network.weights[ i ] += dW #end weight adjustment loop input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] cost_derivative = cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = cost_function(network.update( test_data ), test_targets ) if epoch%1000==0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error ) print "[training] Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function ) print "[training] Trained for %d epochs." % epoch if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): network.save_network_to_file()
def backpropagation(network, trainingset, testset, cost_function, evaluation_function = None, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = (), batch_size = 0, input_layer_dropout = 0.0, hidden_layer_dropout = 0.0, print_rate = 1000, save_trained_network = False ): assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" # Whether to use another function for printing the dataset error than the cost function. # This is useful if you train the network with the MSE cost function, but are going to # classify rather than regress on your data. calculate_print_error = evaluation_function if evaluation_function != None else cost_function training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) test_data = np.array( [instance.features for instance in testset ] ) test_targets = np.array( [instance.targets for instance in testset ] ) batch_size = batch_size if batch_size != 0 else training_data.shape[0] batch_training_data = np.array_split(training_data, math.ceil(1.0 * training_data.shape[0] / batch_size)) batch_training_targets = np.array_split(training_targets, math.ceil(1.0 * training_targets.shape[0] / batch_size)) batch_indices = range(len(batch_training_data)) # fast reference to batches error = calculate_print_error(network.update( test_data ), test_targets ) reversed_layer_indexes = range( len(network.layers) )[::-1] momentum = collections.defaultdict( int ) epoch = 0 while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 random.shuffle(batch_indices) # Shuffle the order in which the batches are processed between the iterations for batch_index in batch_indices: batch_data = batch_training_data[ batch_index ] batch_targets = batch_training_targets[ batch_index ] batch_size = float( batch_data.shape[0] ) input_signals, derivatives = network.update( batch_data, trace=True ) out = input_signals[-1] cost_derivative = cost_function( out, batch_targets, derivative=True ).T delta = cost_derivative * derivatives[-1] for i in reversed_layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability hidden_layer_dropout if i > 0 else input_layer_dropout ) # calculate the weight change dW = -learning_rate * (np.dot( delta, add_bias(dropped) )/batch_size).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( network.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] # Store the momentum momentum[i] = dW # Update the weights network.weights[ i ] += dW #end weight adjustment loop error = calculate_print_error(network.update( test_data ), test_targets ) if epoch%print_rate==0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error ) print "[training] Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function ) print "[training] Trained for %d epochs." % epoch if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): network.save_network_to_file()
def forward(self, emb_in, length, context, state_init, batch_size=1, mask=None, cmask=None): ''' Build the computational graph which computes the hidden states. :type emb_in: theano variable :param emb_in: the input word embeddings :type length: theano variable :param length: the length of the input :type context: theano variable :param context: the context vectors :type state_init: theano variable :param state_init: the inital states :type batch_size: int :param batch_size: the batch size :type mask: theano variable :param mask: indicate the length of each sequence in one batch :type cmask: theano variable :param cmask: indicate the length of each context sequence in one batch ''' # calculate the input vector for inputter, updater and reseter att_c = tools.dot3d(context, self.att_context) # size: (length, batch_size,dim) state_in = (tensor.dot(emb_in, self.input_emb) + self.input_emb_offset).reshape( (length, batch_size, self.dim)) gate_in = tensor.dot(emb_in, self.gate_emb).reshape( (length, batch_size, self.dim)) reset_in = tensor.dot(emb_in, self.reset_emb).reshape( (length, batch_size, self.dim)) if mask: scan_inp = [state_in, gate_in, reset_in, mask] scan_func = lambda x, g, r, m, h, c, attc, cm: self.forward_step( h, x, g, r, c, attc, m, cm) else: scan_inp = [state_in, gate_in, reset_in] scan_func = lambda x, g, r, h, c, attc: self.forward_step( h, x, g, r, c, attc) if self.verbose: outputs_info = [ state_init, None, None, None, None, None, None, None, None, None, None, None, None ] else: outputs_info = [state_init, None, None] # calculate hidden states hiddens, updates = theano.scan(scan_func, sequences=scan_inp, outputs_info=outputs_info, non_sequences=[context, att_c, cmask], n_steps=length) c = hiddens[1] attentions = hiddens[2] # Add the initial state and discard the last hidden state state_before = tensor.concatenate((state_init.reshape( (1, state_init.shape[0], state_init.shape[1])), hiddens[0][:-1])) state_in_prev = tensor.dot(emb_in, self.readout_emb).reshape( (length, batch_size, self.dim)) # calculate the energy for each word readout_c = tensor.dot(c, self.readout_context) readout_h = tensor.dot(state_before, self.readout_hidden) readout_h += self.readout_offset state_in_prev = tools.shift_one(state_in_prev) readout = readout_c + readout_h + state_in_prev readout = readout.reshape( (readout.shape[0] * readout.shape[1], readout.shape[2])) maxout = tools.maxout(readout, self.maxout) if self.dropout > 0.: logging.info('dropout ratio: ' + str(self.dropout)) maxout = tools.dropout(maxout, self.dropout) outenergy = tensor.dot(maxout, self.probs_emb) outenergy_1 = outenergy outenergy = tensor.dot(outenergy, self.probs) outenergy_2 = outenergy outenergy += self.probs_offset if self.verbose: return hiddens, outenergy, state_in, gate_in, reset_in, state_in_prev, readout, maxout, outenergy_1, outenergy_2 else: return hiddens, outenergy, attentions
def backpropagation(network, trainingset, testset, cost_function, ERROR_LIMIT=1e-3, learning_rate=0.03, momentum_factor=0.9, max_iterations=(), input_layer_dropout=0.0, hidden_layer_dropout=0.0, save_trained_network=False): assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array([instance.features for instance in trainingset]) training_targets = np.array([instance.targets for instance in trainingset]) test_data = np.array([instance.features for instance in testset]) test_targets = np.array([instance.targets for instance in testset]) momentum = collections.defaultdict(int) input_signals, derivatives = network.update(training_data, trace=True) out = input_signals[-1] cost_derivative = cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = cost_function(network.update(test_data), test_targets) layer_indexes = range(len(network.layers))[::-1] # reversed epoch = 0 n_samples = float(training_data.shape[0]) while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability hidden_layer_dropout if i > 0 else input_layer_dropout) # calculate the weight change dW = -learning_rate * (np.dot(delta, add_bias(input_signals[i])) / n_samples).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot(network.weights[i][1:, :], delta) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i - 1] # Store the momentum momentum[i] = dW # Update the weights network.weights[i] += dW #end weight adjustment loop input_signals, derivatives = network.update(training_data, trace=True) out = input_signals[-1] cost_derivative = cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = cost_function(network.update(test_data), test_targets) if epoch % 1000 == 0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error) print "[training] Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function) print "[training] Trained for %d epochs." % epoch if save_trained_network and confirm( promt="Do you wish to store the trained network?"): network.save_network_to_file()
def backpropagation(network, trainingset, testset, cost_function, evaluation_function=None, ERROR_LIMIT=1e-3, learning_rate=0.03, momentum_factor=0.9, max_iterations=(), batch_size=0, input_layer_dropout=0.0, hidden_layer_dropout=0.0, print_rate=1000, save_trained_network=False): assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" # Whether to use another function for printing the dataset error than the cost function. # This is useful if you train the network with the MSE cost function, but are going to # classify rather than regress on your data. calculate_print_error = evaluation_function if evaluation_function != None else cost_function training_data = np.array([instance.features for instance in trainingset]) training_targets = np.array([instance.targets for instance in trainingset]) test_data = np.array([instance.features for instance in testset]) test_targets = np.array([instance.targets for instance in testset]) batch_size = batch_size if batch_size != 0 else training_data.shape[0] batch_training_data = np.array_split( training_data, math.ceil(1.0 * training_data.shape[0] / batch_size)) batch_training_targets = np.array_split( training_targets, math.ceil(1.0 * training_targets.shape[0] / batch_size)) batch_indices = range( len(batch_training_data)) # fast reference to batches error = calculate_print_error(network.update(test_data), test_targets) reversed_layer_indexes = range(len(network.layers))[::-1] momentum = collections.defaultdict(int) epoch = 0 while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 random.shuffle( batch_indices ) # Shuffle the order in which the batches are processed between the iterations for batch_index in batch_indices: batch_data = batch_training_data[batch_index] batch_targets = batch_training_targets[batch_index] batch_size = float(batch_data.shape[0]) input_signals, derivatives = network.update(batch_data, trace=True) out = input_signals[-1] cost_derivative = cost_function(out, batch_targets, derivative=True).T delta = cost_derivative * derivatives[-1] for i in reversed_layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability hidden_layer_dropout if i > 0 else input_layer_dropout) # calculate the weight change dW = -learning_rate * (np.dot(delta, add_bias( dropped)) / batch_size).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot(network.weights[i][1:, :], delta) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i - 1] # Store the momentum momentum[i] = dW # Update the weights network.weights[i] += dW #end weight adjustment loop error = calculate_print_error(network.update(test_data), test_targets) if epoch % print_rate == 0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error) print "[training] Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function) print "[training] Trained for %d epochs." % epoch if save_trained_network and confirm( promt="Do you wish to store the trained network?"): network.save_network_to_file()
def backpropagation(self, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.3, momentum_factor = 0.9 ): assert trainingset[0].features.shape[0] == self.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == self.n_outputs, \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) MSE = ( ) # inf neterror = None momentum = collections.defaultdict( int ) batch_size = self.batch_size if self.batch_size != 0 else training_data.shape[0] epoch = 0 while MSE > ERROR_LIMIT: epoch += 1 for start in xrange( 0, len(training_data), batch_size ): batch = training_data[start : start+batch_size] input_layers = self.update( training_data, trace=True ) out = input_layers[-1] error = out - training_targets delta = error MSE = np.mean( np.power(error,2) ) loop = itertools.izip( xrange(len(self.weights)-1, -1, -1), reversed(self.weights), reversed(input_layers[:-1]), ) for i, weight_layer, input_signals in loop: # Loop over the weight layers in reversed order to calculate the deltas if i == 0: dropped = dropout( add_bias(input_signals).T, self.input_layer_dropout ) else: dropped = dropout( add_bias(input_signals).T, self.hidden_layer_dropout ) # Calculate weight change dW = learning_rate * np.dot( dropped, delta ) + momentum_factor * momentum[i] if i!= 0: """Do not calculate the delta unnecessarily.""" # Skipping the bias weight during calculation. weight_delta = np.dot( delta, weight_layer[1:,:].T ) # Calculate the delta for the subsequent layer delta = np.multiply( weight_delta, self.activation_functions[i-1]( input_signals, derivative=True) ) # Store the momentum momentum[i] = dW # Update the weights self.weights[ i ] -= dW if epoch%1000==0: # Show the current training status print "* current network error (MSE):", MSE print "* Converged to error bound (%.4g) with MSE = %.4g." % ( ERROR_LIMIT, MSE ) print "* Trained for %d epochs." % epoch