def backprop(self, X, y): yHat = self.forward(X) error = yHat - y delta = error MSE = np.mean( np.power(error, 2)) X_bias = add_bias(X) if (self.n_hidden_layers == 0): delta2 = np.multiply(delta, self.activation_functions[0]( self.Z2 , True)) # Compute final gradient dJdW1 = np.dot(X_bias.T, delta_final) return [ dJdW1 , MSE] else: delta3 = np.multiply(delta, self.activation_functions[1]( self.Z3 , True)) dJdW2 = np.dot(add_bias(self.A2).T, delta3) # Pass backward delta2 = np.multiply(np.dot(delta3, self.weight_layers[1][0:self.n_hiddens].T), self.activation_functions[0]( self.Z2 , True)) dJdW1 = np.dot(X_bias.T, delta2) return [(dJdW1, dJdW2), MSE]
def gradient(self, weight_vector, training_data, training_targets, cost_function ): assert softmax_function != self.layers[-1][1] or cost_function == softmax_neg_loss,\ "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." assert cost_function != softmax_neg_loss or softmax_function == self.layers[-1][1],\ "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." # assign the weight_vector as the network topology self.set_weights( np.array(weight_vector) ) input_signals, derivatives = self.update( training_data, trace=True ) out = input_signals[-1] cost_derivative = cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] layer_indexes = range( len(self.layers) )[::-1] # reversed n_samples = float(training_data.shape[0]) deltas_by_layer = [] for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas deltas_by_layer.append(list((np.dot( delta, add_bias(input_signals[i]) )/n_samples).T.flat)) if i!= 0: # i!= 0 because we don't want calculate the delta unnecessarily. weight_delta = np.dot( self.weights[ i ][1:,:], delta ) # Skip the bias weight # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] #end weight adjustment loop return np.hstack( reversed(deltas_by_layer) )
def gradient(self, weight_vector, training_data, training_targets, cost_function ): # assign the weight_vector as the network topology self.set_weights( np.array(weight_vector) ) input_signals, derivatives = self.update( training_data, trace=True ) out = input_signals[-1] cost_derivative = cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] layer_indexes = range( len(self.layers) )[::-1] # reversed n_samples = float(training_data.shape[0]) deltas_by_layer = [] for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas deltas_by_layer.append(list((np.dot( delta, add_bias(input_signals[i]) )/n_samples).T.flat)) if i!= 0: # i!= 0 because we don't want calculate the delta unnecessarily. weight_delta = np.dot( self.weights[ i ][1:,:], delta ) # Skip the bias weight # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] #end weight adjustment loop return np.hstack( reversed(deltas_by_layer) )
def gradient(self, weight_vector, training_data, training_targets ): layer_indexes = range( len(self.layers) )[::-1] # reversed self.weights = self.unpack( np.array(weight_vector) ) input_signals, derivatives = self.update( training_data, trace=True ) out = input_signals[-1] error = (out - training_targets).T delta = error * derivatives[-1] layers = [] for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # calculate the weight change layers.append(np.dot( delta, add_bias(input_signals[i]) ).T.flat) if i!= 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( self.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] #end weight adjustment loop return np.hstack( reversed(layers) )
def gradient(self, weight_vector, training_data, training_targets): layer_indexes = range(len(self.layers))[::-1] # reversed self.weights = self.unpack(np.array(weight_vector)) input_signals, derivatives = self.update(training_data, trace=True) out = input_signals[-1] cost_derivative = self.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = self.cost_function(out, training_targets) layers = [] for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # calculate the weight change dropped = dropout( input_signals[i], # dropout probability self.hidden_layer_dropout if i > 0 else self.input_layer_dropout) layers.append(np.dot(delta, add_bias(dropped)).T.flat) if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot(self.weights[i][1:, :], delta) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i - 1] #end weight adjustment loop return np.hstack(reversed(layers))
def gradient(self, weight_vector, training_data, training_targets): layer_indexes = range(len(self.layers))[::-1] # reversed self.weights = self.unpack(np.array(weight_vector)) input_signals, derivatives = self.update(training_data, trace=True) out = input_signals[-1] error = (out - training_targets).T delta = error * derivatives[-1] layers = [] for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # calculate the weight change layers.append(np.dot(delta, add_bias(input_signals[i])).T.flat) if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot(self.weights[i][1:, :], delta) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i - 1] # end weight adjustment loop return np.hstack(reversed(layers))
def gradient(self, weight_vector, training_data, training_targets ): layer_indexes = range( len(self.layers) )[::-1] # reversed self.weights = self.unpack( np.array(weight_vector) ) input_signals, derivatives = self.update( training_data, trace=True ) out = input_signals[-1] cost_derivative = self.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = self.cost_function(out, training_targets ) layers = [] for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # calculate the weight change dropped = dropout( input_signals[i], # dropout probability self.hidden_layer_dropout if i > 0 else self.input_layer_dropout ) layers.append(np.dot( delta, add_bias(dropped) ).T.flat) if i!= 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( self.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] #end weight adjustment loop return np.hstack( reversed(layers) )
def parallel_backpropagation_one_process(network, trainingset, block_number, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = ()): training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) layer_indexes = range( len(network.layers) )[::-1] # reversed momentum = collections.defaultdict( int ) epoch = 0 input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] error = network.cost_function(out, training_targets ) cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] while epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability network.hidden_layer_dropout if i > 0 else network.input_layer_dropout ) # calculate the weight change dW = -learning_rate * np.dot( delta, add_bias(dropped) ).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( network.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] # Store the momentum momentum[i] = dW # Update the weights network.weights[ i ] += dW #end weight adjustment loop input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] error = network.cost_function(out, training_targets ) cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] result = [] result.append(block_number) result.append(out) result.append(error) result.append(cost_derivative) result.append(delta) return result
def forward(self, X): # Forward propogate inputs through the network (N, d) = X.shape X_bias = add_bias(X) # Calculate first activation self.Z2 = np.dot(X_bias, self.weight_layers[0]) if (self.n_hidden_layers == 0): # Return activation on Z2 return self.activation_functions[0]( self.Z2 , False) else: self.A2 = self.activation_functions[0]( self.Z2 , False) A2_bias = add_bias(self.A2) self.Z3 = np.dot(A2_bias, self.weight_layers[1]) yHat = self.activation_functions[1]( self.Z3, False) # Final layer output return yHat
def backpropagation(self, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = () ): assert trainingset[0].features.shape[0] == self.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == self.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) layer_indexes = range( len(self.layers) )[::-1] # reversed momentum = collections.defaultdict( int ) MSE = ( ) # inf epoch = 0 input_signals, derivatives = self.update( training_data, trace=True ) out = input_signals[-1] error = (out - training_targets).T delta = error * derivatives[-1] MSE = np.mean( np.power(error,2) ) while MSE > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability self.hidden_layer_dropout if i else self.input_layer_dropout ) # calculate the weight change dW = -learning_rate * np.dot( delta, add_bias(dropped) ).T + momentum_factor * momentum[i] if i!= 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( self.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] # Store the momentum momentum[i] = dW # Update the weights self.weights[ i ] += dW #end weight adjustment loop input_signals, derivatives = self.update( training_data, trace=True ) out = input_signals[-1] error = (out - training_targets).T delta = error * derivatives[-1] MSE = np.mean( np.power(error,2) ) if epoch%1000==0: # Show the current training status print "* current network error (MSE):", MSE print "* Converged to error bound (%.4g) with MSE = %.4g." % ( ERROR_LIMIT, MSE ) print "* Trained for %d epochs." % epoch if self.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): self.save_to_file()
def backpropagation(network, trainingset, testset, cost_function, evaluation_function = None, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = (), batch_size = 0, input_layer_dropout = 0.0, hidden_layer_dropout = 0.0, print_rate = 1000, save_trained_network = False ): assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" # Whether to use another function for printing the dataset error than the cost function. # This is useful if you train the network with the MSE cost function, but are going to # classify rather than regress on your data. calculate_print_error = evaluation_function if evaluation_function != None else cost_function training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) test_data = np.array( [instance.features for instance in testset ] ) test_targets = np.array( [instance.targets for instance in testset ] ) batch_size = batch_size if batch_size != 0 else training_data.shape[0] batch_training_data = np.array_split(training_data, math.ceil(1.0 * training_data.shape[0] / batch_size)) batch_training_targets = np.array_split(training_targets, math.ceil(1.0 * training_targets.shape[0] / batch_size)) batch_indices = range(len(batch_training_data)) # fast reference to batches error = calculate_print_error(network.update( test_data ), test_targets ) reversed_layer_indexes = range( len(network.layers) )[::-1] momentum = collections.defaultdict( int ) epoch = 0 while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 random.shuffle(batch_indices) # Shuffle the order in which the batches are processed between the iterations for batch_index in batch_indices: batch_data = batch_training_data[ batch_index ] batch_targets = batch_training_targets[ batch_index ] batch_size = float( batch_data.shape[0] ) input_signals, derivatives = network.update( batch_data, trace=True ) out = input_signals[-1] cost_derivative = cost_function( out, batch_targets, derivative=True ).T delta = cost_derivative * derivatives[-1] for i in reversed_layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability hidden_layer_dropout if i > 0 else input_layer_dropout ) # calculate the weight change dW = -learning_rate * (np.dot( delta, add_bias(dropped) )/batch_size).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( network.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] # Store the momentum momentum[i] = dW # Update the weights network.weights[ i ] += dW #end weight adjustment loop error = calculate_print_error(network.update( test_data ), test_targets ) if epoch%print_rate==0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error ) print "[training] Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function ) print "[training] Trained for %d epochs." % epoch if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): network.save_network_to_file()
def resilient_backpropagation(self, trainingset, ERROR_LIMIT=1e-3, max_iterations = (), weight_step_max = 50., weight_step_min = 0., start_step = 0.5, learn_max = 1.2, learn_min = 0.5 ): # Implemented according to iRprop+ # http://sci2s.ugr.es/keel/pdf/algorithm/articulo/2003-Neuro-Igel-IRprop+.pdf assert self.input_layer_dropout == 0 and self.hidden_layer_dropout == 0, \ "ERROR: dropout should not be used with resilient backpropagation" assert trainingset[0].features.shape[0] == self.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == self.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) # Data structure to store the previous derivative last_dEdW = [ 1 ] * len( self.weights ) # Storing the current / previous weight step size weight_step = [ np.full( weight_layer.shape, start_step ) for weight_layer in self.weights ] # Storing the current / previous weight update dW = [ np.ones(shape=weight_layer.shape) for weight_layer in self.weights ] input_signals, derivatives = self.update( training_data, trace=True ) out = input_signals[-1] error = (out - training_targets).T delta = error * derivatives[-1] MSE = np.mean( np.power(error,2) ) layer_indexes = range( len(self.layers) )[::-1] # reversed prev_MSE = ( ) # inf epoch = 0 while MSE > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # Calculate the delta with respect to the weights dEdW = np.dot( delta, add_bias(input_signals[i]) ).T if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( self.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] # Calculate sign changes and note where they have changed diffs = np.multiply( dEdW, last_dEdW[i] ) pos_indexes = np.where( diffs > 0 ) neg_indexes = np.where( diffs < 0 ) zero_indexes = np.where( diffs == 0 ) # positive if np.any(pos_indexes): # Calculate the weight step size weight_step[i][pos_indexes] = np.minimum( weight_step[i][pos_indexes] * learn_max, weight_step_max ) # Calculate the weight step direction dW[i][pos_indexes] = np.multiply( -np.sign( dEdW[pos_indexes] ), weight_step[i][pos_indexes] ) # Apply the weight deltas self.weights[i][ pos_indexes ] += dW[i][pos_indexes] # negative if np.any(neg_indexes): weight_step[i][neg_indexes] = np.maximum( weight_step[i][neg_indexes] * learn_min, weight_step_min ) if MSE > prev_MSE: # iRprop+ version of resilient backpropagation self.weights[i][ neg_indexes ] -= dW[i][neg_indexes] # backtrack dEdW[ neg_indexes ] = 0 # zeros if np.any(zero_indexes): dW[i][zero_indexes] = np.multiply( -np.sign( dEdW[zero_indexes] ), weight_step[i][zero_indexes] ) self.weights[i][ zero_indexes ] += dW[i][zero_indexes] # Store the previous weight step last_dEdW[i] = dEdW #end weight adjustment loop prev_MSE = MSE input_signals, derivatives = self.update( training_data, trace=True ) out = input_signals[-1] error = (out - training_targets).T delta = error * derivatives[-1] MSE = np.mean( np.power(error,2) ) if epoch%1000==0: print "* current network error (MSE):", MSE print "* Converged to error bound (%.3g) with MSE = %.3g." % ( ERROR_LIMIT, MSE ) print "* Trained for %d epochs." % epoch if self.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): self.save_to_file()
def resilient_backpropagation(network, trainingset, testset, cost_function, ERROR_LIMIT=1e-3, max_iterations=(), weight_step_max=50., weight_step_min=0., start_step=0.5, learn_max=1.2, learn_min=0.5, print_rate=1000, save_trained_network=False): # Implemented according to iRprop+ # http://sci2s.ugr.es/keel/pdf/algorithm/articulo/2003-Neuro-Igel-IRprop+.pdf assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array([instance.features for instance in trainingset]) training_targets = np.array([instance.targets for instance in trainingset]) test_data = np.array([instance.features for instance in testset]) test_targets = np.array([instance.targets for instance in testset]) # Storing the current / previous weight step size weight_step = [ np.full(weight_layer.shape, start_step) for weight_layer in network.weights ] # Storing the current / previous weight update dW = [ np.ones(shape=weight_layer.shape) for weight_layer in network.weights ] # Storing the previous derivative previous_dEdW = [1] * len(network.weights) # Storing the previous error measurement prev_error = () # inf input_signals, derivatives = network.update(training_data, trace=True) out = input_signals[-1] cost_derivative = cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = cost_function(network.update(test_data), test_targets) n_samples = float(training_data.shape[0]) layer_indexes = range(len(network.layers))[::-1] # reversed epoch = 0 while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # Calculate the delta with respect to the weights dEdW = (np.dot(delta, add_bias(input_signals[i])) / n_samples).T if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot(network.weights[i][1:, :], delta) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i - 1] # Calculate sign changes and note where they have changed diffs = np.multiply(dEdW, previous_dEdW[i]) pos_indexes = np.where(diffs > 0) neg_indexes = np.where(diffs < 0) zero_indexes = np.where(diffs == 0) # positive if np.any(pos_indexes): # Calculate the weight step size weight_step[i][pos_indexes] = np.minimum( weight_step[i][pos_indexes] * learn_max, weight_step_max) # Calculate the weight step direction dW[i][pos_indexes] = np.multiply(-np.sign(dEdW[pos_indexes]), weight_step[i][pos_indexes]) # Apply the weight deltas network.weights[i][pos_indexes] += dW[i][pos_indexes] # negative if np.any(neg_indexes): weight_step[i][neg_indexes] = np.maximum( weight_step[i][neg_indexes] * learn_min, weight_step_min) if error > prev_error: # iRprop+ version of resilient backpropagation network.weights[i][neg_indexes] -= dW[i][ neg_indexes] # backtrack dEdW[neg_indexes] = 0 # zeros if np.any(zero_indexes): dW[i][zero_indexes] = np.multiply(-np.sign(dEdW[zero_indexes]), weight_step[i][zero_indexes]) network.weights[i][zero_indexes] += dW[i][zero_indexes] # Store the previous weight step previous_dEdW[i] = dEdW #end weight adjustment loop prev_error = error input_signals, derivatives = network.update(training_data, trace=True) out = input_signals[-1] cost_derivative = cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = cost_function(network.update(test_data), test_targets) if epoch % print_rate == 0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error) print "[training] Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function) print "[training] Trained for %d epochs." % epoch if save_trained_network and confirm( promt="Do you wish to store the trained network?"): network.save_network_to_file()
def resilient_backpropagation(network, trainingset, ERROR_LIMIT=1e-3, max_iterations = (), weight_step_max = 50., weight_step_min = 0., start_step = 0.5, learn_max = 1.2, learn_min = 0.5 ): # Implemented according to iRprop+ # http://sci2s.ugr.es/keel/pdf/algorithm/articulo/2003-Neuro-Igel-IRprop+.pdf assert network.input_layer_dropout == 0 and network.hidden_layer_dropout == 0, \ "ERROR: dropout should not be used with resilient backpropagation" assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) # Data structure to store the previous derivative previous_dEdW = [ 1 ] * len( network.weights ) # Storing the current / previous weight step size weight_step = [ np.full( weight_layer.shape, start_step ) for weight_layer in network.weights ] # Storing the current / previous weight update dW = [ np.ones(shape=weight_layer.shape) for weight_layer in network.weights ] input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = network.cost_function(out, training_targets ) layer_indexes = range( len(network.layers) )[::-1] # reversed prev_error = ( ) # inf epoch = 0 while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # Calculate the delta with respect to the weights dEdW = np.dot( delta, add_bias(input_signals[i]) ).T if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( network.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] # Calculate sign changes and note where they have changed diffs = np.multiply( dEdW, previous_dEdW[i] ) pos_indexes = np.where( diffs > 0 ) neg_indexes = np.where( diffs < 0 ) zero_indexes = np.where( diffs == 0 ) # positive if np.any(pos_indexes): # Calculate the weight step size weight_step[i][pos_indexes] = np.minimum( weight_step[i][pos_indexes] * learn_max, weight_step_max ) # Calculate the weight step direction dW[i][pos_indexes] = np.multiply( -np.sign( dEdW[pos_indexes] ), weight_step[i][pos_indexes] ) # Apply the weight deltas network.weights[i][ pos_indexes ] += dW[i][pos_indexes] # negative if np.any(neg_indexes): weight_step[i][neg_indexes] = np.maximum( weight_step[i][neg_indexes] * learn_min, weight_step_min ) if error > prev_error: # iRprop+ version of resilient backpropagation network.weights[i][ neg_indexes ] -= dW[i][neg_indexes] # backtrack dEdW[ neg_indexes ] = 0 # zeros if np.any(zero_indexes): dW[i][zero_indexes] = np.multiply( -np.sign( dEdW[zero_indexes] ), weight_step[i][zero_indexes] ) network.weights[i][ zero_indexes ] += dW[i][zero_indexes] # Store the previous weight step previous_dEdW[i] = dEdW #end weight adjustment loop prev_error = error input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = network.cost_function(out, training_targets ) if epoch%1000==0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error ) print "[training] Trained for %d epochs." % epoch if network.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): network.save_to_file()
def backpropagation(network, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = () ): assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) layer_indexes = range( len(network.layers) )[::-1] # reversed momentum = collections.defaultdict( int ) epoch = 0 input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] error = network.cost_function(out, training_targets ) cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability network.hidden_layer_dropout if i > 0 else network.input_layer_dropout ) # calculate the weight change dW = -learning_rate * np.dot( delta, add_bias(dropped) ).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( network.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] # Store the momentum momentum[i] = dW # Update the weights network.weights[ i ] += dW #end weight adjustment loop input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] error = network.cost_function(out, training_targets ) cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] if epoch%1000==0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error ) print "[training] Trained for %d epochs." % epoch if network.save_trained_network: network.save_to_file()
def backpropagation(self, trainingset, ERROR_LIMIT=1e-3, learning_rate=0.3, momentum_factor=0.9): assert trainingset[0].features.shape[0] == self.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == self.n_outputs, \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset]) training_targets = np.array( [instance.targets for instance in trainingset]) MSE = () # inf neterror = None momentum = collections.defaultdict(int) batch_size = self.batch_size if self.batch_size != 0 else training_data.shape[ 0] epoch = 0 while MSE > ERROR_LIMIT: epoch += 1 for start in xrange(0, len(training_data), batch_size): batch = training_data[start:start + batch_size] input_layers = self.update(training_data, trace=True) out = input_layers[-1] error = out - training_targets delta = error MSE = np.mean(np.power(error, 2)) loop = itertools.izip( xrange(len(self.weights) - 1, -1, -1), reversed(self.weights), reversed(input_layers[:-1]), ) for i, weight_layer, input_signals in loop: # Loop over the weight layers in reversed order to calculate the deltas if i == 0: dropped = dropout( add_bias(input_signals).T, self.input_layer_dropout) else: dropped = dropout( add_bias(input_signals).T, self.hidden_layer_dropout) # Calculate weight change dW = learning_rate * np.dot( dropped, delta) + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skipping the bias weight during calculation. weight_delta = np.dot(delta, weight_layer[1:, :].T) # Calculate the delta for the subsequent layer delta = np.multiply( weight_delta, self.activation_functions[i - 1](input_signals, derivative=True)) # Store the momentum momentum[i] = dW # Update the weights self.weights[i] -= dW if epoch % 1000 == 0: # Show the current training status print "* current network error (MSE):", MSE print "* Converged to error bound (%.4g) with MSE = %.4g." % ( ERROR_LIMIT, MSE) print "* Trained for %d epochs." % epoch
def backpropagation(network, trainingset, testset, cost_function, ERROR_LIMIT=1e-3, learning_rate=0.03, momentum_factor=0.9, max_iterations=(), input_layer_dropout=0.0, hidden_layer_dropout=0.0, save_trained_network=False): assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array([instance.features for instance in trainingset]) training_targets = np.array([instance.targets for instance in trainingset]) test_data = np.array([instance.features for instance in testset]) test_targets = np.array([instance.targets for instance in testset]) momentum = collections.defaultdict(int) input_signals, derivatives = network.update(training_data, trace=True) out = input_signals[-1] cost_derivative = cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = cost_function(network.update(test_data), test_targets) layer_indexes = range(len(network.layers))[::-1] # reversed epoch = 0 n_samples = float(training_data.shape[0]) while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability hidden_layer_dropout if i > 0 else input_layer_dropout) # calculate the weight change dW = -learning_rate * (np.dot(delta, add_bias(input_signals[i])) / n_samples).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot(network.weights[i][1:, :], delta) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i - 1] # Store the momentum momentum[i] = dW # Update the weights network.weights[i] += dW #end weight adjustment loop input_signals, derivatives = network.update(training_data, trace=True) out = input_signals[-1] cost_derivative = cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = cost_function(network.update(test_data), test_targets) if epoch % 1000 == 0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error) print "[training] Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function) print "[training] Trained for %d epochs." % epoch if save_trained_network and confirm( promt="Do you wish to store the trained network?"): network.save_network_to_file()
def backpropagation(network, trainingset, testset, cost_function, evaluation_function=None, ERROR_LIMIT=1e-3, learning_rate=0.03, momentum_factor=0.9, max_iterations=(), batch_size=0, input_layer_dropout=0.0, hidden_layer_dropout=0.0, print_rate=1000, save_trained_network=False): assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" # Whether to use another function for printing the dataset error than the cost function. # This is useful if you train the network with the MSE cost function, but are going to # classify rather than regress on your data. calculate_print_error = evaluation_function if evaluation_function != None else cost_function training_data = np.array([instance.features for instance in trainingset]) training_targets = np.array([instance.targets for instance in trainingset]) test_data = np.array([instance.features for instance in testset]) test_targets = np.array([instance.targets for instance in testset]) batch_size = batch_size if batch_size != 0 else training_data.shape[0] batch_training_data = np.array_split( training_data, math.ceil(1.0 * training_data.shape[0] / batch_size)) batch_training_targets = np.array_split( training_targets, math.ceil(1.0 * training_targets.shape[0] / batch_size)) batch_indices = range( len(batch_training_data)) # fast reference to batches error = calculate_print_error(network.update(test_data), test_targets) reversed_layer_indexes = range(len(network.layers))[::-1] momentum = collections.defaultdict(int) epoch = 0 while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 random.shuffle( batch_indices ) # Shuffle the order in which the batches are processed between the iterations for batch_index in batch_indices: batch_data = batch_training_data[batch_index] batch_targets = batch_training_targets[batch_index] batch_size = float(batch_data.shape[0]) input_signals, derivatives = network.update(batch_data, trace=True) out = input_signals[-1] cost_derivative = cost_function(out, batch_targets, derivative=True).T delta = cost_derivative * derivatives[-1] for i in reversed_layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability hidden_layer_dropout if i > 0 else input_layer_dropout) # calculate the weight change dW = -learning_rate * (np.dot(delta, add_bias( dropped)) / batch_size).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot(network.weights[i][1:, :], delta) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i - 1] # Store the momentum momentum[i] = dW # Update the weights network.weights[i] += dW #end weight adjustment loop error = calculate_print_error(network.update(test_data), test_targets) if epoch % print_rate == 0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error) print "[training] Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function) print "[training] Trained for %d epochs." % epoch if save_trained_network and confirm( promt="Do you wish to store the trained network?"): network.save_network_to_file()
def backpropagation(network, trainingset, testset, cost_function, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = (), input_layer_dropout = 0.0, hidden_layer_dropout = 0.0, save_trained_network = False ): assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) test_data = np.array( [instance.features for instance in testset ] ) test_targets = np.array( [instance.targets for instance in testset ] ) momentum = collections.defaultdict( int ) input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] cost_derivative = cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = cost_function(network.update( test_data ), test_targets ) layer_indexes = range( len(network.layers) )[::-1] # reversed epoch = 0 n_samples = float(training_data.shape[0]) while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability hidden_layer_dropout if i > 0 else input_layer_dropout ) # calculate the weight change dW = -learning_rate * (np.dot( delta, add_bias(input_signals[i]) )/n_samples).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( network.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] # Store the momentum momentum[i] = dW # Update the weights network.weights[ i ] += dW #end weight adjustment loop input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] cost_derivative = cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = cost_function(network.update( test_data ), test_targets ) if epoch%1000==0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error ) print "[training] Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function ) print "[training] Trained for %d epochs." % epoch if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): network.save_network_to_file()
def backpropagation(network, trainingset, ERROR_LIMIT=1e-3, learning_rate=0.03, momentum_factor=0.9, max_iterations=()): assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array([instance.features for instance in trainingset]) training_targets = np.array([instance.targets for instance in trainingset]) layer_indexes = range(len(network.layers))[::-1] # reversed momentum = collections.defaultdict(int) epoch = 0 input_signals, derivatives = network.update(training_data, trace=True) out = input_signals[-1] error = network.cost_function(out, training_targets) cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability network.hidden_layer_dropout if i > 0 else network.input_layer_dropout) # calculate the weight change dW = -learning_rate * np.dot( delta, add_bias(dropped)).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot(network.weights[i][1:, :], delta) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i - 1] # Store the momentum momentum[i] = dW # Update the weights network.weights[i] += dW #end weight adjustment loop input_signals, derivatives = network.update(training_data, trace=True) out = input_signals[-1] error = network.cost_function(out, training_targets) cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] if epoch % 1000 == 0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error) print "[training] Trained for %d epochs." % epoch if network.save_trained_network and confirm( promt="Do you wish to store the trained network?"): network.save_to_file()
def backpropagation(self, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.3, momentum_factor = 0.9 ): assert trainingset[0].features.shape[0] == self.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == self.n_outputs, \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) MSE = ( ) # inf neterror = None momentum = collections.defaultdict( int ) batch_size = self.batch_size if self.batch_size != 0 else training_data.shape[0] epoch = 0 while MSE > ERROR_LIMIT: epoch += 1 for start in xrange( 0, len(training_data), batch_size ): batch = training_data[start : start+batch_size] input_layers = self.update( training_data, trace=True ) out = input_layers[-1] error = out - training_targets delta = error MSE = np.mean( np.power(error,2) ) loop = itertools.izip( xrange(len(self.weights)-1, -1, -1), reversed(self.weights), reversed(input_layers[:-1]), ) for i, weight_layer, input_signals in loop: # Loop over the weight layers in reversed order to calculate the deltas if i == 0: dropped = dropout( add_bias(input_signals).T, self.input_layer_dropout ) else: dropped = dropout( add_bias(input_signals).T, self.hidden_layer_dropout ) # Calculate weight change dW = learning_rate * np.dot( dropped, delta ) + momentum_factor * momentum[i] if i!= 0: """Do not calculate the delta unnecessarily.""" # Skipping the bias weight during calculation. weight_delta = np.dot( delta, weight_layer[1:,:].T ) # Calculate the delta for the subsequent layer delta = np.multiply( weight_delta, self.activation_functions[i-1]( input_signals, derivative=True) ) # Store the momentum momentum[i] = dW # Update the weights self.weights[ i ] -= dW if epoch%1000==0: # Show the current training status print "* current network error (MSE):", MSE print "* Converged to error bound (%.4g) with MSE = %.4g." % ( ERROR_LIMIT, MSE ) print "* Trained for %d epochs." % epoch