def scipyoptimize(self, trainingset, method = "Newton-CG", ERROR_LIMIT = 1e-6, max_iterations = () ): from scipy.optimize import minimize training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) options = {} if max_iterations < (): options["maxiter"] = max_iterations results = minimize( self.error, self.get_weights(), args = (training_data, training_targets), method = method, jac = self.gradient, options = options ) optimized_weights = results.x self.weights = self.unpack( np.array(optimized_weights) ) if not results.success: print "* ERROR: did not converge" print "* Error = %.3g." % results.fun print "* Trained for %d epochs." % results.nfev if self.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): self.save_to_file()
def scipyoptimize(self, trainingset, method = "Newton-CG", ERROR_LIMIT = 1e-6, max_iterations = () ): from scipy.optimize import minimize training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) minimization_options = {} if max_iterations < (): minimization_options["maxiter"] = max_iterations results = minimize( self.error, # The function we are minimizing self.get_weights(), # The vector (parameters) we are minimizing args = (training_data, training_targets), # Additional arguments to the error and gradient function method = method, # The minimization strategy specified by the user jac = self.gradient, # The gradient calculating function tol = ERROR_LIMIT, # The error limit options = minimization_options, # Additional options ) self.weights = self.unpack( results.x ) if not results.success: print "[training] WARNING:", results.message print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, results.fun ) else: print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, results.fun ) if self.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): self.save_to_file()
def scipyoptimize(network, trainingset, method = "Newton-CG", ERROR_LIMIT = 1e-6, max_iterations = () ): from scipy.optimize import minimize training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) minimization_options = {} if max_iterations < (): minimization_options["maxiter"] = max_iterations results = minimize( network.error, # The function we are minimizing network.get_weights(), # The vector (parameters) we are minimizing args = (training_data, training_targets), # Additional arguments to the error and gradient function method = method, # The minimization strategy specified by the user jac = network.gradient, # The gradient calculating function tol = ERROR_LIMIT, # The error limit options = minimization_options, # Additional options ) network.weights = network.unpack( results.x ) if not results.success: print "[training] WARNING:", results.message print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, results.fun ) else: print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, results.fun ) if network.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): network.save_to_file()
def check_gradient(self, trainingset, cost_function, epsilon=1e-4): assert trainingset[0].features.shape[0] == self.n_inputs, \ "ERROR: input size varies from the configuration. Configured as %d, instance had %d" % (self.n_inputs, trainingset[0].features.shape[0]) assert trainingset[0].targets.shape[0] == self.layers[-1][0], \ "ERROR: output size varies from the configuration. Configured as %d, instance had %d" % (self.layers[-1][0], trainingset[0].targets.shape[0]) training_data = np.array([ instance.features for instance in trainingset ][:100]) # perform the test with at most 100 instances training_targets = np.array( [instance.targets for instance in trainingset][:100]) # assign the weight_vector as the network topology initial_weights = np.array(self.get_weights()) numeric_gradient = np.zeros(initial_weights.shape) perturbed = np.zeros(initial_weights.shape) n_samples = float(training_data.shape[0]) print "[gradient check] Running gradient check..." for i in xrange(self.n_weights): perturbed[i] = epsilon right_side = self.error(initial_weights + perturbed, training_data, training_targets, cost_function) left_side = self.error(initial_weights - perturbed, training_data, training_targets, cost_function) numeric_gradient[i] = (right_side - left_side) / (2 * epsilon) perturbed[i] = 0 #end loop # Reset the weights self.set_weights(initial_weights) # Calculate the analytic gradient analytic_gradient = self.gradient(self.get_weights(), training_data, training_targets, cost_function) # Compare the numeric and the analytic gradient ratio = np.linalg.norm(analytic_gradient - numeric_gradient) / np.linalg.norm( analytic_gradient + numeric_gradient) if not ratio < 1e-6: print "[gradient check] WARNING: The numeric gradient check failed! Analytical gradient differed by %g from the numerical." % ratio if not confirm("[gradient check] Do you want to continue?"): print "[gradient check] Exiting." import sys sys.exit(2) else: print "[gradient check] Passed!" return ratio
def scipyoptimize(network, trainingset, testset, cost_function, method="Newton-CG", save_trained_network=False): from scipy.optimize import minimize assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array([instance.features for instance in trainingset]) training_targets = np.array([instance.targets for instance in trainingset]) test_data = np.array([instance.features for instance in testset]) test_targets = np.array([instance.targets for instance in testset]) error_function_wrapper = lambda weights, training_data, training_targets, test_data, test_targets, cost_function: network.error( weights, test_data, test_targets, cost_function) gradient_function_wrapper = lambda weights, training_data, training_targets, test_data, test_targets, cost_function: network.gradient( weights, training_data, training_targets, cost_function) results = minimize( error_function_wrapper, # The function we are minimizing network.get_weights(), # The vector (parameters) we are minimizing method=method, # The minimization strategy specified by the user jac=gradient_function_wrapper, # The gradient calculating function args=(training_data, training_targets, test_data, test_targets, cost_function ), # Additional arguments to the error and gradient function ) network.set_weights(results.x) if not results.success: print "[training] WARNING:", results.message print "[training] Terminated with error %.4g." % results.fun else: print "[training] Finished:" print "[training] Completed with error %.4g." % results.fun print "[training] Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function) if save_trained_network and confirm( promt="Do you wish to store the trained network?"): network.save_network_to_file()
def check_gradient(self, trainingset, cost_function, epsilon=1e-4): check_network_structure( self, cost_function ) # check for special case topology requirements, such as softmax training_data, training_targets = verify_dataset_shape_and_modify( self, trainingset) # assign the weight_vector as the network topology initial_weights = np.array(self.get_weights()) numeric_gradient = np.zeros(initial_weights.shape) perturbed = np.zeros(initial_weights.shape) n_samples = float(training_data.shape[0]) print "[gradient check] Running gradient check..." for i in xrange(self.n_weights): perturbed[i] = epsilon right_side = self.error(initial_weights + perturbed, training_data, training_targets, cost_function) left_side = self.error(initial_weights - perturbed, training_data, training_targets, cost_function) numeric_gradient[i] = (right_side - left_side) / (2 * epsilon) perturbed[i] = 0 print i, "/", self.n_weights #end loop # Reset the weights self.set_weights(initial_weights) # Calculate the analytic gradient analytic_gradient = self.gradient(self.get_weights(), training_data, training_targets, cost_function) # Compare the numeric and the analytic gradient ratio = np.linalg.norm(analytic_gradient - numeric_gradient) / np.linalg.norm( analytic_gradient + numeric_gradient) if not ratio < 1e-6: print "[gradient check] WARNING: The numeric gradient check failed! Analytical gradient differed by %g from the numerical." % ratio if not confirm("[gradient check] Do you want to continue?"): print "[gradient check] Exiting." import sys sys.exit(2) else: print "[gradient check] Passed!" return ratio
def check_gradient(self, trainingset, cost_function, epsilon = 1e-4 ): assert trainingset[0].features.shape[0] == self.n_inputs, \ "ERROR: input size varies from the configuration. Configured as %d, instance had %d" % (self.n_inputs, trainingset[0].features.shape[0]) assert trainingset[0].targets.shape[0] == self.layers[-1][0], \ "ERROR: output size varies from the configuration. Configured as %d, instance had %d" % (self.layers[-1][0], trainingset[0].targets.shape[0]) training_data = np.array( [instance.features for instance in trainingset ][:100] ) # perform the test with at most 100 instances training_targets = np.array( [instance.targets for instance in trainingset ][:100] ) # assign the weight_vector as the network topology initial_weights = np.array(self.get_weights()) numeric_gradient = np.zeros( initial_weights.shape ) perturbed = np.zeros( initial_weights.shape ) n_samples = float(training_data.shape[0]) print "[gradient check] Running gradient check..." for i in xrange( self.n_weights ): perturbed[i] = epsilon right_side = self.error( initial_weights + perturbed, training_data, training_targets, cost_function ) left_side = self.error( initial_weights - perturbed, training_data, training_targets, cost_function ) numeric_gradient[i] = (right_side - left_side) / (2 * epsilon) perturbed[i] = 0 #end loop # Reset the weights self.set_weights( initial_weights ) # Calculate the analytic gradient analytic_gradient = self.gradient( self.get_weights(), training_data, training_targets, cost_function ) # Compare the numeric and the analytic gradient ratio = np.linalg.norm(analytic_gradient - numeric_gradient) / np.linalg.norm(analytic_gradient + numeric_gradient) if not ratio < 1e-6: print "[gradient check] WARNING: The numeric gradient check failed! Analytical gradient differed by %g from the numerical." % ratio if not confirm("[gradient check] Do you want to continue?"): print "[gradient check] Exiting." import sys sys.exit(2) else: print "[gradient check] Passed!" return ratio
def scipyoptimize(network, trainingset, testset, cost_function, method = "Newton-CG", save_trained_network = False ): from scipy.optimize import minimize assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) test_data = np.array( [instance.features for instance in testset ] ) test_targets = np.array( [instance.targets for instance in testset ] ) error_function_wrapper = lambda weights, training_data, training_targets, test_data, test_targets, cost_function: network.error( weights, test_data, test_targets, cost_function ) gradient_function_wrapper = lambda weights, training_data, training_targets, test_data, test_targets, cost_function: network.gradient( weights, training_data, training_targets, cost_function ) results = minimize( error_function_wrapper, # The function we are minimizing network.get_weights(), # The vector (parameters) we are minimizing method = method, # The minimization strategy specified by the user jac = gradient_function_wrapper, # The gradient calculating function args = (training_data, training_targets, test_data, test_targets, cost_function), # Additional arguments to the error and gradient function ) network.set_weights( results.x ) if not results.success: print "[training] WARNING:", results.message print "[training] Terminated with error %.4g." % results.fun else: print "[training] Finished:" print "[training] Completed with error %.4g." % results.fun print "[training] Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function ) if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): network.save_network_to_file()
print "1.Implant Trojan." print "2.Implant File." print "3.Get flag." print "4.Store flag." print "5.Get Score." print "6.Singe Rce." print "7.Multi Rce." print "8.Confirm Config." print "9.Monitor Trojan." print "*******************************************************" choose = int(raw_input("Please Input:")) if choose == 1: tools.trojan_implant() tools.living_check() if choose == 2: tools.file_implant() if choose == 3: tools.catch_flag() if choose == 4: tools.store_flag() if choose == 5: tools.upload_flag() if choose == 6: tools.remote_command() if choose == 7: tools.remote_command_multi() if choose == 8: tools.confirm() if choose == 9: tools.living_check()
def scaled_conjugate_gradient(network, trainingset, ERROR_LIMIT = 1e-6, max_iterations = () ): # Implemented according to the paper by Martin F. Moller # http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.38.3391 assert network.input_layer_dropout == 0 and network.hidden_layer_dropout == 0, \ "ERROR: dropout should not be used with scaled conjugated gradients training" assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) ## Variables sigma0 = 1.e-6 lamb = 1.e-6 lamb_ = 0 vector = network.get_weights() # The (weight) vector we will use SCG to optimalize N = len(vector) grad_new = -network.gradient( vector, training_data, training_targets ) r_new = grad_new # end success = True k = 0 while k < max_iterations: k += 1 r = np.copy( r_new ) grad = np.copy( grad_new ) mu = np.dot( grad,grad ) if success: success = False sigma = sigma0 / math.sqrt(mu) s = (network.gradient(vector+sigma*grad, training_data, training_targets)-network.gradient(vector,training_data, training_targets))/sigma delta = np.dot( grad.T, s ) #end # scale s zetta = lamb-lamb_ s += zetta*grad delta += zetta*mu if delta < 0: s += (lamb - 2*delta/mu)*grad lamb_ = 2*(lamb - delta/mu) delta -= lamb*mu delta *= -1 lamb = lamb_ #end phi = np.dot( grad.T,r ) alpha = phi/delta vector_new = vector+alpha*grad f_old, f_new = network.error(vector,training_data, training_targets), network.error(vector_new,training_data, training_targets) comparison = 2 * delta * (f_old - f_new)/np.power( phi, 2 ) if comparison >= 0: if f_new < ERROR_LIMIT: break # done! vector = vector_new f_old = f_new r_new = -network.gradient( vector, training_data, training_targets ) success = True lamb_ = 0 if k % N == 0: grad_new = r_new else: beta = (np.dot( r_new, r_new ) - np.dot( r_new, r ))/phi grad_new = r_new + beta * grad if comparison > 0.75: lamb = 0.5 * lamb else: lamb_ = lamb # end if comparison < 0.25: lamb = 4 * lamb if k%1000==0: print "[training] Current error:", f_new, "\tEpoch:", k #end network.weights = network.unpack( np.array(vector_new) ) print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, f_new ) print "[training] Trained for %d epochs." % k if network.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): network.save_to_file()
def resilient_backpropagation(network, trainingset, ERROR_LIMIT=1e-3, max_iterations = (), weight_step_max = 50., weight_step_min = 0., start_step = 0.5, learn_max = 1.2, learn_min = 0.5 ): # Implemented according to iRprop+ # http://sci2s.ugr.es/keel/pdf/algorithm/articulo/2003-Neuro-Igel-IRprop+.pdf assert network.input_layer_dropout == 0 and network.hidden_layer_dropout == 0, \ "ERROR: dropout should not be used with resilient backpropagation" assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) # Data structure to store the previous derivative previous_dEdW = [ 1 ] * len( network.weights ) # Storing the current / previous weight step size weight_step = [ np.full( weight_layer.shape, start_step ) for weight_layer in network.weights ] # Storing the current / previous weight update dW = [ np.ones(shape=weight_layer.shape) for weight_layer in network.weights ] input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = network.cost_function(out, training_targets ) layer_indexes = range( len(network.layers) )[::-1] # reversed prev_error = ( ) # inf epoch = 0 while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # Calculate the delta with respect to the weights dEdW = np.dot( delta, add_bias(input_signals[i]) ).T if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( network.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] # Calculate sign changes and note where they have changed diffs = np.multiply( dEdW, previous_dEdW[i] ) pos_indexes = np.where( diffs > 0 ) neg_indexes = np.where( diffs < 0 ) zero_indexes = np.where( diffs == 0 ) # positive if np.any(pos_indexes): # Calculate the weight step size weight_step[i][pos_indexes] = np.minimum( weight_step[i][pos_indexes] * learn_max, weight_step_max ) # Calculate the weight step direction dW[i][pos_indexes] = np.multiply( -np.sign( dEdW[pos_indexes] ), weight_step[i][pos_indexes] ) # Apply the weight deltas network.weights[i][ pos_indexes ] += dW[i][pos_indexes] # negative if np.any(neg_indexes): weight_step[i][neg_indexes] = np.maximum( weight_step[i][neg_indexes] * learn_min, weight_step_min ) if error > prev_error: # iRprop+ version of resilient backpropagation network.weights[i][ neg_indexes ] -= dW[i][neg_indexes] # backtrack dEdW[ neg_indexes ] = 0 # zeros if np.any(zero_indexes): dW[i][zero_indexes] = np.multiply( -np.sign( dEdW[zero_indexes] ), weight_step[i][zero_indexes] ) network.weights[i][ zero_indexes ] += dW[i][zero_indexes] # Store the previous weight step previous_dEdW[i] = dEdW #end weight adjustment loop prev_error = error input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = network.cost_function(out, training_targets ) if epoch%1000==0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error ) print "[training] Trained for %d epochs." % epoch if network.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): network.save_to_file()
def backpropagation(network, trainingset, ERROR_LIMIT=1e-3, learning_rate=0.03, momentum_factor=0.9, max_iterations=()): assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array([instance.features for instance in trainingset]) training_targets = np.array([instance.targets for instance in trainingset]) layer_indexes = range(len(network.layers))[::-1] # reversed momentum = collections.defaultdict(int) epoch = 0 input_signals, derivatives = network.update(training_data, trace=True) out = input_signals[-1] error = network.cost_function(out, training_targets) cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability network.hidden_layer_dropout if i > 0 else network.input_layer_dropout) # calculate the weight change dW = -learning_rate * np.dot( delta, add_bias(dropped)).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot(network.weights[i][1:, :], delta) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i - 1] # Store the momentum momentum[i] = dW # Update the weights network.weights[i] += dW #end weight adjustment loop input_signals, derivatives = network.update(training_data, trace=True) out = input_signals[-1] error = network.cost_function(out, training_targets) cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] if epoch % 1000 == 0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error) print "[training] Trained for %d epochs." % epoch if network.save_trained_network and confirm( promt="Do you wish to store the trained network?"): network.save_to_file()
def backpropagation(network, trainingset, testset, cost_function, ERROR_LIMIT=1e-3, learning_rate=0.03, momentum_factor=0.9, max_iterations=(), input_layer_dropout=0.0, hidden_layer_dropout=0.0, save_trained_network=False): assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array([instance.features for instance in trainingset]) training_targets = np.array([instance.targets for instance in trainingset]) test_data = np.array([instance.features for instance in testset]) test_targets = np.array([instance.targets for instance in testset]) momentum = collections.defaultdict(int) input_signals, derivatives = network.update(training_data, trace=True) out = input_signals[-1] cost_derivative = cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = cost_function(network.update(test_data), test_targets) layer_indexes = range(len(network.layers))[::-1] # reversed epoch = 0 n_samples = float(training_data.shape[0]) while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability hidden_layer_dropout if i > 0 else input_layer_dropout) # calculate the weight change dW = -learning_rate * (np.dot(delta, add_bias(input_signals[i])) / n_samples).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot(network.weights[i][1:, :], delta) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i - 1] # Store the momentum momentum[i] = dW # Update the weights network.weights[i] += dW #end weight adjustment loop input_signals, derivatives = network.update(training_data, trace=True) out = input_signals[-1] cost_derivative = cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = cost_function(network.update(test_data), test_targets) if epoch % 1000 == 0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error) print "[training] Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function) print "[training] Trained for %d epochs." % epoch if save_trained_network and confirm( promt="Do you wish to store the trained network?"): network.save_network_to_file()
def scaled_conjugate_gradient(network, trainingset, testset, cost_function, ERROR_LIMIT = 1e-6, max_iterations = (), save_trained_network = False ): # Implemented according to the paper by Martin F. Moller # http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.38.3391 assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) test_data = np.array( [instance.features for instance in testset ] ) test_targets = np.array( [instance.targets for instance in testset ] ) ## Variables sigma0 = 1.e-6 lamb = 1.e-6 lamb_ = 0 vector = network.get_weights() # The (weight) vector we will use SCG to optimalize N = len(vector) grad_new = -network.gradient( vector, training_data, training_targets, cost_function ) r_new = grad_new # end success = True k = 0 while k < max_iterations: k += 1 r = np.copy( r_new ) grad = np.copy( grad_new ) mu = np.dot( grad,grad ) if success: success = False sigma = sigma0 / math.sqrt(mu) s = (network.gradient(vector+sigma*grad, training_data, training_targets, cost_function)-network.gradient(vector,training_data, training_targets,cost_function))/sigma delta = np.dot( grad.T, s ) #end # scale s zetta = lamb-lamb_ s += zetta*grad delta += zetta*mu if delta < 0: s += (lamb - 2*delta/mu)*grad lamb_ = 2*(lamb - delta/mu) delta -= lamb*mu delta *= -1 lamb = lamb_ #end phi = np.dot( grad.T,r ) alpha = phi/delta vector_new = vector+alpha*grad f_old, f_new = network.error(vector, test_data, test_targets, cost_function), network.error(vector_new, test_data, test_targets, cost_function) comparison = 2 * delta * (f_old - f_new)/np.power( phi, 2 ) if comparison >= 0: if f_new < ERROR_LIMIT: break # done! vector = vector_new f_old = f_new r_new = -network.gradient( vector, training_data, training_targets, cost_function ) success = True lamb_ = 0 if k % N == 0: grad_new = r_new else: beta = (np.dot( r_new, r_new ) - np.dot( r_new, r ))/phi grad_new = r_new + beta * grad if comparison > 0.75: lamb = 0.5 * lamb else: lamb_ = lamb # end if comparison < 0.25: lamb = 4 * lamb if k%1000==0: print "[training] Current error:", f_new, "\tEpoch:", k #end network.set_weights( np.array(vector_new) ) print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, f_new ) print "[training] Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function ) print "[training] Trained for %d epochs." % k if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): network.save_network_to_file() #end scg # NOT YET IMPLEMENTED #def generalized_hebbian(network, trainingset, testset, cost_function, ERROR_LIMIT = 1e-3, learning_rate = 0.001, max_iterations = (), save_trained_network = False ): # assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ # "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." # assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ # "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." # # assert trainingset[0].features.shape[0] == network.n_inputs, \ # "ERROR: input size varies from the defined input setting" # assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ # "ERROR: output size varies from the defined output setting" # # training_data = np.array( [instance.features for instance in trainingset ] ) # training_targets = np.array( [instance.targets for instance in trainingset ] ) # test_data = np.array( [instance.features for instance in testset ] ) # test_targets = np.array( [instance.targets for instance in testset ] ) # # layer_indexes = range( len(network.layers) ) # epoch = 0 # # input_signals, derivatives = network.update( training_data, trace=True ) # # out = input_signals[-1] # error = cost_function( out, training_targets ) # # input_signals[-1] = out - training_targets # # while error > ERROR_LIMIT and epoch < max_iterations: # epoch += 1 # # for i in layer_indexes: # forgetting_term = np.dot(network.weights[i], np.tril(np.dot( input_signals[i+1].T, input_signals[i+1] ))) # activation_product = np.dot(add_bias(input_signals[i]).T, input_signals[i+1]) # network.weights[i] += learning_rate * (activation_product - forgetting_term) # #end weight adjustment loop # # # normalize the weight to prevent the weights from growing unbounded # network.weights[i] /= np.sqrt(np.sum(network.weights[i]**2)) # # input_signals, derivatives = network.update( training_data, trace=True ) # out = input_signals[-1] # error = cost_function(out, training_targets ) # # if epoch%1000==0: # # Show the current training status # print "[training] Current error:", error, "\tEpoch:", epoch # # print "[training] Finished:" # print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error ) # print "[training] Trained for %d epochs." % epoch # # if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): # network.save_network_to_file() ## end backprop
def backpropagation(network, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = () ): assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) layer_indexes = range( len(network.layers) )[::-1] # reversed momentum = collections.defaultdict( int ) epoch = 0 input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] error = network.cost_function(out, training_targets ) cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability network.hidden_layer_dropout if i > 0 else network.input_layer_dropout ) # calculate the weight change dW = -learning_rate * np.dot( delta, add_bias(dropped) ).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( network.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] # Store the momentum momentum[i] = dW # Update the weights network.weights[ i ] += dW #end weight adjustment loop input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] error = network.cost_function(out, training_targets ) cost_derivative = network.cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] if epoch%1000==0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error ) print "[training] Trained for %d epochs." % epoch if network.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): network.save_to_file()
def resilient_backpropagation(network, trainingset, testset, cost_function, ERROR_LIMIT=1e-3, max_iterations=(), weight_step_max=50., weight_step_min=0., start_step=0.5, learn_max=1.2, learn_min=0.5, print_rate=1000, save_trained_network=False): # Implemented according to iRprop+ # http://sci2s.ugr.es/keel/pdf/algorithm/articulo/2003-Neuro-Igel-IRprop+.pdf assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array([instance.features for instance in trainingset]) training_targets = np.array([instance.targets for instance in trainingset]) test_data = np.array([instance.features for instance in testset]) test_targets = np.array([instance.targets for instance in testset]) # Storing the current / previous weight step size weight_step = [ np.full(weight_layer.shape, start_step) for weight_layer in network.weights ] # Storing the current / previous weight update dW = [ np.ones(shape=weight_layer.shape) for weight_layer in network.weights ] # Storing the previous derivative previous_dEdW = [1] * len(network.weights) # Storing the previous error measurement prev_error = () # inf input_signals, derivatives = network.update(training_data, trace=True) out = input_signals[-1] cost_derivative = cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = cost_function(network.update(test_data), test_targets) n_samples = float(training_data.shape[0]) layer_indexes = range(len(network.layers))[::-1] # reversed epoch = 0 while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # Calculate the delta with respect to the weights dEdW = (np.dot(delta, add_bias(input_signals[i])) / n_samples).T if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot(network.weights[i][1:, :], delta) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i - 1] # Calculate sign changes and note where they have changed diffs = np.multiply(dEdW, previous_dEdW[i]) pos_indexes = np.where(diffs > 0) neg_indexes = np.where(diffs < 0) zero_indexes = np.where(diffs == 0) # positive if np.any(pos_indexes): # Calculate the weight step size weight_step[i][pos_indexes] = np.minimum( weight_step[i][pos_indexes] * learn_max, weight_step_max) # Calculate the weight step direction dW[i][pos_indexes] = np.multiply(-np.sign(dEdW[pos_indexes]), weight_step[i][pos_indexes]) # Apply the weight deltas network.weights[i][pos_indexes] += dW[i][pos_indexes] # negative if np.any(neg_indexes): weight_step[i][neg_indexes] = np.maximum( weight_step[i][neg_indexes] * learn_min, weight_step_min) if error > prev_error: # iRprop+ version of resilient backpropagation network.weights[i][neg_indexes] -= dW[i][ neg_indexes] # backtrack dEdW[neg_indexes] = 0 # zeros if np.any(zero_indexes): dW[i][zero_indexes] = np.multiply(-np.sign(dEdW[zero_indexes]), weight_step[i][zero_indexes]) network.weights[i][zero_indexes] += dW[i][zero_indexes] # Store the previous weight step previous_dEdW[i] = dEdW #end weight adjustment loop prev_error = error input_signals, derivatives = network.update(training_data, trace=True) out = input_signals[-1] cost_derivative = cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = cost_function(network.update(test_data), test_targets) if epoch % print_rate == 0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error) print "[training] Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function) print "[training] Trained for %d epochs." % epoch if save_trained_network and confirm( promt="Do you wish to store the trained network?"): network.save_network_to_file()
def scg(self, trainingset, ERROR_LIMIT = 1e-6, max_iterations = () ): # Implemented according to the paper by Martin F. Moller # http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.38.3391 assert self.input_layer_dropout == 0 and self.hidden_layer_dropout == 0, \ "ERROR: dropout should not be used with scaled conjugated gradients training" assert trainingset[0].features.shape[0] == self.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == self.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) ## Variables sigma0 = 1.e-6 lamb = 1.e-6 lamb_ = 0 vector = self.get_weights() # The (weight) vector we will use SCG to optimalize N = len(vector) grad_new = -self.gradient( vector, training_data, training_targets ) r_new = grad_new # end success = True k = 0 while k < max_iterations: k += 1 r = np.copy( r_new ) grad = np.copy( grad_new ) mu = np.dot( grad,grad ) if success: success = False sigma = sigma0 / math.sqrt(mu) s = (self.gradient(vector+sigma*grad, training_data, training_targets)-self.gradient(vector,training_data, training_targets))/sigma delta = np.dot( grad.T, s ) #end # scale s zetta = lamb-lamb_ s += zetta*grad delta += zetta*mu if delta < 0: s += (lamb - 2*delta/mu)*grad lamb_ = 2*(lamb - delta/mu) delta -= lamb*mu delta *= -1 lamb = lamb_ #end phi = np.dot( grad.T,r ) alpha = phi/delta vector_new = vector+alpha*grad f_old, f_new = self.error(vector,training_data, training_targets), self.error(vector_new,training_data, training_targets) comparison = 2 * delta * (f_old - f_new)/np.power( phi, 2 ) if comparison >= 0: if f_new < ERROR_LIMIT: break # done! vector = vector_new f_old = f_new r_new = -self.gradient( vector, training_data, training_targets ) success = True lamb_ = 0 if k % N == 0: grad_new = r_new else: beta = (np.dot( r_new, r_new ) - np.dot( r_new, r ))/phi grad_new = r_new + beta * grad if comparison > 0.75: lamb = 0.5 * lamb else: lamb_ = lamb # end if comparison < 0.25: lamb = 4 * lamb if k%1000==0: print "* current network error (MSE):", f_new #end self.weights = self.unpack( np.array(vector_new) ) print "* Converged to error bound (%.3g) with MSE = %.3g." % ( ERROR_LIMIT, f_new ) print "* Trained for %d epochs." % k if self.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): self.save_to_file()
def backpropagation(self, trainingset, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = () ): assert trainingset[0].features.shape[0] == self.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == self.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) layer_indexes = range( len(self.layers) )[::-1] # reversed momentum = collections.defaultdict( int ) MSE = ( ) # inf epoch = 0 input_signals, derivatives = self.update( training_data, trace=True ) out = input_signals[-1] error = (out - training_targets).T delta = error * derivatives[-1] MSE = np.mean( np.power(error,2) ) while MSE > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability self.hidden_layer_dropout if i else self.input_layer_dropout ) # calculate the weight change dW = -learning_rate * np.dot( delta, add_bias(dropped) ).T + momentum_factor * momentum[i] if i!= 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( self.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] # Store the momentum momentum[i] = dW # Update the weights self.weights[ i ] += dW #end weight adjustment loop input_signals, derivatives = self.update( training_data, trace=True ) out = input_signals[-1] error = (out - training_targets).T delta = error * derivatives[-1] MSE = np.mean( np.power(error,2) ) if epoch%1000==0: # Show the current training status print "* current network error (MSE):", MSE print "* Converged to error bound (%.4g) with MSE = %.4g." % ( ERROR_LIMIT, MSE ) print "* Trained for %d epochs." % epoch if self.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): self.save_to_file()
def backpropagation(network, trainingset, testset, cost_function, evaluation_function=None, ERROR_LIMIT=1e-3, learning_rate=0.03, momentum_factor=0.9, max_iterations=(), batch_size=0, input_layer_dropout=0.0, hidden_layer_dropout=0.0, print_rate=1000, save_trained_network=False): assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" # Whether to use another function for printing the dataset error than the cost function. # This is useful if you train the network with the MSE cost function, but are going to # classify rather than regress on your data. calculate_print_error = evaluation_function if evaluation_function != None else cost_function training_data = np.array([instance.features for instance in trainingset]) training_targets = np.array([instance.targets for instance in trainingset]) test_data = np.array([instance.features for instance in testset]) test_targets = np.array([instance.targets for instance in testset]) batch_size = batch_size if batch_size != 0 else training_data.shape[0] batch_training_data = np.array_split( training_data, math.ceil(1.0 * training_data.shape[0] / batch_size)) batch_training_targets = np.array_split( training_targets, math.ceil(1.0 * training_targets.shape[0] / batch_size)) batch_indices = range( len(batch_training_data)) # fast reference to batches error = calculate_print_error(network.update(test_data), test_targets) reversed_layer_indexes = range(len(network.layers))[::-1] momentum = collections.defaultdict(int) epoch = 0 while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 random.shuffle( batch_indices ) # Shuffle the order in which the batches are processed between the iterations for batch_index in batch_indices: batch_data = batch_training_data[batch_index] batch_targets = batch_training_targets[batch_index] batch_size = float(batch_data.shape[0]) input_signals, derivatives = network.update(batch_data, trace=True) out = input_signals[-1] cost_derivative = cost_function(out, batch_targets, derivative=True).T delta = cost_derivative * derivatives[-1] for i in reversed_layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability hidden_layer_dropout if i > 0 else input_layer_dropout) # calculate the weight change dW = -learning_rate * (np.dot(delta, add_bias( dropped)) / batch_size).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot(network.weights[i][1:, :], delta) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i - 1] # Store the momentum momentum[i] = dW # Update the weights network.weights[i] += dW #end weight adjustment loop error = calculate_print_error(network.update(test_data), test_targets) if epoch % print_rate == 0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error) print "[training] Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function) print "[training] Trained for %d epochs." % epoch if save_trained_network and confirm( promt="Do you wish to store the trained network?"): network.save_network_to_file()
def scaled_conjugate_gradient(network, trainingset, testset, cost_function, ERROR_LIMIT=1e-6, max_iterations=(), print_rate=1000, save_trained_network=False): # Implemented according to the paper by Martin F. Moller # http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.38.3391 assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array([instance.features for instance in trainingset]) training_targets = np.array([instance.targets for instance in trainingset]) test_data = np.array([instance.features for instance in testset]) test_targets = np.array([instance.targets for instance in testset]) ## Variables sigma0 = 1.e-6 lamb = 1.e-6 lamb_ = 0 vector = network.get_weights( ) # The (weight) vector we will use SCG to optimalize grad_new = -network.gradient(vector, training_data, training_targets, cost_function) r_new = grad_new # end success = True k = 0 while k < max_iterations: k += 1 r = np.copy(r_new) grad = np.copy(grad_new) mu = np.dot(grad, grad) if success: success = False sigma = sigma0 / math.sqrt(mu) s = (network.gradient(vector + sigma * grad, training_data, training_targets, cost_function) - network.gradient(vector, training_data, training_targets, cost_function)) / sigma delta = np.dot(grad.T, s) #end # scale s zetta = lamb - lamb_ s += zetta * grad delta += zetta * mu if delta < 0: s += (lamb - 2 * delta / mu) * grad lamb_ = 2 * (lamb - delta / mu) delta -= lamb * mu delta *= -1 lamb = lamb_ #end phi = np.dot(grad.T, r) alpha = phi / delta vector_new = vector + alpha * grad f_old, f_new = network.error(vector, test_data, test_targets, cost_function), network.error( vector_new, test_data, test_targets, cost_function) comparison = 2 * delta * (f_old - f_new) / np.power(phi, 2) if comparison >= 0: if f_new < ERROR_LIMIT: break # done! vector = vector_new f_old = f_new r_new = -network.gradient(vector, training_data, training_targets, cost_function) success = True lamb_ = 0 if k % network.n_weights == 0: grad_new = r_new else: beta = (np.dot(r_new, r_new) - np.dot(r_new, r)) / phi grad_new = r_new + beta * grad if comparison > 0.75: lamb = 0.5 * lamb else: lamb_ = lamb # end if comparison < 0.25: lamb = 4 * lamb if k % print_rate == 0: print "[training] Current error:", f_new, "\tEpoch:", k #end network.set_weights(np.array(vector_new)) print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, f_new) print "[training] Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function) print "[training] Trained for %d epochs." % k if save_trained_network and confirm( promt="Do you wish to store the trained network?"): network.save_network_to_file() #end scg ## NOT YET IMPLEMENTED #def generalized_hebbian(network, trainingset, testset, cost_function, learning_rate = 0.001, max_iterations = (), save_trained_network = False ): # assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ # "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." # assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ # "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." # # assert trainingset[0].features.shape[0] == network.n_inputs, \ # "ERROR: input size varies from the defined input setting" # assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ # "ERROR: output size varies from the defined output setting" # # training_data = np.array( [instance.features for instance in trainingset ] ) # training_targets = np.array( [instance.targets for instance in trainingset ] ) # # layer_indexes = range( len(network.layers) ) # epoch = 0 # # input_signals, derivatives = network.update( training_data, trace=True ) # error = cost_function(input_signals[-1], training_targets ) # input_signals[-1] -= training_targets # # while error > 0.01 and epoch < max_iterations: # epoch += 1 # # for i in layer_indexes: # forgetting_term = np.dot(network.weights[i], np.tril(np.dot( input_signals[i+1].T, input_signals[i+1] ))) # activation_product = np.dot(add_bias(input_signals[i]).T, input_signals[i+1]) # dW = learning_rate * (activation_product - forgetting_term) # network.weights[i] += dW # # # normalize the weight to prevent the weights from growing unbounded # #network.weights[i] /= np.sqrt(np.sum(network.weights[i]**2)) # #end weight adjustment loop # # input_signals, derivatives = network.update( training_data, trace=True ) # error = cost_function(input_signals[-1], training_targets ) # input_signals[-1] -= training_targets # # if epoch % 1000 == 0: # print "[training] Error:", error # # print "[training] Finished:" # print "[training] Trained for %d epochs." % epoch # # if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): # network.save_network_to_file() ## end hebbian
def resilient_backpropagation(self, trainingset, ERROR_LIMIT=1e-3, max_iterations = (), weight_step_max = 50., weight_step_min = 0., start_step = 0.5, learn_max = 1.2, learn_min = 0.5 ): # Implemented according to iRprop+ # http://sci2s.ugr.es/keel/pdf/algorithm/articulo/2003-Neuro-Igel-IRprop+.pdf assert self.input_layer_dropout == 0 and self.hidden_layer_dropout == 0, \ "ERROR: dropout should not be used with resilient backpropagation" assert trainingset[0].features.shape[0] == self.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == self.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) # Data structure to store the previous derivative last_dEdW = [ 1 ] * len( self.weights ) # Storing the current / previous weight step size weight_step = [ np.full( weight_layer.shape, start_step ) for weight_layer in self.weights ] # Storing the current / previous weight update dW = [ np.ones(shape=weight_layer.shape) for weight_layer in self.weights ] input_signals, derivatives = self.update( training_data, trace=True ) out = input_signals[-1] error = (out - training_targets).T delta = error * derivatives[-1] MSE = np.mean( np.power(error,2) ) layer_indexes = range( len(self.layers) )[::-1] # reversed prev_MSE = ( ) # inf epoch = 0 while MSE > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # Calculate the delta with respect to the weights dEdW = np.dot( delta, add_bias(input_signals[i]) ).T if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( self.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] # Calculate sign changes and note where they have changed diffs = np.multiply( dEdW, last_dEdW[i] ) pos_indexes = np.where( diffs > 0 ) neg_indexes = np.where( diffs < 0 ) zero_indexes = np.where( diffs == 0 ) # positive if np.any(pos_indexes): # Calculate the weight step size weight_step[i][pos_indexes] = np.minimum( weight_step[i][pos_indexes] * learn_max, weight_step_max ) # Calculate the weight step direction dW[i][pos_indexes] = np.multiply( -np.sign( dEdW[pos_indexes] ), weight_step[i][pos_indexes] ) # Apply the weight deltas self.weights[i][ pos_indexes ] += dW[i][pos_indexes] # negative if np.any(neg_indexes): weight_step[i][neg_indexes] = np.maximum( weight_step[i][neg_indexes] * learn_min, weight_step_min ) if MSE > prev_MSE: # iRprop+ version of resilient backpropagation self.weights[i][ neg_indexes ] -= dW[i][neg_indexes] # backtrack dEdW[ neg_indexes ] = 0 # zeros if np.any(zero_indexes): dW[i][zero_indexes] = np.multiply( -np.sign( dEdW[zero_indexes] ), weight_step[i][zero_indexes] ) self.weights[i][ zero_indexes ] += dW[i][zero_indexes] # Store the previous weight step last_dEdW[i] = dEdW #end weight adjustment loop prev_MSE = MSE input_signals, derivatives = self.update( training_data, trace=True ) out = input_signals[-1] error = (out - training_targets).T delta = error * derivatives[-1] MSE = np.mean( np.power(error,2) ) if epoch%1000==0: print "* current network error (MSE):", MSE print "* Converged to error bound (%.3g) with MSE = %.3g." % ( ERROR_LIMIT, MSE ) print "* Trained for %d epochs." % epoch if self.save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): self.save_to_file()
def backpropagation(network, trainingset, testset, cost_function, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = (), input_layer_dropout = 0.0, hidden_layer_dropout = 0.0, save_trained_network = False ): assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) test_data = np.array( [instance.features for instance in testset ] ) test_targets = np.array( [instance.targets for instance in testset ] ) momentum = collections.defaultdict( int ) input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] cost_derivative = cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = cost_function(network.update( test_data ), test_targets ) layer_indexes = range( len(network.layers) )[::-1] # reversed epoch = 0 n_samples = float(training_data.shape[0]) while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 for i in layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability hidden_layer_dropout if i > 0 else input_layer_dropout ) # calculate the weight change dW = -learning_rate * (np.dot( delta, add_bias(input_signals[i]) )/n_samples).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( network.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] # Store the momentum momentum[i] = dW # Update the weights network.weights[ i ] += dW #end weight adjustment loop input_signals, derivatives = network.update( training_data, trace=True ) out = input_signals[-1] cost_derivative = cost_function(out, training_targets, derivative=True).T delta = cost_derivative * derivatives[-1] error = cost_function(network.update( test_data ), test_targets ) if epoch%1000==0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error ) print "[training] Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function ) print "[training] Trained for %d epochs." % epoch if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): network.save_network_to_file()
def backpropagation(network, trainingset, testset, cost_function, evaluation_function = None, ERROR_LIMIT = 1e-3, learning_rate = 0.03, momentum_factor = 0.9, max_iterations = (), batch_size = 0, input_layer_dropout = 0.0, hidden_layer_dropout = 0.0, print_rate = 1000, save_trained_network = False ): assert softmax_function != network.layers[-1][1] or cost_function == softmax_neg_loss,\ "When using the `softmax` activation function, the cost function MUST be `softmax_neg_loss`." assert cost_function != softmax_neg_loss or softmax_function == network.layers[-1][1],\ "When using the `softmax_neg_loss` cost function, the activation function in the final layer MUST be `softmax`." assert trainingset[0].features.shape[0] == network.n_inputs, \ "ERROR: input size varies from the defined input setting" assert trainingset[0].targets.shape[0] == network.layers[-1][0], \ "ERROR: output size varies from the defined output setting" # Whether to use another function for printing the dataset error than the cost function. # This is useful if you train the network with the MSE cost function, but are going to # classify rather than regress on your data. calculate_print_error = evaluation_function if evaluation_function != None else cost_function training_data = np.array( [instance.features for instance in trainingset ] ) training_targets = np.array( [instance.targets for instance in trainingset ] ) test_data = np.array( [instance.features for instance in testset ] ) test_targets = np.array( [instance.targets for instance in testset ] ) batch_size = batch_size if batch_size != 0 else training_data.shape[0] batch_training_data = np.array_split(training_data, math.ceil(1.0 * training_data.shape[0] / batch_size)) batch_training_targets = np.array_split(training_targets, math.ceil(1.0 * training_targets.shape[0] / batch_size)) batch_indices = range(len(batch_training_data)) # fast reference to batches error = calculate_print_error(network.update( test_data ), test_targets ) reversed_layer_indexes = range( len(network.layers) )[::-1] momentum = collections.defaultdict( int ) epoch = 0 while error > ERROR_LIMIT and epoch < max_iterations: epoch += 1 random.shuffle(batch_indices) # Shuffle the order in which the batches are processed between the iterations for batch_index in batch_indices: batch_data = batch_training_data[ batch_index ] batch_targets = batch_training_targets[ batch_index ] batch_size = float( batch_data.shape[0] ) input_signals, derivatives = network.update( batch_data, trace=True ) out = input_signals[-1] cost_derivative = cost_function( out, batch_targets, derivative=True ).T delta = cost_derivative * derivatives[-1] for i in reversed_layer_indexes: # Loop over the weight layers in reversed order to calculate the deltas # perform dropout dropped = dropout( input_signals[i], # dropout probability hidden_layer_dropout if i > 0 else input_layer_dropout ) # calculate the weight change dW = -learning_rate * (np.dot( delta, add_bias(dropped) )/batch_size).T + momentum_factor * momentum[i] if i != 0: """Do not calculate the delta unnecessarily.""" # Skip the bias weight weight_delta = np.dot( network.weights[ i ][1:,:], delta ) # Calculate the delta for the subsequent layer delta = weight_delta * derivatives[i-1] # Store the momentum momentum[i] = dW # Update the weights network.weights[ i ] += dW #end weight adjustment loop error = calculate_print_error(network.update( test_data ), test_targets ) if epoch%print_rate==0: # Show the current training status print "[training] Current error:", error, "\tEpoch:", epoch print "[training] Finished:" print "[training] Converged to error bound (%.4g) with error %.4g." % ( ERROR_LIMIT, error ) print "[training] Measured quality: %.4g" % network.measure_quality( training_data, training_targets, cost_function ) print "[training] Trained for %d epochs." % epoch if save_trained_network and confirm( promt = "Do you wish to store the trained network?" ): network.save_network_to_file()