def main(input_dim, output_dim, n_train, batch_size, t_lim, num_hidden_units, e_lims, n_repeats): """ Main function for the script. See module docstring for more info. Inputs: - input_dim: positive integer number of input dimensions - output_dim: positive integer number of output dimensions - n_train: positive integer number of points in the training set - batch_size: positive integer batch size to use for training - t_lim: positive float, length of time to train for each experiment - num_hidden_units: list of positive integers, number of hidden units in each hidden layer of the NeuralNetwork, EG [10] or [20, 20] - e_lims: list of 2 floats, used as axis limits in the output plots - n_repeats: positive integer number of repeats to perform of each experiment """ np.random.seed(1913) # Perform warmup experiment so process acquires priority optimisers.warmup() # Do something useful pass
def main(args): # Get name of output directory, and create it if it doesn't exist param_str = ( "input_dim = %i, output_dim = %i, n_train = %i, t_lim = %.2f, " "num_repeats = %i, find_best_params = %s" % ( args.input_dim, args.output_dim, args.n_train, args.t_lim, args.n_repeats, args.find_best_params, ) ) current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join( current_dir, "Outputs", "Gradient descent", param_str, ) if not os.path.isdir(output_dir): os.makedirs(output_dir) # Initialise data set np.random.seed(6763) sin_data = data.Sinusoidal( args.input_dim, args.output_dim, args.n_train, x_lo=-2, x_hi=2, freq=(1 if args.input_dim == 1 else None), ) # Define function to be run for each experiment def run_experiment( num_units, num_layers, log10_s0, alpha, beta, act_func, max_steps, batch_size, batch_replace, plot_preds=False, ): # Initialise network and batch getter model = models.NeuralNetwork( input_dim=args.input_dim, output_dim=args.output_dim, num_hidden_units=[num_units for _ in range(num_layers)], act_funcs=[act_func, models.activations.identity], ) if (batch_size is None) or (batch_size >= args.n_train): batch_getter = optimisers.batch.FullTrainingSet() else: batch_getter = optimisers.batch.ConstantBatchSize( batch_size, batch_replace, ) # Perform gradient descent result = optimisers.gradient_descent( model, sin_data, terminator=optimisers.Terminator(t_lim=args.t_lim), evaluator=optimisers.Evaluator(t_interval=args.t_eval), line_search=optimisers.LineSearch( s0=pow(10, log10_s0), alpha=alpha, beta=beta, max_its=max_steps, ), batch_getter=batch_getter, ) # If specified, plot the final model predictions if plot_preds: print("Plotting final predictions...") plotting.plot_data_predictions( plot_name="Final predictions", dir_name=output_dir, dataset=sin_data, output_dim=args.output_dim, model=model, ) # Return the final test error TestError = optimisers.results.columns.TestError final_test_error = result.get_values(TestError)[-1] return final_test_error # Initialise the Experiment object, and add parameters experiment = Experiment(run_experiment, output_dir, args.n_repeats) addp = lambda *args: experiment.add_parameter(Parameter(*args)) addp("num_units", 10, [5, 10, 15, 20] ) addp("num_layers", 1, [1, 2, 3] ) addp("log10_s0", 0, np.linspace(-1, 3, 5) ) addp("alpha", 0.5, np.linspace(0.1, 1, 9, endpoint=False) ) addp("beta", 0.5, np.linspace(0.1, 1, 9, endpoint=False) ) addp("max_steps", 10, [5, 10, 15, 20] ) addp("batch_size", 100, [25, 50, 75, 100, 150, 200, 300, None] ) addp("batch_replace", True, [True, False] ) addp( "act_func", models.activations.gaussian, [ models.activations.gaussian, models.activations.cauchy, models.activations.logistic, models.activations.relu, ], ) # Call warmup function optimisers.warmup() # Call function to run all experiments if args.find_best_params: experiment.find_best_parameters() else: experiment.sweep_all_parameters() # Write the results of all experiments to a text file experiment.save_results_as_text() # Open the output plot directory os.system("explorer \"%s\"" % output_dir) # Plot the predictions using the model with the optimal hyper-parameters default_param_dict = experiment.get_default_param_dictionary() run_experiment(**default_param_dict, plot_preds=True)
def main(input_dim, output_dim, n_train, batch_size, t_lim, num_hidden_units, e_lims, n_repeats): """ Main function for this script, wrapped by argparse for command-line arguments. """ # Perform warmup experiment so process acquires priority optimisers.warmup() # Initialise data, time limit, and results list np.random.seed(9251) sin_data = data.Sinusoidal(input_dim=input_dim, output_dim=output_dim, n_train=n_train) t_interval = t_lim / 50 results_list = [] for i in range(n_repeats): # Set the random seed np.random.seed(i) # Generate random network n = models.NeuralNetwork( input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[models.activations.cauchy, models.activations.identity], initialiser=models.initialisers.ConstantPreActivationStatistics( sin_data.x_train, sin_data.y_train)) # Set name for experiment name = "Constant pre-activation statistics" # Call gradient descent function result = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name=name, verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result) # Generate random network n = models.NeuralNetwork( input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[models.activations.cauchy, models.activations.identity], initialiser=models.initialisers.ConstantParameterStatistics()) # Set name for experiment name = "Constant parameter statistics" # Call gradient descent function result = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name=name, verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result) # Get name of output directory current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join(current_dir, "Outputs") # Compare training curves plotting.plot_training_curves( results_list, "Comparing initialisers for gradient descent on 2D sinusoidal data", output_dir, e_lims=e_lims, tp=0.5)
import os import numpy as np if __name__ == "__main__": import __init__ from models import NeuralNetwork import activations, data, optimisers, plotting # Get name of output directory current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join(current_dir, "Outputs") optimisers.warmup() # Initialise data, number of iterations, and results list np.random.seed(9251) sin_data = data.SinusoidalDataSet1D1D(xlim=[-2, 2], freq=1) n_iters = 10000 eval_every = n_iters // 20 results_list = [] for seed in [2295, 6997, 7681]: # Set the random seed np.random.seed(seed) # Generate random network and store initial parameters n = NeuralNetwork(1, 1, [10], [activations.Gaussian(), activations.Identity()]) w0 = n.get_parameter_vector().copy() # Call gradient descent function result_ls = optimisers.gradient_descent(n, sin_data,
def main(args): """ Main function for the script. See module docstring for more info. Inputs: - args: object containing modified command line arguments as attributes """ np.random.seed(args.seed) # Get output directory which is specific to the relevant script parameters param_str = " ".join([ "d%s" % args.dataset_type.__name__[:3], "i%s" % args.input_dim, "o%s" % args.output_dim, "t%s" % args.t_lim, "n%s" % args.n_train, "b%s" % args.batch_size, "u%s" % args.num_hidden_units, ]) if args.dynamic_terminator: dt_str = "dyn" if args.dt_smooth_output: dt_str += "sOut" if args.dt_smooth_mrse: dt_str += "sMrStd" param_str += " %s%i" % (dt_str, args.dt_buffer_length) current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join( current_dir, "Outputs", "Train gradient descent", param_str, ) if not os.path.isdir(output_dir): os.makedirs(output_dir) # Perform warmup experiment optimisers.warmup() # Initialise lists of objects that will be stored for each repeat result_list = [] model_list = [] prediction_column_list = [] # Initialise dataset object and corresponding error function dataset_kwargs = { "input_dim": args.input_dim, "n_train": args.n_train, } if not issubclass(args.dataset_type, data.BinaryClassification): dataset_kwargs["output_dim"] = args.output_dim dataset = args.dataset_type(**dataset_kwargs) if isinstance(dataset, data.Regression): error_func = models.errors.sum_of_squares act_funcs = None print("Using regression data set with sum of squares error function") elif isinstance(dataset, data.BinaryClassification): error_func = models.errors.binary_cross_entropy act_funcs = [models.activations.gaussian, models.activations.logistic] print("Using binary classification data set with binary cross-entropy " "error function, and logistic activation function in the output " "layer") elif isinstance(dataset, data.Classification): error_func = models.errors.softmax_cross_entropy act_funcs = None print("Using classification data set with softmax cross entropy error " "function") else: raise ValueError( "Data set must be either a binary-classification, multi-class " "classification or regression data set") # Iterate through repeats for i in range(args.n_repeats): # Initialise model and Result object model = models.NeuralNetwork( input_dim=args.input_dim, output_dim=args.output_dim, num_hidden_units=args.num_hidden_units, error_func=error_func, act_funcs=act_funcs, ) result = optimisers.Result(name="Repeat %i" % (i + 1)) if args.line_search is not None: args.line_search_col = columns.StepSize(args.line_search) result.add_column(args.line_search_col) if args.plot_pred_gif or args.plot_hidden_gif: pred_column = columns.Predictions( dataset=dataset, store_hidden_layer_outputs=args.plot_hidden_gif, store_hidden_layer_preactivations=( args.plot_hidden_preactivations_gif), ) result.add_column(pred_column) if args.plot_test_set_improvement_probability: test_set_improvement_column = ( columns.TestSetImprovementProbabilitySimple( model, dataset, smoother=optimisers.smooth.MovingAverage(1, n=10), )) result.add_column(test_set_improvement_column) if args.dynamic_terminator: dynamic_terminator = optimisers.DynamicTerminator( model=model, dataset=dataset, batch_size=args.batch_size, replace=False, t_lim=args.t_lim, smooth_n=args.dt_buffer_length, smooth_x0=args.dt_x0, smooth_output=args.dt_smooth_output, smooth_mean_reduction=args.dt_smooth_mrse, smooth_std=args.dt_smooth_mrse, ) terminator = dynamic_terminator batch_getter = dynamic_terminator dynamic_terminator_column = columns.BatchImprovementProbability( dynamic_terminator, ) result.add_column(dynamic_terminator_column) else: terminator = optimisers.Terminator(t_lim=args.t_lim) batch_getter = optimisers.batch.ConstantBatchSize( args.batch_size, True, ) # Perform gradient descent optimisers.gradient_descent( model, dataset, line_search=args.line_search, result=result, evaluator=optimisers.Evaluator(t_interval=args.t_eval), terminator=terminator, batch_getter=batch_getter, ) # Store results result_list.append(result) model_list.append(model) if args.plot_pred_gif or args.plot_hidden_gif: prediction_column_list.append(pred_column) # Make output plots print("Plotting output plots in \"%s\"..." % output_dir) os.system("explorer \"%s\"" % output_dir) print("Plotting training curves...") plotting.plot_training_curves( result_list, dir_name=output_dir, e_lims=args.error_lims, ) if args.plot_test_set_improvement_probability or args.dynamic_terminator: attribute_list = [ columns.TrainError, columns.TestError, columns.StepSize, ] if args.plot_test_set_improvement_probability: print("Plotting test set improvement probability...") attribute_list.append(columns.TestSetImprovementProbabilitySimple) if args.dynamic_terminator: print("Plotting batch improvement probability...") attribute_list.append(columns.BatchImprovementProbability) plotting.plot_result_attributes_subplots( plot_name="Improvement probability\n%s" % param_str, dir_name=output_dir, result_list=result_list, attribute_list=attribute_list, log_axes_attributes=[columns.StepSize], iqr_axis_scaling=True, ) for i, model in enumerate(model_list): output_dir_repeat = os.path.join(output_dir, "Repeat %i" % (i + 1)) if args.plot_preds: print("Plotting final predictions...") plotting.plot_data_predictions( plot_name="Final predictions", dir_name=output_dir_repeat, dataset=dataset, output_dim=args.output_dim, model=model, ) if args.plot_pred_gif: print("Plotting gif of predictions during training...") plotting.plot_predictions_gif( plot_name="Model predictions during training", dir_name=output_dir_repeat, result=result_list[i], prediction_column=prediction_column_list[i], dataset=dataset, output_dim=args.output_dim, duration=args.t_eval * 1000, ) if args.plot_hidden_gif: print("Plotting gif of hidden layers during training...") if args.plot_hidden_preactivations_gif: plot_name = "Hidden layer preactivations during training" else: plot_name = "Hidden layer outputs during training" plotting.plot_hidden_outputs_gif( plot_name=plot_name, dir_name=output_dir_repeat, result=result_list[i], prediction_column=prediction_column_list[i], dataset=dataset, output_dim=args.output_dim, duration=args.t_eval * 1000, )
def main(input_dim, output_dim, n_train, batch_size, t_lim, num_hidden_units, e_lims, n_repeats): """ Main function for the script. See module docstring for more info. Inputs: - input_dim: positive integer number of input dimensions - output_dim: positive integer number of output dimensions - n_train: positive integer number of points in the training set - batch_size: positive integer batch size to use for training - t_lim: positive float, length of time to train for each experiment - num_hidden_units: list of positive integers, number of hidden units in each hidden layer of the NeuralNetwork, EG [10] or [20, 20] - e_lims: list of 2 floats, used as axis limits in the output plots - n_repeats: positive integer number of repeats to perform of each experiment """ # Perform warmup experiment so process acquires priority optimisers.warmup() # Initialise data, results list, and time interval for evaluations np.random.seed(9251) sin_data = data.Sinusoidal( input_dim=input_dim, output_dim=output_dim, n_train=n_train, ) results_list = [] t_interval = t_lim / 50 for i in range(n_repeats): # Set the random seed np.random.seed(i) # Generate random network and store initial parameters n = models.NeuralNetwork(input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[ models.activations.gaussian, models.activations.identity ]) w0 = n.get_parameter_vector().copy() # Call gradient descent function result_gd_ls = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="SGD with line search", verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_gd_ls) # Try again without line search n.set_parameter_vector(w0) result_gd_no_ls = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="SGD without line search", verbose=True), line_search=None, batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_gd_no_ls) # Call generalised Newton function n.set_parameter_vector(w0) result_pbgn_ls = optimisers.generalised_newton( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="PBGN with line search", verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_pbgn_ls) # Try again without line search n.set_parameter_vector(w0) result_pbgn_no_ls = optimisers.generalised_newton( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="PBGN without line search", verbose=True), line_search=None, batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_pbgn_no_ls) # Get name of output directory current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join(current_dir, "Outputs") # Compare training curves plot_name = "Comparing gradient descent vs generalised Newton" plot_name += ", %iD-%iD data" % (input_dim, output_dim) plot_name += ", %.2g s training time" % t_lim plot_name += ", %s hidden units" % str(num_hidden_units) plotting.plot_training_curves(results_list, plot_name, output_dir, e_lims=e_lims)
def main(input_dim, output_dim, n_train, t_lim, num_hidden_units, e_lims, n_repeats, alpha_smooth, p_c, min_batch_size): """ Main function for the script. See module docstring for more info. Inputs: - input_dim: positive integer number of input dimensions - output_dim: positive integer number of output dimensions - n_train: positive integer number of points in the training set - t_lim: positive float, length of time to train for each experiment - num_hidden_units: list of positive integers, number of hidden units in each hidden layer of the NeuralNetwork, EG [10] or [20, 20] - e_lims: list of 2 floats, used as axis limits in the output plots - n_repeats: positive integer number of repeats to perform of each experiment - alpha_smooth: float in (0, 1), amount of smoothing to apply to DBS batch size """ # Perform warmup experiment so process acquires priority optimisers.warmup() # Initialise data, results list, and time interval for evaluations np.random.seed(9251) sin_data = data.Sinusoidal( input_dim=input_dim, output_dim=output_dim, n_train=n_train, ) results_list = [] t_interval = t_lim / 50 for i in range(n_repeats): # Set the random seed np.random.seed(i) # Generate random network model = models.NeuralNetwork(input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[ models.activations.gaussian, models.activations.identity ]) # Call gradient descent function result = optimisers.Result("Repeat = %i" % i) batch_getter = optimisers.batch.DynamicBatchSize( model, sin_data, alpha_smooth=alpha_smooth, prob_correct_direction=p_c, min_batch_size=min_batch_size) batch_col = optimisers.results.columns.BatchSize(batch_getter) dbs_col = optimisers.results.columns.DbsMetric() result.add_column(batch_col) result.add_column(dbs_col) result = optimisers.gradient_descent( model, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=result, line_search=optimisers.LineSearch(), batch_getter=batch_getter) results_list.append(result) # Compare training curves plot_name_suffix = "\n%iD-%iD data" % (input_dim, output_dim) plot_name_suffix += ", %.2g s training time" % t_lim plot_name_suffix += ", %s hidden units" % str(num_hidden_units) plot_name_suffix += "\nalpha_smooth = %.3f" % alpha_smooth plot_name_suffix += ", p_c = %.3f" % p_c plot_name_suffix += ", min_batch_size = %.3f" % min_batch_size this_test_output_dir = os.path.join(output_dir, plot_name_suffix.replace("\n", "")) plotting.plot_training_curves(results_list, "DBS learning curves" + plot_name_suffix, this_test_output_dir, e_lims=e_lims) for col in [dbs_col, batch_col]: plot_name = "%s against iteration for dynamic batch size" % col.name plot_name += plot_name_suffix plotting.plot_result_attribute(plot_name, this_test_output_dir, results_list, type(col))