Exemplo n.º 1
0
def main(input_dim, output_dim, n_train, batch_size, t_lim, num_hidden_units,
         e_lims, n_repeats):
    """
    Main function for the script. See module docstring for more info.

    Inputs:
    -   input_dim: positive integer number of input dimensions
    -   output_dim: positive integer number of output dimensions
    -   n_train: positive integer number of points in the training set
    -   batch_size: positive integer batch size to use for training
    -   t_lim: positive float, length of time to train for each experiment
    -   num_hidden_units: list of positive integers, number of hidden units in
        each hidden layer of the NeuralNetwork, EG [10] or [20, 20]
    -   e_lims: list of 2 floats, used as axis limits in the output plots
    -   n_repeats: positive integer number of repeats to perform of each
        experiment
    """
    np.random.seed(1913)

    # Perform warmup experiment so process acquires priority
    optimisers.warmup()

    # Do something useful
    pass
Exemplo n.º 2
0
def main(args):
    # Get name of output directory, and create it if it doesn't exist
    param_str = (
        "input_dim = %i, output_dim = %i, n_train = %i, t_lim = %.2f, "
        "num_repeats = %i, find_best_params = %s"
        % (
            args.input_dim,
            args.output_dim,
            args.n_train,
            args.t_lim,
            args.n_repeats,
            args.find_best_params,
        )
    )
    current_dir = os.path.dirname(os.path.abspath(__file__))
    output_dir = os.path.join(
        current_dir,
        "Outputs",
        "Gradient descent",
        param_str,
    )
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    # Initialise data set
    np.random.seed(6763)
    sin_data = data.Sinusoidal(
        args.input_dim,
        args.output_dim,
        args.n_train,
        x_lo=-2,
        x_hi=2,
        freq=(1 if args.input_dim == 1 else None),
    )

    # Define function to be run for each experiment
    def run_experiment(
        num_units,
        num_layers,
        log10_s0,
        alpha,
        beta,
        act_func,
        max_steps,
        batch_size,
        batch_replace,
        plot_preds=False,
    ):
        # Initialise network and batch getter
        model = models.NeuralNetwork(
            input_dim=args.input_dim,
            output_dim=args.output_dim,
            num_hidden_units=[num_units for _ in range(num_layers)],
            act_funcs=[act_func, models.activations.identity],
        )

        if (batch_size is None) or (batch_size >= args.n_train):
            batch_getter = optimisers.batch.FullTrainingSet()
        else:
            batch_getter = optimisers.batch.ConstantBatchSize(
                batch_size,
                batch_replace,
            )
        
        # Perform gradient descent
        result = optimisers.gradient_descent(
            model,
            sin_data,
            terminator=optimisers.Terminator(t_lim=args.t_lim),
            evaluator=optimisers.Evaluator(t_interval=args.t_eval),
            line_search=optimisers.LineSearch(
                s0=pow(10, log10_s0), 
                alpha=alpha, 
                beta=beta,
                max_its=max_steps,
            ),
            batch_getter=batch_getter,
        )

        # If specified, plot the final model predictions
        if plot_preds:
            print("Plotting final predictions...")
            plotting.plot_data_predictions(
                plot_name="Final predictions",
                dir_name=output_dir,
                dataset=sin_data,
                output_dim=args.output_dim,
                model=model,
            )

        # Return the final test error
        TestError = optimisers.results.columns.TestError
        final_test_error = result.get_values(TestError)[-1]
        return final_test_error

    # Initialise the Experiment object, and add parameters
    experiment = Experiment(run_experiment, output_dir, args.n_repeats)
    addp = lambda *args: experiment.add_parameter(Parameter(*args))
    addp("num_units",       10,     [5, 10, 15, 20]                         )
    addp("num_layers",      1,      [1, 2, 3]                               )
    addp("log10_s0",        0,      np.linspace(-1, 3, 5)                   )
    addp("alpha",           0.5,    np.linspace(0.1, 1, 9, endpoint=False)  )
    addp("beta",            0.5,    np.linspace(0.1, 1, 9, endpoint=False)  )
    addp("max_steps",       10,     [5, 10, 15, 20]                         )
    addp("batch_size",      100,    [25, 50, 75, 100, 150, 200, 300, None]  )
    addp("batch_replace",   True,   [True, False]                           )
    addp(
        "act_func",
        models.activations.gaussian,
        [
            models.activations.gaussian,
            models.activations.cauchy,
            models.activations.logistic,
            models.activations.relu,
        ],
    )

    # Call warmup function
    optimisers.warmup()

    # Call function to run all experiments
    if args.find_best_params:
        experiment.find_best_parameters()
    else:
        experiment.sweep_all_parameters()
    
    # Write the results of all experiments to a text file
    experiment.save_results_as_text()

    # Open the output plot directory
    os.system("explorer \"%s\"" % output_dir)

    # Plot the predictions using the model with the optimal hyper-parameters
    default_param_dict = experiment.get_default_param_dictionary()
    run_experiment(**default_param_dict, plot_preds=True)
Exemplo n.º 3
0
def main(input_dim, output_dim, n_train, batch_size, t_lim, num_hidden_units,
         e_lims, n_repeats):
    """
    Main function for this script, wrapped by argparse for command-line
    arguments.
    """

    # Perform warmup experiment so process acquires priority
    optimisers.warmup()

    # Initialise data, time limit, and results list
    np.random.seed(9251)
    sin_data = data.Sinusoidal(input_dim=input_dim,
                               output_dim=output_dim,
                               n_train=n_train)
    t_interval = t_lim / 50
    results_list = []

    for i in range(n_repeats):
        # Set the random seed
        np.random.seed(i)
        # Generate random network
        n = models.NeuralNetwork(
            input_dim=input_dim,
            output_dim=output_dim,
            num_hidden_units=num_hidden_units,
            act_funcs=[models.activations.cauchy, models.activations.identity],
            initialiser=models.initialisers.ConstantPreActivationStatistics(
                sin_data.x_train, sin_data.y_train))
        # Set name for experiment
        name = "Constant pre-activation statistics"
        # Call gradient descent function
        result = optimisers.gradient_descent(
            n,
            sin_data,
            terminator=optimisers.Terminator(t_lim=t_lim),
            evaluator=optimisers.Evaluator(t_interval=t_interval),
            result=optimisers.Result(name=name, verbose=True),
            line_search=optimisers.LineSearch(),
            batch_getter=optimisers.batch.ConstantBatchSize(batch_size))
        results_list.append(result)

        # Generate random network
        n = models.NeuralNetwork(
            input_dim=input_dim,
            output_dim=output_dim,
            num_hidden_units=num_hidden_units,
            act_funcs=[models.activations.cauchy, models.activations.identity],
            initialiser=models.initialisers.ConstantParameterStatistics())
        # Set name for experiment
        name = "Constant parameter statistics"
        # Call gradient descent function
        result = optimisers.gradient_descent(
            n,
            sin_data,
            terminator=optimisers.Terminator(t_lim=t_lim),
            evaluator=optimisers.Evaluator(t_interval=t_interval),
            result=optimisers.Result(name=name, verbose=True),
            line_search=optimisers.LineSearch(),
            batch_getter=optimisers.batch.ConstantBatchSize(batch_size))
        results_list.append(result)

    # Get name of output directory
    current_dir = os.path.dirname(os.path.abspath(__file__))
    output_dir = os.path.join(current_dir, "Outputs")

    # Compare training curves
    plotting.plot_training_curves(
        results_list,
        "Comparing initialisers for gradient descent on 2D sinusoidal data",
        output_dir,
        e_lims=e_lims,
        tp=0.5)
import os
import numpy as np
if __name__ == "__main__":
    import __init__
from models import NeuralNetwork
import activations, data, optimisers, plotting

# Get name of output directory
current_dir = os.path.dirname(os.path.abspath(__file__))
output_dir = os.path.join(current_dir, "Outputs")

optimisers.warmup()

# Initialise data, number of iterations, and results list
np.random.seed(9251)
sin_data = data.SinusoidalDataSet1D1D(xlim=[-2, 2], freq=1)
n_iters = 10000
eval_every = n_iters // 20
results_list = []

for seed in [2295, 6997, 7681]:
    # Set the random seed
    np.random.seed(seed)
    # Generate random network and store initial parameters
    n = NeuralNetwork(1, 1, [10],
                      [activations.Gaussian(),
                       activations.Identity()])
    w0 = n.get_parameter_vector().copy()
    # Call gradient descent function
    result_ls = optimisers.gradient_descent(n,
                                            sin_data,
Exemplo n.º 5
0
def main(args):
    """
    Main function for the script. See module docstring for more info.

    Inputs:
    -   args: object containing modified command line arguments as attributes
    """
    np.random.seed(args.seed)

    # Get output directory which is specific to the relevant script parameters
    param_str = " ".join([
        "d%s" % args.dataset_type.__name__[:3],
        "i%s" % args.input_dim,
        "o%s" % args.output_dim,
        "t%s" % args.t_lim,
        "n%s" % args.n_train,
        "b%s" % args.batch_size,
        "u%s" % args.num_hidden_units,
    ])
    if args.dynamic_terminator:
        dt_str = "dyn"
        if args.dt_smooth_output:
            dt_str += "sOut"
        if args.dt_smooth_mrse:
            dt_str += "sMrStd"
        param_str += " %s%i" % (dt_str, args.dt_buffer_length)

    current_dir = os.path.dirname(os.path.abspath(__file__))
    output_dir = os.path.join(
        current_dir,
        "Outputs",
        "Train gradient descent",
        param_str,
    )
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    # Perform warmup experiment
    optimisers.warmup()

    # Initialise lists of objects that will be stored for each repeat
    result_list = []
    model_list = []
    prediction_column_list = []

    # Initialise dataset object and corresponding error function
    dataset_kwargs = {
        "input_dim": args.input_dim,
        "n_train": args.n_train,
    }
    if not issubclass(args.dataset_type, data.BinaryClassification):
        dataset_kwargs["output_dim"] = args.output_dim
    dataset = args.dataset_type(**dataset_kwargs)

    if isinstance(dataset, data.Regression):
        error_func = models.errors.sum_of_squares
        act_funcs = None
        print("Using regression data set with sum of squares error function")
    elif isinstance(dataset, data.BinaryClassification):
        error_func = models.errors.binary_cross_entropy
        act_funcs = [models.activations.gaussian, models.activations.logistic]
        print("Using binary classification data set with binary cross-entropy "
              "error function, and logistic activation function in the output "
              "layer")
    elif isinstance(dataset, data.Classification):
        error_func = models.errors.softmax_cross_entropy
        act_funcs = None
        print("Using classification data set with softmax cross entropy error "
              "function")
    else:
        raise ValueError(
            "Data set must be either a binary-classification, multi-class "
            "classification or regression data set")

    # Iterate through repeats
    for i in range(args.n_repeats):
        # Initialise model and Result object
        model = models.NeuralNetwork(
            input_dim=args.input_dim,
            output_dim=args.output_dim,
            num_hidden_units=args.num_hidden_units,
            error_func=error_func,
            act_funcs=act_funcs,
        )

        result = optimisers.Result(name="Repeat %i" % (i + 1))

        if args.line_search is not None:
            args.line_search_col = columns.StepSize(args.line_search)
            result.add_column(args.line_search_col)

        if args.plot_pred_gif or args.plot_hidden_gif:
            pred_column = columns.Predictions(
                dataset=dataset,
                store_hidden_layer_outputs=args.plot_hidden_gif,
                store_hidden_layer_preactivations=(
                    args.plot_hidden_preactivations_gif),
            )
            result.add_column(pred_column)

        if args.plot_test_set_improvement_probability:
            test_set_improvement_column = (
                columns.TestSetImprovementProbabilitySimple(
                    model,
                    dataset,
                    smoother=optimisers.smooth.MovingAverage(1, n=10),
                ))
            result.add_column(test_set_improvement_column)

        if args.dynamic_terminator:
            dynamic_terminator = optimisers.DynamicTerminator(
                model=model,
                dataset=dataset,
                batch_size=args.batch_size,
                replace=False,
                t_lim=args.t_lim,
                smooth_n=args.dt_buffer_length,
                smooth_x0=args.dt_x0,
                smooth_output=args.dt_smooth_output,
                smooth_mean_reduction=args.dt_smooth_mrse,
                smooth_std=args.dt_smooth_mrse,
            )
            terminator = dynamic_terminator
            batch_getter = dynamic_terminator

            dynamic_terminator_column = columns.BatchImprovementProbability(
                dynamic_terminator, )
            result.add_column(dynamic_terminator_column)
        else:
            terminator = optimisers.Terminator(t_lim=args.t_lim)
            batch_getter = optimisers.batch.ConstantBatchSize(
                args.batch_size,
                True,
            )

        # Perform gradient descent
        optimisers.gradient_descent(
            model,
            dataset,
            line_search=args.line_search,
            result=result,
            evaluator=optimisers.Evaluator(t_interval=args.t_eval),
            terminator=terminator,
            batch_getter=batch_getter,
        )

        # Store results
        result_list.append(result)
        model_list.append(model)
        if args.plot_pred_gif or args.plot_hidden_gif:
            prediction_column_list.append(pred_column)

    # Make output plots
    print("Plotting output plots in \"%s\"..." % output_dir)
    os.system("explorer \"%s\"" % output_dir)
    print("Plotting training curves...")
    plotting.plot_training_curves(
        result_list,
        dir_name=output_dir,
        e_lims=args.error_lims,
    )
    if args.plot_test_set_improvement_probability or args.dynamic_terminator:
        attribute_list = [
            columns.TrainError,
            columns.TestError,
            columns.StepSize,
        ]
        if args.plot_test_set_improvement_probability:
            print("Plotting test set improvement probability...")
            attribute_list.append(columns.TestSetImprovementProbabilitySimple)
        if args.dynamic_terminator:
            print("Plotting batch improvement probability...")
            attribute_list.append(columns.BatchImprovementProbability)
        plotting.plot_result_attributes_subplots(
            plot_name="Improvement probability\n%s" % param_str,
            dir_name=output_dir,
            result_list=result_list,
            attribute_list=attribute_list,
            log_axes_attributes=[columns.StepSize],
            iqr_axis_scaling=True,
        )

    for i, model in enumerate(model_list):
        output_dir_repeat = os.path.join(output_dir, "Repeat %i" % (i + 1))
        if args.plot_preds:
            print("Plotting final predictions...")
            plotting.plot_data_predictions(
                plot_name="Final predictions",
                dir_name=output_dir_repeat,
                dataset=dataset,
                output_dim=args.output_dim,
                model=model,
            )
        if args.plot_pred_gif:
            print("Plotting gif of predictions during training...")
            plotting.plot_predictions_gif(
                plot_name="Model predictions during training",
                dir_name=output_dir_repeat,
                result=result_list[i],
                prediction_column=prediction_column_list[i],
                dataset=dataset,
                output_dim=args.output_dim,
                duration=args.t_eval * 1000,
            )
        if args.plot_hidden_gif:
            print("Plotting gif of hidden layers during training...")
            if args.plot_hidden_preactivations_gif:
                plot_name = "Hidden layer preactivations during training"
            else:
                plot_name = "Hidden layer outputs during training"

            plotting.plot_hidden_outputs_gif(
                plot_name=plot_name,
                dir_name=output_dir_repeat,
                result=result_list[i],
                prediction_column=prediction_column_list[i],
                dataset=dataset,
                output_dim=args.output_dim,
                duration=args.t_eval * 1000,
            )
Exemplo n.º 6
0
def main(input_dim, output_dim, n_train, batch_size, t_lim, num_hidden_units,
         e_lims, n_repeats):
    """
    Main function for the script. See module docstring for more info.

    Inputs:
    -   input_dim: positive integer number of input dimensions
    -   output_dim: positive integer number of output dimensions
    -   n_train: positive integer number of points in the training set
    -   batch_size: positive integer batch size to use for training
    -   t_lim: positive float, length of time to train for each experiment
    -   num_hidden_units: list of positive integers, number of hidden units in
        each hidden layer of the NeuralNetwork, EG [10] or [20, 20]
    -   e_lims: list of 2 floats, used as axis limits in the output plots
    -   n_repeats: positive integer number of repeats to perform of each
        experiment
    """
    # Perform warmup experiment so process acquires priority
    optimisers.warmup()

    # Initialise data, results list, and time interval for evaluations
    np.random.seed(9251)
    sin_data = data.Sinusoidal(
        input_dim=input_dim,
        output_dim=output_dim,
        n_train=n_train,
    )
    results_list = []
    t_interval = t_lim / 50

    for i in range(n_repeats):
        # Set the random seed
        np.random.seed(i)
        # Generate random network and store initial parameters
        n = models.NeuralNetwork(input_dim=input_dim,
                                 output_dim=output_dim,
                                 num_hidden_units=num_hidden_units,
                                 act_funcs=[
                                     models.activations.gaussian,
                                     models.activations.identity
                                 ])
        w0 = n.get_parameter_vector().copy()
        # Call gradient descent function
        result_gd_ls = optimisers.gradient_descent(
            n,
            sin_data,
            terminator=optimisers.Terminator(t_lim=t_lim),
            evaluator=optimisers.Evaluator(t_interval=t_interval),
            result=optimisers.Result(name="SGD with line search",
                                     verbose=True),
            line_search=optimisers.LineSearch(),
            batch_getter=optimisers.batch.ConstantBatchSize(batch_size))
        results_list.append(result_gd_ls)
        # Try again without line search
        n.set_parameter_vector(w0)
        result_gd_no_ls = optimisers.gradient_descent(
            n,
            sin_data,
            terminator=optimisers.Terminator(t_lim=t_lim),
            evaluator=optimisers.Evaluator(t_interval=t_interval),
            result=optimisers.Result(name="SGD without line search",
                                     verbose=True),
            line_search=None,
            batch_getter=optimisers.batch.ConstantBatchSize(batch_size))
        results_list.append(result_gd_no_ls)
        # Call generalised Newton function
        n.set_parameter_vector(w0)
        result_pbgn_ls = optimisers.generalised_newton(
            n,
            sin_data,
            terminator=optimisers.Terminator(t_lim=t_lim),
            evaluator=optimisers.Evaluator(t_interval=t_interval),
            result=optimisers.Result(name="PBGN with line search",
                                     verbose=True),
            line_search=optimisers.LineSearch(),
            batch_getter=optimisers.batch.ConstantBatchSize(batch_size))
        results_list.append(result_pbgn_ls)
        # Try again without line search
        n.set_parameter_vector(w0)
        result_pbgn_no_ls = optimisers.generalised_newton(
            n,
            sin_data,
            terminator=optimisers.Terminator(t_lim=t_lim),
            evaluator=optimisers.Evaluator(t_interval=t_interval),
            result=optimisers.Result(name="PBGN without line search",
                                     verbose=True),
            line_search=None,
            batch_getter=optimisers.batch.ConstantBatchSize(batch_size))
        results_list.append(result_pbgn_no_ls)

    # Get name of output directory
    current_dir = os.path.dirname(os.path.abspath(__file__))
    output_dir = os.path.join(current_dir, "Outputs")

    # Compare training curves
    plot_name = "Comparing gradient descent vs generalised Newton"
    plot_name += ", %iD-%iD data" % (input_dim, output_dim)
    plot_name += ", %.2g s training time" % t_lim
    plot_name += ", %s hidden units" % str(num_hidden_units)
    plotting.plot_training_curves(results_list,
                                  plot_name,
                                  output_dir,
                                  e_lims=e_lims)
Exemplo n.º 7
0
def main(input_dim, output_dim, n_train, t_lim, num_hidden_units, e_lims,
         n_repeats, alpha_smooth, p_c, min_batch_size):
    """
    Main function for the script. See module docstring for more info.

    Inputs:
    -   input_dim: positive integer number of input dimensions
    -   output_dim: positive integer number of output dimensions
    -   n_train: positive integer number of points in the training set
    -   t_lim: positive float, length of time to train for each experiment
    -   num_hidden_units: list of positive integers, number of hidden units in
        each hidden layer of the NeuralNetwork, EG [10] or [20, 20]
    -   e_lims: list of 2 floats, used as axis limits in the output plots
    -   n_repeats: positive integer number of repeats to perform of each
        experiment
    -   alpha_smooth: float in (0, 1), amount of smoothing to apply to DBS batch
        size
    """
    # Perform warmup experiment so process acquires priority
    optimisers.warmup()

    # Initialise data, results list, and time interval for evaluations
    np.random.seed(9251)
    sin_data = data.Sinusoidal(
        input_dim=input_dim,
        output_dim=output_dim,
        n_train=n_train,
    )
    results_list = []
    t_interval = t_lim / 50

    for i in range(n_repeats):
        # Set the random seed
        np.random.seed(i)
        # Generate random network
        model = models.NeuralNetwork(input_dim=input_dim,
                                     output_dim=output_dim,
                                     num_hidden_units=num_hidden_units,
                                     act_funcs=[
                                         models.activations.gaussian,
                                         models.activations.identity
                                     ])
        # Call gradient descent function
        result = optimisers.Result("Repeat = %i" % i)
        batch_getter = optimisers.batch.DynamicBatchSize(
            model,
            sin_data,
            alpha_smooth=alpha_smooth,
            prob_correct_direction=p_c,
            min_batch_size=min_batch_size)
        batch_col = optimisers.results.columns.BatchSize(batch_getter)
        dbs_col = optimisers.results.columns.DbsMetric()
        result.add_column(batch_col)
        result.add_column(dbs_col)
        result = optimisers.gradient_descent(
            model,
            sin_data,
            terminator=optimisers.Terminator(t_lim=t_lim),
            evaluator=optimisers.Evaluator(t_interval=t_interval),
            result=result,
            line_search=optimisers.LineSearch(),
            batch_getter=batch_getter)
        results_list.append(result)

    # Compare training curves
    plot_name_suffix = "\n%iD-%iD data" % (input_dim, output_dim)
    plot_name_suffix += ", %.2g s training time" % t_lim
    plot_name_suffix += ", %s hidden units" % str(num_hidden_units)
    plot_name_suffix += "\nalpha_smooth = %.3f" % alpha_smooth
    plot_name_suffix += ", p_c = %.3f" % p_c
    plot_name_suffix += ", min_batch_size = %.3f" % min_batch_size
    this_test_output_dir = os.path.join(output_dir,
                                        plot_name_suffix.replace("\n", ""))
    plotting.plot_training_curves(results_list,
                                  "DBS learning curves" + plot_name_suffix,
                                  this_test_output_dir,
                                  e_lims=e_lims)
    for col in [dbs_col, batch_col]:
        plot_name = "%s against iteration for dynamic batch size" % col.name
        plot_name += plot_name_suffix
        plotting.plot_result_attribute(plot_name, this_test_output_dir,
                                       results_list, type(col))