def run_experiment(dataset_path, model_architecture, model_params=None, num_epochs=5000, batch_size=100, chunk_size=0, verbose=False, reshape_to=None, update_func_name='nesterov_momentum', learning_rate=0.01, update_func_kwargs=None, adapt_learning_rate=False, subtract_mean=True, labels_to_keep=None, snapshot_every=0, snapshot_prefix='model', start_from_snapshot=None, snapshot_final_model=True, num_crops=0, crop_shape=None, mirror_crops=True, test_only=False): """Run a deep learning experiment, reporting results to standard output. Command line or in-process arguments: * dataset_path (str) - path of dataset pickle zip (see data.create_datasets) * model_architecture (str) - the name of the architecture to use (subclass of architectures.AbstractModelBuilder) * model_params (str) - colon-separated list of equals-separated key-value pairs to pass to the model builder. All keys are assumed to be strings, while values are evaluated as Python literals * num_epochs (int) - number of training epochs to run * batch_size (int) - number of examples to feed to the network in each batch * chunk_size (int) - number of examples to copy to the GPU in each chunk. If it's zero, the chunk size is set to the number of training examples, which results in faster training. However, it's impossible when the size of the example set is larger than the GPU's memory * verbose (bool) - if True, extra debugging information will be printed * reshape_to (str) - if given, the data will be reshaped to match this string, which should evaluate to a Python tuple of ints (e.g., may be required to make the dataset fit into a convnet input layer) * update_func_name (str) - update function to use to train the network. See functions with signature lasagne.updates.<update_func_name>(loss_or_grads, params, learning_rate, **kwargs) * learning_rate (float) - learning rate to use with the update function * update_func_kwargs (str) - keyword arguments to pass to the update function in addition to learning_rate. This string has the same format as model_params * adapt_learning_rate (bool) - if True, the learning rate will be reduced by a factor of 10 when the validation loss hasn't decreased within _LEARNING_RATE_GRACE_PERIOD, down to a minimum of _MIN_LEARNING_RATE * subtract_mean (bool) - if True, the mean RGB value in the training set will be subtracted from all subsets of the dataset * labels_to_keep (str) - comma-separated list of labels to keep -- all other labels will be dropped * snapshot_every (int) - if nonzero, a model snapshot will be save every snapshot_every number of epochs * snapshot_prefix (str) - prefix for saved snapshot files * start_from_snapshot (str) - path of model snapshot to start training from. Note: currently, the snapshot doesn't contain all the original hyperparameters, so running this command with start_from_snapshot still requires passing all the original command arguments * snapshot_final_model (bool) - if True, the final model snapshot will be saved * num_crops (int) - if non-zero, this number of random crops of the images will be used * crop_shape (str) - if given, specifies the shape of the crops to be created (converted to tuple like reshape_to) * mirror_crops (bool) - if True, every random crop will be mirrored horizontally, making the effective number of crops 2 * num_crops * test_only (bool) - if True, no training will be performed, and results on the testing subset will be reported """ # pylint: disable=too-many-locals,too-many-arguments assert theano.config.floatX == 'float32', 'Theano floatX must be float32 to ensure consistency with pickled dataset' if model_architecture not in ARCHITECTURE_NAME_TO_CLASS: raise ValueError('Unknown architecture %s (valid values: %s)' % (model_architecture, sorted(ARCHITECTURE_NAME_TO_CLASS))) # Set a static random seed for reproducibility np.random.seed(572893204) dataset, label_to_index = _load_data(dataset_path, reshape_to, subtract_mean, labels_to_keep=labels_to_keep) learning_rate_var = theano.shared(lasagne.utils.floatX(learning_rate)) model_builder = ARCHITECTURE_NAME_TO_CLASS[model_architecture]( dataset, output_dim=len(label_to_index), batch_size=batch_size, chunk_size=chunk_size, verbose=verbose, update_func_name=update_func_name, learning_rate=learning_rate_var, update_func_kwargs=parse_param_str(update_func_kwargs), num_crops=num_crops, crop_shape=literal_eval(crop_shape) if crop_shape else None, mirror_crops=mirror_crops ) start_epoch, output_layer = _load_model_snapshot(start_from_snapshot) if start_from_snapshot else (0, None) output_layer, training_iter, validation_eval = model_builder.build( output_layer=output_layer, **parse_param_str(model_params) ) if test_only: testing_loss, testing_accuracy = model_builder.create_eval_function('testing', output_layer)() print('Testing loss & accuracy:\t %.6f\t%.2f%%' % (testing_loss, testing_accuracy * 100)) return _print_network_info(output_layer) try: _run_training_loop(output_layer, training_iter, validation_eval, num_epochs, snapshot_every, snapshot_prefix, snapshot_final_model, start_epoch, learning_rate_var, adapt_learning_rate) except OverflowError, e: print('Divergence detected (OverflowError: %s). Stopping now.' % e)
def run_experiment(dataset_path, model_architecture, model_params=None, num_epochs=5000, batch_size=100, chunk_size=0, verbose=False, reshape_to=None, update_func_name='nesterov_momentum', learning_rate=0.01, update_func_kwargs=None, adapt_learning_rate=False, subtract_mean=True, labels_to_keep=None, snapshot_every=0, snapshot_prefix='model', start_from_snapshot=None, snapshot_final_model=True, num_crops=0, crop_shape=None, mirror_crops=True, test_only=False): """Run a deep learning experiment, reporting results to standard output. Command line or in-process arguments: * dataset_path (str) - path of dataset pickle zip (see data.create_datasets) * model_architecture (str) - the name of the architecture to use (subclass of architectures.AbstractModelBuilder) * model_params (str) - colon-separated list of equals-separated key-value pairs to pass to the model builder. All keys are assumed to be strings, while values are evaluated as Python literals * num_epochs (int) - number of training epochs to run * batch_size (int) - number of examples to feed to the network in each batch * chunk_size (int) - number of examples to copy to the GPU in each chunk. If it's zero, the chunk size is set to the number of training examples, which results in faster training. However, it's impossible when the size of the example set is larger than the GPU's memory * verbose (bool) - if True, extra debugging information will be printed * reshape_to (str) - if given, the data will be reshaped to match this string, which should evaluate to a Python tuple of ints (e.g., may be required to make the dataset fit into a convnet input layer) * update_func_name (str) - update function to use to train the network. See functions with signature lasagne.updates.<update_func_name>(loss_or_grads, params, learning_rate, **kwargs) * learning_rate (float) - learning rate to use with the update function * update_func_kwargs (str) - keyword arguments to pass to the update function in addition to learning_rate. This string has the same format as model_params * adapt_learning_rate (bool) - if True, the learning rate will be reduced by a factor of 10 when the validation loss hasn't decreased within _LEARNING_RATE_GRACE_PERIOD, down to a minimum of _MIN_LEARNING_RATE * subtract_mean (bool) - if True, the mean RGB value in the training set will be subtracted from all subsets of the dataset * labels_to_keep (str) - comma-separated list of labels to keep -- all other labels will be dropped * snapshot_every (int) - if nonzero, a model snapshot will be save every snapshot_every number of epochs * snapshot_prefix (str) - prefix for saved snapshot files * start_from_snapshot (str) - path of model snapshot to start training from. Note: currently, the snapshot doesn't contain all the original hyperparameters, so running this command with start_from_snapshot still requires passing all the original command arguments * snapshot_final_model (bool) - if True, the final model snapshot will be saved * num_crops (int) - if non-zero, this number of random crops of the images will be used * crop_shape (str) - if given, specifies the shape of the crops to be created (converted to tuple like reshape_to) * mirror_crops (bool) - if True, every random crop will be mirrored horizontally, making the effective number of crops 2 * num_crops * test_only (bool) - if True, no training will be performed, and results on the testing subset will be reported """ # pylint: disable=too-many-locals,too-many-arguments assert theano.config.floatX == 'float32', 'Theano floatX must be float32 to ensure consistency with pickled dataset' if model_architecture not in ARCHITECTURE_NAME_TO_CLASS: raise ValueError( 'Unknown architecture %s (valid values: %s)' % (model_architecture, sorted(ARCHITECTURE_NAME_TO_CLASS))) # Set a static random seed for reproducibility np.random.seed(572893204) dataset, label_to_index = _load_data(dataset_path, reshape_to, subtract_mean, labels_to_keep=labels_to_keep) learning_rate_var = theano.shared(lasagne.utils.floatX(learning_rate)) model_builder = ARCHITECTURE_NAME_TO_CLASS[model_architecture]( dataset, output_dim=len(label_to_index), batch_size=batch_size, chunk_size=chunk_size, verbose=verbose, update_func_name=update_func_name, learning_rate=learning_rate_var, update_func_kwargs=parse_param_str(update_func_kwargs), num_crops=num_crops, crop_shape=literal_eval(crop_shape) if crop_shape else None, mirror_crops=mirror_crops) start_epoch, output_layer = _load_model_snapshot( start_from_snapshot) if start_from_snapshot else (0, None) output_layer, training_iter, validation_eval = model_builder.build( output_layer=output_layer, **parse_param_str(model_params)) if test_only: testing_loss, testing_accuracy = model_builder.create_eval_function( 'testing', output_layer)() print('Testing loss & accuracy:\t %.6f\t%.2f%%' % (testing_loss, testing_accuracy * 100)) return _print_network_info(output_layer) try: _run_training_loop(output_layer, training_iter, validation_eval, num_epochs, snapshot_every, snapshot_prefix, snapshot_final_model, start_epoch, learning_rate_var, adapt_learning_rate) except OverflowError, e: print('Divergence detected (OverflowError: %s). Stopping now.' % e)
def search_hyperparams(base_cmd, log_dir, base_model_params=None, model_params_space=None, max_evals=10, learning_rate_range=None, disabled_hyperparams=None): """Run a sequential hyperparameter search using hyperopt.fmin(). To enable restartability and reproducibility, each hyperparameter combination is evaluated by calling base_cmd with the extra hyperparameters, and logging the results to a (hopefully) unique file in log_dir. The experiment-running command is called in a separate shell rather than in-process, because it may depend on global random state. Calling run_experiment() successively in-process would hurt reproducibility. Arguments: * base_cmd (str) - base command line to call, typically "python manage.py run_experiment ..." with a small number of epochs (see experiment.run_experiment()) * log_dir (str) - path where the outputs of individual runs will be stored. Note: each run's filename is "experiment.<command_line_hash>.log", meaning that collisions are possible in very rare cases * base_model_params (str) - model_params to pass to the run_experiment command that are not part of the search * model_params_space (str) - model_params to experiment with, in the same format as normal model_params, except that each key is of the form <param_name>__<hp_func_name>, where hp_func_name is a member of hyperopt.hp, and the parameter value is interpreted as arguments to pass to hp_func_name. For example, if the architecture is ConvNet, "ld0_dropout__uniform=0.0,0.75" will experiment with dropout values drawn from uniform(0.0, 0.75) for the dropout layer that comes after the first dense layer. * max_evals (int) - number of experiments to run. Note: it's possible to run once with a small value of max_evals, and the do a subsequent run with an increased number of experiments. The second run will read the results of the first run and continue from the point where that run stopped. * learning_rate_range (str) - a pair of comma-separated values that specifies the range from which the learning_rate will be drawn, according to hyperopt.hp.loguniform * disabled_hyperparams (str) - comma-separated list of hyperparameters with which no experimentation should be done """ if os.path.exists(log_dir): warn('Log directory %s exists. Existing log files may be read to avoid repeating experiments.' % log_dir) else: os.makedirs(log_dir) learning_rate_range = literal_eval(learning_rate_range) if learning_rate_range else (-12, -5) model_params = parse_param_str(base_model_params) for param_name_and_hp_func, hp_func_args in parse_param_str(model_params_space).iteritems(): param_name, hp_func_name = param_name_and_hp_func.split('__') model_params[param_name] = getattr(hyperopt.hp, hp_func_name)(param_name, *hp_func_args) space = dict( update_func=hyperopt.hp.choice('update_func', [ dict(name='adam', beta1=hyperopt.hp.uniform('beta1', 0.0, 0.9), beta2=hyperopt.hp.uniform('beta2', 0.99, 1.0)), dict(name='nesterov_momentum', momentum=hyperopt.hp.uniform('momentum', 0.5, 1.0)) ]), learning_rate=hyperopt.hp.loguniform('learning_rate', *learning_rate_range), mirror_crops=hyperopt.hp.choice('mirror_crops', [False, True]), num_crops=hyperopt.hp.choice('num_crops', [1, 5]), model_params=model_params ) for hyperparam in (disabled_hyperparams.split(',') if disabled_hyperparams else ()): del space[hyperparam] trials = hyperopt.Trials() hyperopt.fmin(lambda param_dict: _eval_objective(param_dict, log_dir, base_cmd), space=space, algo=hyperopt.tpe.suggest, trials=trials, max_evals=max_evals) print('---\nBest command line: %(cmd)s\nError rate: %(loss).2f%%' % trials.best_trial['result'])