def assess_prediction_functions(test_pairs, functions, costs, print_results=False, prediction_minibatches = None): """ :param test_pairs: A list<pair_name, (x, y)>, where x, y are equal-length vectors representing the samples in a dataset. Eg. [('training', (x_train, y_train)), ('test', (x_test, y_test))] :param functions: A list<function_name, function> of functions for computing the forward pass. :param costs: A list<(cost_name, cost_function)> or dict<cost_name: cost_function> of cost functions, where cost_function has the form: cost = cost_fcn(guess, y), where cost is a scalar, and guess is the output of the prediction function given one of the inputs (x) in test_pairs. :param prediction_minibatches: Size of minibatches to predict in. :return: A ModelTestScore object """ if isinstance(test_pairs, DataSet): test_pairs = _dataset_to_test_pair(test_pairs) assert isinstance(test_pairs, list) assert all(len(_)==2 for _ in test_pairs) assert all(len(pair)==2 for name, pair in test_pairs) if isinstance(functions, dict): functions = functions.items() if callable(functions): functions = [(functions.__name__ if hasattr(functions, '__name__') else None, functions)] else: assert all(callable(f) for name, f in functions) if callable(costs): costs = [(costs.__name__, costs)] elif isinstance(costs, string_types): costs = [(costs, get_evaluation_function(costs))] elif isinstance(costs, dict): costs = costs.items() else: costs = [(cost, get_evaluation_function(cost)) if isinstance(cost, string_types) else (cost.__name__, cost) if callable(cost) else cost for cost in costs] assert all(callable(cost) for name, cost in costs) results = ModelTestScore() for test_pair_name, (x, y) in test_pairs: for function_name, function in functions: if prediction_minibatches is None: predictions = function(x) else: predictions = minibatch_process(function, minibatch_size=prediction_minibatches, mb_args=(x, )) for cost_name, cost_function in costs: results[test_pair_name, function_name, cost_name] = cost_function(predictions, y) if print_results: print(results.get_table()) return results
def assess_prediction_functions(test_pairs, functions, costs, print_results=False, prediction_minibatches=None): """ :param test_pairs: A list<pair_name, (x, y)>, where x, y are equal-length vectors representing the samples in a dataset. Eg. [('training', (x_train, y_train)), ('test', (x_test, y_test))] :param functions: A list<function_name, function> of functions for computing the forward pass. :param costs: A list<(cost_name, cost_function)> or dict<cost_name: cost_function> of cost functions, where cost_function has the form: cost = cost_fcn(guess, y), where cost is a scalar, and guess is the output of the prediction function given one of the inputs (x) in test_pairs. :param prediction_minibatches: Size of minibatches to predict in. :return: A ModelTestScore object """ if isinstance(test_pairs, DataSet): test_pairs = _dataset_to_test_pair(test_pairs) assert isinstance(test_pairs, list) assert all(len(_) == 2 for _ in test_pairs) assert all(len(pair) == 2 for name, pair in test_pairs) if isinstance(functions, dict): functions = functions.items() if callable(functions): functions = [ (functions.__name__ if hasattr(functions, '__name__') else None, functions) ] else: assert all(callable(f) for name, f in functions) if callable(costs): costs = [(costs.__name__, costs)] elif isinstance(costs, string_types): costs = [(costs, get_evaluation_function(costs))] elif isinstance(costs, dict): costs = costs.items() else: costs = [(cost, get_evaluation_function(cost)) if isinstance( cost, string_types) else (cost.__name__, cost) if callable(cost) else cost for cost in costs] assert all(callable(cost) for name, cost in costs) results = ModelTestScore() for test_pair_name, (x, y) in test_pairs: for function_name, function in functions: if prediction_minibatches is None: predictions = function(x) else: predictions = minibatch_process( function, minibatch_size=prediction_minibatches, mb_args=(x, )) for cost_name, cost_function in costs: results[test_pair_name, function_name, cost_name] = cost_function(predictions, y) if print_results: print(results.get_table()) return results
def evaluate_predictor(predictor, test_set, evaluation_function): if isinstance(evaluation_function, str): evaluation_function = get_evaluation_function(evaluation_function) output = predictor.predict(test_set.input) score = evaluation_function(actual=output, target=test_set.target) return score
def assess_online_predictor(predictor, dataset, evaluation_function, test_epochs, minibatch_size, test_on = 'training+test', accumulator = None, report_test_scores=True, test_batch_size = None, test_callback = None): """ DEPRECATED: use assess_prediction_functions_on_generator in train_and_test_old.py Train an online predictor and return the LearningCurveData. :param predictor: An IPredictor object :param dataset: A DataSet object :param evaluation_function: A function of the form: score=fcn(actual_values, target_values) :param test_epochs: List of epochs to test at. Eg. [0.5, 1, 2, 4] :param minibatch_size: Number of samples per minibatch, or: 'full' to do full-batch. 'stretch': to stretch the size of each batch so that we make just one call to "train" between each test. Use this, for instance, if your predictor trains on one sample at a time in sequence anyway. :param report_test_scores: Print out the test scores as they're computed (T/F) :param test_callback: A callback which takes the predictor, and is called every time a test is done. This can be useful for plotting/debugging the state. :return: LearningCurveData containing the score on the test sets """ # TODO: Remove this class, as it is deprecated record = LearningCurveData() testing_sets = dataset_to_testing_sets(dataset, test_on) if accumulator is None: prediction_functions = {k: predictor.predict for k in testing_sets} else: accum_constructor = {'avg': RunningAverage}[accumulator] \ if isinstance(accumulator, str) else accumulator accumulators = {k: accum_constructor() for k in testing_sets} prediction_functions = {k: lambda inp, kp=k: accumulators[kp](predictor.predict(inp)) for k in testing_sets} # Bewate the in-loop lambda - but I think we're ok here. if isinstance(evaluation_function, str): evaluation_function = get_evaluation_function(evaluation_function) def do_test(current_epoch): scores = [(k, evaluation_function(process_in_batches(prediction_functions[k], x, test_batch_size), y)) for k, (x, y) in testing_sets.items()] if report_test_scores: print('Scores at Epoch %s: %s, after %.2fs' % (current_epoch, ', '.join('%s: %.3f' % (set_name, score) for set_name, score in scores), time.time()-start_time)) record.add(current_epoch, scores) if test_callback is not None: record.add(current_epoch, ('callback', test_callback(predictor))) start_time = time.time() if minibatch_size == 'stretch': test_samples = (np.array(test_epochs) * dataset.training_set.n_samples).astype(int) i=0 if test_samples[0] == 0: do_test(i) i += 1 for indices in checkpoint_minibatch_index_generator(n_samples=dataset.training_set.n_samples, checkpoints=test_samples, slice_when_possible=True): predictor.train(dataset.training_set.input[indices], dataset.training_set.target[indices]) do_test(test_epochs[i]) i += 1 else: checker = CheckPointCounter(test_epochs) last_n_samples_seen = 0 for (n_samples_seen, input_minibatch, target_minibatch) in \ dataset.training_set.minibatch_iterator(minibatch_size = minibatch_size, epochs = float('inf'), single_channel = True): current_epoch = (float(last_n_samples_seen))/dataset.training_set.n_samples last_n_samples_seen = n_samples_seen time_for_a_test, done = checker.check(current_epoch) if time_for_a_test: do_test(current_epoch) if done: break predictor.train(input_minibatch, target_minibatch) return record
def compare_predictors(dataset, online_predictors={}, offline_predictors={}, minibatch_size = 'full', evaluation_function = 'mse', test_epochs = sqrtspace(0, 1, 10), report_test_scores = True, test_on = 'training+test', test_batch_size = None, accumulators = None, online_test_callbacks = {}): """ DEPRECATED: use train_and_test_online_predictor instead. Compare a set of predictors by running them on a dataset, and return the learning curves for each predictor. :param dataset: A DataSet object :param online_predictors: A dict<str:IPredictor> of online predictors. An online predictor is sequentially fed minibatches of data and updates its parameters with each minibatch. :param offline_predictors: A dict<str:object> of offline predictors. Offline predictors obey sklearn's Estimator/Predictor interfaces - ie they methods estimator = object.fit(data, targets) and prediction = object.predict(data) :param minibatch_size: Size of the minibatches to use for online predictors. Can be: An int, in which case it represents the minibatch size for all classifiers. A dict<str: int>, in which case you can set the minibatch size per-classifier. In place of the int, you can put 'all' if you want to train on the whole dataset in each iteration. :param test_epochs: Test points to use for online predictors. Can be: A list of integers - in which case the classifier is after seeing this many samples. A list of floats - in which case the classifier is tested after seeing this many epochs. 'always' - In which case a test is performed after every training step The final test point determines the end of training. :param evaluation_function: Function used to evaluate output of predictors :param report_test_scores: Boolean indicating whether you'd like to report results online. :param test_on: 'training', 'test', 'training+test' :param test_batch_size: When the test set is too large to process in one step, use this to break it up into chunks. :param accumulators: A dict<str: accum_fcn>, where accum_fcn is a stateful-function of the form: accmulated_output = accum_fcn(this_output) Special case: accum_fcn can be 'avg' to make a running average. :param online_test_callbacks: A dict<str: fcn> where fcn is a callback that takes an online predictor as an argument. Useful for logging/plotting/debugging progress during training. :return: An OrderedDict<LearningCurveData> """ all_keys = online_predictors.keys()+offline_predictors.keys() assert len(all_keys) > 0, 'You have to give at least one predictor. Is that too much to ask?' assert len(all_keys) == len(np.unique(all_keys)), "You have multiple predictors using the same names. Change that." type_constructor_dict = OrderedDict( [(k, ('offline', offline_predictors[k])) for k in sorted(offline_predictors.keys())] + [(k, ('online', online_predictors[k])) for k in sorted(online_predictors.keys())] ) minibatch_size = _pack_into_dict(minibatch_size, expected_keys=online_predictors.keys()) accumulators = _pack_into_dict(accumulators, expected_keys=online_predictors.keys()) online_test_callbacks = _pack_into_dict(online_test_callbacks, expected_keys=online_predictors.keys(), allow_subset=True) test_epochs = np.array(test_epochs) if isinstance(evaluation_function, str): evaluation_function = get_evaluation_function(evaluation_function) records = OrderedDict() # Run the offline predictors for predictor_name, (predictor_type, predictor) in type_constructor_dict.items(): print('%s\nRunning predictor %s\n%s' % ('='*20, predictor_name, '-'*20)) records[predictor_name] = \ assess_offline_predictor( predictor=predictor, dataset = dataset, evaluation_function = evaluation_function, report_test_scores = report_test_scores, test_on = test_on, test_batch_size = test_batch_size ) if predictor_type == 'offline' else \ assess_online_predictor( predictor=predictor, dataset = dataset, evaluation_function = evaluation_function, test_epochs = test_epochs, accumulator = accumulators[predictor_name], minibatch_size = minibatch_size[predictor_name], report_test_scores = report_test_scores, test_on = test_on, test_batch_size = test_batch_size, test_callback=online_test_callbacks[predictor_name] if predictor_name in online_test_callbacks else None ) if predictor_type == 'online' else \ bad_value(predictor_type) print('Done!') return records
def assess_online_predictor(predictor, dataset, evaluation_function, test_epochs, minibatch_size, test_on='training+test', accumulator=None, report_test_scores=True, test_batch_size=None, test_callback=None): """ DEPRECATED: use assess_prediction_functions_on_generator in train_and_test_old.py Train an online predictor and return the LearningCurveData. :param predictor: An IPredictor object :param dataset: A DataSet object :param evaluation_function: A function of the form: score=fcn(actual_values, target_values) :param test_epochs: List of epochs to test at. Eg. [0.5, 1, 2, 4] :param minibatch_size: Number of samples per minibatch, or: 'full' to do full-batch. 'stretch': to stretch the size of each batch so that we make just one call to "train" between each test. Use this, for instance, if your predictor trains on one sample at a time in sequence anyway. :param report_test_scores: Print out the test scores as they're computed (T/F) :param test_callback: A callback which takes the predictor, and is called every time a test is done. This can be useful for plotting/debugging the state. :return: LearningCurveData containing the score on the test sets """ # TODO: Remove this class, as it is deprecated record = LearningCurveData() testing_sets = dataset_to_testing_sets(dataset, test_on) if accumulator is None: prediction_functions = {k: predictor.predict for k in testing_sets} else: accum_constructor = {'avg': RunningAverage}[accumulator] \ if isinstance(accumulator, str) else accumulator accumulators = {k: accum_constructor() for k in testing_sets} prediction_functions = { k: lambda inp, kp=k: accumulators[kp](predictor.predict(inp)) for k in testing_sets } # Bewate the in-loop lambda - but I think we're ok here. if isinstance(evaluation_function, str): evaluation_function = get_evaluation_function(evaluation_function) def do_test(current_epoch): scores = [(k, evaluation_function( process_in_batches(prediction_functions[k], x, test_batch_size), y)) for k, (x, y) in testing_sets.items()] if report_test_scores: print('Scores at Epoch %s: %s, after %.2fs' % (current_epoch, ', '.join('%s: %.3f' % (set_name, score) for set_name, score in scores), time.time() - start_time)) record.add(current_epoch, scores) if test_callback is not None: record.add(current_epoch, ('callback', test_callback(predictor))) start_time = time.time() if minibatch_size == 'stretch': test_samples = (np.array(test_epochs) * dataset.training_set.n_samples).astype(int) i = 0 if test_samples[0] == 0: do_test(i) i += 1 for indices in checkpoint_minibatch_index_generator( n_samples=dataset.training_set.n_samples, checkpoints=test_samples, slice_when_possible=True): predictor.train(dataset.training_set.input[indices], dataset.training_set.target[indices]) do_test(test_epochs[i]) i += 1 else: checker = CheckPointCounter(test_epochs) last_n_samples_seen = 0 for (n_samples_seen, input_minibatch, target_minibatch) in \ dataset.training_set.minibatch_iterator(minibatch_size = minibatch_size, epochs = float('inf'), single_channel = True): current_epoch = ( float(last_n_samples_seen)) / dataset.training_set.n_samples last_n_samples_seen = n_samples_seen time_for_a_test, done = checker.check(current_epoch) if time_for_a_test: do_test(current_epoch) if done: break predictor.train(input_minibatch, target_minibatch) return record
def compare_predictors(dataset, online_predictors={}, offline_predictors={}, minibatch_size='full', evaluation_function='mse', test_epochs=sqrtspace(0, 1, 10), report_test_scores=True, test_on='training+test', test_batch_size=None, accumulators=None, online_test_callbacks={}): """ DEPRECATED: use train_and_test_online_predictor instead. Compare a set of predictors by running them on a dataset, and return the learning curves for each predictor. :param dataset: A DataSet object :param online_predictors: A dict<str:IPredictor> of online predictors. An online predictor is sequentially fed minibatches of data and updates its parameters with each minibatch. :param offline_predictors: A dict<str:object> of offline predictors. Offline predictors obey sklearn's Estimator/Predictor interfaces - ie they methods estimator = object.fit(data, targets) and prediction = object.predict(data) :param minibatch_size: Size of the minibatches to use for online predictors. Can be: An int, in which case it represents the minibatch size for all classifiers. A dict<str: int>, in which case you can set the minibatch size per-classifier. In place of the int, you can put 'all' if you want to train on the whole dataset in each iteration. :param test_epochs: Test points to use for online predictors. Can be: A list of integers - in which case the classifier is after seeing this many samples. A list of floats - in which case the classifier is tested after seeing this many epochs. 'always' - In which case a test is performed after every training step The final test point determines the end of training. :param evaluation_function: Function used to evaluate output of predictors :param report_test_scores: Boolean indicating whether you'd like to report results online. :param test_on: 'training', 'test', 'training+test' :param test_batch_size: When the test set is too large to process in one step, use this to break it up into chunks. :param accumulators: A dict<str: accum_fcn>, where accum_fcn is a stateful-function of the form: accmulated_output = accum_fcn(this_output) Special case: accum_fcn can be 'avg' to make a running average. :param online_test_callbacks: A dict<str: fcn> where fcn is a callback that takes an online predictor as an argument. Useful for logging/plotting/debugging progress during training. :return: An OrderedDict<LearningCurveData> """ all_keys = online_predictors.keys() + offline_predictors.keys() assert len( all_keys ) > 0, 'You have to give at least one predictor. Is that too much to ask?' assert len(all_keys) == len( np.unique(all_keys) ), "You have multiple predictors using the same names. Change that." type_constructor_dict = OrderedDict( [(k, ('offline', offline_predictors[k])) for k in sorted(offline_predictors.keys())] + [(k, ('online', online_predictors[k])) for k in sorted(online_predictors.keys())]) minibatch_size = _pack_into_dict(minibatch_size, expected_keys=online_predictors.keys()) accumulators = _pack_into_dict(accumulators, expected_keys=online_predictors.keys()) online_test_callbacks = _pack_into_dict( online_test_callbacks, expected_keys=online_predictors.keys(), allow_subset=True) test_epochs = np.array(test_epochs) if isinstance(evaluation_function, str): evaluation_function = get_evaluation_function(evaluation_function) records = OrderedDict() # Run the offline predictors for predictor_name, (predictor_type, predictor) in type_constructor_dict.items(): print('%s\nRunning predictor %s\n%s' % ('=' * 20, predictor_name, '-' * 20)) records[predictor_name] = \ assess_offline_predictor( predictor=predictor, dataset = dataset, evaluation_function = evaluation_function, report_test_scores = report_test_scores, test_on = test_on, test_batch_size = test_batch_size ) if predictor_type == 'offline' else \ assess_online_predictor( predictor=predictor, dataset = dataset, evaluation_function = evaluation_function, test_epochs = test_epochs, accumulator = accumulators[predictor_name], minibatch_size = minibatch_size[predictor_name], report_test_scores = report_test_scores, test_on = test_on, test_batch_size = test_batch_size, test_callback=online_test_callbacks[predictor_name] if predictor_name in online_test_callbacks else None ) if predictor_type == 'online' else \ bad_value(predictor_type) print('Done!') return records
def evaluate_predictor(predictor, test_set, evaluation_function): if isinstance(evaluation_function, str): evaluation_function = get_evaluation_function(evaluation_function) output = predictor.predict(test_set.input) score = evaluation_function(actual = output, target = test_set.target) return score