def test_checkpoint_minibatch_generator(): n_samples = 48 data = np.arange(n_samples) for checkpoints in ([0, 20, 30, 63, 100], [20, 30, 63, 100]): for slice_when_possible in (True, False): iterator = checkpoint_minibatch_index_generator(n_samples=n_samples, checkpoints=checkpoints, slice_when_possible=slice_when_possible) assert np.array_equal(data[next(iterator)], np.arange(20)) assert np.array_equal(data[next(iterator)], np.arange(20, 30)) assert np.array_equal(data[next(iterator)], np.arange(30, 63) % 48) assert np.array_equal(data[next(iterator)], np.arange(63, 100) % 48) try: next(iterator) except StopIteration: pass except: raise Exception("Failed to stop iteration")
def assess_online_predictor(predictor, dataset, evaluation_function, test_epochs, minibatch_size, test_on = 'training+test', accumulator = None, report_test_scores=True, test_batch_size = None, test_callback = None): """ DEPRECATED: use assess_prediction_functions_on_generator in train_and_test_old.py Train an online predictor and return the LearningCurveData. :param predictor: An IPredictor object :param dataset: A DataSet object :param evaluation_function: A function of the form: score=fcn(actual_values, target_values) :param test_epochs: List of epochs to test at. Eg. [0.5, 1, 2, 4] :param minibatch_size: Number of samples per minibatch, or: 'full' to do full-batch. 'stretch': to stretch the size of each batch so that we make just one call to "train" between each test. Use this, for instance, if your predictor trains on one sample at a time in sequence anyway. :param report_test_scores: Print out the test scores as they're computed (T/F) :param test_callback: A callback which takes the predictor, and is called every time a test is done. This can be useful for plotting/debugging the state. :return: LearningCurveData containing the score on the test sets """ # TODO: Remove this class, as it is deprecated record = LearningCurveData() testing_sets = dataset_to_testing_sets(dataset, test_on) if accumulator is None: prediction_functions = {k: predictor.predict for k in testing_sets} else: accum_constructor = {'avg': RunningAverage}[accumulator] \ if isinstance(accumulator, str) else accumulator accumulators = {k: accum_constructor() for k in testing_sets} prediction_functions = {k: lambda inp, kp=k: accumulators[kp](predictor.predict(inp)) for k in testing_sets} # Bewate the in-loop lambda - but I think we're ok here. if isinstance(evaluation_function, str): evaluation_function = get_evaluation_function(evaluation_function) def do_test(current_epoch): scores = [(k, evaluation_function(process_in_batches(prediction_functions[k], x, test_batch_size), y)) for k, (x, y) in testing_sets.items()] if report_test_scores: print('Scores at Epoch %s: %s, after %.2fs' % (current_epoch, ', '.join('%s: %.3f' % (set_name, score) for set_name, score in scores), time.time()-start_time)) record.add(current_epoch, scores) if test_callback is not None: record.add(current_epoch, ('callback', test_callback(predictor))) start_time = time.time() if minibatch_size == 'stretch': test_samples = (np.array(test_epochs) * dataset.training_set.n_samples).astype(int) i=0 if test_samples[0] == 0: do_test(i) i += 1 for indices in checkpoint_minibatch_index_generator(n_samples=dataset.training_set.n_samples, checkpoints=test_samples, slice_when_possible=True): predictor.train(dataset.training_set.input[indices], dataset.training_set.target[indices]) do_test(test_epochs[i]) i += 1 else: checker = CheckPointCounter(test_epochs) last_n_samples_seen = 0 for (n_samples_seen, input_minibatch, target_minibatch) in \ dataset.training_set.minibatch_iterator(minibatch_size = minibatch_size, epochs = float('inf'), single_channel = True): current_epoch = (float(last_n_samples_seen))/dataset.training_set.n_samples last_n_samples_seen = n_samples_seen time_for_a_test, done = checker.check(current_epoch) if time_for_a_test: do_test(current_epoch) if done: break predictor.train(input_minibatch, target_minibatch) return record
def assess_online_predictor(predictor, dataset, evaluation_function, test_epochs, minibatch_size, test_on='training+test', accumulator=None, report_test_scores=True, test_batch_size=None, test_callback=None): """ DEPRECATED: use assess_prediction_functions_on_generator in train_and_test_old.py Train an online predictor and return the LearningCurveData. :param predictor: An IPredictor object :param dataset: A DataSet object :param evaluation_function: A function of the form: score=fcn(actual_values, target_values) :param test_epochs: List of epochs to test at. Eg. [0.5, 1, 2, 4] :param minibatch_size: Number of samples per minibatch, or: 'full' to do full-batch. 'stretch': to stretch the size of each batch so that we make just one call to "train" between each test. Use this, for instance, if your predictor trains on one sample at a time in sequence anyway. :param report_test_scores: Print out the test scores as they're computed (T/F) :param test_callback: A callback which takes the predictor, and is called every time a test is done. This can be useful for plotting/debugging the state. :return: LearningCurveData containing the score on the test sets """ # TODO: Remove this class, as it is deprecated record = LearningCurveData() testing_sets = dataset_to_testing_sets(dataset, test_on) if accumulator is None: prediction_functions = {k: predictor.predict for k in testing_sets} else: accum_constructor = {'avg': RunningAverage}[accumulator] \ if isinstance(accumulator, str) else accumulator accumulators = {k: accum_constructor() for k in testing_sets} prediction_functions = { k: lambda inp, kp=k: accumulators[kp](predictor.predict(inp)) for k in testing_sets } # Bewate the in-loop lambda - but I think we're ok here. if isinstance(evaluation_function, str): evaluation_function = get_evaluation_function(evaluation_function) def do_test(current_epoch): scores = [(k, evaluation_function( process_in_batches(prediction_functions[k], x, test_batch_size), y)) for k, (x, y) in testing_sets.items()] if report_test_scores: print('Scores at Epoch %s: %s, after %.2fs' % (current_epoch, ', '.join('%s: %.3f' % (set_name, score) for set_name, score in scores), time.time() - start_time)) record.add(current_epoch, scores) if test_callback is not None: record.add(current_epoch, ('callback', test_callback(predictor))) start_time = time.time() if minibatch_size == 'stretch': test_samples = (np.array(test_epochs) * dataset.training_set.n_samples).astype(int) i = 0 if test_samples[0] == 0: do_test(i) i += 1 for indices in checkpoint_minibatch_index_generator( n_samples=dataset.training_set.n_samples, checkpoints=test_samples, slice_when_possible=True): predictor.train(dataset.training_set.input[indices], dataset.training_set.target[indices]) do_test(test_epochs[i]) i += 1 else: checker = CheckPointCounter(test_epochs) last_n_samples_seen = 0 for (n_samples_seen, input_minibatch, target_minibatch) in \ dataset.training_set.minibatch_iterator(minibatch_size = minibatch_size, epochs = float('inf'), single_channel = True): current_epoch = ( float(last_n_samples_seen)) / dataset.training_set.n_samples last_n_samples_seen = n_samples_seen time_for_a_test, done = checker.check(current_epoch) if time_for_a_test: do_test(current_epoch) if done: break predictor.train(input_minibatch, target_minibatch) return record