def main(): p = Pipeline([ ('step1', MultiplyByN()), ('step2', MultiplyByN()), Pipeline([ Identity(), Identity(), PCA(n_components=4) ]) ]) p.set_hyperparams_space({ 'step1__multiply_by': RandInt(42, 50), 'step2__multiply_by': RandInt(-10, 0), 'Pipeline__PCA__n_components': RandInt(2, 3) }) samples = p.get_hyperparams_space().rvs() p.set_hyperparams(samples) samples = p.get_hyperparams().to_flat_as_dict_primitive() assert 42 <= samples['step1__multiply_by'] <= 50 assert -10 <= samples['step2__multiply_by'] <= 0 assert samples['Pipeline__PCA__n_components'] in [2, 3] assert p['Pipeline']['PCA'].get_wrapped_sklearn_predictor().n_components in [2, 3]
def test_choose_one_step_of_invalid_chosen_step(): with pytest.raises(ValueError): Pipeline([ ChooseOneStepOf([ ('a', Identity()), ('b', Identity()) ]).set_hyperparams({'choice': 'c'}), ])
def test_has_children_mixin_apply_should_return_recursive_dict_to_direct_childrends( ): p = Pipeline([ ('a', Identity().set_hyperparams(HyperparameterSamples({'hp': 0}))), ('b', Identity().set_hyperparams(HyperparameterSamples({'hp': 1}))) ]) results = p.apply('_get_hyperparams', ra=None) assert results.to_flat_as_dict_primitive()['a__hp'] == 0 assert results.to_flat_as_dict_primitive()['b__hp'] == 1
def test_feature_union_should_transform_with_numpy_transpose(): p = Pipeline( [FeatureUnion([ Identity(), Identity(), ], joiner=NumpyTranspose())]) data_inputs = np.random.randint((1, 20)) outputs = p.transform(data_inputs) assert np.array_equal(outputs, np.array([data_inputs, data_inputs]).transpose())
def test_feature_union_should_transform_with_zip_features(): p = Pipeline( [FeatureUnion([ Identity(), Identity(), ], joiner=ZipFeatures())]) data_inputs = np.random.randint(low=0, high=100, size=(2, 20)) outputs = p.transform(data_inputs) assert np.array_equal(outputs, np.stack([data_inputs, data_inputs], axis=1))
def test_feature_union_should_transform_with_concatenate_inner_features(): p = Pipeline([ FeatureUnion([ Identity(), Identity(), ], joiner=NumpyConcatenateInnerFeatures()) ]) data_inputs = np.random.randint((1, 20)) outputs = p.transform(data_inputs) assert np.array_equal(outputs, np.concatenate([data_inputs, data_inputs]))
def test_feature_union_should_fit_transform_with_numpy_transpose(): p = Pipeline( [FeatureUnion([ Identity(), Identity(), ], joiner=NumpyTranspose())]) data_inputs = np.random.randint(low=0, high=100, size=(2, 20)) expected_outputs = None p, outputs = p.fit_transform(data_inputs, expected_outputs) assert np.array_equal(outputs, np.array([data_inputs, data_inputs]).transpose())
def test_feature_union_should_apply_to_self_and_sub_steps(): p = Pipeline( [FeatureUnion([ Identity(), Identity(), ], joiner=NumpyTranspose())]) p.apply(lambda step: step._set_hyperparams( HyperparameterSamples({'applied': True}))) assert p.hyperparams['applied'] assert p['FeatureUnion'].hyperparams['applied'] assert p['FeatureUnion'][0].hyperparams['applied'] assert p['FeatureUnion'][1].hyperparams['applied'] assert p['FeatureUnion'][2].hyperparams['applied']
def test_has_children_mixin_apply_should_return_recursive_dict_to_recursive_childrends( ): p = Pipeline([ Pipeline([ ('c', Identity().set_hyperparams(HyperparameterSamples({'hp': 3}))), ('d', Identity().set_hyperparams(HyperparameterSamples({'hp': 4}))) ]).set_hyperparams(HyperparameterSamples({'hp': 2})), ]) results = p.apply('_get_hyperparams', ra=None) assert results['Pipeline__hp'] == 2 assert results['Pipeline__c__hp'] == 3 assert results['Pipeline__d__hp'] == 4
def test_feature_union_should_fit_transform_with_concatenate_inner_features(): p = Pipeline([ FeatureUnion([ Identity(), Identity(), ], joiner=NumpyConcatenateInnerFeatures()) ]) data_inputs = np.random.randint(low=0, high=100, size=(2, 20)) expected_outputs = None p, outputs = p.fit_transform(data_inputs, expected_outputs) assert np.array_equal(outputs, np.concatenate([data_inputs, data_inputs], axis=-1))
def test_when_hyperparams_and_saved_no_pipeline_should_not_load_checkpoint_pickle(tmpdir: LocalPath): # Given tape = TapeCallbackFunction() pickle_checkpoint_step = DefaultCheckpoint() # When pipeline_save = create_pipeline( tmpdir=tmpdir, pickle_checkpoint_step=Identity(), tape=TapeCallbackFunction(), hyperparameters=HyperparameterSamples({"a__learning_rate": 1}), different=True, save_pipeline=False ) pipeline_save.fit_transform(data_inputs, expected_outputs) pipeline_load = create_pipeline( tmpdir=tmpdir, pickle_checkpoint_step=pickle_checkpoint_step, tape=tape, hyperparameters=HyperparameterSamples({"a__learning_rate": 1}) ) pipeline_load, actual_data_inputs = pipeline_load.fit_transform(data_inputs, expected_outputs) # Then actual_tape = tape.get_name_tape() assert np.array_equal(actual_data_inputs, data_inputs) assert actual_tape == ["1", "2", "3"]
def given_failed_trial_split(trial): with trial.new_validation_split(Identity()) as trial_split: trial_split.add_metric_results_train(name=MAIN_METRIC_NAME, score=0.5, higher_score_is_better=False) trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME, score=0.5, higher_score_is_better=False) trial_split.add_metric_results_train(name=MAIN_METRIC_NAME, score=0.7, higher_score_is_better=False) trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME, score=0.7, higher_score_is_better=False) trial_split.add_metric_results_train(name=MAIN_METRIC_NAME, score=0.4, higher_score_is_better=False) trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME, score=0.4, higher_score_is_better=False) error = IndexError('index error') trial_split.set_failed(error) trial.set_failed(error) return trial_split
def test_trial_split_is_new_best_score_should_return_true_with_a_new_best_score_after_multiple_scores( ): hp = HyperparameterSamples({'a': 2}) repo = InMemoryHyperparamsRepository() trial = Trial(save_trial_function=repo.save_trial, hyperparams=hp, main_metric_name=MAIN_METRIC_NAME) with trial.new_validation_split(Identity()) as trial_split: trial_split.add_metric_results_train(name=MAIN_METRIC_NAME, score=0.5, higher_score_is_better=False) trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME, score=0.5, higher_score_is_better=False) trial_split.add_metric_results_train(name=MAIN_METRIC_NAME, score=0.7, higher_score_is_better=False) trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME, score=0.7, higher_score_is_better=False) trial_split.add_metric_results_train(name=MAIN_METRIC_NAME, score=0.4, higher_score_is_better=False) trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME, score=0.4, higher_score_is_better=False) assert trial_split.is_new_best_score()
def given_success_trial_validation_split(trial, best_score=0.4): with trial.new_validation_split(Identity()) as trial_split: trial_split.add_metric_results_train(name=MAIN_METRIC_NAME, score=0.5, higher_score_is_better=False) trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME, score=0.5, higher_score_is_better=False) trial_split.add_metric_results_train(name=MAIN_METRIC_NAME, score=0.7, higher_score_is_better=False) trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME, score=0.7, higher_score_is_better=False) trial_split.add_metric_results_train(name=MAIN_METRIC_NAME, score=best_score, higher_score_is_better=False) trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME, score=best_score, higher_score_is_better=False) trial_split.set_success() trial.set_success() return trial_split
def test_load_full_dump_from_path(tmpdir): # Given tape_fit_callback_function = TapeCallbackFunction() tape_transform_callback_function = TapeCallbackFunction() pipeline = Pipeline( [('step_a', Identity()), ('step_b', OutputTransformerWrapper( FitTransformCallbackStep(tape_fit_callback_function, tape_transform_callback_function)))], cache_folder=tmpdir).set_name(PIPELINE_NAME) # When pipeline, outputs = pipeline.fit_transform(DATA_INPUTS, EXPECTED_OUTPUTS) pipeline.save(ExecutionContext(tmpdir), full_dump=True) # Then loaded_pipeline = ExecutionContext(tmpdir).load( os.path.join(PIPELINE_NAME, 'step_b')) assert isinstance(loaded_pipeline, OutputTransformerWrapper) loaded_step_b_wrapped_step = loaded_pipeline.wrapped assert np.array_equal( loaded_step_b_wrapped_step.transform_callback_function.data[0], EXPECTED_OUTPUTS) assert np.array_equal( loaded_step_b_wrapped_step.fit_callback_function.data[0][0], EXPECTED_OUTPUTS) assert np.array_equal( loaded_step_b_wrapped_step.fit_callback_function.data[0][1], [None] * len(EXPECTED_OUTPUTS))
def __init__(self, wrapped, max_plotted_predictions=None): if max_plotted_predictions is None: max_plotted_predictions = 10 FeatureUnion.__init__(self, [Identity(), wrapped], joiner=PlotPredictionsJoiner( plot_predictions, max_plotted_predictions), n_jobs=1)
def test_trial_should_have_end_time_later_than_start_time(self): with self.trial.new_validation_split(Identity()) as trial_split: time.sleep(0.001) # TODO: maybe remove sleep? trial_split.set_success() assert isinstance(trial_split.start_time, datetime.datetime) assert isinstance(trial_split.end_time, datetime.datetime) assert trial_split.start_time < trial_split.end_time
def __init__(self, steps_as_tuple: NamedTupleList, **kwargs): """ Create a ``FeatureUnion`` where ``Identity`` is the first step so as to also keep the inputs to concatenate them to the outputs. :param steps_as_tuple: The steps to be sent to the ``FeatureUnion``. ``Identity()`` is prepended. :param kwargs: Other arguments to send to ``FeatureUnion``. """ FeatureUnion.__init__(self, [Identity()] + steps_as_tuple, **kwargs)
def test_has_children_mixin_apply_should_apply_method_to_recursive_childrends( ): p = Pipeline([ ('a', Identity()), ('b', Identity()), Pipeline([('c', Identity()), ('d', Identity())]), ]) p.apply('_set_hyperparams', ra=None, hyperparams=HyperparameterSamples({ 'Pipeline__c__hp': 3, 'Pipeline__d__hp': 4 })) assert p['Pipeline']['c'].hyperparams.to_flat_dict()['hp'] == 3 assert p['Pipeline']['d'].hyperparams.to_flat_dict()['hp'] == 4
def test_has_children_mixin_apply_should_apply_method_to_direct_childrends(): p = Pipeline([ ('a', Identity()), ('b', Identity()), Pipeline([('c', Identity()), ('d', Identity())]), ]) p.apply('_set_hyperparams', ra=None, hyperparams=HyperparameterSamples({ 'a__hp': 0, 'b__hp': 1, 'Pipeline__hp': 2 })) assert p['a'].hyperparams.to_flat_as_dict_primitive()['hp'] == 0 assert p['b'].hyperparams.to_flat_as_dict_primitive()['hp'] == 1 assert p['Pipeline'].hyperparams.to_flat_as_dict_primitive()['hp'] == 2
def main(): p = Pipeline([ NonFittableStep(), NonTransformableStep(), Identity() # Note: Identity does nothing: it inherits from both NonFittableMixin and NonTransformableMixin. ]) p = p.fit(np.array([0, 1]), np.array([0, 1])) out = p.transform(np.array([0, 1]))
def test_trial_split_is_new_best_score_should_return_true_with_one_score( self): with self.trial.new_validation_split(Identity()) as trial_split: trial_split.add_metric_results_train(name=MAIN_METRIC_NAME, score=0.5, higher_score_is_better=False) trial_split.add_metric_results_validation( name=MAIN_METRIC_NAME, score=0.5, higher_score_is_better=False) assert trial_split.is_new_best_score()
def test_trial_should_create_new_split(): hp = HyperparameterSamples({'a': 2}) trial = Trial(hyperparams=hp, main_metric_name=MAIN_METRIC_NAME) with trial.new_validation_split(Identity()) as trial_split: trial_split.set_success() assert isinstance(trial_split.start_time, datetime.datetime) assert isinstance(trial_split.end_time, datetime.datetime) assert trial_split.start_time < trial_split.end_time assert trial.validation_splits[0] == trial_split
def main(): np.random.seed(42) X = np.random.randint(5, size=(100, 5)) # Create and fit the pipeline: pipeline = Pipeline([ StandardScaler(), Identity(), Pipeline([ Identity(), Identity(), # Note: an Identity step is a step that does nothing. Identity(), # We use it here for demonstration purposes. Pipeline([Identity(), PCA(n_components=2)]) ]) ]) pipeline, X_t = pipeline.fit_transform(X) # Get the components: pca_components = pipeline["Pipeline"]["Pipeline"][ -1].get_wrapped_sklearn_predictor().components_ assert pca_components.shape == (2, 5)
def test_trial_split_is_new_best_score_should_return_true_with_one_score(): hp = HyperparameterSamples({'a': 2}) trial = Trial(hyperparams=hp, main_metric_name=MAIN_METRIC_NAME) with trial.new_validation_split(Identity()) as trial_split: trial_split.add_metric_results_train(name=MAIN_METRIC_NAME, score=0.5, higher_score_is_better=False) trial_split.add_metric_results_validation(name=MAIN_METRIC_NAME, score=0.5, higher_score_is_better=False) assert trial_split.is_new_best_score()
def test_can_update_scipy_distribution(): p = Identity().set_hyperparams_space(HyperparameterSpace({ 'rand_int_neuraxle': RandInt(2, 5) # neuraxle })) p.update_hyperparams_space(HyperparameterSpace({ 'rand_int_scipy': randint(low=2, high=5), # scipy 'gamma_scipy': gamma(0.2), # scipy })) assert isinstance(p.get_hyperparams_space()['rand_int_scipy'], ScipyDiscreteDistributionWrapper) assert isinstance(p.get_hyperparams_space()['gamma_scipy'], ScipyContinuousDistributionWrapper) randint_sample = p.get_hyperparams_space()['rand_int_scipy'].rvs() gamma_sample = p.get_hyperparams_space()['gamma_scipy'].rvs() assert 5 >= randint_sample >= 2 assert isinstance(gamma_sample, float)
def main(): p = Pipeline([ NonFittableStep(), NonTransformableStep(), Identity() # Note: Identity does nothing: it inherits from both NonFittableMixin and NonTransformableMixin. ]) some_data = np.array([0, 1]) p = p.fit(some_data) # Out: # NonFittableStep: I transformed. # NonTransformableStep: I fitted. out = p.transform(some_data) # Out: # NonFittableStep: I transformed. assert np.array_equal(out, some_data)
def test_sklearn_wrapper_with_an_invalid_step(): with pytest.raises(ValueError): SKLearnWrapper(Identity())
def _get_expected_output_checkpoint_path(self, context): return context.push( Identity( name=DataCheckpointType.EXPECTED_OUTPUT.value)).get_path()
def _get_data_input_checkpoint_path(self, context): return context.push( Identity(name=DataCheckpointType.DATA_INPUT.value)).get_path()