示例#1
0
def test_hyperparam_space():
    p = Pipeline([
        AddFeatures([
            SomeStep(hyperparams_space=HyperparameterSpace({"n_components": RandInt(1, 5)})),
            SomeStep(hyperparams_space=HyperparameterSpace({"n_components": RandInt(1, 5)}))
        ]),
        ModelStacking([
            SomeStep(hyperparams_space=HyperparameterSpace({"n_estimators": RandInt(1, 1000)})),
            SomeStep(hyperparams_space=HyperparameterSpace({"n_estimators": RandInt(1, 1000)})),
            SomeStep(hyperparams_space=HyperparameterSpace({"max_depth": RandInt(1, 100)})),
            SomeStep(hyperparams_space=HyperparameterSpace({"max_depth": RandInt(1, 100)}))
        ],
            joiner=NumpyTranspose(),
            judge=SomeStep(hyperparams_space=HyperparameterSpace({"alpha": LogUniform(0.1, 10.0)}))
        )
    ])

    rvsed = p.get_hyperparams_space()
    p.set_hyperparams(rvsed)

    hyperparams = p.get_hyperparams()

    assert "AddFeatures" in hyperparams.keys()
    assert "SomeStep" in hyperparams["AddFeatures"]
    assert "n_components" in hyperparams["AddFeatures"]["SomeStep"]
    assert "SomeStep1" in hyperparams["AddFeatures"]
    assert "n_components" in hyperparams["AddFeatures"]["SomeStep1"]
    assert "SomeStep" in hyperparams["ModelStacking"]
    assert "n_estimators" in hyperparams["ModelStacking"]["SomeStep"]
    assert "SomeStep1" in hyperparams["ModelStacking"]
    assert "max_depth" in hyperparams["ModelStacking"]["SomeStep2"]
示例#2
0
def main():
    p = Pipeline([
        ('step1', MultiplyByN()),
        ('step2', MultiplyByN()),
        Pipeline([
            Identity(),
            Identity(),
            PCA(n_components=4)
        ])
    ])

    p.set_hyperparams_space({
        'step1__multiply_by': RandInt(42, 50),
        'step2__multiply_by': RandInt(-10, 0),
        'Pipeline__PCA__n_components': RandInt(2, 3)
    })

    samples = p.get_hyperparams_space().rvs()
    p.set_hyperparams(samples)

    samples = p.get_hyperparams().to_flat_as_dict_primitive()
    assert 42 <= samples['step1__multiply_by'] <= 50
    assert -10 <= samples['step2__multiply_by'] <= 0
    assert samples['Pipeline__PCA__n_components'] in [2, 3]
    assert p['Pipeline']['PCA'].get_wrapped_sklearn_predictor().n_components in [2, 3]
示例#3
0
def test_pipeline_set_one_hyperparam_level_two_dict():
    p = Pipeline([
        ("a", SomeStep()),
        ("b", Pipeline([
            ("a", SomeStep()),
            ("b", SomeStep()),
            ("c", SomeStep())
        ])),
        ("c", SomeStep())
    ])

    p.set_hyperparams({
        "b": {
            "a": {
                "learning_rate": 7
            },
            "learning_rate": 9
        }
    })
    print(p.get_hyperparams())

    assert p["b"]["a"].hyperparams["learning_rate"] == 7
    assert p["b"]["c"].hyperparams == dict()
    assert p["b"].hyperparams["learning_rate"] == 9
    assert p["c"].hyperparams == dict()
示例#4
0
def test_apply_on_pipeline_with_meta_step_and_positional_argument_should_call_method_on_each_steps():
    pipeline = Pipeline([OutputTransformerWrapper(MultiplyByN(1)), MultiplyByN(1)])

    pipeline.apply('set_hyperparams', hyperparams=HyperparameterSamples({'multiply_by': 2}))

    assert pipeline.get_hyperparams()['multiply_by'] == 2
    assert pipeline['OutputTransformerWrapper'].wrapped.get_hyperparams()['multiply_by'] == 2
    assert pipeline['MultiplyByN'].get_hyperparams()['multiply_by'] == 2
示例#5
0
def test_apply_on_pipeline_with_positional_argument_should_call_method_on_each_steps():
    pipeline = Pipeline([MultiplyByN(1), MultiplyByN(1)])

    pipeline.apply('set_hyperparams', hyperparams=HyperparameterSamples({'multiply_by': 2}))

    assert pipeline.get_hyperparams()['multiply_by'] == 2
    assert pipeline['MultiplyByN'].get_hyperparams()['multiply_by'] == 2
    assert pipeline['MultiplyByN1'].get_hyperparams()['multiply_by'] == 2
示例#6
0
def test_apply_method_on_pipeline_should_call_method_on_each_steps():
    pipeline = Pipeline([MultiplyByN(1), MultiplyByN(1)])

    pipeline.apply_method(lambda step: step.set_hyperparams(
        HyperparameterSamples({'multiply_by': 2})))

    assert pipeline.get_hyperparams()['multiply_by'] == 2
    assert pipeline['MultiplyByN'].get_hyperparams()['multiply_by'] == 2
    assert pipeline['MultiplyByN1'].get_hyperparams()['multiply_by'] == 2
示例#7
0
def test_apply_method_on_pipeline_with_meta_step_should_call_method_on_each_steps():
    pipeline = Pipeline([OutputTransformerWrapper(MultiplyByN(1)), MultiplyByN(1)])

    pipeline.apply_method(
        lambda step: step.set_hyperparams(HyperparameterSamples({'multiply_by': 2}))
    )

    assert pipeline.get_hyperparams()['multiply_by'] == 2
    assert pipeline['OutputTransformerWrapper'].wrapped.get_hyperparams()['multiply_by'] == 2
    assert pipeline['MultiplyByN'].get_hyperparams()['multiply_by'] == 2
示例#8
0
def test_pipeline_set_one_hyperparam_level_two_flat():
    p = Pipeline([("a", SomeStep()),
                  ("b",
                   Pipeline([("a", SomeStep()), ("b", SomeStep()),
                             ("c", SomeStep())])), ("c", SomeStep())])

    p.set_hyperparams({"b__a__learning_rate": 7})
    print(p.get_hyperparams())

    assert p["b"]["a"].hyperparams["learning_rate"] == 7
    assert p["b"]["c"].hyperparams.to_flat_dict() == dict()
    assert p["b"].hyperparams.to_flat_dict() == {'a__learning_rate': 7}
    assert p["c"].hyperparams.to_flat_dict() == dict()
示例#9
0
def test_hyperparam_space():
    p = Pipeline([
        AddFeatures([
            SomeStep(hyperparams_space=HyperparameterSpace(
                {"n_components": RandInt(1, 5)})),
            SomeStep(hyperparams_space=HyperparameterSpace(
                {"n_components": RandInt(1, 5)}))
        ]),
        ModelStacking([
            SomeStep(hyperparams_space=HyperparameterSpace(
                {"n_estimators": RandInt(1, 1000)})),
            SomeStep(hyperparams_space=HyperparameterSpace(
                {"n_estimators": RandInt(1, 1000)})),
            SomeStep(hyperparams_space=HyperparameterSpace(
                {"max_depth": RandInt(1, 100)})),
            SomeStep(hyperparams_space=HyperparameterSpace(
                {"max_depth": RandInt(1, 100)}))
        ],
                      joiner=NumpyTranspose(),
                      judge=SomeStep(hyperparams_space=HyperparameterSpace(
                          {"alpha": LogUniform(0.1, 10.0)})))
    ])

    rvsed = p.get_hyperparams_space()
    p.set_hyperparams(rvsed)

    hyperparams = p.get_hyperparams()
    flat_hyperparams_keys = hyperparams.to_flat_dict().keys()

    assert 'AddFeatures' in hyperparams
    assert 'SomeStep' in hyperparams["AddFeatures"]
    assert "n_components" in hyperparams["AddFeatures"]["SomeStep"]
    assert 'SomeStep1' in hyperparams["AddFeatures"]
    assert "n_components" in hyperparams["AddFeatures"]["SomeStep1"]

    assert 'ModelStacking' in hyperparams
    assert 'SomeStep' in hyperparams["ModelStacking"]
    assert 'n_estimators' in hyperparams["ModelStacking"]["SomeStep"]
    assert 'SomeStep1' in hyperparams["ModelStacking"]
    assert 'n_estimators' in hyperparams["ModelStacking"]["SomeStep1"]
    assert 'SomeStep2' in hyperparams["ModelStacking"]
    assert 'max_depth' in hyperparams["ModelStacking"]["SomeStep2"]
    assert 'SomeStep3' in hyperparams["ModelStacking"]
    assert 'max_depth' in hyperparams["ModelStacking"]["SomeStep3"]

    assert 'AddFeatures__SomeStep1__n_components' in flat_hyperparams_keys
    assert 'AddFeatures__SomeStep__n_components' in flat_hyperparams_keys
    assert 'ModelStacking__SomeStep__n_estimators' in flat_hyperparams_keys
    assert 'ModelStacking__SomeStep1__n_estimators' in flat_hyperparams_keys
    assert 'ModelStacking__SomeStep2__max_depth' in flat_hyperparams_keys
    assert 'ModelStacking__SomeStep3__max_depth' in flat_hyperparams_keys
def test_pipeline_should_get_hyperparams():
    p = Pipeline([
        SomeStep().set_name('step_1'),
        SomeStep().set_name('step_2')
    ])
    p.set_hyperparams({
        'hp': 1,
        'step_1__hp': 2,
        'step_2__hp': 3
    })

    hyperparams = p.get_hyperparams()

    assert isinstance(hyperparams, HyperparameterSamples)
    assert hyperparams['hp'] == 1
    assert hyperparams['step_1__hp'] == 2
    assert hyperparams['step_2__hp'] == 3
示例#11
0
def test_apply_on_pipeline_with_meta_step_and_positional_argument():
    pipeline = Pipeline(
        [OutputTransformerWrapper(MultiplyByN(1)),
         MultiplyByN(1)])

    pipeline.apply('_set_hyperparams',
                   hyperparams=HyperparameterSamples({
                       'multiply_by':
                       2,
                       'OutputTransformerWrapper__multiply_by':
                       3,
                       'OutputTransformerWrapper__MultiplyByN__multiply_by':
                       4,
                       'MultiplyByN__multiply_by':
                       5
                   }))

    assert pipeline.get_hyperparams()['multiply_by'] == 2
    assert pipeline['OutputTransformerWrapper'].get_hyperparams(
    )['multiply_by'] == 3
    assert pipeline['OutputTransformerWrapper'].wrapped.get_hyperparams(
    )['multiply_by'] == 4
    assert pipeline['MultiplyByN'].get_hyperparams()['multiply_by'] == 5
def main(tmpdir, sleep_time: float = 0.001, n_iter: int = 10):
    DATA_INPUTS = np.array(range(100))
    EXPECTED_OUTPUTS = np.array(range(100, 200))

    HYPERPARAMETER_SPACE = HyperparameterSpace({
        'multiplication_1__multiply_by': RandInt(1, 2),
        'multiplication_2__multiply_by': RandInt(1, 2),
        'multiplication_3__multiply_by': RandInt(1, 2),
    })

    print('Classic Pipeline:')
    classic_pipeline_folder = os.path.join(str(tmpdir), 'classic')

    pipeline = Pipeline([
        ('multiplication_1', MultiplyByN()),
        ('sleep_1', ForEachDataInput(Sleep(sleep_time))),
        ('multiplication_2', MultiplyByN()),
        ('sleep_2', ForEachDataInput(Sleep(sleep_time))),
        ('multiplication_3', MultiplyByN()),
    ], cache_folder=classic_pipeline_folder).set_hyperparams_space(HYPERPARAMETER_SPACE)

    time_a = time.time()
    auto_ml = AutoML(
        pipeline,
        refit_trial=True,
        n_trials=n_iter,
        cache_folder_when_no_handle=classic_pipeline_folder,
        validation_splitter=ValidationSplitter(0.2),
        hyperparams_optimizer=RandomSearchHyperparameterSelectionStrategy(),
        scoring_callback=ScoringCallback(mean_squared_error, higher_score_is_better=False),
        callbacks=[
            MetricCallback('mse', metric_function=mean_squared_error, higher_score_is_better=False)
        ],
    )
    auto_ml = auto_ml.fit(DATA_INPUTS, EXPECTED_OUTPUTS)
    outputs = auto_ml.get_best_model().predict(DATA_INPUTS)
    time_b = time.time()

    actual_score = mean_squared_error(EXPECTED_OUTPUTS, outputs)
    print('{0} seconds'.format(time_b - time_a))
    print('output: {0}'.format(outputs))
    print('smallest mse: {0}'.format(actual_score))
    print('best hyperparams: {0}'.format(pipeline.get_hyperparams()))

    assert isinstance(actual_score, float)

    print('Resumable Pipeline:')
    resumable_pipeline_folder = os.path.join(str(tmpdir), 'resumable')

    pipeline = ResumablePipeline([
        ('multiplication_1', MultiplyByN()),
        ('ForEach(sleep_1)', ForEachDataInput(Sleep(sleep_time))),
        ('checkpoint1', ExpandDim(DefaultCheckpoint())),
        ('multiplication_2', MultiplyByN()),
        ('sleep_2', ForEachDataInput(Sleep(sleep_time))),
        ('checkpoint2', ExpandDim(DefaultCheckpoint())),
        ('multiplication_3', MultiplyByN())
    ], cache_folder=resumable_pipeline_folder).set_hyperparams_space(HYPERPARAMETER_SPACE)

    time_a = time.time()
    auto_ml = AutoML(
        pipeline,
        refit_trial=True,
        n_trials=n_iter,
        cache_folder_when_no_handle=resumable_pipeline_folder,
        validation_splitter=ValidationSplitter(0.2),
        hyperparams_optimizer=RandomSearchHyperparameterSelectionStrategy(),
        scoring_callback=ScoringCallback(mean_squared_error, higher_score_is_better=False),
        callbacks=[
            MetricCallback('mse', metric_function=mean_squared_error, higher_score_is_better=False)
        ]
    )
    auto_ml = auto_ml.fit(DATA_INPUTS, EXPECTED_OUTPUTS)
    outputs = auto_ml.get_best_model().predict(DATA_INPUTS)
    time_b = time.time()
    pipeline.flush_all_cache()

    actual_score = mean_squared_error(EXPECTED_OUTPUTS, outputs)
    print('{0} seconds'.format(time_b - time_a))
    print('output: {0}'.format(outputs))
    print('smallest mse: {0}'.format(actual_score))
    print('best hyperparams: {0}'.format(pipeline.get_hyperparams()))

    assert isinstance(actual_score, float)
示例#13
0
def main(tmpdir, sleep_time: float = 0, n_iter: int = 10):
    DATA_INPUTS = np.array(range(100))
    EXPECTED_OUTPUTS = np.array(range(100, 200))

    HYPERPARAMETER_SPACE = HyperparameterSpace({
        'multiplication_1__multiply_by':
        RandInt(1, 2),
        'multiplication_2__multiply_by':
        RandInt(1, 2),
        'multiplication_3__multiply_by':
        RandInt(1, 2),
    })

    print('Classic Pipeline:')

    pipeline = Pipeline([
        ('multiplication_1', MultiplyByN()),
        ('sleep_1', ForEachDataInput(Sleep(sleep_time))),
        ('multiplication_2', MultiplyByN()),
        ('sleep_2', ForEachDataInput(Sleep(sleep_time))),
        ('multiplication_3', MultiplyByN()),
    ]).set_hyperparams_space(HYPERPARAMETER_SPACE)

    time_a = time.time()
    best_model = RandomSearch(pipeline,
                              n_iter=n_iter,
                              higher_score_is_better=True).fit(
                                  DATA_INPUTS, EXPECTED_OUTPUTS)
    outputs = best_model.transform(DATA_INPUTS)
    time_b = time.time()

    actual_score = mean_squared_error(EXPECTED_OUTPUTS, outputs)
    print('{0} seconds'.format(time_b - time_a))
    print('output: {0}'.format(outputs))
    print('smallest mse: {0}'.format(actual_score))
    print('best hyperparams: {0}'.format(pipeline.get_hyperparams()))

    assert isinstance(actual_score, float)

    print('Resumable Pipeline:')

    pipeline = ResumablePipeline(
        [('multiplication_1', MultiplyByN()),
         ('ForEach(sleep_1)', ForEachDataInput(Sleep(sleep_time))),
         ('checkpoint1', ExpandDim(DefaultCheckpoint())),
         ('multiplication_2', MultiplyByN()),
         ('sleep_2', ForEachDataInput(Sleep(sleep_time))),
         ('checkpoint2', ExpandDim(DefaultCheckpoint())),
         ('multiplication_3', MultiplyByN())],
        cache_folder=tmpdir).set_hyperparams_space(HYPERPARAMETER_SPACE)

    time_a = time.time()
    best_model = RandomSearch(pipeline,
                              n_iter=n_iter,
                              higher_score_is_better=True).fit(
                                  DATA_INPUTS, EXPECTED_OUTPUTS)
    outputs = best_model.transform(DATA_INPUTS)
    time_b = time.time()
    pipeline.flush_all_cache()

    actual_score = mean_squared_error(EXPECTED_OUTPUTS, outputs)
    print('{0} seconds'.format(time_b - time_a))
    print('output: {0}'.format(outputs))
    print('smallest mse: {0}'.format(actual_score))
    print('best hyperparams: {0}'.format(pipeline.get_hyperparams()))

    assert isinstance(actual_score, float)