Пример #1
0
def test_train_test_provided():
    """Assert that input train, test works."""
    dataset = pd.concat([bin_train, bin_test]).reset_index(drop=True)

    trainer = DirectClassifier("LR", random_state=1)
    trainer.run(bin_train, bin_test)
    assert trainer.dataset.equals(dataset)
Пример #2
0
def test_input_data_in_training():
    """Assert that the data does not change once in a training pipeline."""
    train = bin_train.copy()
    trainer = DirectClassifier("LR", random_state=1)
    trainer.run(train, bin_test)
    train.iloc[3, 2] = 99  # Change an item of the original variable
    assert 99 not in trainer.dataset  # Is unchanged in the pipeline
Пример #3
0
def test_load():
    """Assert that a trainer is loaded correctly."""
    trainer = DirectClassifier("LR", random_state=1)
    trainer.save(FILE_DIR + "trainer")

    trainer2 = ATOMLoader(FILE_DIR + "trainer")
    assert trainer2.__class__.__name__ == "DirectClassifier"
Пример #4
0
def test_plot():
    """Assert that plotting the BO runs without errors."""
    trainer = DirectClassifier(
        models=["lSVM", "kSVM", "MLP"],
        n_calls=45,
        n_initial_points=20,
        bo_params={"plot": True},
        random_state=1,
    )
    trainer.run(bin_train, bin_test)
Пример #5
0
def test_est_params_per_model():
    """Assert that est_params passes the parameters per model."""
    trainer = DirectClassifier(
        models=["XGB", "LGB"],
        est_params={"xgb": {"n_estimators": 100}, "lgb": {"n_estimators": 200}},
        random_state=1,
    )
    trainer.run(bin_train, bin_test)
    assert trainer.xgb.estimator.get_params()["n_estimators"] == 100
    assert trainer.lgb.estimator.get_params()["n_estimators"] == 200
Пример #6
0
def test_all_callbacks():
    """Assert that all callbacks predefined callbacks work as intended."""
    trainer = DirectClassifier(
        models="LR",
        n_calls=2,
        n_initial_points=2,
        bo_params={"max_time": 50, "delta_x": 5, "delta_y": 5},
        random_state=1,
    )
    trainer.run(bin_train, bin_test)
Пример #7
0
def test_callbacks(callbacks):
    """Assert that custom callbacks are accepted."""
    trainer = DirectClassifier(
        models="LR",
        n_calls=2,
        n_initial_points=2,
        bo_params={"callbacks": callbacks},
        random_state=1,
    )
    trainer.run(bin_train, bin_test)
Пример #8
0
def test_close_plot_after_error():
    """Assert that the BO plot is closed after an error."""
    trainer = DirectClassifier(
        models=["LR", "LDA"],
        n_calls=4,
        n_initial_points=[2, 5],
        bo_params={"plot": True},
        random_state=1,
    )
    trainer.run(bin_train, bin_test)
    assert PlotCallback.c == 1  # First model is 0, after error passes to 1
Пример #9
0
def test_est_params_all_models():
    """Assert that est_params passes the parameters to all models."""
    trainer = DirectClassifier(
        models=["XGB", "LGB"],
        n_calls=5,
        est_params={"n_estimators": 220},
        random_state=1,
    )
    trainer.run(bin_train, bin_test)
    assert trainer.lgb.estimator.get_params()["n_estimators"] == 220
    assert trainer.xgb.estimator.get_params()["n_estimators"] == 220
Пример #10
0
def test_optimizer_kwargs():
    """Assert that the kwargs provided are passed to the optimizer."""
    trainer = DirectClassifier(
        models="LR",
        n_calls=2,
        n_initial_points=2,
        bo_params={"acq_func": "EI"},
        random_state=1,
    )
    trainer.run(bin_train, bin_test)
    assert trainer._bo_kwargs.get("acq_func") == "EI"
Пример #11
0
def test_4_data_provided():
    """Assert that input X_train, X_test, y_train, y_test works."""
    dataset = pd.concat([bin_train, bin_test]).reset_index(drop=True)
    X_train = bin_train.iloc[:, :-1]
    X_test = bin_test.iloc[:, :-1]
    y_train = bin_train.iloc[:, -1]
    y_test = bin_test.iloc[:, -1]

    trainer = DirectClassifier("LR", random_state=1)
    trainer.run(X_train, X_test, y_train, y_test)
    assert trainer.dataset.equals(dataset)
Пример #12
0
def test_error_handling():
    """Assert that models with errors are removed from the pipeline."""
    trainer = DirectClassifier(
        models=["LR", "LDA"],
        n_calls=4,
        n_initial_points=[2, 5],
        random_state=1,
    )
    trainer.run(bin_train, bin_test)
    assert trainer.errors.get("LDA")
    assert "LDA" not in trainer.models
    assert "LDA" not in trainer.results.index
Пример #13
0
def test_custom_dimensions_all_models():
    """Assert that the custom dimensions are for all models if not dict."""
    trainer = DirectClassifier(
        models=["LR1", "LR2"],
        n_calls=2,
        n_initial_points=2,
        bo_params={"dimensions": [Integer(100, 1000, name="max_iter")]},
        random_state=1,
    )
    trainer.run(bin_train, bin_test)
    assert list(trainer.lr1.best_params.keys()) == ["max_iter"]
    assert list(trainer.lr2.best_params.keys()) == ["max_iter"]
Пример #14
0
def test_sequence_parameters():
    """Assert that every model get his corresponding parameters."""
    trainer = DirectClassifier(
        models=["LR", "Tree", "LGB"],
        n_calls=(2, 3, 4),
        n_initial_points=(1, 2, 3),
        bagging=[2, 5, 7],
        random_state=1,
    )
    trainer.run(bin_train, bin_test)
    assert len(trainer.LR.bo) == 2
    assert sum(trainer.tree.bo.index.str.startswith("Initial")) == 2
    assert len(trainer.lgb.metric_bagging) == 7
Пример #15
0
def test_goals_trainers():
    """Assert that the goal of every Trainer class is set correctly."""
    trainer = DirectClassifier("LR")
    assert trainer.goal == "classification"

    trainer = DirectRegressor("OLS")
    assert trainer.goal == "regression"
Пример #16
0
def test_only_task_models():
    """Assert that an error is raised for models at invalid task."""
    trainer = DirectClassifier("OLS", random_state=1)  # Only regression
    pytest.raises(ValueError, trainer.run, bin_train, bin_test)

    trainer = DirectRegressor("LDA", random_state=1)  # Only classification
    pytest.raises(ValueError, trainer.run, reg_train, reg_test)
Пример #17
0
def test_custom_dimensions_per_model():
    """Assert that the custom dimensions are distributed over the models."""
    trainer = DirectClassifier(
        models=["LR1", "LR2"],
        n_calls=2,
        n_initial_points=2,
        bo_params={
            "dimensions": {
                "lr1": [Integer(100, 200, name="max_iter")],
                "lr2": [Integer(300, 400, name="max_iter")],
            },
        },
        random_state=1,
    )
    trainer.run(bin_train, bin_test)
    assert 100 <= trainer.lr1.best_params["max_iter"] <= 200
    assert 300 <= trainer.lr2.best_params["max_iter"] <= 400
Пример #18
0
def test_invalid_sequence_parameter():
    """Assert that an error is raised for parameters with the wrong length."""
    trainer = DirectClassifier(
        models="LR",
        metric=f1_score,
        needs_proba=[True, False],
        random_state=1,
    )
    pytest.raises(ValueError, trainer.run, bin_train, bin_test)
Пример #19
0
def test_data_already_set():
    """Assert that if there already is data, the call to run can be empty."""
    dataset = pd.concat([bin_train, bin_test]).reset_index(drop=True)

    trainer = DirectClassifier("LR", random_state=1)
    trainer.run(bin_train, bin_test)
    trainer.run()
    assert trainer.dataset.equals(dataset)
    assert trainer.branch.idx == [len(bin_train), len(bin_test)]
Пример #20
0
def test_default_metric():
    """Assert that a default metric is assigned depending on the task."""
    trainer = DirectClassifier("LR", random_state=1)
    trainer.run(bin_train, bin_test)
    assert trainer.metric == "f1"

    trainer = DirectClassifier("LR", random_state=1)
    trainer.run(class_train, class_test)
    assert trainer.metric == "f1_weighted"

    trainer = DirectRegressor("LGB", random_state=1)
    trainer.run(reg_train, reg_test)
    assert trainer.metric == "r2"
Пример #21
0
def test_duplicate_models():
    """Assert that duplicate inputs are ignored."""
    trainer = DirectClassifier(["lr", "LR", "lgb"], random_state=1)
    trainer.run(bin_train, bin_test)
    assert len(trainer.models) == 2
Пример #22
0
def test_invalid_model_name():
    """Assert that an error is raised when the model is unknown."""
    trainer = DirectClassifier(models="invalid", random_state=1)
    pytest.raises(ValueError, trainer.run, bin_train, bin_test)
Пример #23
0
def test_multiple_same_models():
    """Assert that the same model can used with different names."""
    trainer = DirectClassifier(["lr", "lr2", "lr_3"], random_state=1)
    trainer.run(bin_train, bin_test)
    assert trainer.models == ["LR", "LR2", "LR_3"]
Пример #24
0
def test_models_get_right_name():
    """Assert that the model names are transformed to the correct acronyms."""
    trainer = DirectClassifier(["lR", "tReE"], random_state=1)
    trainer.run(bin_train, bin_test)
    assert trainer.models == ["LR", "Tree"]
Пример #25
0
def test_all_models_failed():
    """Assert that an error is raised when all models failed."""
    trainer = DirectClassifier("LR", n_calls=4, n_initial_points=5, random_state=1)
    pytest.raises(RuntimeError, trainer.run, bin_train, bin_test)
Пример #26
0
def test_default_mapping_assignment():
    """Assert that a default mapping is assigned."""
    trainer = DirectClassifier("LR", random_state=1)
    trainer.run(bin_train, bin_test)
    assert trainer.mapping == {"0": 0, "1": 1}
Пример #27
0
def test_scorer_metric_parameter():
    """Assert that a scorer metric works."""
    trainer = DirectClassifier("LR", metric=get_scorer("f1"), random_state=1)
    trainer.run(bin_train, bin_test)
    assert trainer.metric == "f1"
Пример #28
0
def test_model_is_custom():
    """Assert that custom models are accepted."""
    trainer = DirectClassifier(RandomForestClassifier, random_state=1)
    trainer.run(bin_train, bin_test)
    assert trainer.models == "RFC"
Пример #29
0
def test_sequence_parameters_invalid_length():
    """Assert that an error is raised when the length is invalid."""
    trainer = DirectClassifier("LR", n_calls=(2, 2), random_state=1)
    pytest.raises(ValueError, trainer.run, bin_train, bin_test)
Пример #30
0
def test_function_metric_parameter():
    """Assert that a function metric works."""
    trainer = DirectClassifier("LR", metric=f1_score, random_state=1)
    trainer.run(bin_train, bin_test)
    assert trainer.metric == "f1_score"