Exemplo n.º 1
0
def test_grid_search_tuner(tmpdir):
    train_set = pd.DataFrame({
        'id': ["id1", "id2", "id3", "id3"],
        'date':
        pd.to_datetime(
            ["2016-01-01", "2016-02-01", "2016-03-01", "2016-04-01"]),
        'x': [.2, .9, .3, .3],
        'target': [0, 1, 0, 1]
    })

    eval_fn = roc_auc_evaluator(target_column="target")

    space = {
        'learning_rate': lambda: [1e-3, 1e-2, 1e-1],
        'num_estimators': lambda: [1, 2],
        'silent': lambda: [True]
    }

    @curry
    def param_train_fn(space, train_set):
        return xgb_classification_learner(
            features=["x"],
            target="target",
            learning_rate=space["learning_rate"],
            num_estimators=space["num_estimators"])(train_set)

    split_fn = out_of_time_and_space_splitter(n_splits=2,
                                              in_time_limit="2016-05-01",
                                              space_column="id",
                                              time_column="date")

    tuning_log = grid_search_cv(space=space,
                                train_set=train_set,
                                param_train_fn=param_train_fn,
                                split_fn=split_fn,
                                eval_fn=eval_fn)

    assert len(tuning_log) == 3 * 2

    space = {
        'learning_rate': lambda: [1e-3, 1e-2, 1e-1, 1],
        'num_estimators': lambda: [1, 2],
        'silent': lambda: [True]
    }

    tuning_log = grid_search_cv(space=space,
                                train_set=train_set,
                                param_train_fn=param_train_fn,
                                split_fn=split_fn,
                                eval_fn=eval_fn)

    assert len(tuning_log) == 4 * 2
Exemplo n.º 2
0
def test_roc_auc_evaluator():
    predictions = pd.DataFrame({
        'target': [0, 1, 0, 1],
        'prediction': [.2, .9, .3, .3]
    })

    eval_fn = roc_auc_evaluator(prediction_column="prediction",
                                target_column="target",
                                eval_name="eval_name")

    result = eval_fn(predictions)

    assert result["eval_name"] == 0.875