示例#1
0
def test_experiment_lgb_regressor():
    X, y = make_regression_df(n_samples=1024,
                              n_num_features=10,
                              n_cat_features=2,
                              random_state=0,
                              id_column='user_id')

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.5,
                                                        random_state=0)

    params = {'objective': 'regression', 'max_depth': 8}

    with get_temp_directory() as temp_path:
        result = experiment_gbdt(params, X_train, y_train, X_test, temp_path)

        assert len(np.unique(result.oof_prediction)
                   ) > 5  # making sure prediction is not binarized
        assert len(np.unique(result.test_prediction)) > 5
        assert mean_squared_error(y_train,
                                  result.oof_prediction) == result.metrics[-1]

        _check_file_exists(
            temp_path,
            ('oof_prediction.npy', 'test_prediction.npy', 'metrics.txt'))
示例#2
0
def test_experiment_sklearn_regressor(tmpdir_name):
    X, y = make_regression_df(n_samples=1024,
                              n_num_features=10,
                              n_cat_features=0,
                              random_state=0,
                              id_column='user_id')

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.5,
                                                        random_state=0)

    params = {'fit_intercept': True}

    result = run_experiment(params,
                            X_train,
                            y_train,
                            X_test,
                            tmpdir_name,
                            with_auto_prep=False,
                            algorithm_type=LinearRegression)

    assert len(np.unique(
        result.oof_prediction)) > 5  # making sure prediction is not binarized
    assert len(np.unique(result.test_prediction)) > 5
    assert mean_squared_error(y_train,
                              result.oof_prediction) == result.metrics[-1]

    _check_file_exists(tmpdir_name)
示例#3
0
def test_experiment_cat_custom_eval():
    X, y = make_regression_df(n_samples=1024,
                              n_num_features=10,
                              n_cat_features=2,
                              random_state=0,
                              id_column='user_id')

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.5,
                                                        random_state=0)

    params = {'max_depth': 8, 'num_boost_round': 100, 'eval_metric': 'MAE'}

    with get_temp_directory() as temp_path:
        result = experiment_gbdt(params,
                                 X_train,
                                 y_train,
                                 X_test,
                                 temp_path,
                                 gbdt_type='cat',
                                 eval_func=mean_absolute_error)

        assert mean_absolute_error(y_train,
                                   result.oof_prediction) == result.metrics[-1]
        _check_file_exists(
            temp_path,
            ('oof_prediction.npy', 'test_prediction.npy', 'metrics.txt'))
示例#4
0
def test_experiment_cat_custom_eval(tmpdir_name):
    X, y = make_regression_df(n_samples=1024,
                              n_num_features=10,
                              n_cat_features=2,
                              random_state=0,
                              id_column='user_id')

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.5,
                                                        random_state=0)

    params = {'max_depth': 8, 'num_boost_round': 100, 'eval_metric': 'MAE'}

    result = run_experiment(params,
                            X_train,
                            y_train,
                            X_test,
                            tmpdir_name,
                            algorithm_type='cat',
                            eval_func=mean_absolute_error)

    assert mean_absolute_error(y_train,
                               result.oof_prediction) == result.metrics[-1]
    _check_file_exists(tmpdir_name)
示例#5
0
def test_experiment_xgb_regressor(tmpdir_name):
    X, y = make_regression_df(n_samples=1024,
                              n_num_features=10,
                              n_cat_features=2,
                              random_state=0,
                              id_column='user_id')

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.5,
                                                        random_state=0)

    params = {'max_depth': 8, 'num_boost_round': 100}

    result = run_experiment(params,
                            X_train,
                            y_train,
                            X_test,
                            tmpdir_name,
                            algorithm_type='xgb',
                            with_auto_prep=True)

    assert mean_squared_error(y_train,
                              result.oof_prediction) == result.metrics[-1]
    _check_file_exists(tmpdir_name)
示例#6
0
def test_experiment_cat_regressor():
    X, y = make_regression_df(n_samples=1024,
                              n_num_features=10,
                              n_cat_features=2,
                              random_state=0,
                              id_column='user_id')

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.5,
                                                        random_state=0)

    params = {'max_depth': 8, 'num_boost_round': 100}

    with get_temp_directory() as temp_path:
        result = run_experiment(params,
                                X_train,
                                y_train,
                                X_test,
                                temp_path,
                                algorithm_type='cat')

        assert mean_squared_error(y_train,
                                  result.oof_prediction) == result.metrics[-1]
        _check_file_exists(
            temp_path,
            ('oof_prediction.npy', 'test_prediction.npy', 'metrics.txt'))
示例#7
0
def test_averaging_regression():
    X, y = make_regression_df()
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    oof, test = _make_1st_stage_preds(X_train, y_train, X_test)

    result = averaging(test, oof, y_train)

    assert_array_almost_equal((test[0] + test[1] + test[2]) / 3, result.test_prediction)
    assert_array_almost_equal((oof[0] + oof[1] + oof[2]) / 3, result.oof_prediction)
    assert result.score is None
示例#8
0
def test_averaging_opt_minimize():
    X, y = make_regression_df(n_samples=1024)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    oof, test = _make_1st_stage_preds(X_train, y_train, X_test)

    best_single_model = min(mean_squared_error(y_train, oof[0]),
                            mean_squared_error(y_train, oof[1]),
                            mean_squared_error(y_train, oof[2]))

    result = averaging_opt(test, oof, y_train, mean_squared_error, higher_is_better=False)

    assert result.score <= best_single_model

    result_simple_avg = averaging(test, oof, y_train, eval_func=mean_squared_error)

    assert result.score <= result_simple_avg.score
示例#9
0
def test_experiment_lgb_regressor(tmpdir_name):
    X, y = make_regression_df(n_samples=1024, n_num_features=10, n_cat_features=2,
                              random_state=0, id_column='user_id')

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

    params = {
        'objective': 'regression',
        'max_depth': 8
    }

    result = run_experiment(params, X_train, y_train, X_test, tmpdir_name)

    assert len(np.unique(result.oof_prediction)) > 5  # making sure prediction is not binarized
    assert len(np.unique(result.test_prediction)) > 5
    assert mean_squared_error(y_train, result.oof_prediction) == result.metrics[-1]

    _check_file_exists(tmpdir_name)
示例#10
0
def test_averaging_opt_minimize_with_method():
    X, y = make_regression_df(n_samples=1024)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    oof, test = _make_1st_stage_preds(X_train, y_train, X_test)

    best_single_model = min(mean_squared_error(y_train, oof[0]),
                            mean_squared_error(y_train, oof[1]),
                            mean_squared_error(y_train, oof[2]))

    result1 = averaging_opt(test, oof, y_train, mean_squared_error, higher_is_better=False)
    result2 = averaging_opt(test, oof, y_train, mean_squared_error, higher_is_better=False, method='Nelder-Mead')
    result3 = averaging_opt(test, oof, y_train, mean_squared_error, higher_is_better=False, method='SLSQP')

    assert result1.score != result2.score
    assert result1.score == result3.score

    assert result1.score <= best_single_model
    assert result2.score <= best_single_model