Пример #1
0
def test_folding_regressor(n_samples=100, n_features=3):
    """
    checking mostly different things with quality of predictions and not using train data during predictions.
    """
    from sklearn.metrics import mean_squared_error

    X = numpy.random.normal(size=[n_samples, n_features])
    y = numpy.random.normal(size=n_samples)
    kfolder = FoldingRegressor(SklearnRegressor(GradientBoostingRegressor()),
                               n_folds=2)
    kfolder.fit(X, y)
    preds = kfolder.predict(X)
    # checking that we fitted fine
    assert mean_squared_error(y,
                              preds) > mean_squared_error(y * 0., preds) * 0.5

    # shuffled predictions
    p = numpy.random.permutation(n_samples)
    preds2 = kfolder.predict(X[p])[numpy.argsort(p)]

    # Now let's compare this with shuffled kFolding:
    assert mean_squared_error(y, preds) > mean_squared_error(y, preds2) * 0.5

    preds_mean = kfolder.predict(X,
                                 vote_function=lambda x: numpy.mean(x, axis=0))
    # Now let's compare this with mean prediction:
    assert mean_squared_error(y, preds) > mean_squared_error(y, preds_mean)
Пример #2
0
def test_folding_regressor_functions():
    """Testing folding functions """
    data, y, sample_weight = generate_classification_data()

    for X in [data, numpy.array(data)]:
        kfolder = FoldingRegressor(SklearnRegressor(GradientBoostingRegressor(n_estimators=5)), n_folds=2)
        kfolder.fit(X, y, sample_weight=sample_weight)
        preds = kfolder.predict(X)
        for p in kfolder.staged_predict(X):
            pass
        assert numpy.allclose(p, preds)

        importances = kfolder.feature_importances_
        other_importances = kfolder.get_feature_importances()
Пример #3
0
def test_folding_regressor_functions():
    """Testing folding functions """
    data, y, sample_weight = generate_classification_data()

    for X in [data, numpy.array(data)]:
        kfolder = FoldingRegressor(SklearnRegressor(GradientBoostingRegressor(n_estimators=5)), n_folds=2)
        kfolder.fit(X, y, sample_weight=sample_weight)
        preds = kfolder.predict(X)
        for p in kfolder.staged_predict(X):
            pass
        assert numpy.allclose(p, preds)

        importances = kfolder.feature_importances_
        other_importances = kfolder.get_feature_importances()
Пример #4
0
def test_folding_regressor(n_samples=100, n_features=3):
    """
    checking mostly different things with quality of predictions and not using train data during predictions.
    """
    from sklearn.metrics import mean_squared_error

    X = numpy.random.normal(size=[n_samples, n_features])
    y = numpy.random.normal(size=n_samples)
    kfolder = FoldingRegressor(SklearnRegressor(GradientBoostingRegressor()), n_folds=2)
    kfolder.fit(X, y)
    preds = kfolder.predict(X)
    # checking that we fitted fine
    assert mean_squared_error(y, preds) > mean_squared_error(y * 0., preds) * 0.5

    # shuffled predictions
    p = numpy.random.permutation(n_samples)
    preds2 = kfolder.predict(X[p])[numpy.argsort(p)]

    # Now let's compare this with shuffled kFolding:
    assert mean_squared_error(y, preds) > mean_squared_error(y, preds2) * 0.5

    preds_mean = kfolder.predict(X, vote_function=lambda x: numpy.mean(x, axis=0))
    # Now let's compare this with mean prediction:
    assert mean_squared_error(y, preds) > mean_squared_error(y, preds_mean)
Пример #5
0
def test_folding_regressor_with_check_model():
    base_clf = SklearnRegressor(GradientBoostingRegressor(n_estimators=4))
    folding_str = FoldingRegressor(base_clf, n_folds=2)
    check_regression(folding_str, True, True, True)