示例#1
0
def test_chickweight_raise_error_group_col_missing():
    df = load_chicken(as_frame=True)
    mod = GroupedPredictor(estimator=LinearRegression(), groups="diet")
    mod.fit(df[["time", "diet"]], df["weight"])
    with pytest.raises(ValueError) as e:
        mod.predict(df[["time", "chick"]])
        assert "not in columns" in str(e)
示例#2
0
def test_chickweight_raise_error_cols_missing2():
    df = load_chicken(give_pandas=True)
    mod = GroupedEstimator(estimator=LinearRegression(), groups="diet")
    mod.fit(df[['time', 'diet']], df['weight'])
    with pytest.raises(ValueError) as e:
        mod.predict(df[['diet', 'chick']])
        assert "not in columns" in str(e)
示例#3
0
def test_bad_shrinkage_value_error():
    with pytest.raises(ValueError) as e:
        df = load_chicken(as_frame=True)
        mod = GroupedPredictor(
            estimator=LinearRegression(), groups="diet", shrinkage="dinosaurhead"
        )
        mod.fit(df[["time", "diet"]], df["weight"])
        assert "shrinkage function" in str(e)
def test_has_decision_function():
    # needed as for example cross_val_score(pipe, X, y, cv=5, scoring="roc_auc", error_score='raise') may fail otherwise, see https://github.com/koaning/scikit-lego/issues/511
    df = load_chicken(as_frame=True)

    X, y = df.drop(columns='weight'), df['weight']
    # This should NOT raise errors
    GroupedPredictor(LogisticRegression(),
                     groups=["diet"]).fit(X, y).decision_function(X)
示例#5
0
def test_chickweight_can_do_fallback():
    df = load_chicken(give_pandas=True)
    mod = GroupedEstimator(estimator=LinearRegression(), groups="diet")
    mod.fit(df[['time', 'diet']], df['weight'])
    assert set(mod.estimators_.keys()) == {1, 2, 3, 4}
    to_predict = pd.DataFrame({"time": [21, 21], "diet": [5, 6]})
    assert mod.predict(to_predict).shape == (2, )
    assert mod.predict(to_predict)[0] == mod.predict(to_predict)[1]
示例#6
0
def test_chickweight_raise_error_value_col_missing():
    df = load_chicken(as_frame=True)
    mod = GroupedPredictor(estimator=LinearRegression(), groups="diet")
    mod.fit(df[["time", "diet"]], df["weight"])

    with pytest.raises(ValueError):
        # Former test not valid anymore because we don't check for value columns
        # mod.predict(df[["diet", "chick"]])
        mod.predict(df[["diet"]])
示例#7
0
def test_fallback_can_raise_error():
    df = load_chicken(give_pandas=True)
    mod = GroupedEstimator(estimator=LinearRegression(),
                           groups="diet",
                           use_fallback=False)
    mod.fit(df[['time', 'diet']], df['weight'])
    to_predict = pd.DataFrame({"time": [21, 21], "diet": [5, 6]})
    with pytest.raises(ValueError):
        mod.predict(to_predict)
示例#8
0
def test_bad_shrinkage_value_error():
    with pytest.raises(ValueError) as e:
        df = load_chicken(give_pandas=True)
        mod = GroupedEstimator(
            estimator=LinearRegression(),
            groups="diet",
            shrinkage="dinosaurhead",
        )
        mod.fit(df[['time', 'diet']], df['weight'])
        assert "shrinkage function" in str(e)
def test_chickweight_can_do_fallback_proba():
    df = load_chicken(as_frame=True)
    y = np.where(df.weight > df.weight.mean(), 1, 0)
    mod = GroupedPredictor(estimator=LogisticRegression(), groups="diet")
    mod.fit(df[["time", "diet"]], y)
    assert set(mod.estimators_.keys()) == {1, 2, 3, 4}
    to_predict = pd.DataFrame({"time": [21, 21], "diet": [5, 6]})
    assert mod.predict_proba(to_predict).shape == (2, 2)
    assert (mod.predict_proba(to_predict)[0] == mod.predict_proba(to_predict)
            [1]).all()
示例#10
0
def test_fallback_can_raise_error():
    df = load_chicken(give_pandas=True)
    mod = GroupedEstimator(estimator=LinearRegression(),
                           groups="diet",
                           use_global_model=False,
                           shrinkage=None)
    mod.fit(df[['time', 'diet']], df['weight'])
    to_predict = pd.DataFrame({"time": [21, 21], "diet": [5, 6]})
    with pytest.raises(ValueError) as e:
        mod.predict(to_predict)
        assert "found a group" in str(e)
示例#11
0
def test_chickweigt_string_groups():

    df = load_chicken(give_pandas=True)
    df['diet'] = ['omgomgomg' + s for s in df['diet'].astype(str)]

    X = df[['time', 'diet']]
    X_np = np.array(X)

    y = df['weight']

    # This should NOT raise errors
    GroupedEstimator(LinearRegression(), groups=['diet']).fit(X, y).predict(X)
    GroupedEstimator(LinearRegression(), groups=1).fit(X_np, y).predict(X_np)
示例#12
0
def test_chickweigt_string_groups():

    df = load_chicken(as_frame=True)
    df["diet"] = ["omgomgomg" + s for s in df["diet"].astype(str)]

    X = df[["time", "diet"]]
    X_np = np.array(X)

    y = df["weight"]

    # This should NOT raise errors
    GroupedPredictor(LinearRegression(), groups=["diet"]).fit(X, y).predict(X)
    GroupedPredictor(LinearRegression(), groups=1).fit(X_np, y).predict(X_np)
def test_missing_check():
    df = load_chicken(as_frame=True)

    X, y = df.drop(columns='weight'), df['weight']
    # create missing value
    X.loc[0, 'chick'] = np.nan
    model = make_pipeline(SimpleImputer(), LinearRegression())

    # Should not raise error, check is disabled
    m = GroupedPredictor(model, groups=['diet'], check_X=False).fit(X, y)
    m.predict(X)

    # Should raise error, check is still enabled
    with pytest.raises(ValueError) as e:
        GroupedPredictor(model, groups=['diet']).fit(X, y)
        assert "contains NaN" in str(e)
示例#14
0
def test_chickweight1():
    X, y = load_chicken(return_X_y=True)
    assert X.shape == (578, 3)
    assert y.shape[0] == 578
示例#15
0
def test_chickweight_df2_keys():
    df = load_chicken(give_pandas=True)
    mod = GroupedEstimator(estimator=LinearRegression(), groups="chick")
    mod.fit(df[['time', 'chick']], df['weight'])
    assert set(mod.estimators_.keys()) == set(range(1, 50 + 1))
示例#16
0
def test_chickweight_df1_keys():
    df = load_chicken(give_pandas=True)
    mod = GroupedEstimator(estimator=LinearRegression(), groups="diet")
    mod.fit(df[['time', 'diet']], df['weight'])
    assert set(mod.estimators_.keys()) == {1, 2, 3, 4}
示例#17
0
def test_chickweight_np_keys():
    df = load_chicken(as_frame=True)
    mod = GroupedPredictor(estimator=LinearRegression(), groups=[1, 2])
    mod.fit(df[["time", "chick", "diet"]].values, df["weight"].values)
    # there should still only be 50 groups on this dataset
    assert len(mod.estimators_.keys()) == 50
def test_chickweight2():
    df = load_chicken(as_frame=True)
    assert df.shape == (578, 4)
示例#19
0
def test_chickweight2():
    df = load_chicken(give_pandas=True)
    assert df.shape == (578, 4)
示例#20
0
def test_chickweight_df2_keys():
    df = load_chicken(as_frame=True)
    mod = GroupedPredictor(estimator=LinearRegression(), groups="chick")
    mod.fit(df[["time", "chick"]], df["weight"])
    assert set(mod.estimators_.keys()) == set(range(1, 50 + 1))
示例#21
0
def test_chickweight_df1_keys():
    df = load_chicken(as_frame=True)
    mod = GroupedPredictor(estimator=LinearRegression(), groups="diet")
    mod.fit(df[["time", "diet"]], df["weight"])
    assert set(mod.estimators_.keys()) == {1, 2, 3, 4}
示例#22
0
def test_chickweight1():
    X, y = load_chicken()
    assert X.shape == (578, 3)
    assert y.shape[0] == 578
示例#23
0
def test_chickweight_np_keys():
    df = load_chicken(give_pandas=True)
    mod = GroupedEstimator(estimator=LinearRegression(), groups=[1, 2])
    mod.fit(df[['time', 'chick', 'diet']].values, df['weight'].values)
    # there should still only be 50 groups on this dataset
    assert len(mod.estimators_.keys()) == 50
示例#24
0
def test_chickweight_raise_error_cols_missing1():
    df = load_chicken(give_pandas=True)
    mod = GroupedEstimator(estimator=LinearRegression(), groups="diet")
    mod.fit(df[['time', 'diet']], df['weight'])
    with pytest.raises(KeyError):
        mod.predict(df[['time', 'chick']])