示例#1
0
def test_refit_always():
    lr = LogisticRegression()
    mod = Thresholder(lr, threshold=0.5, refit=True)
    np.random.seed(42)
    X = np.random.normal(0, 1, (100, 3))
    y = np.random.normal(0, 1, (100, )) < 0
    assert mod.fit(X, y).predict(X).shape == y.shape
示例#2
0
def test_raise_error2():
    with pytest.raises(ValueError):
        mod = Thresholder(LinearRegression(), threshold=0.7)
        np.random.seed(42)
        X = np.random.normal(0, 1, (1000, 3))
        # we only support two classes
        y = np.random.choice(["a", "b", "c"], 1000)
        mod.fit(X, y)
示例#3
0
def test_raise_error1():
    with pytest.raises(ValueError):
        # we only support classification models
        mod = Thresholder(LinearRegression(), threshold=0.7)
        np.random.seed(42)
        X = np.random.normal(0, 1, (100, 3))
        y = np.random.normal(0, 1, (100, )) < 0
        mod.fit(X, y)
示例#4
0
def test_diff_threshold():
    mod1 = Thresholder(LogisticRegression(), threshold=0.5)
    mod2 = Thresholder(LogisticRegression(), threshold=0.7)
    mod3 = Thresholder(LogisticRegression(), threshold=0.9)
    np.random.seed(42)
    X = np.random.normal(0, 1, (100, 3))
    y = np.random.normal(0, 1, (100, )) < 0
    assert mod1.fit(X, y).predict(X).sum() >= mod2.fit(X, y).predict(X).sum()
    assert mod2.fit(X, y).predict(X).sum() >= mod3.fit(X, y).predict(X).sum()
示例#5
0
def test_refit_fits_underlying():
    X = np.array([1, 2, 3, 4]).reshape(-1, 1)
    y_ones = np.array([0, 1, 1, 1]).reshape(-1, )
    y_zeros = np.array([0, 0, 0, 1]).reshape(-1, )

    clf = DummyClassifier(strategy="most_frequent")
    clf.fit(X, y_ones)
    a = Thresholder(clf, threshold=0.2, refit=True)
    a.fit(X, y_zeros)

    assert a.predict(np.array([[1]])) == 0
示例#6
0
def test_passes_sample_weight(refit):
    class CustomLR(LogisticRegression):
        def fit(self, X, y, sample_weight=None):
            assert sample_weight is not None
            super().fit(X, y)

    mod = Thresholder(CustomLR(), threshold=0.5, refit=refit)
    np.random.seed(42)
    X = np.random.normal(0, 1, (100, 3))
    y = np.random.normal(0, 1, (100, )) < 0
    weight = np.random.random(100)

    mod.fit(X, y, sample_weight=weight)
示例#7
0
def test_stacking_classifier():
    '''
    Tests issue https://github.com/koaning/scikit-lego/issues/501

    No asserts are added as we only test for being exception free.
    When cloning the model in Thresholder an unfitted model is generated
    where no predict_proba exists
    '''
    estimators = [("dummy", DummyClassifier(strategy="constant", constant=0))]

    X = np.random.normal(0, 1, (100, 3))
    y = np.random.normal(0, 1, (100, )) < 0

    clf = StackingClassifier(estimators=estimators,
                             final_estimator=DummyClassifier(
                                 strategy="constant", constant=0))

    clf.fit(X, y)

    a = Thresholder(clf, threshold=0.2)
    a.fit(X, y)
    a.predict(X)
示例#8
0
def test_same_threshold():
    mod1 = Thresholder(LogisticRegression(), threshold=0.5)
    mod2 = LogisticRegression()
    X = np.random.normal(0, 1, (100, 3))
    y = np.random.normal(0, 1, (100, )) < 0
    assert (mod1.fit(X, y).predict(X) == mod2.fit(X, y).predict(X)).all()
示例#9
0
def test_standard_checks(test_fn):
    trf = Thresholder(LogisticRegression(), threshold=0.5)
    test_fn(Thresholder.__name__, trf)
示例#10
0
def test_nans_could_work():
    X = np.array([[np.nan, 4], [7, 3], [5, 5], [7, 2], [5, 7]])
    y = np.array([1, 0, 1, 0, 1])
    model = Thresholder(HistGradientBoostingClassifier(), 0.6)
    model.fit(X, y)