Пример #1
0
def test_online_batch_consistent():

    # Batch

    batch = (
        preprocessing.StandardScaler() |
        multiclass.OneVsRestClassifier(
            linear_model.LogisticRegression()
        )
    )

    dataset = datasets.ImageSegments()

    batch_metric = metrics.MacroF1()

    for i, x in enumerate(pd.read_csv(dataset.path, chunksize=1)):
        y = x.pop('category')
        y_pred = batch.predict_many(x)
        batch.fit_many(x, y)

        for yt, yp in zip(y, y_pred):
            if yp is not None:
                batch_metric.update(yt, yp)

        if i == 30:
            break

    # Online

    online = (
        preprocessing.StandardScaler() |
        multiclass.OneVsRestClassifier(
            linear_model.LogisticRegression()
        )
    )

    online_metric = metrics.MacroF1()

    X = pd.read_csv(dataset.path)
    Y = X.pop('category')

    for i, (x, y) in enumerate(stream.iter_pandas(X, Y)):
        y_pred = online.predict_one(x)
        online.fit_one(x, y)

        if y_pred is not None:
            online_metric.update(y, y_pred)

        if i == 30:
            break

    assert online_metric.get() == batch_metric.get()
Пример #2
0
def test_one_many_consistent():
    """Checks that using fit_one or fit_many produces the same result."""

    X = pd.read_csv(datasets.TrumpApproval().path)
    Y = X.pop('five_thirty_eight')

    one = lm.LinearRegression()
    for x, y in stream.iter_pandas(X, Y):
        one.fit_one(x, y)

    many = lm.LinearRegression()
    for xb, yb in zip(np.array_split(X, len(X)), np.array_split(Y, len(Y))):
        many.fit_many(xb, yb)

    for i in X:
        assert math.isclose(one.weights[i], many.weights[i])
Пример #3
0
def test_standard_scaler_one_many_consistent():
    """Checks that using fit_one or fit_many produces the same result."""

    X = pd.read_csv(datasets.TrumpApproval().path)

    one = preprocessing.StandardScaler()
    for x, _ in stream.iter_pandas(X):
        one.fit_one(x)

    many = preprocessing.StandardScaler()
    for xb in np.array_split(X, 10):
        many.fit_many(xb)

    for i in X:
        assert math.isclose(one.counts[i], many.counts[i])
        assert math.isclose(one.means[i], many.means[i])
        assert math.isclose(one.vars[i], many.vars[i])