예제 #1
0
def test_sklearn_incremental_predictor_classification():
    df = vaex.ml.datasets.load_iris_1e5()
    df_train, df_test = df.ml.train_test_split(test_size=0.1, verbose=False)

    features = df_train.column_names[:4]
    target = 'class_'

    incremental = IncrementalPredictor(
        model=SGDClassifier(learning_rate='constant', eta0=0.01),
        features=features,
        batch_size=10_000,
        num_epochs=3,
        shuffle=False,
        prediction_name='pred',
        partial_fit_kwargs={'classes': [0, 1, 2]})

    incremental.fit(df=df_train, target=target)
    df_train = incremental.transform(df_train)

    # State transfer
    state = df_train.state_get()
    df_test.state_set(state)

    assert df_test.column_count() == 6
    assert df_test.pred.values.shape == (10050, )

    pred_in_memory = incremental.predict(df_test)
    np.testing.assert_array_equal(pred_in_memory, df_test.pred.values)
예제 #2
0
def test_sklearn_incremental_predictor_serialize(tmpdir):
    df = vaex.example()
    df_train, df_test = df.ml.train_test_split(test_size=0.1, verbose=False)

    features = df_train.column_names[:6]
    target = 'FeH'

    incremental = IncrementalPredictor(model=SGDRegressor(),
                                       features=features,
                                       batch_size=10 * 1000,
                                       num_epochs=5,
                                       shuffle=True,
                                       prediction_name='pred')
    incremental.fit(df=df_train, target=target)
    df_train = incremental.transform(df_train)

    # State transfer - serialization
    df_train.state_write(str(tmpdir.join('test.json')))
    df_test.state_load(str(tmpdir.join('test.json')))

    assert df_train.column_count() == df_test.column_count()
    assert df_test.pred.values.shape == (33000, )

    pred_in_memory = incremental.predict(df_test)
    np.testing.assert_array_almost_equal(pred_in_memory,
                                         df_test.pred.values,
                                         decimal=1)
예제 #3
0
def test_sklearn_incremental_predictor_regression(df_example):
    df = df_example
    df_train, df_test = df.ml.train_test_split(test_size=0.1, verbose=False)

    features = df_train.column_names[:6]
    target = 'FeH'

    incremental = IncrementalPredictor(model=SGDRegressor(),
                                       features=features,
                                       target=target,
                                       batch_size=10 * 1000,
                                       num_epochs=5,
                                       shuffle=True,
                                       prediction_name='pred')
    incremental.fit(df=df_train)
    df_train = incremental.transform(df_train)

    # State transfer
    state = df_train.state_get()
    df_test.state_set(state)

    assert df_train.column_count() == df_test.column_count()
    assert df_test.pred.values.shape == (33000, )

    pred_in_memory = incremental.predict(df_test)
    np.testing.assert_array_almost_equal(pred_in_memory,
                                         df_test.pred.values,
                                         decimal=1)