Exemplo n.º 1
0
def test_fit_predict_standard_stack(teardown):
    # This uses the "standard" protocol where the 2nd level features
    # are the out-of-fold predictions of the 1st. It also appends the
    # original data to the 2nd level features.
    # See for example: https://www.kdnuggets.com/2017/02/stacking-models-imropved-predictions.html
    X_data, y_t_data = breast_cancer.data, breast_cancer.target
    X_train, X_test, y_t_train, y_t_test = train_test_split(X_data,
                                                            y_t_data,
                                                            test_size=0.2,
                                                            random_state=0)
    random_state = 42

    # baikal way
    x = Input()
    y_t = Input()

    y_p1 = RandomForestClassifierOOF(n_estimators=10,
                                     random_state=random_state)(
                                         x, y_t, compute_func="predict_proba")
    y_p1 = Lambda(lambda array: array[:, 1:])(y_p1)  # remove collinear feature

    x_scaled = StandardScaler()(x)
    y_p2 = LinearSVCOOF(random_state=random_state)(
        x_scaled, y_t, compute_func="decision_function")

    stacked_features = ColumnStack()([x, y_p1, y_p2])
    y_p = LogisticRegression(solver="liblinear",
                             random_state=random_state)(stacked_features, y_t)

    model = Model(x, y_p, y_t)
    model.fit(X_train, y_t_train)
    y_pred_baikal = model.predict(X_test)

    # traditional way
    estimators = [
        ("rf",
         RandomForestClassifier(n_estimators=10, random_state=random_state)),
        ("svr",
         make_pipeline(StandardScaler(),
                       LinearSVC(random_state=random_state))),
    ]
    clf = sklearn.ensemble.StackingClassifier(
        estimators=estimators,
        final_estimator=LogisticRegression(solver="liblinear",
                                           random_state=random_state),
        passthrough=True,
    )
    y_pred_traditional = clf.fit(X_train, y_t_train).predict(X_test)

    assert_array_equal(y_pred_baikal, y_pred_traditional)
Exemplo n.º 2
0
 def dataplaceholders(self):
     x1 = Input(name="x1")
     x2 = Input(name="x2")
     y1_t = Input(name="y1_t")
     x1_rescaled = StandardScaler()(x1)
     y1 = LogisticRegression()(x1_rescaled, y1_t)
     y2 = PCA()(x2)
     return x1, x2, x1_rescaled, y1, y2, y1_t
Exemplo n.º 3
0
 def test_fit_with_shared_step(self, teardown):
     x = Input()
     scaler = StandardScaler()
     z = scaler(x, compute_func="transform", trainable=True)
     y = scaler(z, compute_func="inverse_transform", trainable=False)
     model = Model(x, y)
     model.fit(np.array([1, 3, 1, 3]).reshape(-1, 1))
     assert (scaler.mean_, scaler.var_) == (2.0, 1.0)
Exemplo n.º 4
0
def test_fit_predict_with_shared_step(teardown):
    x = Input()
    scaler = StandardScaler()
    z = scaler(x, compute_func="transform", trainable=True)
    y = scaler(z, compute_func="inverse_transform", trainable=False)
    model = Model(x, y)

    X_data = np.array([1, 3, 1, 3]).reshape(-1, 1)
    model.fit(X_data)
    assert_array_equal(model.predict(X_data), X_data)
Exemplo n.º 5
0
def test_get_set_params_invariance(teardown):
    scaler = StandardScaler(name="scaler")
    regressor = LinearRegression(name="regressor")

    x = Input()
    y_t = Input()
    y_t_scaled = scaler(y_t)
    y_p_scaled = regressor(x, y_t_scaled)
    y_p = scaler(y_p_scaled, compute_func="inverse_transform", trainable=False)
    model = Model(x, y_p, y_t)

    params1 = model.get_params()
    model.set_params(**params1)
    params2 = model.get_params()
    assert params2 == params1