Exemplo n.º 1
0
def test_nested_model_stack(teardown):
    x_data = iris.data
    y_t_data = iris.target
    random_state = 123
    n_components = 2

    # ----------- baikal way
    stacked_model_baikal = make_naive_stacked_model(
        n_components, random_state, x_data, y_t_data
    )
    y_pred_baikal = stacked_model_baikal.predict(x_data)

    # ----------- traditional way
    # Submodel 1
    submodel1 = LogisticRegression(
        multi_class="multinomial", solver="lbfgs", random_state=random_state
    )
    pca = PCA(n_components=n_components, random_state=random_state)
    pca.fit(x_data)
    pca_trans = pca.transform(x_data)
    submodel1.fit(pca_trans, y_t_data)
    submodel1_pred = submodel1.predict(pca_trans)

    # Submodel 2 (a nested stacked model)
    random_forest = RandomForestClassifier(random_state=random_state)
    random_forest.fit(x_data, y_t_data)
    random_forest_pred = random_forest.predict(x_data)

    extra_trees = ExtraTreesClassifier(random_state=random_state)
    extra_trees.fit(x_data, y_t_data)
    extra_trees_pred = extra_trees.predict(x_data)

    features = np.stack([random_forest_pred, extra_trees_pred], axis=1)
    submodel2 = LogisticRegression(
        multi_class="multinomial", solver="lbfgs", random_state=random_state
    )
    submodel2.fit(features, y_t_data)
    submodel2_pred = submodel2.predict(features)

    # Stacked model
    features = np.stack([submodel1_pred, submodel2_pred], axis=1)
    stacked_model_traditional = LogisticRegression(
        multi_class="multinomial", solver="lbfgs", random_state=random_state
    )
    stacked_model_traditional.fit(features, y_t_data)
    y_pred_traditional = stacked_model_traditional.predict(features)

    assert_array_equal(y_pred_baikal, y_pred_traditional)
Exemplo n.º 2
0
def make_naive_stacked_model(n_components, random_state, x_data, y_t_data):
    # An unnecessarily complex Model

    # Sub-model 1
    x1 = Input(name="x1")
    y1_t = Input(name="y1_t")
    h1 = PCA(n_components=n_components, random_state=random_state, name="pca_sub1")(x1)
    y1 = LogisticRegression(
        multi_class="multinomial",
        solver="lbfgs",
        random_state=random_state,
        name="logreg_sub1",
    )(h1, y1_t)
    submodel1 = Model(x1, y1, y1_t, name="submodel1")

    # Sub-model 2 (a nested stacked model)
    x2 = Input(name="x2")
    y2_t = Input(name="y2_t")
    y2_1 = RandomForestClassifier(random_state=random_state, name="rforest_sub2")(
        x2, y2_t
    )
    y2_2 = ExtraTreesClassifier(random_state=random_state, name="extrees_sub2")(
        x2, y2_t
    )
    features = Stack(axis=1, name="stack_sub2")([y2_1, y2_2])
    y2 = LogisticRegression(
        multi_class="multinomial",
        solver="lbfgs",
        random_state=random_state,
        name="logreg_sub2",
    )(features, y2_t)
    submodel2 = Model(x2, y2, y2_t, name="submodel2")

    # Stack of submodels
    x = Input(name="x")
    y_t = Input(name="y_t")
    y1 = submodel1(x, y_t)
    y2 = submodel2(x, y_t)
    features = Stack(axis=1, name="stack")([y1, y2])
    y = LogisticRegression(
        multi_class="multinomial",
        solver="lbfgs",
        random_state=random_state,
        name="logreg_stacked",
    )(features, y_t)
    stacked_model_baikal = Model(x, y, y_t, name="stacked")

    stacked_model_baikal.fit(x_data, y_t_data)

    return stacked_model_baikal