def test_nested_model_stack(teardown): x_data = iris.data y_t_data = iris.target random_state = 123 n_components = 2 # ----------- baikal way stacked_model_baikal = make_naive_stacked_model( n_components, random_state, x_data, y_t_data ) y_pred_baikal = stacked_model_baikal.predict(x_data) # ----------- traditional way # Submodel 1 submodel1 = LogisticRegression( multi_class="multinomial", solver="lbfgs", random_state=random_state ) pca = PCA(n_components=n_components, random_state=random_state) pca.fit(x_data) pca_trans = pca.transform(x_data) submodel1.fit(pca_trans, y_t_data) submodel1_pred = submodel1.predict(pca_trans) # Submodel 2 (a nested stacked model) random_forest = RandomForestClassifier(random_state=random_state) random_forest.fit(x_data, y_t_data) random_forest_pred = random_forest.predict(x_data) extra_trees = ExtraTreesClassifier(random_state=random_state) extra_trees.fit(x_data, y_t_data) extra_trees_pred = extra_trees.predict(x_data) features = np.stack([random_forest_pred, extra_trees_pred], axis=1) submodel2 = LogisticRegression( multi_class="multinomial", solver="lbfgs", random_state=random_state ) submodel2.fit(features, y_t_data) submodel2_pred = submodel2.predict(features) # Stacked model features = np.stack([submodel1_pred, submodel2_pred], axis=1) stacked_model_traditional = LogisticRegression( multi_class="multinomial", solver="lbfgs", random_state=random_state ) stacked_model_traditional.fit(features, y_t_data) y_pred_traditional = stacked_model_traditional.predict(features) assert_array_equal(y_pred_baikal, y_pred_traditional)
def make_naive_stacked_model(n_components, random_state, x_data, y_t_data): # An unnecessarily complex Model # Sub-model 1 x1 = Input(name="x1") y1_t = Input(name="y1_t") h1 = PCA(n_components=n_components, random_state=random_state, name="pca_sub1")(x1) y1 = LogisticRegression( multi_class="multinomial", solver="lbfgs", random_state=random_state, name="logreg_sub1", )(h1, y1_t) submodel1 = Model(x1, y1, y1_t, name="submodel1") # Sub-model 2 (a nested stacked model) x2 = Input(name="x2") y2_t = Input(name="y2_t") y2_1 = RandomForestClassifier(random_state=random_state, name="rforest_sub2")( x2, y2_t ) y2_2 = ExtraTreesClassifier(random_state=random_state, name="extrees_sub2")( x2, y2_t ) features = Stack(axis=1, name="stack_sub2")([y2_1, y2_2]) y2 = LogisticRegression( multi_class="multinomial", solver="lbfgs", random_state=random_state, name="logreg_sub2", )(features, y2_t) submodel2 = Model(x2, y2, y2_t, name="submodel2") # Stack of submodels x = Input(name="x") y_t = Input(name="y_t") y1 = submodel1(x, y_t) y2 = submodel2(x, y_t) features = Stack(axis=1, name="stack")([y1, y2]) y = LogisticRegression( multi_class="multinomial", solver="lbfgs", random_state=random_state, name="logreg_stacked", )(features, y_t) stacked_model_baikal = Model(x, y, y_t, name="stacked") stacked_model_baikal.fit(x_data, y_t_data) return stacked_model_baikal