def test_equivalence_blend(): """[SequentialEnsemble] Test ensemble equivalence with BlendEnsemble.""" ens = BlendEnsemble() seq = SequentialEnsemble() ens.add(ECM, dtype=np.float64) seq.add('blend', ECM, dtype=np.float64) F = ens.fit(X, y).predict(X) P = seq.fit(X, y).predict(X) np.testing.assert_array_equal(P, F)
def test_equivalence_super_learner(): """[SequentialEnsemble] Test ensemble equivalence with SuperLearner.""" ens = SuperLearner() seq = SequentialEnsemble() ens.add(ECM, dtype=np.float64) seq.add('stack', ECM, dtype=np.float64) F = ens.fit(X, y).predict(X) P = seq.fit(X, y).predict(X) np.testing.assert_array_equal(P, F)
def test_equivalence_subsemble(): """[SequentialEnsemble] Test ensemble equivalence with Subsemble.""" ens = Subsemble(n_jobs=1) seq = SequentialEnsemble(n_jobs=1) ens.add(ECM, dtype=np.float64) seq.add('subsemble', ECM, dtype=np.float64) F = ens.fit(X, y).predict(X) P = seq.fit(X, y).predict(X) np.testing.assert_array_equal(P, F)
def test_equivalence_subsemble(): """[Sequential] Test ensemble equivalence with Subsemble.""" ens = Subsemble() seq = SequentialEnsemble() ens.add(ECM) seq.add('subset', ECM) F = ens.fit(X, y).predict(X) P = seq.fit(X, y).predict(X) np.testing.assert_array_equal(P, F)
def test_predict(): """[SequentialEnsemble] Test multilayer prediction.""" S = lc_s.predict(X) B = lc_b.predict(S) U = lc_u.predict(B) ens = SequentialEnsemble() ens.add('stack', ESTIMATORS, PREPROCESSING, dtype=np.float64) ens.add('blend', ECM, dtype=np.float64) ens.add('subsemble', ECM, dtype=np.float64) out = ens.fit(X, y).predict(X) np.testing.assert_array_equal(U, out)
def test_fit(): """[SequentialEnsemble] Test multilayer fitting.""" S = lc_s.fit_transform(X, y) B = lc_b.fit_transform(S, y) r = y.shape[0] - B.shape[0] U = lc_u.fit_transform(B, y[r:]) ens = SequentialEnsemble() ens.add('stack', ESTIMATORS, PREPROCESSING, dtype=np.float64) ens.add('blend', ECM, dtype=np.float64) ens.add('subsemble', ECM, dtype=np.float64) out = ens.fit_transform(X, y) np.testing.assert_array_equal(U, out)
def test_predict(): """[Sequential] Test multilayer prediction.""" S = lc_s.predict(X, y) B = lc_b.predict(S, y) U = lc_u.predict(B, y) ens = SequentialEnsemble() ens.add('stack', ESTIMATORS, PREPROCESSING) ens.add('blend', ECM) ens.add('subset', ECM) out = ens.fit(X, y).predict(X) np.testing.assert_array_equal(U, out)
def test_fit(): """[Sequential] Test multilayer fitting.""" S = lc_s.fit(X, y, -1)[-1] B = lc_b.fit(S, y, -1)[-1] U = lc_u.fit(B, y, -1)[-1] ens = SequentialEnsemble() ens.add('stack', ESTIMATORS, PREPROCESSING) ens.add('blend', ECM) ens.add('subset', ECM) out = ens.layers.fit(X, y, -1)[-1] np.testing.assert_array_equal(U, out)
def build_ensemble(**kwargs): sc = StandardScaler() ests_1 = [('rfr', RandomForestRegressor(n_estimators=5)), ('rdg', Ridge(tol=1e-4, max_iter=4000)), ('mlr', MLPRegressor((100, 20), max_iter=1000))] ests_2 = [ ('rdg', Ridge(tol=1e-4, max_iter=4000)), ('svr', SVR(tol=1e-4, kernel='linear', degree=5, max_iter=4000)), ] ensemble = SequentialEnsemble(**kwargs, shuffle=False) ensemble.add( "blend", ests_1, preprocessing=[sc], ) ensemble.add("stack", ests_2, preprocessing=[sc]) ensemble.add_meta([('etc', ExtraTreesClassifier(n_estimators=5))]) return ensemble
nmf = NMF() pre_cases = { 'case-1': [sc], # 'case-2': [sc], # 'case-3': [pca], # 'case-4': [fa] } score = make_scorer(score_func=accuracy_score, greater_is_better=True, needs_proba=False, needs_threshold=False) ensemble = SequentialEnsemble(model_selection=True, n_jobs=1, shuffle=False, random_state=seed) ensemble.add('stack', ests_1, preprocessing=pre_cases) ensemble.add_meta(SVC(kernel='linear', degree=5, tol=1e-4)) # ensemble.fit(X_train, y_train) # y_pred = ensemble.predict(X_test) # ens = ensemble evaluator = Evaluator(scorer=score, random_state=seed, verbose=True) evaluator.fit(data_pix, spacial_pix, estimators=[], param_dicts=pars_1, n_iter=5, preprocessing=pre_cases)
def add_sequential(name, models, X_train, Y_train, X_test, Y_test): # Establish and reset variables acc_score_cv = None acc_score = None time_ = None ensemble = SequentialEnsemble(scorer=accuracy_score, random_state=seed) # Add a subsemble with 5 partitions as first layer ensemble.add('subsemble', models, partitions=10, folds=10) # Add a super learner as second layer ensemble.add('stack', models, folds=20) ensemble.add_meta(SVC()) start = time.time() ensemble.fit(X_train, Y_train) preds = ensemble.predict(X_test) acc_score = accuracy_score(preds, Y_test) end = time.time() time_ = end - start return { "Ensemble": name, "Meta_Classifier": "SVC", "Accuracy_Score": acc_score, "Runtime": time_ }
}, 'case-1.bag': { 'n_estimators': f, } } pre_cases = { # 'case-1': [], 'case-1': [StandardScaler()] } # scorer = make_scorer(r2_score, greater_is_better=False, needs_proba=False, needs_threshold=False) ensemble = SequentialEnsemble(model_selection=False, n_jobs=3, shuffle=True, random_state=seed, scorer=mean_absolute_error) ensemble.add('blend', ests, preprocessing=pre_cases) ensemble.add_meta(SVR()) # ensemble.fit(X_train, y_train) # y_pred = ensemble.predict(X_test) # print(mean_absolute_error(y_test, y_pred)) # ests = [ensemble] # evaluator = Evaluator(scorer=scorer, random_state=seed, verbose=3, cv=4, n_jobs=1) # evaluator.fit(X=image_set, y=label_set, estimators=ests, param_dicts=pars_1, # n_iter=40, preprocessing=pre_cases) # # print(evaluator.results)
# # General multi-layer ensemble learning # ------------------------------------- # # .. currentmodule:: mlens.ensemble # # To alternate between the *type* of layer with each ``add`` call, # the :class:`SequentialEnsemble` class can be used to specify what type of # layer (i.e. stacked, blended, subsamle-style) to add. This is particularly # powerful if facing a large dataset, as the first layer can use a fast approach # such as blending, while subsequent layers fitted on the remaining data can # use more computationally intensive approaches. from mlens.ensemble import SequentialEnsemble ensemble = SequentialEnsemble() # The initial layer is a blended layer, same as a layer in the BlendEnsemble ensemble.add('blend', [SVC(), RandomForestClassifier(random_state=seed)]) # The second layer is a stacked layer, same as a layer of the SuperLearner ensemble.add('stack', [SVC(), RandomForestClassifier(random_state=seed)]) # The third layer is a subsembled layer, same as a layer of the Subsemble ensemble.add('subsemble', [SVC(), RandomForestClassifier(random_state=seed)]) # The meta estimator is added as in any other ensemble ensemble.add_meta(SVC()) ############################################################################## # The below table maps the types of layers available in the :class:`SequentialEnsemble` with the corresponding ensemble.