def test_subset_fit(): """[Subsemble] 'fit' and 'predict' runs correctly.""" meta = OLS() meta.fit(F, y) g = meta.predict(P) ens = Subsemble() ens.add(ECM, partitions=2, folds=3, dtype=np.float64) ens.add_meta(OLS(), dtype=np.float64) ens.fit(X, y) pred = ens.predict(X) np.testing.assert_array_equal(pred, g)
def add_subsemble(name, models, X_train, Y_train, X_test, Y_test): # Establish and reset variables acc_score_cv = None acc_score = None time_ = None ensemble = Subsemble(scorer=accuracy_score, random_state=seed) ensemble.add(models) # Attach the final meta estimator ensemble.add(SVC(), meta=True) start = time.time() ensemble.fit(X_train, Y_train) preds = ensemble.predict(X_test) acc_score = accuracy_score(preds, Y_test) end = time.time() time_ = end - start return { "Ensemble": name, "Meta_Classifier": "SVC", "Accuracy_Score": acc_score, "Runtime": time_ }
else: model = Subsemble(partitions=2, random_state=42, n_jobs=1) model.add(KNeighborsRegressor()) model.add(RandomForestRegressor()) model.add(BayesianRidge()) model.add_meta(Lasso()) # train and predict train_predict = pd.DataFrame() test_predict = pd.DataFrame() for j in Y.columns: # train the model model.fit(X.iloc[train_idx, :], Y.loc[train_idx, j]) # predict training and testing data train_predict_j = pd.DataFrame(model.predict(X.iloc[train_idx, :]), columns=[j]) test_predict_j = pd.DataFrame(model.predict(X.iloc[test_idx, :]), columns=[j]) train_predict = pd.concat([train_predict, train_predict_j], axis=1) test_predict = pd.concat([test_predict, test_predict_j], axis=1) print("-- " + j + " --") # In[2]: Collect the predictions # reshape all of the predictions into a single table predictions = pd.DataFrame() for j in range(outputs): # collect training data predict_j = np.array(train_predict.iloc[:,j]) actual_j = np.array(Y.iloc[train_idx, j]) name_j = Y.columns[j]