def test_w_prep_set_params(): """[Model Selection] Test run with preprocessing, sep param dists.""" evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100) params = { ('no', 'ols'): { 'offset': randint(3, 6) }, ('pr', 'ols'): { 'offset': randint(1, 3) }, } # Fitting evl.fit(X, y, estimators={ 'pr': [OLS()], 'no': [OLS()] }, param_dicts=params, preprocessing={ 'pr': [Scale()], 'no': [] }, n_iter=3) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('no', 'ols')], -18.684229451043198) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('pr', 'ols')], -7.2594502123869491, 1) assert evl.summary['params'][('no', 'ols')]['offset'] == 3 assert evl.summary['params'][('pr', 'ols')]['offset'] == 1
def test_bench_equality(): """[Model Selection] Test benchmark correspondence with eval.""" with open(os.devnull, 'w') as f, redirect_stderr(f): evl = Evaluator(mape_scorer, cv=5) evl.fit(X, y, estimators={ 'pr': [OLS()], 'no': [OLS()] }, param_dicts={}, preprocessing={ 'pr': [Scale()], 'no': [] }) out = benchmark(X, y, mape_scorer, 5, { 'pr': [OLS()], 'no': [OLS()] }, { 'pr': [Scale()], 'no': [] }, None) np.testing.assert_approx_equal(out['test_score-m']['no.ols'], evl.results['test_score-m']['no.ols'])
def test_w_prep_fit(): """[Model Selection] Test run with preprocessing, single step.""" evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100, verbose=True) with open(os.devnull, 'w') as f, redirect_stdout(f): evl.fit(X, y, estimators=[OLS()], param_dicts={'ols': { 'offset': randint(1, 10) }}, preprocessing={ 'pr': [Scale()], 'no': [] }, n_iter=3) np.testing.assert_approx_equal(evl.results['test_score-m']['no.ols'], -24.903229451043195) np.testing.assert_approx_equal(evl.results['test_score-m']['pr.ols'], -26.510708862278072, 1) assert evl.results['params']['no.ols']['offset'] == 4 assert evl.results['params']['pr.ols']['offset'] == 4
def base_hyperparam_tuning (X,y,base_learners, param_dicts, n_iterations = 100): '''基层模型超参数调节,当前评估指标为auc''' X = X.values y = y.values scorer = make_scorer(metrics.roc_auc_score, greater_is_better=True) evl = Evaluator(scorer, cv=5, verbose = 20, backend= 'multiprocessing') evl.fit(X, y, estimators=base_learners, param_dicts=param_dicts, n_iter = n_iterations) df_params = pd.DataFrame(evl.results) return df_params
def test_no_prep(): """[Model Selection] Test run without preprocessing.""" evl = Evaluator(mape_scorer, verbose=True, cv=5, shuffle=False, random_state=100) with open(os.devnull, 'w') as f, redirect_stderr(f): evl.fit(X, y, estimators=[OLS()], param_dicts={'ols': {'offset': randint(1, 10)}}, n_iter=3) np.testing.assert_approx_equal( evl.summary['test_score_mean']['ols'], -24.903229451043195) assert evl.summary['params']['ols']['offset'] == 4
def test_w_prep_fit(): """[Model Selection] Test run with preprocessing, single step.""" evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100) evl.fit(X, y, estimators=[OLS()], param_dicts={'ols': {'offset': randint(1, 10)}}, preprocessing={'pr': [Scale()], 'no': []}, n_iter=3) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('no', 'ols')], -24.903229451043195) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('pr', 'ols')], -26.510708862278072, 1) assert evl.summary['params'][('no', 'ols')]['offset'] == 4 assert evl.summary['params'][('pr', 'ols')]['offset'] == 4
def layer_hyperparam_tuning(X,y,pre_layer_learners, local_layer_learners, param_dicts_layer, n_iterations = 50, pre_params = 'params_base.csv'): '''中间层超参数调节,加入需按顺序''' X = X.values y = y.values scorer = make_scorer(metrics.roc_auc_score, greater_is_better=True) params_pre = pd.read_csv(pre_params) params_pre.set_index(['Unnamed: 0'], inplace = True) for case_name, params in params_pre["params"].items(): case_est = case_name params = eval(params) for est_name, est in pre_layer_learners: if est_name == case_est: est.set_params(**params) in_layer = SuperLearner(folds = 10, backend= 'multiprocessing', model_selection=True) in_layer.add(pre_layer_learners,proba=True) preprocess = [in_layer] evl = Evaluator(scorer,cv=5,verbose = 20,backend= 'multiprocessing') evl.fit(X, y, local_layer_learners, param_dicts = param_dicts_layer, preprocessing={'meta': preprocess},n_iter=n_iterations) df_params_layer = pd.DataFrame(evl.results) return in_layer, df_params_layer
def test_w_prep_set_params(): """[Model Selection] Test run with preprocessing, sep param dists.""" evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100, verbose=2) params = { 'no.ols': { 'offset': randint(3, 6) }, 'pr.ols': { 'offset': randint(1, 3) }, } with open(os.devnull, 'w') as f, redirect_stdout(f): evl.fit(X, y, estimators={ 'pr': [OLS()], 'no': [OLS()] }, param_dicts=params, preprocessing={ 'pr': [Scale()], 'no': [] }, n_iter=10) np.testing.assert_approx_equal(evl.results['test_score-m']['no.ols'], -18.684229451043198) np.testing.assert_approx_equal(evl.results['test_score-m']['pr.ols'], -7.2594502123869491) assert evl.results['params']['no.ols']['offset'] == 3 assert evl.results['params']['pr.ols']['offset'] == 1
def evaluateSecondLayer(base_learners, x_train, y_train, meta_learners, param_dicts): in_layer = EnsembleTransformer() print("adding base learners to transformer") in_layer.add('stack', base_learners) preprocess = [in_layer] print("creating scorer") scorer = make_scorer(mean_absolute_error, greater_is_better=False) evl = Evaluator(scorer, cv=4, verbose=1) print("fitting evaluator") evl.fit( x_train.values, y_train.values, meta_learners, param_dicts, preprocessing={'meta': preprocess}, n_iter=40 # bump this up to do a larger grid search ) table = pd.DataFrame(evl.summary) table.to_html('iteration5.html') table.to_csv('iteration5.csv', index=False, header=False, sep='\t')
from scipy.stats import randint # Here we name the estimators ourselves ests = [('gnb', GaussianNB()), ('knn', KNeighborsClassifier())] # Now we map parameters to these # The gnb doesn't have any parameters so we can skip it pars = {'n_neighbors': randint(2, 20)} params = {'knn': pars} ############################################################################## # We can now run an evaluation over these estimators and parameter distributions # by calling the ``fit`` method. evaluator = Evaluator(accuracy_scorer, cv=10, random_state=seed, verbose=1) evaluator.fit(X, y, ests, params, n_iter=10) ############################################################################## # The full history of the evaluation can be found in ``cv_results``. To compare # models with their best parameters, we can pass the ``results`` attribute to # a :obj:`pandas.DataFrame` or print it as a table. We use ``m`` to denote # mean values and ``s`` to denote standard deviation across folds for brevity. # Note that the timed prediction is for the training set, for comparability with # training time. print("Score comparison with best params founds:\n\n%r" % evaluator.results) ############################################################################## # Preprocessing # ^^^^^^^^^^^^^ #
evl = Evaluator( scorer, cv=2, random_state=SEED, verbose=5, ) # In[ ]: evl.fit( xtrain, ytrain, estimators=base_learners, param_dicts=param_dicts, preprocessing={ 'sc': [StandardScaler()], 'none': [] }, n_iter=2 # bump this up to do a larger grid search ) # In[ ]: pd.DataFrame(evl.results) # There you have it, a comparison of tuned models in one grid search! # # Optimal parameters are then easily accessed. # In[ ]:
needs_threshold=False) ensemble = SequentialEnsemble(model_selection=True, n_jobs=1, shuffle=False, random_state=seed) ensemble.add('stack', ests_1, preprocessing=pre_cases) ensemble.add_meta(SVC(kernel='linear', degree=5, tol=1e-4)) # ensemble.fit(X_train, y_train) # y_pred = ensemble.predict(X_test) # ens = ensemble evaluator = Evaluator(scorer=score, random_state=seed, verbose=True) evaluator.fit(data_pix, spacial_pix, estimators=[], param_dicts=pars_1, n_iter=5, preprocessing=pre_cases) print(evaluator.results) spacial_pix = spacial_pix.astype('int') unique, counts = np.unique(y_test, return_counts=True) print(np.asarray((unique, counts)).T) # print(confusion_matrix(y_test, y_pred, labels=unique)) # print(precision_score(y_test, y_pred, average='micro', labels=unique)) # print(mean_absolute_error(y_test, y_pred)) # print(mean_squared_error(y_test, y_pred))
# Set parameter mapping # Here, we differentiate distributions between cases for the random forest params = { 'svc': { 'C': uniform(0, 10) }, 'class.rf': { 'max_depth': randint(2, 10) }, 'proba.rf': { 'max_depth': randint(2, 10), 'max_features': uniform(0.5, 0.5) } } scorer = make_scorer(accuracy_score) evaluator = Evaluator(scorer=scorer, random_state=seed, cv=2) evaluator.fit(X, y, meta_learners, params, preprocessing=preprocessing, n_iter=2) ############################################################################## # We can now compare the performance of the best fit for each candidate # meta learner. print("Results:\n%s" % evaluator.results)