def test_w_prep_set_params(): """[Model Selection] Test run with preprocessing, sep param dists.""" evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100) params = { ('no', 'ols'): { 'offset': randint(3, 6) }, ('pr', 'ols'): { 'offset': randint(1, 3) }, } # Fitting evl.fit(X, y, estimators={ 'pr': [OLS()], 'no': [OLS()] }, param_dicts=params, preprocessing={ 'pr': [Scale()], 'no': [] }, n_iter=3) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('no', 'ols')], -18.684229451043198) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('pr', 'ols')], -7.2594502123869491, 1) assert evl.summary['params'][('no', 'ols')]['offset'] == 3 assert evl.summary['params'][('pr', 'ols')]['offset'] == 1
def test_w_prep_fit(): """[Model Selection] Test run with preprocessing, single step.""" evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100, verbose=True) with open(os.devnull, 'w') as f, redirect_stdout(f): evl.fit(X, y, estimators=[OLS()], param_dicts={'ols': { 'offset': randint(1, 10) }}, preprocessing={ 'pr': [Scale()], 'no': [] }, n_iter=3) np.testing.assert_approx_equal(evl.results['test_score-m']['no.ols'], -24.903229451043195) np.testing.assert_approx_equal(evl.results['test_score-m']['pr.ols'], -26.510708862278072, 1) assert evl.results['params']['no.ols']['offset'] == 4 assert evl.results['params']['pr.ols']['offset'] == 4
def test_bench_equality(): """[Model Selection] Test benchmark correspondence with eval.""" with open(os.devnull, 'w') as f, redirect_stderr(f): evl = Evaluator(mape_scorer, cv=5) evl.fit(X, y, estimators={ 'pr': [OLS()], 'no': [OLS()] }, param_dicts={}, preprocessing={ 'pr': [Scale()], 'no': [] }) out = benchmark(X, y, mape_scorer, 5, { 'pr': [OLS()], 'no': [OLS()] }, { 'pr': [Scale()], 'no': [] }, None) np.testing.assert_approx_equal(out['test_score-m']['no.ols'], evl.results['test_score-m']['no.ols'])
def base_hyperparam_tuning (X,y,base_learners, param_dicts, n_iterations = 100): '''基层模型超参数调节,当前评估指标为auc''' X = X.values y = y.values scorer = make_scorer(metrics.roc_auc_score, greater_is_better=True) evl = Evaluator(scorer, cv=5, verbose = 20, backend= 'multiprocessing') evl.fit(X, y, estimators=base_learners, param_dicts=param_dicts, n_iter = n_iterations) df_params = pd.DataFrame(evl.results) return df_params
def test_no_prep(): """[Model Selection] Test run without preprocessing.""" evl = Evaluator(mape_scorer, verbose=True, cv=5, shuffle=False, random_state=100) with open(os.devnull, 'w') as f, redirect_stderr(f): evl.fit(X, y, estimators=[OLS()], param_dicts={'ols': {'offset': randint(1, 10)}}, n_iter=3) np.testing.assert_approx_equal( evl.summary['test_score_mean']['ols'], -24.903229451043195) assert evl.summary['params']['ols']['offset'] == 4
def test_raises(): """[Model Selection] Test raises on error.""" evl = Evaluator(bad_scorer) np.testing.assert_raises(ValueError, evl.fit, X, y, [OLS()], {'ols': {'offset': randint(1, 10)}}, n_iter=1)
def test_params(): """[Model Selection] Test raises on bad params.""" evl = Evaluator(mape_scorer) np.testing.assert_raises(ValueError, evl.fit, X, y, [OLS()], {('bad', 'ols'): {'offset': randint(1, 10)}}, preprocessing={'prep': [Scale()]})
def test_passes(): """[Model Selection] Test sets error score on failed scoring.""" evl = Evaluator(bad_scorer, error_score=0, n_jobs=1) evl = np.testing.assert_warns(FitFailedWarning, evl.fit, X, y, [OLS()], {'ols': {'offset': randint(1, 10)}}, n_iter=1) assert evl.summary['test_score_mean']['ols'] == 0
def test_w_prep_fit(): """[Model Selection] Test run with preprocessing, single step.""" evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100) evl.fit(X, y, estimators=[OLS()], param_dicts={'ols': {'offset': randint(1, 10)}}, preprocessing={'pr': [Scale()], 'no': []}, n_iter=3) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('no', 'ols')], -24.903229451043195) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('pr', 'ols')], -26.510708862278072, 1) assert evl.summary['params'][('no', 'ols')]['offset'] == 4 assert evl.summary['params'][('pr', 'ols')]['offset'] == 4
def layer_hyperparam_tuning(X,y,pre_layer_learners, local_layer_learners, param_dicts_layer, n_iterations = 50, pre_params = 'params_base.csv'): '''中间层超参数调节,加入需按顺序''' X = X.values y = y.values scorer = make_scorer(metrics.roc_auc_score, greater_is_better=True) params_pre = pd.read_csv(pre_params) params_pre.set_index(['Unnamed: 0'], inplace = True) for case_name, params in params_pre["params"].items(): case_est = case_name params = eval(params) for est_name, est in pre_layer_learners: if est_name == case_est: est.set_params(**params) in_layer = SuperLearner(folds = 10, backend= 'multiprocessing', model_selection=True) in_layer.add(pre_layer_learners,proba=True) preprocess = [in_layer] evl = Evaluator(scorer,cv=5,verbose = 20,backend= 'multiprocessing') evl.fit(X, y, local_layer_learners, param_dicts = param_dicts_layer, preprocessing={'meta': preprocess},n_iter=n_iterations) df_params_layer = pd.DataFrame(evl.results) return in_layer, df_params_layer
def test_w_prep(): """[Model Selection] Test run with preprocessing, double step.""" evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100) # Preprocessing with open(os.devnull, 'w') as f, redirect_stderr(f): evl.preprocess(X, y, {'pr': [Scale()], 'no': []}) # Fitting evl.evaluate(X, y, estimators=[OLS()], param_dicts={'ols': { 'offset': randint(1, 10) }}, n_iter=3) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('no', 'ols')], -24.903229451043195) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('pr', 'ols')], -26.510708862278072, 1) assert evl.summary['params'][('no', 'ols')]['offset'] == 4 assert evl.summary['params'][('pr', 'ols')]['offset'] == 4
def test_w_prep_set_params(): """[Model Selection] Test run with preprocessing, sep param dists.""" evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100, verbose=2) params = { 'no.ols': { 'offset': randint(3, 6) }, 'pr.ols': { 'offset': randint(1, 3) }, } with open(os.devnull, 'w') as f, redirect_stdout(f): evl.fit(X, y, estimators={ 'pr': [OLS()], 'no': [OLS()] }, param_dicts=params, preprocessing={ 'pr': [Scale()], 'no': [] }, n_iter=10) np.testing.assert_approx_equal(evl.results['test_score-m']['no.ols'], -18.684229451043198) np.testing.assert_approx_equal(evl.results['test_score-m']['pr.ols'], -7.2594502123869491) assert evl.results['params']['no.ols']['offset'] == 3 assert evl.results['params']['pr.ols']['offset'] == 1
def evaluateSecondLayer(base_learners, x_train, y_train, meta_learners, param_dicts): in_layer = EnsembleTransformer() print("adding base learners to transformer") in_layer.add('stack', base_learners) preprocess = [in_layer] print("creating scorer") scorer = make_scorer(mean_absolute_error, greater_is_better=False) evl = Evaluator(scorer, cv=4, verbose=1) print("fitting evaluator") evl.fit( x_train.values, y_train.values, meta_learners, param_dicts, preprocessing={'meta': preprocess}, n_iter=40 # bump this up to do a larger grid search ) table = pd.DataFrame(evl.summary) table.to_html('iteration5.html') table.to_csv('iteration5.csv', index=False, header=False, sep='\t')
def test_params(): """[Model Selection] Test raises on bad params.""" evl = Evaluator(mape_scorer, verbose=2) np.testing.assert_raises( ValueError, evl.fit, X, y, estimators=[OLS()], param_dicts={'bad.ols': { 'offset': randint(1, 10) }}, preprocessing={'prep': [Scale()]})
def test_raises(): """[Model Selection] Test raises on error.""" evl = Evaluator(bad_scorer, verbose=1) with open(os.devnull, 'w') as f, redirect_stdout(f): np.testing.assert_raises( ValueError, evl.fit, X, y, estimators=[OLS()], param_dicts={'ols': { 'offset': randint(1, 10) }}, n_iter=1)
def test_passes(): """[Model Selection] Test sets error score on failed scoring.""" evl = Evaluator(bad_scorer, error_score=0, n_jobs=1, verbose=5) with open(os.devnull, 'w') as f, redirect_stdout(f): evl = np.testing.assert_warns( FitFailedWarning, evl.fit, X, y, estimators=[OLS()], param_dicts={'ols': { 'offset': randint(1, 10) }}, n_iter=1) assert evl.results['test_score-m']['ols'] == 0
from sklearn.neighbors import KNeighborsClassifier from scipy.stats import randint # Here we name the estimators ourselves ests = [('gnb', GaussianNB()), ('knn', KNeighborsClassifier())] # Now we map parameters to these # The gnb doesn't have any parameters so we can skip it pars = {'n_neighbors': randint(2, 20)} params = {'knn': pars} ############################################################################## # We can now run an evaluation over these estimators and parameter distributions # by calling the ``fit`` method. evaluator = Evaluator(accuracy_scorer, cv=10, random_state=seed, verbose=1) evaluator.fit(X, y, ests, params, n_iter=10) ############################################################################## # The full history of the evaluation can be found in ``cv_results``. To compare # models with their best parameters, we can pass the ``results`` attribute to # a :obj:`pandas.DataFrame` or print it as a table. We use ``m`` to denote # mean values and ``s`` to denote standard deviation across folds for brevity. # Note that the timed prediction is for the training set, for comparability with # training time. print("Score comparison with best params founds:\n\n%r" % evaluator.results) ############################################################################## # Preprocessing # ^^^^^^^^^^^^^
'rf': { 'max_depth': randint(2, 5), 'min_samples_split': randint(5, 20), 'min_samples_leaf': randint(10, 20), 'n_estimators': randint(50, 100), 'max_features': uniform(0.6, 0.3) } } # In[ ]: scorer = make_scorer(mean_absolute_error, greater_is_better=False) evl = Evaluator( scorer, cv=2, random_state=SEED, verbose=5, ) # In[ ]: evl.fit( xtrain, ytrain, estimators=base_learners, param_dicts=param_dicts, preprocessing={ 'sc': [StandardScaler()], 'none': [] }, n_iter=2 # bump this up to do a larger grid search
# Set parameter mapping # Here, we differentiate distributions between cases for the random forest params = { 'svc': { 'C': uniform(0, 10) }, 'class.rf': { 'max_depth': randint(2, 10) }, 'proba.rf': { 'max_depth': randint(2, 10), 'max_features': uniform(0.5, 0.5) } } scorer = make_scorer(accuracy_score) evaluator = Evaluator(scorer=scorer, random_state=seed, cv=2) evaluator.fit(X, y, meta_learners, params, preprocessing=preprocessing, n_iter=2) ############################################################################## # We can now compare the performance of the best fit for each candidate # meta learner. print("Results:\n%s" % evaluator.results)
score = make_scorer(score_func=accuracy_score, greater_is_better=True, needs_proba=False, needs_threshold=False) ensemble = SequentialEnsemble(model_selection=True, n_jobs=1, shuffle=False, random_state=seed) ensemble.add('stack', ests_1, preprocessing=pre_cases) ensemble.add_meta(SVC(kernel='linear', degree=5, tol=1e-4)) # ensemble.fit(X_train, y_train) # y_pred = ensemble.predict(X_test) # ens = ensemble evaluator = Evaluator(scorer=score, random_state=seed, verbose=True) evaluator.fit(data_pix, spacial_pix, estimators=[], param_dicts=pars_1, n_iter=5, preprocessing=pre_cases) print(evaluator.results) spacial_pix = spacial_pix.astype('int') unique, counts = np.unique(y_test, return_counts=True) print(np.asarray((unique, counts)).T) # print(confusion_matrix(y_test, y_pred, labels=unique)) # print(precision_score(y_test, y_pred, average='micro', labels=unique))
ests = [('rdg', Ridge(max_iter=4000)), ('las', Lasso(max_iter=4000))] a = uniform(0, 10) params = { 'rdg': {'alpha': a}, 'las': {'alpha': a} } preproc = { 'none': [], 'sc': [StandardScaler()] } evaluator = Evaluator(r2_scorer, cv=2, random_state=seed, verbose=1) # evaluator.fit(image_set, label_set, ests, params, 40, preproc) # print(evaluator.results) def sig(z): return 1/(1 + np.exp(-z)) def hyp(th, x): return sig(x @ th) def cost_func(x, y, th, m): hi = hyp(th, x) y_ = y.reshape(-1, 1) j = 1/float(m) * np.sum(-y_ * np.log(hi) - (1 - y_) * np.log(1 - hi)) return j
from sklearn.metrics import r2_score # from sklearn.model_selection import train_test_split # from sklearn.preprocessing import StandardScaler from mlens.model_selection import Evaluator from mlens.metrics import make_scorer # from mlens.preprocessing import Subset # image_set = np.genfromtxt('../testdata/c1_gn.csv', delimiter=',') # label_set = np.genfromtxt('../testdata/c1_L_gn.csv', delimiter=',') # # X_train, X_test, y_train, y_test = train_test_split(image_set, label_set, test_size=0.33) score_f = make_scorer(score_func=r2_score, greater_is_better=False) evaluator = Evaluator(scorer=score_f, shuffle=True, verbose=True) estimators = [ ('las', Lasso(copy_X=True, max_iter=4000)), ('rdg', Ridge(copy_X=True, max_iter=4000)), # ('rfr', RandomForestRegressor()), ] params = { 'las': { 'alpha': uniform(0, 5) }, 'rdg': { 'alpha': uniform(0, 5) }, 'rfr': {