Пример #1
0
def test_w_prep_set_params():
    """[Model Selection] Test run with preprocessing, sep param dists."""
    evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100)

    params = {
        ('no', 'ols'): {
            'offset': randint(3, 6)
        },
        ('pr', 'ols'): {
            'offset': randint(1, 3)
        },
    }

    # Fitting
    evl.fit(X,
            y,
            estimators={
                'pr': [OLS()],
                'no': [OLS()]
            },
            param_dicts=params,
            preprocessing={
                'pr': [Scale()],
                'no': []
            },
            n_iter=3)

    np.testing.assert_approx_equal(
        evl.summary['test_score_mean'][('no', 'ols')], -18.684229451043198)

    np.testing.assert_approx_equal(
        evl.summary['test_score_mean'][('pr', 'ols')], -7.2594502123869491, 1)

    assert evl.summary['params'][('no', 'ols')]['offset'] == 3
    assert evl.summary['params'][('pr', 'ols')]['offset'] == 1
Пример #2
0
def test_w_prep_fit():
    """[Model Selection] Test run with preprocessing, single step."""
    evl = Evaluator(mape_scorer,
                    cv=5,
                    shuffle=False,
                    random_state=100,
                    verbose=True)

    with open(os.devnull, 'w') as f, redirect_stdout(f):

        evl.fit(X,
                y,
                estimators=[OLS()],
                param_dicts={'ols': {
                    'offset': randint(1, 10)
                }},
                preprocessing={
                    'pr': [Scale()],
                    'no': []
                },
                n_iter=3)

    np.testing.assert_approx_equal(evl.results['test_score-m']['no.ols'],
                                   -24.903229451043195)

    np.testing.assert_approx_equal(evl.results['test_score-m']['pr.ols'],
                                   -26.510708862278072, 1)

    assert evl.results['params']['no.ols']['offset'] == 4
    assert evl.results['params']['pr.ols']['offset'] == 4
Пример #3
0
def test_bench_equality():
    """[Model Selection] Test benchmark correspondence with eval."""

    with open(os.devnull, 'w') as f, redirect_stderr(f):
        evl = Evaluator(mape_scorer, cv=5)
        evl.fit(X,
                y,
                estimators={
                    'pr': [OLS()],
                    'no': [OLS()]
                },
                param_dicts={},
                preprocessing={
                    'pr': [Scale()],
                    'no': []
                })

        out = benchmark(X, y, mape_scorer, 5, {
            'pr': [OLS()],
            'no': [OLS()]
        }, {
            'pr': [Scale()],
            'no': []
        }, None)

    np.testing.assert_approx_equal(out['test_score-m']['no.ols'],
                                   evl.results['test_score-m']['no.ols'])
Пример #4
0
def base_hyperparam_tuning (X,y,base_learners, param_dicts, n_iterations = 100):
    '''基层模型超参数调节,当前评估指标为auc'''
    X = X.values
    y = y.values
    scorer = make_scorer(metrics.roc_auc_score, greater_is_better=True)
    evl = Evaluator(scorer, cv=5, verbose = 20, backend= 'multiprocessing')
    evl.fit(X, y, estimators=base_learners, param_dicts=param_dicts, n_iter = n_iterations)
    df_params = pd.DataFrame(evl.results)
    return df_params
Пример #5
0
def test_no_prep():
    """[Model Selection] Test run without preprocessing."""
    evl = Evaluator(mape_scorer, verbose=True, cv=5, shuffle=False,
                    random_state=100)

    with open(os.devnull, 'w') as f, redirect_stderr(f):
        evl.fit(X, y,
                estimators=[OLS()],
                param_dicts={'ols': {'offset': randint(1, 10)}},
                n_iter=3)

    np.testing.assert_approx_equal(
            evl.summary['test_score_mean']['ols'],
            -24.903229451043195)

    assert evl.summary['params']['ols']['offset'] == 4
Пример #6
0
def test_raises():
    """[Model Selection] Test raises on error."""

    evl = Evaluator(bad_scorer)

    np.testing.assert_raises(ValueError,
                             evl.fit, X, y, [OLS()],
                             {'ols': {'offset': randint(1, 10)}},
                             n_iter=1)
Пример #7
0
def test_params():
    """[Model Selection] Test raises on bad params."""
    evl = Evaluator(mape_scorer)

    np.testing.assert_raises(ValueError,
                             evl.fit, X, y, [OLS()],
                             {('bad', 'ols'):
                                  {'offset': randint(1, 10)}},
                             preprocessing={'prep': [Scale()]})
Пример #8
0
def test_passes():
    """[Model Selection] Test sets error score on failed scoring."""

    evl = Evaluator(bad_scorer, error_score=0, n_jobs=1)

    evl = np.testing.assert_warns(FitFailedWarning,
                                  evl.fit, X, y, [OLS()],
                                  {'ols': {'offset': randint(1, 10)}},
                                  n_iter=1)

    assert evl.summary['test_score_mean']['ols'] == 0
Пример #9
0
def test_w_prep_fit():
    """[Model Selection] Test run with preprocessing, single step."""
    evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100)

    evl.fit(X, y,
            estimators=[OLS()],
            param_dicts={'ols': {'offset': randint(1, 10)}},
            preprocessing={'pr': [Scale()], 'no': []},
            n_iter=3)

    np.testing.assert_approx_equal(
            evl.summary['test_score_mean'][('no', 'ols')],
            -24.903229451043195)

    np.testing.assert_approx_equal(
            evl.summary['test_score_mean'][('pr', 'ols')],
            -26.510708862278072, 1)

    assert evl.summary['params'][('no', 'ols')]['offset'] == 4
    assert evl.summary['params'][('pr', 'ols')]['offset'] == 4
Пример #10
0
def layer_hyperparam_tuning(X,y,pre_layer_learners, local_layer_learners, param_dicts_layer, n_iterations = 50, pre_params = 'params_base.csv'):
    '''中间层超参数调节,加入需按顺序'''
    X = X.values
    y = y.values
    scorer = make_scorer(metrics.roc_auc_score, greater_is_better=True)
    params_pre = pd.read_csv(pre_params)
    params_pre.set_index(['Unnamed: 0'], inplace = True)
    for case_name, params in params_pre["params"].items():
        case_est = case_name
        params = eval(params)
        for est_name, est in pre_layer_learners:
            if est_name == case_est:
                est.set_params(**params)
    in_layer = SuperLearner(folds = 10, backend= 'multiprocessing', model_selection=True)
    in_layer.add(pre_layer_learners,proba=True)
    preprocess = [in_layer]
    evl = Evaluator(scorer,cv=5,verbose = 20,backend= 'multiprocessing')
    evl.fit(X, y, local_layer_learners, param_dicts = param_dicts_layer, preprocessing={'meta': preprocess},n_iter=n_iterations)
    df_params_layer = pd.DataFrame(evl.results)
    return in_layer, df_params_layer
Пример #11
0
def test_w_prep():
    """[Model Selection] Test run with preprocessing, double step."""
    evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100)

    # Preprocessing
    with open(os.devnull, 'w') as f, redirect_stderr(f):

        evl.preprocess(X, y, {'pr': [Scale()], 'no': []})

        # Fitting
        evl.evaluate(X,
                     y,
                     estimators=[OLS()],
                     param_dicts={'ols': {
                         'offset': randint(1, 10)
                     }},
                     n_iter=3)

    np.testing.assert_approx_equal(
        evl.summary['test_score_mean'][('no', 'ols')], -24.903229451043195)

    np.testing.assert_approx_equal(
        evl.summary['test_score_mean'][('pr', 'ols')], -26.510708862278072, 1)

    assert evl.summary['params'][('no', 'ols')]['offset'] == 4
    assert evl.summary['params'][('pr', 'ols')]['offset'] == 4
Пример #12
0
def test_w_prep_set_params():
    """[Model Selection] Test run with preprocessing, sep param dists."""
    evl = Evaluator(mape_scorer,
                    cv=5,
                    shuffle=False,
                    random_state=100,
                    verbose=2)

    params = {
        'no.ols': {
            'offset': randint(3, 6)
        },
        'pr.ols': {
            'offset': randint(1, 3)
        },
    }

    with open(os.devnull, 'w') as f, redirect_stdout(f):

        evl.fit(X,
                y,
                estimators={
                    'pr': [OLS()],
                    'no': [OLS()]
                },
                param_dicts=params,
                preprocessing={
                    'pr': [Scale()],
                    'no': []
                },
                n_iter=10)

    np.testing.assert_approx_equal(evl.results['test_score-m']['no.ols'],
                                   -18.684229451043198)

    np.testing.assert_approx_equal(evl.results['test_score-m']['pr.ols'],
                                   -7.2594502123869491)
    assert evl.results['params']['no.ols']['offset'] == 3
    assert evl.results['params']['pr.ols']['offset'] == 1
Пример #13
0
def evaluateSecondLayer(base_learners, x_train, y_train, meta_learners,
                        param_dicts):
    in_layer = EnsembleTransformer()
    print("adding base learners to transformer")
    in_layer.add('stack', base_learners)

    preprocess = [in_layer]
    print("creating scorer")
    scorer = make_scorer(mean_absolute_error, greater_is_better=False)
    evl = Evaluator(scorer, cv=4, verbose=1)
    print("fitting evaluator")
    evl.fit(
        x_train.values,
        y_train.values,
        meta_learners,
        param_dicts,
        preprocessing={'meta': preprocess},
        n_iter=40  # bump this up to do a larger grid search
    )

    table = pd.DataFrame(evl.summary)
    table.to_html('iteration5.html')
    table.to_csv('iteration5.csv', index=False, header=False, sep='\t')
Пример #14
0
def test_params():
    """[Model Selection] Test raises on bad params."""
    evl = Evaluator(mape_scorer, verbose=2)

    np.testing.assert_raises(
        ValueError,
        evl.fit,
        X,
        y,
        estimators=[OLS()],
        param_dicts={'bad.ols': {
            'offset': randint(1, 10)
        }},
        preprocessing={'prep': [Scale()]})
Пример #15
0
def test_raises():
    """[Model Selection] Test raises on error."""

    evl = Evaluator(bad_scorer, verbose=1)

    with open(os.devnull, 'w') as f, redirect_stdout(f):
        np.testing.assert_raises(
            ValueError,
            evl.fit,
            X,
            y,
            estimators=[OLS()],
            param_dicts={'ols': {
                'offset': randint(1, 10)
            }},
            n_iter=1)
Пример #16
0
def test_passes():
    """[Model Selection] Test sets error score on failed scoring."""

    evl = Evaluator(bad_scorer, error_score=0, n_jobs=1, verbose=5)

    with open(os.devnull, 'w') as f, redirect_stdout(f):
        evl = np.testing.assert_warns(
            FitFailedWarning,
            evl.fit,
            X,
            y,
            estimators=[OLS()],
            param_dicts={'ols': {
                'offset': randint(1, 10)
            }},
            n_iter=1)
    assert evl.results['test_score-m']['ols'] == 0
from sklearn.neighbors import KNeighborsClassifier
from scipy.stats import randint

# Here we name the estimators ourselves
ests = [('gnb', GaussianNB()), ('knn', KNeighborsClassifier())]

# Now we map parameters to these
# The gnb doesn't have any parameters so we can skip it
pars = {'n_neighbors': randint(2, 20)}
params = {'knn': pars}

##############################################################################
# We can now run an evaluation over these estimators and parameter distributions
# by calling the ``fit`` method.

evaluator = Evaluator(accuracy_scorer, cv=10, random_state=seed, verbose=1)
evaluator.fit(X, y, ests, params, n_iter=10)

##############################################################################
# The full history of the evaluation can be found in ``cv_results``. To compare
# models with their best parameters, we can pass the ``results`` attribute to
# a :obj:`pandas.DataFrame` or print it as a table. We use ``m`` to denote
# mean values and ``s`` to denote standard deviation across folds for brevity.
# Note that the timed prediction is for the training set, for comparability with
# training time.

print("Score comparison with best params founds:\n\n%r" % evaluator.results)

##############################################################################
# Preprocessing
# ^^^^^^^^^^^^^
Пример #18
0
    'rf': {
        'max_depth': randint(2, 5),
        'min_samples_split': randint(5, 20),
        'min_samples_leaf': randint(10, 20),
        'n_estimators': randint(50, 100),
        'max_features': uniform(0.6, 0.3)
    }
}

# In[ ]:

scorer = make_scorer(mean_absolute_error, greater_is_better=False)

evl = Evaluator(
    scorer,
    cv=2,
    random_state=SEED,
    verbose=5,
)

# In[ ]:

evl.fit(
    xtrain,
    ytrain,
    estimators=base_learners,
    param_dicts=param_dicts,
    preprocessing={
        'sc': [StandardScaler()],
        'none': []
    },
    n_iter=2  # bump this up to do a larger grid search
Пример #19
0
# Set parameter mapping
# Here, we differentiate distributions between cases for the random forest
params = {
    'svc': {
        'C': uniform(0, 10)
    },
    'class.rf': {
        'max_depth': randint(2, 10)
    },
    'proba.rf': {
        'max_depth': randint(2, 10),
        'max_features': uniform(0.5, 0.5)
    }
}

scorer = make_scorer(accuracy_score)
evaluator = Evaluator(scorer=scorer, random_state=seed, cv=2)

evaluator.fit(X,
              y,
              meta_learners,
              params,
              preprocessing=preprocessing,
              n_iter=2)

##############################################################################
# We can now compare the performance of the best fit for each candidate
# meta learner.

print("Results:\n%s" % evaluator.results)
Пример #20
0
score = make_scorer(score_func=accuracy_score,
                    greater_is_better=True,
                    needs_proba=False,
                    needs_threshold=False)

ensemble = SequentialEnsemble(model_selection=True,
                              n_jobs=1,
                              shuffle=False,
                              random_state=seed)

ensemble.add('stack', ests_1, preprocessing=pre_cases)
ensemble.add_meta(SVC(kernel='linear', degree=5, tol=1e-4))
# ensemble.fit(X_train, y_train)
# y_pred = ensemble.predict(X_test)
# ens = ensemble
evaluator = Evaluator(scorer=score, random_state=seed, verbose=True)
evaluator.fit(data_pix,
              spacial_pix,
              estimators=[],
              param_dicts=pars_1,
              n_iter=5,
              preprocessing=pre_cases)

print(evaluator.results)

spacial_pix = spacial_pix.astype('int')
unique, counts = np.unique(y_test, return_counts=True)
print(np.asarray((unique, counts)).T)

# print(confusion_matrix(y_test, y_pred, labels=unique))
# print(precision_score(y_test, y_pred, average='micro', labels=unique))
Пример #21
0
ests = [('rdg', Ridge(max_iter=4000)), ('las', Lasso(max_iter=4000))]

a = uniform(0, 10)

params = {
    'rdg': {'alpha': a},
    'las': {'alpha': a}
}

preproc = {
    'none': [],
    'sc': [StandardScaler()]
}

evaluator = Evaluator(r2_scorer, cv=2, random_state=seed, verbose=1)
# evaluator.fit(image_set, label_set, ests, params, 40, preproc)
# print(evaluator.results)

def sig(z):
    return 1/(1 + np.exp(-z))

def hyp(th, x):
    return sig(x @ th)

def cost_func(x, y, th, m):
    hi = hyp(th, x)
    y_ = y.reshape(-1, 1)
    j = 1/float(m) * np.sum(-y_ * np.log(hi) - (1 - y_) * np.log(1 - hi))
    return j
Пример #22
0
from sklearn.metrics import r2_score
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler

from mlens.model_selection import Evaluator
from mlens.metrics import make_scorer
# from mlens.preprocessing import Subset

# image_set = np.genfromtxt('../testdata/c1_gn.csv', delimiter=',')
# label_set = np.genfromtxt('../testdata/c1_L_gn.csv', delimiter=',')
#
# X_train, X_test, y_train, y_test = train_test_split(image_set, label_set, test_size=0.33)

score_f = make_scorer(score_func=r2_score, greater_is_better=False)

evaluator = Evaluator(scorer=score_f, shuffle=True, verbose=True)

estimators = [
    ('las', Lasso(copy_X=True, max_iter=4000)),
    ('rdg', Ridge(copy_X=True, max_iter=4000)),
    # ('rfr', RandomForestRegressor()),
]

params = {
    'las': {
        'alpha': uniform(0, 5)
    },
    'rdg': {
        'alpha': uniform(0, 5)
    },
    'rfr': {