Пример #1
0
def make_regressors(subset=None):
    available_regressors = {
        'gbrt':
        set_grid(
            GradientBoostingRegressor(),
            n_estimators=[2**i for i in range(1, 11)],
            learning_rate=[0.1, 0.01, 0.001],
        ),
        'lasso':
        set_grid(
            Lasso(),
            alpha=np.exp(np.linspace(-8, 8)),
        )
    }

    if subset is None:
        subset = list(available_regressors.keys())

    result = [available_regressors[k] for k in subset]

    pipe = Pipeline([('finmodel', DummyRegressor())])

    pipe = set_grid(pipe, finmodel=result)

    return pipe
Пример #2
0
def test_build_param_grid_set_estimator():
    clf1 = SVC()
    clf2 = LogisticRegression()
    clf3 = SVC()
    clf4 = SGDClassifier()
    estimator = set_grid(Pipeline([('sel', set_grid(SelectKBest(), k=[2, 3])),
                                   ('clf', None)]),
                         clf=[
                             set_grid(clf1, kernel=['linear']), clf2,
                             set_grid(clf3, kernel=['poly'], degree=[2, 3]),
                             clf4
                         ])
    param_grid = [{
        'clf': [clf1],
        'clf__kernel': ['linear'],
        'sel__k': [2, 3]
    }, {
        'clf': [clf3],
        'clf__kernel': ['poly'],
        'clf__degree': [2, 3],
        'sel__k': [2, 3]
    }, {
        'clf': [clf2, clf4],
        'sel__k': [2, 3]
    }]
    assert build_param_grid(estimator) == param_grid
Пример #3
0
def test_rnn(datafnc):
    from sklearn.neural_network import MLPClassifier
    from sklearn.model_selection import train_test_split
    from sklearn.model_selection import GridSearchCV
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler
    from searchgrid import set_grid, build_param_grid
    from noxer.sequences import PadSubsequence

    X, y = datafnc()

    X_train, X_test, y_train, y_test = train_test_split(X, y)

    estimator = make_pipeline(
        set_grid(GRUClassifier(n_neurons=64, n_layers=1, epochs=100),
                 alpha=[1e-4, 1e-3, 1e-2]))

    estimator = make_pipeline(
        set_grid(CNN1DClassifier(epochs=64),
                 alpha=[0.01],
                 n_layers=[1],
                 n_neurons=[32],
                 dropout=[0.2, 0.3, 0.4]))

    model = GridSearchCV(estimator=estimator,
                         param_grid=build_param_grid(estimator),
                         verbose=1000,
                         cv=3,
                         n_jobs=1)

    model.fit(X_train, y_train)
    print(datafnc.__name__)
    print(model.score(X_test, y_test))
Пример #4
0
def make_dummy_regressor(subset=None):
    available_regressors = {
        'dummy': set_grid(DummyRegressor(), strategy=['mean', 'median']),
    }

    if subset is None:
        subset = list(available_regressors.keys())

    result = [available_regressors[k] for k in subset]

    pipe = Pipeline([('model', Lasso())])

    pipe = set_grid(pipe, model=result)

    return pipe
Пример #5
0
def grid_regressors(subset=None):
    """Create a number of regressors with corresponding
    parameter ranges that will be iterated over with
    GridSearchCV.

    Parameters
    ----------
    subset : array - like or None
        Subset of models to use for grid search.

    Returns
    -------
    regressors : list
        List of regressors with attached grids.
    """

    available_regressors = {
        'gbrt':
        set_grid(GradientBoostingRegressor(),
                 n_estimators=[2**i for i in range(1, 11)],
                 learning_rate=[0.1, 0.01, 0.001]),
        'svr':
        set_grid(SVR(),
                 C=np.logspace(-5, 5, 20),
                 epsilon=[0.1, 0.01, 0.001],
                 gamma=np.logspace(-5, 5, 20)),
        'tree':
        set_grid(
            DecisionTreeRegressor(),
            min_samples_split=np.logspace(-5, 0, 20),
            max_depth=list(range(1, 20)),
        ),
        'lasso':
        set_grid(
            Lasso(),
            alpha=np.exp(np.linspace(-8, 8)),
        )
    }

    if subset is None:
        subset = list(available_regressors.keys())

    result = [available_regressors[k] for k in subset]

    return result
Пример #6
0
def test_make_grid_search():
    X, y = load_iris(return_X_y=True)
    lr = LogisticRegression()
    svc = set_grid(SVC(), kernel=['poly'], degree=[2, 3])
    gs1 = make_grid_search(lr, cv=5)  # empty grid
    gs2 = make_grid_search(svc, cv=5)
    gs3 = make_grid_search([lr, svc], cv=5)
    for gs, n_results in [(gs1, 1), (gs2, 2), (gs3, 3)]:
        gs.fit(X, y)
        assert gs.cv == 5
        assert len(gs.cv_results_['params']) == n_results

    svc_mask = gs3.cv_results_['param_root'] == svc
    assert svc_mask.sum() == 2
    assert gs3.cv_results_['param_root__degree'][svc_mask].tolist() == [2, 3]
    assert gs3.cv_results_['param_root'][~svc_mask].tolist() == [lr]
Пример #7
0
def test_dnn_v_dnn(datafnc):
    from sklearn.neural_network import MLPClassifier
    from sklearn.model_selection import train_test_split
    from sklearn.model_selection import GridSearchCV
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler
    from searchgrid import set_grid, build_param_grid

    X, y = datafnc()

    X_train, X_test, y_train, y_test = train_test_split(X, y)

    estimator = make_pipeline(
        StandardScaler(),
        set_grid(PMLPClassifier(),
                 epochs=[2**i for i in range(1, 8)],
                 n_layers=list(range(1, 4)),
                 n_neurons=[2**i for i in range(1, 8)],
                 alpha=[1e-4, 1e-3, 1e-2]))

    model = GridSearchCV(estimator=estimator,
                         param_grid=build_param_grid(estimator),
                         verbose=1000,
                         cv=3,
                         n_jobs=2)

    mlp = GridSearchCV(
        estimator=make_pipeline(
            StandardScaler(),
            MLPClassifier(),
        ),
        param_grid={'mlpclassifier__max_iter': [2**i for i in range(1, 8)]},
        verbose=1000,
        cv=3)

    model.fit(X_train, y_train)
    mlp.fit(X_train, y_train)
    print(datafnc.__name__)
    print(model.score(X_test, y_test))
    print(mlp.score(X_test, y_test))
Пример #8
0
features = make_union(
    number(1),  # Pclass
    category(3),  # Gender
    number(4),  # Age
    number(5),  # SibSp
    category(6),  # Parch
    number(8),  # Fare
    category(10),  # Embarked
)

estimator = Pipeline([('features', features), ('scaler', StandardScaler()),
                      ('model', LinearSVC())])

models = [
    set_grid(DecisionTreeClassifier(),
             min_samples_split=[2**-i for i in range(1, 8)],
             max_depth=list(range(1, 21))),
    set_grid(LinearSVC(), C=np.logspace(-6, 6, num=20)),
]

estimator = set_grid(estimator,
                     model=[
                         set_grid(GradientBoostingClassifier(),
                                  n_estimators=[2**i for i in range(1, 11)],
                                  learning_rate=np.logspace(-4, 0, num=10)),
                     ],
                     scaler=[StandardScaler()])

param_grid = build_param_grid(estimator)

gsearch = GridSearchCV(estimator=estimator,
Пример #9
0
import pytest
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.feature_selection import SelectKBest
from sklearn.datasets import load_iris
from searchgrid import set_grid, build_param_grid, make_grid_search


@pytest.mark.parametrize(('estimator', 'param_grid'), [
    (set_grid(SVC(), C=[1, 2]), {
        'C': [1, 2]
    }),
    (set_grid(SVC(), C=[1, 2], gamma=[1, 2]), {
        'C': [1, 2],
        'gamma': [1, 2]
    }),
    (make_pipeline(set_grid(SVC(), C=[1, 2], gamma=[1, 2])), {
        'svc__C': [1, 2],
        'svc__gamma': [1, 2]
    }),
])
def test_build_param_grid(estimator, param_grid):
    assert build_param_grid(estimator) == param_grid


#    pytest.mark.xfail(
#        (set_grid(SVC(), [{'kernel': ['linear']},
#                          {'kernel': 'rbf', 'gamma': [1, 2]}]),
#         [{'kernel': ['linear']}, {'kernel': 'rbf', 'gamma': [1, 2]}])),
#    pytest.mark.xfail(