def test_sequentialSearchCV_equivalence():
    # Test the functional equivalence of SequentialSearchCV to a manual sequence
    iris = datasets.load_iris()
    X = iris.data[:, [0, 2]]
    y = iris.target
    cv = KFold(2, shuffle=True, random_state=42)
    svm1 = SVC(random_state=42)
    svm2 = SVC(random_state=42)
    param_grid1 = {'C': [1, 2], 'kernel': ['rbf', 'linear']}
    param_grid2 = {'shrinking': [True, False]}
    gs1 = GridSearchCV(svm1, param_grid1, cv=cv).fit(X, y)
    gs2 = RandomizedSearchCV(gs1.best_estimator_,
                             param_grid2,
                             cv=cv,
                             random_state=42).fit(X, y)

    ss = SequentialSearchCV(svm2,
                            searches=[('gs1', GridSearchCV, param_grid1, {
                                'cv': cv
                            }),
                                      ('gs2', RandomizedSearchCV, param_grid2,
                                       {
                                           'cv': cv,
                                           'random_state': 42
                                       })]).fit(X, y)
    assert gs1.best_params_ == ss.best_params_['gs1']
    assert gs2.best_params_ == ss.best_params_['gs2']
示例#2
0
def test_sequentialSearchCV_equivalence() -> None:
    """Test the equivalence of SequentialSearchCV to a manual sequence."""
    iris = datasets.load_iris()
    X = iris.data[:, [0, 2]]
    y = iris.target
    cv = KFold(2, shuffle=True, random_state=42)
    svm1 = SVC(random_state=42)
    svm2 = SVC(random_state=42)
    param_grid1 = {'C': [1, 2], 'kernel': ['rbf', 'linear']}
    param_grid2 = {'shrinking': [True, False]}
    gs1 = GridSearchCV(svm1, param_grid1, cv=cv).fit(X, y)
    gs2 = RandomizedSearchCV(gs1.best_estimator_,
                             param_grid2,
                             cv=cv,
                             random_state=42).fit(X, y)

    ss = SequentialSearchCV(svm2,
                            searches=[
                                ('gs1', GridSearchCV, param_grid1, {
                                    'cv': cv
                                }),
                                ('gs2', RandomizedSearchCV, param_grid2, {
                                    'cv': cv,
                                    'random_state': 42,
                                    'refit': True
                                }), ('gs3', GridSearchCV, param_grid1)
                            ]).fit(X, y)
    assert gs1.best_params_ == ss.all_best_params_['gs1']
    assert gs2.best_params_ == ss.all_best_params_['gs2']
    assert (isinstance(ss.cv_results_, dict))
    assert (ss.best_estimator_ is not None)
    assert (isinstance(ss.best_score_, float))
    print(ss.best_index_)
    assert (isinstance(ss.n_splits_, int))
    assert (isinstance(ss.refit_time_, float))
    assert (isinstance(ss.multimetric, bool))
示例#3
0
                           needs_proba=True)
}
kwargs_step4 = {
    'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1,
    'scoring': make_scorer(mean_squared_error, greater_is_better=False,
                           needs_proba=True)
}

searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_esn = ESNClassifier(**initially_fixed_params)
sequential_search = \
    SequentialSearchCV(base_esn, searches=searches).fit(X_train, y_train)

# ## Test the ESN
#
# In the test case, we train the ESN using the entire training set as seen
# before. Next, we compute the predicted outputs on the training and test set
# and fix a threshold of 0.5, above a note is assumed to be predicted.
#
# We report the accuracy score for each frame in order to follow the reference
# paper.
param_grid = {'hidden_layer_size': [500, 1000, 2000, 4000, 5000]}
base_esn = sequential_search.best_estimator_

for params in ParameterGrid(param_grid):
    print(params)
    esn = clone(base_esn).set_params(**params)
示例#4
0
# The searches are defined similarly to the steps of a
# sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', RandomizedSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_esn = ESNClassifier(**initially_fixed_params)

# Optimization
# We provide a SequentialSearchCV that basically iterates through the list of
# searches that we have defined before. It can be combined with any model
# selection tool from
# scikit-learn.
sequential_search = SequentialSearchCV(base_esn,
                                       searches=searches).fit(X_tr, y_tr)

# Use the ESN with final hyper-parameters
#
# After the optimization, we extract the ESN with final hyper-parameters as the
# result # of the optimization.
base_esn = sequential_search.best_estimator_

# Test the ESN
# Finally, we increase the reservoir size and compare the impact of uni- and
# bidirectional ESNs. Notice that the ESN strongly benefit from both,
# increasing the reservoir size and from the bi-directional working mode.
param_grid = {
    'hidden_layer_size': [50, 100, 200, 400, 500],
    'bidirectional': [False, True]
}
示例#5
0
    'scoring': scorer,
    'cv': TimeSeriesSplit()
}
step_2_params = {'leakage': [0.2, 0.4, 0.7, 0.9, 1.0]}
kwargs_2 = {
    'verbose': 5,
    'scoring': scorer,
    'n_jobs': -1,
    'cv': TimeSeriesSplit()
}

searches = [('step1', RandomizedSearchCV, step_1_params, kwargs_1),
            ('step2', GridSearchCV, step_2_params, kwargs_2)]

# Perform the search
esn_opti = SequentialSearchCV(esn, searches).fit(X_train.reshape(-1, 1),
                                                 y_train)
print(esn_opti)

# Programming pattern for sequence processing
# Load the dataset
X, y = load_digits(return_X_y=True, as_sequence=True)
print("Number of digits: {0}".format(len(X)))
print("Shape of digits {0}".format(X[0].shape))
# Divide the dataset into training and test subsets
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2, random_state=42)
print("Number of digits in training set: {0}".format(len(X_tr)))
print("Shape of the first digit: {0}".format(X_tr[0].shape))
print("Number of digits in test set: {0}".format(len(X_te)))
print("Shape of the first digit: {0}".format(X_te[0].shape))

# These parameters were optimized using SequentialSearchCV
示例#6
0
scorer = make_scorer(score_func=mean_squared_error, greater_is_better=False)

kwargs = {
    'verbose': 5,
    'scoring': scorer,
    'n_jobs': -1,
    'cv': TimeSeriesSplit()
}

esn = ESNRegressor(regressor=Ridge(), **initially_fixed_params)

searches = [('step1', GridSearchCV, step1_esn_params, kwargs),
            ('step2', GridSearchCV, step2_esn_params, kwargs),
            ('step3', GridSearchCV, step3_esn_params, kwargs)]

sequential_search_esn = SequentialSearchCV(esn, searches=searches).fit(
    X_train.reshape(-1, 1), y_train)

# Hyperparameter optimization ELM
initially_fixed_elm_params = {
    'hidden_layer_size': 100,
    'activation': 'tanh',
    'k_in': 1,
    'alpha': 1e-5,
    'random_state': 42
}

step1_elm_params = {'input_scaling': np.linspace(0.1, 5.0, 50)}
step2_elm_params = {'bias_scaling': np.linspace(0.0, 1.5, 16)}

scorer = make_scorer(score_func=mean_squared_error, greater_is_better=False)
    'scoring': make_scorer(accuracy_score)
}
kwargs_step3 = {
    'verbose': 1,
    'n_jobs': -1,
    'scoring': make_scorer(accuracy_score)
}
kwargs_step4 = {
    'n_iter': 50,
    'random_state': 42,
    'verbose': 1,
    'n_jobs': -1,
    'scoring': make_scorer(accuracy_score)
}

searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_km_esn = ESNClassifier(input_to_node=PredefinedWeightsInputToNode(
    predefined_input_weights=w_in.T),
                            **initially_fixed_params)

try:
    sequential_search = load("../sequential_search_fsdd_km_sparse_200.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_km_esn, searches=searches).fit(
        X_train_scaled, y_train)
    dump(sequential_search, "../sequential_search_fsdd_km_sparse_200.joblib")
}
step2_esn_params = {'leakage': np.linspace(0.1, 1.0, 10)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.5, 16)}

scorer = make_scorer(score_func=mean_squared_error, greater_is_better=False)

kwargs = {'verbose': 5, 'scoring': scorer, 'n_jobs': -1}

esn = ESNRegressor(regressor=Ridge(), **initially_fixed_esn_params)

ts_split = TimeSeriesSplit()
searches = [('step1', GridSearchCV, step1_esn_params, kwargs),
            ('step2', GridSearchCV, step2_esn_params, kwargs),
            ('step3', GridSearchCV, step3_esn_params, kwargs)]

sequential_search_esn = SequentialSearchCV(esn, searches=searches).fit(
    X_train, y_train)

esn_step1 = sequential_search_esn.all_best_estimator_["step1"]
esn_step2 = sequential_search_esn.all_best_estimator_["step2"]
esn_step3 = sequential_search_esn.all_best_estimator_["step3"]

esn_step1.predict(X=unit_impulse)
fig = plt.figure()
im = plt.imshow(np.abs(esn_step1.hidden_layer_state[:, 1:].T),
                vmin=0,
                vmax=1.0)
plt.xlim([0, 100])
plt.ylim([0, esn_step1.hidden_layer_state.shape[1] - 1])
plt.xlabel('n')
plt.ylabel('R[n]')
plt.colorbar(im)
示例#9
0
           'n_jobs': -1,
           'scoring': 'accuracy'}

elm = ELMClassifier(regressor=Ridge(), **initially_fixed_params)

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_params, kwargs1),
            ('step2', GridSearchCV, step2_params, kwargs2)]


# # Perform the sequential search

# In[ ]:


sequential_search = SequentialSearchCV(elm, searches=searches).fit(X_train, y_train)


# # Extract the final results

# In[ ]:


final_fixed_params = initially_fixed_params
final_fixed_params.update(sequential_search.all_best_params_["step1"])
final_fixed_params.update(sequential_search.all_best_params_["step2"])


# # Test
# Increase reservoir size and compare different regression methods. Make sure that you have enough RAM for that, because all regression types without chunk size require a lot of memory. This is the reason why, especially for large datasets, the incremental regression is recommeded.