def test_sequentialSearchCV_equivalence(): # Test the functional equivalence of SequentialSearchCV to a manual sequence iris = datasets.load_iris() X = iris.data[:, [0, 2]] y = iris.target cv = KFold(2, shuffle=True, random_state=42) svm1 = SVC(random_state=42) svm2 = SVC(random_state=42) param_grid1 = {'C': [1, 2], 'kernel': ['rbf', 'linear']} param_grid2 = {'shrinking': [True, False]} gs1 = GridSearchCV(svm1, param_grid1, cv=cv).fit(X, y) gs2 = RandomizedSearchCV(gs1.best_estimator_, param_grid2, cv=cv, random_state=42).fit(X, y) ss = SequentialSearchCV(svm2, searches=[('gs1', GridSearchCV, param_grid1, { 'cv': cv }), ('gs2', RandomizedSearchCV, param_grid2, { 'cv': cv, 'random_state': 42 })]).fit(X, y) assert gs1.best_params_ == ss.best_params_['gs1'] assert gs2.best_params_ == ss.best_params_['gs2']
def test_sequentialSearchCV_equivalence() -> None: """Test the equivalence of SequentialSearchCV to a manual sequence.""" iris = datasets.load_iris() X = iris.data[:, [0, 2]] y = iris.target cv = KFold(2, shuffle=True, random_state=42) svm1 = SVC(random_state=42) svm2 = SVC(random_state=42) param_grid1 = {'C': [1, 2], 'kernel': ['rbf', 'linear']} param_grid2 = {'shrinking': [True, False]} gs1 = GridSearchCV(svm1, param_grid1, cv=cv).fit(X, y) gs2 = RandomizedSearchCV(gs1.best_estimator_, param_grid2, cv=cv, random_state=42).fit(X, y) ss = SequentialSearchCV(svm2, searches=[ ('gs1', GridSearchCV, param_grid1, { 'cv': cv }), ('gs2', RandomizedSearchCV, param_grid2, { 'cv': cv, 'random_state': 42, 'refit': True }), ('gs3', GridSearchCV, param_grid1) ]).fit(X, y) assert gs1.best_params_ == ss.all_best_params_['gs1'] assert gs2.best_params_ == ss.all_best_params_['gs2'] assert (isinstance(ss.cv_results_, dict)) assert (ss.best_estimator_ is not None) assert (isinstance(ss.best_score_, float)) print(ss.best_index_) assert (isinstance(ss.n_splits_, int)) assert (isinstance(ss.refit_time_, float)) assert (isinstance(ss.multimetric, bool))
needs_proba=True) } kwargs_step4 = { 'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True) } searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1), ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2), ('step3', GridSearchCV, step3_esn_params, kwargs_step3), ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)] base_esn = ESNClassifier(**initially_fixed_params) sequential_search = \ SequentialSearchCV(base_esn, searches=searches).fit(X_train, y_train) # ## Test the ESN # # In the test case, we train the ESN using the entire training set as seen # before. Next, we compute the predicted outputs on the training and test set # and fix a threshold of 0.5, above a note is assumed to be predicted. # # We report the accuracy score for each frame in order to follow the reference # paper. param_grid = {'hidden_layer_size': [500, 1000, 2000, 4000, 5000]} base_esn = sequential_search.best_estimator_ for params in ParameterGrid(param_grid): print(params) esn = clone(base_esn).set_params(**params)
# The searches are defined similarly to the steps of a # sklearn.pipeline.Pipeline: searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1), ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2), ('step3', RandomizedSearchCV, step3_esn_params, kwargs_step3), ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)] base_esn = ESNClassifier(**initially_fixed_params) # Optimization # We provide a SequentialSearchCV that basically iterates through the list of # searches that we have defined before. It can be combined with any model # selection tool from # scikit-learn. sequential_search = SequentialSearchCV(base_esn, searches=searches).fit(X_tr, y_tr) # Use the ESN with final hyper-parameters # # After the optimization, we extract the ESN with final hyper-parameters as the # result # of the optimization. base_esn = sequential_search.best_estimator_ # Test the ESN # Finally, we increase the reservoir size and compare the impact of uni- and # bidirectional ESNs. Notice that the ESN strongly benefit from both, # increasing the reservoir size and from the bi-directional working mode. param_grid = { 'hidden_layer_size': [50, 100, 200, 400, 500], 'bidirectional': [False, True] }
'scoring': scorer, 'cv': TimeSeriesSplit() } step_2_params = {'leakage': [0.2, 0.4, 0.7, 0.9, 1.0]} kwargs_2 = { 'verbose': 5, 'scoring': scorer, 'n_jobs': -1, 'cv': TimeSeriesSplit() } searches = [('step1', RandomizedSearchCV, step_1_params, kwargs_1), ('step2', GridSearchCV, step_2_params, kwargs_2)] # Perform the search esn_opti = SequentialSearchCV(esn, searches).fit(X_train.reshape(-1, 1), y_train) print(esn_opti) # Programming pattern for sequence processing # Load the dataset X, y = load_digits(return_X_y=True, as_sequence=True) print("Number of digits: {0}".format(len(X))) print("Shape of digits {0}".format(X[0].shape)) # Divide the dataset into training and test subsets X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2, random_state=42) print("Number of digits in training set: {0}".format(len(X_tr))) print("Shape of the first digit: {0}".format(X_tr[0].shape)) print("Number of digits in test set: {0}".format(len(X_te))) print("Shape of the first digit: {0}".format(X_te[0].shape)) # These parameters were optimized using SequentialSearchCV
scorer = make_scorer(score_func=mean_squared_error, greater_is_better=False) kwargs = { 'verbose': 5, 'scoring': scorer, 'n_jobs': -1, 'cv': TimeSeriesSplit() } esn = ESNRegressor(regressor=Ridge(), **initially_fixed_params) searches = [('step1', GridSearchCV, step1_esn_params, kwargs), ('step2', GridSearchCV, step2_esn_params, kwargs), ('step3', GridSearchCV, step3_esn_params, kwargs)] sequential_search_esn = SequentialSearchCV(esn, searches=searches).fit( X_train.reshape(-1, 1), y_train) # Hyperparameter optimization ELM initially_fixed_elm_params = { 'hidden_layer_size': 100, 'activation': 'tanh', 'k_in': 1, 'alpha': 1e-5, 'random_state': 42 } step1_elm_params = {'input_scaling': np.linspace(0.1, 5.0, 50)} step2_elm_params = {'bias_scaling': np.linspace(0.0, 1.5, 16)} scorer = make_scorer(score_func=mean_squared_error, greater_is_better=False)
'scoring': make_scorer(accuracy_score) } kwargs_step3 = { 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(accuracy_score) } kwargs_step4 = { 'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(accuracy_score) } searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1), ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2), ('step3', GridSearchCV, step3_esn_params, kwargs_step3), ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)] base_km_esn = ESNClassifier(input_to_node=PredefinedWeightsInputToNode( predefined_input_weights=w_in.T), **initially_fixed_params) try: sequential_search = load("../sequential_search_fsdd_km_sparse_200.joblib") except FileNotFoundError: sequential_search = SequentialSearchCV(base_km_esn, searches=searches).fit( X_train_scaled, y_train) dump(sequential_search, "../sequential_search_fsdd_km_sparse_200.joblib")
} step2_esn_params = {'leakage': np.linspace(0.1, 1.0, 10)} step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.5, 16)} scorer = make_scorer(score_func=mean_squared_error, greater_is_better=False) kwargs = {'verbose': 5, 'scoring': scorer, 'n_jobs': -1} esn = ESNRegressor(regressor=Ridge(), **initially_fixed_esn_params) ts_split = TimeSeriesSplit() searches = [('step1', GridSearchCV, step1_esn_params, kwargs), ('step2', GridSearchCV, step2_esn_params, kwargs), ('step3', GridSearchCV, step3_esn_params, kwargs)] sequential_search_esn = SequentialSearchCV(esn, searches=searches).fit( X_train, y_train) esn_step1 = sequential_search_esn.all_best_estimator_["step1"] esn_step2 = sequential_search_esn.all_best_estimator_["step2"] esn_step3 = sequential_search_esn.all_best_estimator_["step3"] esn_step1.predict(X=unit_impulse) fig = plt.figure() im = plt.imshow(np.abs(esn_step1.hidden_layer_state[:, 1:].T), vmin=0, vmax=1.0) plt.xlim([0, 100]) plt.ylim([0, esn_step1.hidden_layer_state.shape[1] - 1]) plt.xlabel('n') plt.ylabel('R[n]') plt.colorbar(im)
'n_jobs': -1, 'scoring': 'accuracy'} elm = ELMClassifier(regressor=Ridge(), **initially_fixed_params) # The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline: searches = [('step1', RandomizedSearchCV, step1_params, kwargs1), ('step2', GridSearchCV, step2_params, kwargs2)] # # Perform the sequential search # In[ ]: sequential_search = SequentialSearchCV(elm, searches=searches).fit(X_train, y_train) # # Extract the final results # In[ ]: final_fixed_params = initially_fixed_params final_fixed_params.update(sequential_search.all_best_params_["step1"]) final_fixed_params.update(sequential_search.all_best_params_["step2"]) # # Test # Increase reservoir size and compare different regression methods. Make sure that you have enough RAM for that, because all regression types without chunk size require a lot of memory. This is the reason why, especially for large datasets, the incremental regression is recommeded.