def test_input_errors_randomized(params, expected_error_message):
    # tests specific to HalvingRandomSearchCV

    base_estimator = FastClassifier()
    param_grid = {'a': [1]}
    X, y = make_classification(100)

    sh = HalvingRandomSearchCV(base_estimator, param_grid, **params)

    with pytest.raises(ValueError, match=expected_error_message):
        sh.fit(X, y)
def test_random_search_discrete_distributions(param_distributions,
                                              expected_n_candidates):
    # Make sure random search samples the appropriate number of candidates when
    # we ask for more than what's possible. How many parameters are sampled
    # depends whether the distributions are 'all lists' or not (see
    # ParameterSampler for details). This is somewhat redundant with the checks
    # in ParameterSampler but interaction bugs were discovered during
    # developement of SH

    n_samples = 1024
    X, y = make_classification(n_samples=n_samples, random_state=0)
    base_estimator = FastClassifier()
    sh = HalvingRandomSearchCV(base_estimator, param_distributions,
                               n_candidates=10)
    sh.fit(X, y)
    assert sh.n_candidates_[0] == expected_n_candidates
def test_random_search(max_resources, n_candidates, expected_n_candidates):
    # Test random search and make sure the number of generated candidates is
    # as expected

    n_samples = 1024
    X, y = make_classification(n_samples=n_samples, random_state=0)
    param_grid = {'a': norm, 'b': norm}
    base_estimator = FastClassifier()
    sh = HalvingRandomSearchCV(base_estimator, param_grid,
                               n_candidates=n_candidates, cv=2,
                               max_resources=max_resources, factor=2,
                               min_resources=4)
    sh.fit(X, y)
    assert sh.n_candidates_[0] == expected_n_candidates
    if n_candidates == 'exhaust':
        # Make sure 'exhaust' makes the last iteration use as much resources as
        # we can
        assert sh.n_resources_[-1] == max_resources
clf = RandomForestClassifier(n_estimators=20, random_state=rng)

param_dist = {
    "max_depth": [3, None],
    "max_features": randint(1, 11),
    "min_samples_split": randint(2, 11),
    "bootstrap": [True, False],
    "criterion": ["gini", "entropy"]
}

rsh = HalvingRandomSearchCV(estimator=clf,
                            param_distributions=param_dist,
                            factor=2,
                            random_state=rng)
rsh.fit(X, y)

# %%
# We can now use the `cv_results_` attribute of the search estimator to inspect
# and plot the evolution of the search.

results = pd.DataFrame(rsh.cv_results_)
results['params_str'] = results.params.apply(str)
results.drop_duplicates(subset=('params_str', 'iter'), inplace=True)
mean_scores = results.pivot(index='iter',
                            columns='params_str',
                            values='mean_test_score')
ax = mean_scores.plot(legend=False, alpha=.6)

labels = [
    f'iter={i}\nn_samples={rsh.n_resources_[i]}\n'
示例#5
0
                         n_iter=5,
                         scoring='roc_auc',
                         n_jobs=-1)
clf = clf.fit(train_features, train_labels)
# Score model
score_randomized = roc_auc_score(test_labels,
                                 clf.predict_proba(test_features)[:, 1])
print(f'ROC AUC Score for RandomizedSearchCV model: {score_randomized}')
print(clf.best_params_)

# Fit rf model with HalvingRandomSearchCV
clf_halving = HalvingRandomSearchCV(pipe,
                                    param_grid,
                                    cv=cv,
                                    verbose=1,
                                    scoring='roc_auc',
                                    n_jobs=-1,
                                    aggressive_elimination=True,
                                    factor=2,
                                    min_resources=20)
clf_halving = clf_halving.fit(train_features, train_labels)
# Score model
score_halving = roc_auc_score(test_labels,
                              clf_halving.predict_proba(test_features)[:, 1])
print(f'ROC AUC Score for HalvingRandomSearchCV model: {score_halving}')
print(clf_halving.best_params_)

print(f'ROC AUC Score for out of the box model: {score_rf}')
print(f'ROC AUC Score for RandomizedSearchCV model: {score_randomized}')
print(f'ROC AUC Score for HalvingRandomSearchCV model: {score_halving}')
示例#6
0
_ = halving_cv.fit(X, y)


# deal with class imbalance

counts = pd.Series(y.flatten()).value_counts()

scale_pos_weight = counts["No"] / counts["Yes"]


param_grid_2 = {
    "max_depth": [3, 4, 5],
    "gamma": [5, 30, 50],
    "learning_rate": [0.01, 0.1, 0.3, 0.5],
    "min_child_weight": [1, 3, 5],
    "reg_lambda": [50, 100, 300],
    "scale_pos_weight": [scale_pos_weight],  # Fix scale_pos_weight
    "subsample": [0.7, 0.8, 0.9],
    "colsample_bytree": [0.7, 0.8, 0.9],
}

from sklearn.model_selection import HalvingRandomSearchCV

halving_random_cv = HalvingRandomSearchCV(
    xgb_cl, param_grid_2, scoring="roc_auc", n_jobs=-1, n_candidates="exhaust", factor=4
)

_ = halving_random_cv.fit(X, y)


        sampler=["mala", "langevin", "tempered mala", "tempered langevin"],
        weight_decay=expon(1e-3),
        #     max_iter=poisson(30),
        replay_prob=beta(a=9, b=1),
        adversary_weight=beta(a=1, b=1),
        num_units=poisson(32),
        num_layers=poisson(3),
        max_replay=poisson(10),
    )
    clf_cv = HalvingRandomSearchCV(clf,
                                   distributions,
                                   random_state=0,
                                   n_jobs=5,
                                   resource="max_iter",
                                   max_resources=max_resources)
    search = clf_cv.fit(X.values)
    clf = clf_cv.best_estimator_
elif do_search == "bohb":
    distributions = CS.ConfigurationSpace(seed=42)
    distributions.add_hyperparameter(
        CSH.UniformFloatHyperparameter("lr",
                                       1e-4,
                                       3e-1,
                                       log=True,
                                       default_value=6e-3))
    distributions.add_hyperparameter(
        CSH.UniformFloatHyperparameter("sampler_lr",
                                       1e-4,
                                       3e-1,
                                       log=True,
                                       default_value=1e-3))