def execute(): """This is going to be a very simple example to illustrate what exactly HyperparameterHunter does, and how it revolutionizes hyperparameter optimization.""" # Start by creating an `Environment` - This is where you define how Experiments (and optimization) will be conducted env = Environment( train_dataset=get_breast_cancer_data(target="target"), results_path="HyperparameterHunterAssets", metrics=["roc_auc_score"], cv_type="StratifiedKFold", cv_params=dict(n_splits=10, shuffle=True, random_state=32), ) # Now, conduct an `Experiment` # This tells HyperparameterHunter to use the settings in the active `Environment` to train a model with these hyperparameters experiment = CVExperiment(model_initializer=XGBClassifier, model_init_params=dict(objective="reg:linear", max_depth=3)) # That's it. No annoying boilerplate code to fit models and record results # Now, the `Environment`'s `results_path` directory will contain new files describing the Experiment just conducted # Time for the fun part. We'll set up some hyperparameter optimization by first defining the `OptimizationProtocol` we want optimizer = BayesianOptPro(verbose=1) # Now we're going to say which hyperparameters we want to optimize. # Notice how this looks just like our `experiment` above optimizer.forge_experiment( model_initializer=XGBClassifier, model_init_params=dict( objective= "reg:linear", # We're setting this as a constant guideline - Not one to optimize max_depth=Integer( 2, 10 ), # Instead of using an int like the `experiment` above, we provide a space to search ), ) # Notice that our range for `max_depth` includes the `max_depth=3` value we used in our `experiment` earlier optimizer.go() # Now, we go assert experiment.experiment_id in [ _[2] for _ in optimizer.similar_experiments ] # Here we're verifying that the `experiment` we conducted first was found by `optimizer` and used as learning material # You can also see via the console that we found `experiment`'s saved files, and used it to start optimization last_experiment_id = optimizer.current_experiment.experiment_id # Let's save the id of the experiment that was just conducted by `optimizer` optimizer.go() # Now, we'll start up `optimizer` again... # And we can see that this second optimization round learned from both our first `experiment` and our first optimization round assert experiment.experiment_id in [ _[2] for _ in optimizer.similar_experiments ] assert last_experiment_id in [_[2] for _ in optimizer.similar_experiments]
def _execute(): env = Environment( train_dataset=get_breast_cancer_data(), results_path="HyperparameterHunterAssets", target_column="diagnosis", metrics=["roc_auc_score"], cv_type=StratifiedKFold, cv_params=dict(n_splits=10, shuffle=True, random_state=32), runs=2, ) optimizer = BayesianOptPro(iterations=10, read_experiments=True, random_state=None) optimizer.forge_experiment( model_initializer=XGBClassifier, model_init_params=dict( max_depth=Integer(2, 20), learning_rate=Real(0.0001, 0.5), n_estimators=200, subsample=0.5, booster=Categorical(["gbtree", "gblinear", "dart"]), ), model_extra_params=dict(fit=dict(eval_metric=Categorical(["auc", "rmse", "mae"]))), ) optimizer.go()
def _execute(): #################### Environment #################### env = Environment( train_dataset=get_breast_cancer_data(target="target"), results_path="HyperparameterHunterAssets", metrics=["roc_auc_score"], cv_type="StratifiedKFold", cv_params=dict(n_splits=5, shuffle=True, random_state=32), ) #################### Experimentation #################### experiment = CVExperiment( model_initializer=KerasClassifier, model_init_params=dict(build_fn=_build_fn_experiment), model_extra_params=dict(callbacks=[ReduceLROnPlateau(patience=5)], batch_size=32, epochs=10, verbose=0), ) #################### Optimization #################### optimizer = BayesianOptPro(iterations=10) optimizer.forge_experiment( model_initializer=KerasClassifier, model_init_params=dict(build_fn=_build_fn_optimization), model_extra_params=dict( callbacks=[ReduceLROnPlateau(patience=Integer(5, 10))], batch_size=Categorical([32, 64], transform="onehot"), epochs=10, verbose=0, ), ) optimizer.go()
def fe_optimizer(request): if request.param is not None: request.param = FeatureEngineer(request.param) opt = BayesianOptPro() opt.forge_experiment(model_initializer=Ridge, model_init_params={}, feature_engineer=request.param) opt.go() return opt
def do_optimization(): optimizer = BayesianOptPro(iterations=5, random_state=1337) optimizer.forge_experiment( model_initializer=XGBClassifier, model_init_params=dict( objective="reg:linear", max_depth=Integer(2, 20), learning_rate=Real(0.0001, 0.5), subsample=0.5, booster=Categorical(["gbtree", "dart"]), ), ) optimizer.go()
def opt_svc_0(request): optimizer = BayesianOptPro(target_metric=request.param, iterations=2, random_state=32) optimizer.forge_experiment( model_initializer=SVC, model_init_params=dict( C=Real(0.9, 1.1), kernel=Categorical(["linear", "poly", "rbf"]), max_iter=Integer(50, 125), tol=1e-3, ), ) optimizer.go() yield optimizer assert optimizer.target_metric == ("oof", (request.param or "roc_auc"))
def opt_lgb_0(request): optimizer = BayesianOptPro(target_metric=request.param, iterations=2, random_state=32) optimizer.forge_experiment( model_initializer=LGBMClassifier, model_init_params=dict( boosting_type=Categorical(["gbdt", "dart"]), num_leaves=Integer(2, 8), n_estimators=10, max_depth=5, min_child_samples=1, subsample=Real(0.4, 0.7), verbose=-1, ), ) optimizer.go() yield optimizer assert optimizer.target_metric == ("oof", (request.param or "roc_auc"))
def execute(): train_df, holdout_df = prep_data() env = Environment( train_dataset=train_df, results_path="HyperparameterHunterAssets", metrics=["roc_auc_score"], target_column=[f"target_{_}" for _ in range(10) ], # 10 classes (one-hot-encoded output) holdout_dataset=holdout_df, cv_type="StratifiedKFold", cv_params=dict(n_splits=3, shuffle=True, random_state=True), ) exp = CVExperiment(KerasClassifier, build_fn_exp, dict(batch_size=64, epochs=10, verbose=1)) opt = BayesianOptPro(iterations=10, random_state=32) opt.forge_experiment(KerasClassifier, build_fn_opt, dict(batch_size=64, epochs=10, verbose=0)) opt.go()
def test_similar_experiments_unordered(): """Check that an experiment with a single `EngineerStep` is considered "similar" by an Optimization Protocol, with two `optional` `EngineerStep`s, where the second step is identical to the single step used by the standalone experiment. As of v3.0.0alpha2, this is expected to fail because the otherwise identical engineer steps occur at different indexes in `FeatureEngineer.steps` for the experiment and the OptPro. The experiment has `sqr_sum_feature` at index=0, while the same step in the OptPro is at index=1. Note that the step index in OptPro is still 1 despite the fact that the other step immediately preceding it is `optional`""" env = Environment( train_dataset=get_breast_cancer_data(), results_path=assets_dir, target_column="diagnosis", metrics=["roc_auc_score"], cv_type="StratifiedKFold", cv_params=dict(n_splits=5, shuffle=True, random_state=32), ) exp = CVExperiment( model_initializer=XGBClassifier, model_init_params=dict(objective="reg:linear", subsample=0.5, max_depth=3), feature_engineer=FeatureEngineer([EngineerStep(sqr_sum_feature)]), ) opt = BayesianOptPro(iterations=1) opt.forge_experiment( model_initializer=XGBClassifier, model_init_params=dict(objective="reg:linear", subsample=0.5, max_depth=3), feature_engineer=FeatureEngineer([ Categorical([standard_scale, normalize, min_max_scale], optional=True), Categorical([sqr_sum_feature], optional=True), ]), ) opt.go() assert exp.experiment_id in [_[2] for _ in opt.similar_experiments]
def execute(): #################### Environment #################### env = Environment( train_dataset=get_iris_data(), results_path="HyperparameterHunterAssets", target_column="species", metrics=["hamming_loss"], cv_params=dict(n_splits=5, random_state=32), ) #################### Experiment #################### # Just a reference for normal `class_weight` usage outside of optimization CVExperiment(RandomForestClassifier, dict(n_estimators=10, class_weight={ 0: 1, 1: 1, 2: 1 })) #################### Optimization #################### opt = BayesianOptPro(iterations=10, random_state=32) opt.forge_experiment( model_initializer=RandomForestClassifier, model_init_params=dict( # Weight values for each class can be optimized with `Categorical`/`Integer` class_weight={ 0: Categorical([1, 3]), 1: Categorical([1, 4]), 2: Integer(1, 9), # You can also use `Integer` for low/high ranges }, criterion=Categorical(["gini", "entropy"]), n_estimators=Integer(5, 100), ), ) opt.go()
def test_categorical_tuple_match(env_digits): """Test that optimization of a `Categorical` space, whose values are tuples can be performed and that saved results from such a space are correctly identified as similar Experiments""" model_extra_params = dict(batch_size=32, epochs=3, verbose=0, shuffle=True) exp_0 = CVExperiment(KerasClassifier, build_fn_digits_exp, model_extra_params) #################### First OptPro #################### opt_0 = BayesianOptPro(iterations=1, random_state=32, n_initial_points=1) opt_0.forge_experiment(KerasClassifier, build_fn_digits_opt, model_extra_params) opt_0.go() assert len(opt_0.similar_experiments) == 1 # Should match `exp_0` #################### Second OptPro #################### opt_1 = BayesianOptPro(iterations=1, random_state=32, n_initial_points=1) opt_1.forge_experiment(KerasClassifier, build_fn_digits_opt, model_extra_params) opt_1.go() assert len(opt_1.similar_experiments) == 2 # Should match `exp_0` and `opt_0`
metrics=['roc_auc_score'], cv_type='StratifiedKFold', cv_params=dict(n_splits=10, shuffle=True, random_state=32), ) # Now, conduct an `Experiment` # This tells HyperparameterHunter to use the settings in the active `Environment` to train a model with these hyperparameters experiment = CVExperiment(model_initializer=XGBClassifier, model_init_params=dict(objective='reg:linear', max_depth=3)) # That's it. No annoying boilerplate code to fit models and record results # Now, the `Environment`'s `results_path` directory will contain new files describing the Experiment just conducted # Time for the fun part. We'll set up some hyperparameter optimization by first defining the `OptPro` (Optimization Protocol) we want optimizer = BayesianOptPro(verbose=1) # Now we're going to say which hyperparameters we want to optimize. # Notice how this looks just like our `experiment` above optimizer.forge_experiment( model_initializer=XGBClassifier, model_init_params=dict( objective= 'reg:linear', # We're setting this as a constant guideline - Not one to optimize max_depth=Integer( 2, 10 ) # Instead of using an int like the `experiment` above, we provide a space to search )) # Notice that our range for `max_depth` includes the `max_depth=3` value we used in our `experiment` earlier optimizer.go() # Now, we go
model_init_params=dict(objective="reg:linear", max_depth=3, n_estimators=100, subsample=0.5), model_extra_params=dict( fit=dict( eval_set=[ (env.train_input, env.train_target), (env.validation_input, env.validation_target), ], early_stopping_rounds=5, eval_metric="mae", ) ), ) # And/or... #################### 2. Hyperparameter Optimization #################### optimizer = BayesianOptPro(iterations=30, random_state=1337) optimizer.forge_experiment( model_initializer=XGBClassifier, model_init_params=dict( objective="reg:linear", max_depth=Integer(2, 20), learning_rate=Real(0.0001, 0.5), subsample=0.5, booster=Categorical(["gbtree", "dart"]), ), model_extra_params=dict( fit=dict( eval_set=[ (env.train_input, env.train_target), (env.validation_input, env.validation_target), ],