def env_3(): def printer_callback(): def printer_helper(_rep, _fold, _run, last_evaluation_results): print(f"{_rep}.{_fold}.{_run} {last_evaluation_results}") return lambda_callback( on_experiment_start=printer_helper, on_experiment_end=printer_helper, on_repetition_start=printer_helper, on_repetition_end=printer_helper, on_fold_start=printer_helper, on_fold_end=printer_helper, on_run_start=printer_helper, on_run_end=printer_helper, ) return Environment( train_dataset=get_toy_classification_data(), results_path=assets_dir, metrics=["roc_auc_score"], holdout_dataset=get_toy_classification_data(), cv_type=RepeatedStratifiedKFold, cv_params=dict(n_splits=3, n_repeats=2, random_state=32), runs=2, experiment_callbacks=[ printer_callback(), confusion_matrix_oof(), confusion_matrix_holdout(), ], )
def env_2(): # noinspection PyUnusedLocal def get_holdout_set(train, target_column): return train, train.copy() return Environment( train_dataset=get_toy_classification_data(), results_path=assets_dir, holdout_dataset=get_holdout_set, test_dataset=get_toy_classification_data(), metrics=["roc_auc_score"], cv_type=StratifiedKFold, cv_params=dict(n_splits=3, shuffle=True, random_state=32), )
def execute(): env = Environment( train_dataset=get_toy_classification_data(), root_results_path="HyperparameterHunterAssets", metrics_map=["roc_auc_score"], cross_validation_type=RepeatedStratifiedKFold, cross_validation_params=dict(n_splits=3, n_repeats=2, random_state=32), do_full_save=do_full_save, ) experiment_0 = CVExperiment(model_initializer=XGBClassifier, model_init_params=dict(subsample=0.01)) # Pro Tip: By setting XGBoost's subsample ridiculously low, we can get bad scores on purpose # Upon completion of this Experiment, we see a warning that not all result files will be saved # This is because the final score of the Experiment was below our threshold of 0.75 # Specifically, we skipped saving prediction files (OOF, holdout, test, or in-fold), and the heartbeat file # What still got saved is the Experiment's: key information, leaderboard position, and description file # These are saved to allow us to use the information for future hyperparameter optimization, and detect repeated Experiments # Additionally, the Experiment's script backup is saved, but that's because its one of the first things that happens # For even finer control over what gets saved, use `do_full_save` together with `file_blacklist` # Now, lets perform another Experiment that does a bit better than our intentionally miserable one experiment_1 = CVExperiment(model_initializer=XGBClassifier, model_init_params=dict(subsample=0.5))
def _execute(): env = Environment( train_dataset=get_toy_classification_data(target='diagnosis'), root_results_path='HyperparameterHunterAssets', target_column='diagnosis', metrics_map=['roc_auc_score'], cross_validation_type=RepeatedStratifiedKFold, cross_validation_params=dict(n_splits=5, n_repeats=2, random_state=32), ) optimizer = ExtraTreesOptimization( iterations=10, read_experiments=True, random_state=None, ) optimizer.set_experiment_guidelines( model_initializer=RGFClassifier, model_init_params=dict(max_leaf=1000, algorithm=Categorical( ['RGF', 'RGF_Opt', 'RGF_Sib']), l2=Real(0.01, 0.3), normalize=Categorical([True, False]), learning_rate=Real(0.3, 0.7), loss=Categorical(['LS', 'Expo', 'Log', 'Abs'])), ) optimizer.go()
def _execute(): env = Environment( train_dataset=get_toy_classification_data(), root_results_path='HyperparameterHunterAssets', metrics_map=['roc_auc_score'], cross_validation_type='StratifiedKFold', cross_validation_params=dict(n_splits=5, shuffle=True, random_state=32), runs=1, ) optimizer = GradientBoostedRegressionTreeOptimization( iterations=10, read_experiments=True, random_state=None, ) optimizer.set_experiment_guidelines( model_initializer=CatBoostClassifier, model_init_params=dict(iterations=100, eval_metric=Categorical( ['Logloss', 'Accuracy', 'AUC'], transform='onehot'), learning_rate=Real(low=0.0001, high=0.5), depth=Integer(4, 7), save_snapshot=False), ) optimizer.go() print('')
def env_fixture_1(): return Environment( train_dataset=get_toy_classification_data(), results_path=None, metrics=["roc_auc_score"], cv_type="StratifiedKFold", cv_params=dict(n_splits=5, shuffle=True, random_state=32), )
def env_0(): return Environment( train_dataset=get_toy_classification_data(), results_path=assets_dir, metrics=["roc_auc_score"], cv_type="RepeatedStratifiedKFold", cv_params=dict(n_splits=3, n_repeats=2, random_state=32), )
def env_fixture_0(): return Environment( train_dataset=get_toy_classification_data(), results_path= "hyperparameter_hunter/__TEST__HyperparameterHunterAssets__", metrics=["roc_auc_score"], cv_type="StratifiedKFold", cv_params=dict(n_splits=5, shuffle=True, random_state=32), )
def execute(): env = Environment( train_dataset=get_toy_classification_data(), results_path="HyperparameterHunterAssets", # Both `holdout_dataset`, and `train_dataset` can be any of the following: pandas.DataFrame, filepath, or None # If a filepath is provided, it will be passed to :meth:`pandas.read_csv`. # In addition to the above types, `holdout_dataset` can also be provided as a callable (see above :func:`get_holdout_set`) holdout_dataset=get_holdout_set, test_dataset=get_toy_classification_data(), # By default, `holdout_dataset` will be scored with the provided metrics, just like OOF predictions # However, you can provide the additional `metrics_params` kwarg to specify which metrics are calculated for each dataset # See the documentation in :class:`environment.Environment` and :class:`metrics.ScoringMixIn` for more information metrics=["roc_auc_score"], cv_type=StratifiedKFold, cv_params=dict(n_splits=5, shuffle=True, random_state=32), ) experiment = CVExperiment(model_initializer=XGBClassifier, model_init_params=dict(subsample=0.5))
def env_0(): def do_full_save(experiment_result): return experiment_result["final_evaluations"]["oof"]["roc_auc_score"] > 0.75 return Environment( train_dataset=get_toy_classification_data(), results_path=assets_dir, metrics=["roc_auc_score"], cv_type=RepeatedStratifiedKFold, cv_params=dict(n_splits=3, n_repeats=2, random_state=32), do_full_save=do_full_save, )
def execute(): env = Environment( train_dataset=get_toy_classification_data(), results_path="HyperparameterHunterAssets", metrics=["roc_auc_score"], cv_type="StratifiedKFold", cv_params=dict(n_splits=5, shuffle=True, random_state=32), ) experiment = CVExperiment( model_initializer=XGBClassifier, model_init_params=dict( objective="reg:linear", max_depth=3, n_estimators=100, subsample=0.5 ), )
def execute(): env = Environment( train_dataset=get_toy_classification_data(), root_results_path='HyperparameterHunterAssets', metrics_map=['roc_auc_score'], cross_validation_type='StratifiedKFold', cross_validation_params=dict(n_splits=5, shuffle=True, random_state=32), ) experiment = CrossValidationExperiment(model_initializer=XGBClassifier, model_init_params=dict( objective='reg:linear', max_depth=3, n_estimators=100, subsample=0.5))
def execute(): env = Environment( train_dataset=get_toy_classification_data(target='diagnosis'), root_results_path='HyperparameterHunterAssets', target_column='diagnosis', metrics_map=['roc_auc_score'], cross_validation_type=RepeatedStratifiedKFold, cross_validation_params=dict(n_splits=5, n_repeats=2, random_state=32), ) experiment = CrossValidationExperiment( model_initializer=RGFClassifier, model_init_params=dict(max_leaf=1000, algorithm='RGF', min_samples_leaf=10), )
def execute(): env = Environment( train_dataset=get_toy_classification_data(), root_results_path='HyperparameterHunterAssets', metrics_map=['roc_auc_score'], cross_validation_type=RepeatedStratifiedKFold, cross_validation_params=dict(n_splits=5, n_repeats=2, random_state=32), runs=2, # Just instantiate `Environment` with your list of callbacks, and go about business as usual experiment_callbacks=[printer_callback()], ) experiment = CrossValidationExperiment( model_initializer=XGBClassifier, model_init_params={}, model_extra_params=dict(fit=dict(verbose=False)), )
def execute(): env = Environment( train_dataset=get_toy_classification_data(), results_path="HyperparameterHunterAssets", metrics=["roc_auc_score"], cv_type=RepeatedStratifiedKFold, cv_params=dict(n_splits=5, n_repeats=2, random_state=32), runs=2, # Just instantiate `Environment` with your list of callbacks, and go about business as usual experiment_callbacks=[printer_callback(), confusion_matrix_oof()], # In addition to `printer_callback` made above, we're also adding the `confusion_matrix_oof` callback # This, and other callbacks, can be found in `hyperparameter_hunter.callbacks.recipes` ) experiment = CVExperiment( model_initializer=XGBClassifier, model_init_params={}, model_extra_params=dict(fit=dict(verbose=False)), )
def execute(): env = Environment( train_dataset=get_toy_classification_data(target='diagnosis'), root_results_path='HyperparameterHunterAssets', target_column='diagnosis', metrics_map=['roc_auc_score'], cross_validation_type=StratifiedKFold, cross_validation_params=dict(n_splits=5, shuffle=True, random_state=32), runs=2, # file_blacklist='ALL', ) experiment = CrossValidationExperiment( model_initializer=CatBoostClassifier, model_init_params=dict(iterations=500, learning_rate=0.01, depth=7, allow_writing_files=False), # NOTE: Inside `model_init_params` can be any of the many kwargs accepted by :meth:`CatBoostClassifier.__init__` model_extra_params=dict(fit=dict(verbose=True)))