def test_experiment_with_blood_full_features(): df = dsutils.load_blood() target = 'Class' df_train, df_test = train_test_split(df, train_size=0.8, random_state=335) df_test.pop(target) experiment = make_experiment( PlainModel, df, target=target, search_space=PlainSearchSpace(), test_data=df_test, feature_generation=True, collinearity_detection=True, drift_detection=True, feature_selection=True, down_sample_search=True, down_sample_search_size=0.2, feature_reselection=True, pseudo_labeling=True, random_state=335, early_stopping_time_limit=1200, # log_level='info', ) estimator = experiment.run(max_trials=3) print(estimator) assert estimator is not None step_names = [step[0] for step in estimator.steps] assert step_names == [ StepNames.DATA_CLEAN, StepNames.MULITICOLLINEARITY_DETECTION, 'estimator' ]
def test_experiment_with_blood_simple(): df = dsutils.load_blood() experiment = make_experiment(PlainModel, df, target='Class', search_space=PlainSearchSpace()) estimator = experiment.run(max_trials=3) print(estimator) assert estimator is not None
def maker(df_train, target, df_eval, file_path): experiment = make_experiment( PlainModel, df_train, target=target, test_data=df_eval.copy(), drift_detection_threshold=0.4, drift_detection_min_features=3, drift_detection_remove_size=0.5, search_space=PlainSearchSpace(enable_lr=False, enable_nn=False), report_render='excel', report_render_options={'file_path': file_path}) return experiment
def maker(df_train, target, df_eval, file_path): from hypernets.experiment.report import ExcelReportRender experiment = make_experiment( PlainModel, df_train, target=target, eval_data=df_eval, test_data=df_eval.copy(), drift_detection_threshold=0.4, drift_detection_min_features=3, drift_detection_remove_size=0.5, search_space=PlainSearchSpace(enable_lr=False, enable_nn=False), report_render=ExcelReportRender(file_path)) return experiment
def test_regression_task_report(): df = dsutils.load_boston() df['Constant'] = [0 for i in range(df.shape[0])] df['Id'] = [i for i in range(df.shape[0])] target = 'target' df_train, df_eval = train_test_split(df, test_size=0.2) df_train['Drifted'] = np.random.random(df_train.shape[0]) df_eval['Drifted'] = np.random.random(df_eval.shape[0]) * 100 file_path = common_util.get_temp_file_path(prefix="report_excel_", suffix=".xlsx") print(file_path) experiment = make_experiment( PlainModel, df_train, target=target, eval_data=df_eval.copy(), test_data=df_eval.copy(), drift_detection_threshold=0.4, drift_detection_min_features=3, drift_detection_remove_size=0.5, search_space=PlainSearchSpace(enable_lr=False, enable_nn=False, enable_dt=False, enable_dtr=True), report_render='excel', report_render_options={'file_path': file_path}) estimator = experiment.run(max_trials=3) assert estimator is not None mlr_callback = None mle_callback = None for callback in experiment.callbacks: if isinstance(callback, MLReportCallback): mlr_callback = callback if isinstance(callback, MLEvaluateCallback): mle_callback = callback assert mlr_callback is not None _experiment_meta: ExperimentMeta = mlr_callback.experiment_meta_ assert len(_experiment_meta.resource_usage) > 0 assert len(_experiment_meta.steps) == 5 assert os.path.exists(file_path) assert mle_callback is not None assert _experiment_meta.evaluation_metric is not None assert len(_experiment_meta.prediction_stats) == 1 assert len(_experiment_meta.datasets) == 3
def run_experiment(train_df, check_as_local=True, **kwargs): experiment = make_experiment(PlainModel, train_df, search_space=PlainSearchSpace(), **kwargs) estimator = experiment.run() print(experiment.random_state, estimator) assert estimator is not None if check_as_local: assert hasattr(estimator, 'as_local') local_estimator = estimator.as_local() assert not hasattr(local_estimator, 'as_local')
def test_experiment_with_data_adaption(): df = dsutils.load_bank() df = MultiLabelEncoder().fit_transform(df) mem_usage = int(df.memory_usage().sum()) experiment = make_experiment( PlainModel, df, target='y', search_space=PlainSearchSpace(), data_adaption_memory_limit=mem_usage // 2, log_level='info', ) estimator = experiment.run(max_trials=3) assert estimator is not None assert estimator.steps[0][0] == 'data_adaption'
def test_experiment_with_blood_down_sample(): df = dsutils.load_blood() experiment = make_experiment( PlainModel, df, target='Class', search_space=PlainSearchSpace(), down_sample_search=True, down_sample_search_size=0.1, down_sample_search_time_limit=300, down_sample_search_max_trials=10, # log_level='info', ) estimator = experiment.run(max_trials=3) print(estimator) assert estimator is not None
def main(): df = dsutils.load_boston() df_train, df_eval = train_test_split(df, test_size=0.2) search_space = PlainSearchSpace(enable_lr=False, enable_nn=False, enable_dt=False, enable_dtr=True) experiment = make_experiment(PlainModel, df_train, target='target', search_space=search_space, report_render='excel') estimator = experiment.run(max_trials=3) print(estimator)
def maker_(*args, **kwargs): if 'random_state' not in kwargs.keys(): kwargs['random_state'] = 1234 return make_experiment(PlainModel, *args, **kwargs)
def maker_(*args, **kwargs): return make_experiment(PlainModel, *args, **kwargs)
def _create_experiment(self, make_options): from hypernets.experiment import make_experiment train_data = dsutils.load_blood() experiment = make_experiment(PlainModel, train_data, **make_options) return experiment