def test_read_supported_file(self): from hypernets.tabular.datasets.dsutils import basedir csv_file = f'{basedir}/heart-disease-uci.csv' df_csv = ExperimentJobCreator._read_file(csv_file) assert df_csv.shape[0] > 1 file_path = common_util.get_temp_file_path(prefix="heart-disease-uci", suffix=".parquet") df_csv.to_parquet(file_path) df_parquet = pd.read_parquet(file_path) assert df_parquet.shape == df_csv.shape
def test_make_tempfile(): temp_file_path: str = common_util.get_temp_file_path(prefix='prefix', suffix='.txt') assert not os.path.exists(temp_file_path) assert os.path.basename(temp_file_path).startswith('prefix') assert os.path.basename(temp_file_path).endswith('.txt') temp_file_dir_created = common_util.get_temp_dir_path(prefix='prefix', suffix='prefix', create=True) assert os.path.exists(temp_file_dir_created) temp_file_dir_not_created = common_util.get_temp_dir_path(prefix='prefix', suffix='prefix', create=False) assert not os.path.exists(temp_file_dir_not_created)
def test_regression_task_report(): df = dsutils.load_boston() df['Constant'] = [0 for i in range(df.shape[0])] df['Id'] = [i for i in range(df.shape[0])] target = 'target' df_train, df_eval = train_test_split(df, test_size=0.2) df_train['Drifted'] = np.random.random(df_train.shape[0]) df_eval['Drifted'] = np.random.random(df_eval.shape[0]) * 100 file_path = common_util.get_temp_file_path(prefix="report_excel_", suffix=".xlsx") print(file_path) experiment = make_experiment( PlainModel, df_train, target=target, eval_data=df_eval.copy(), test_data=df_eval.copy(), drift_detection_threshold=0.4, drift_detection_min_features=3, drift_detection_remove_size=0.5, search_space=PlainSearchSpace(enable_lr=False, enable_nn=False, enable_dt=False, enable_dtr=True), report_render='excel', report_render_options={'file_path': file_path}) estimator = experiment.run(max_trials=3) assert estimator is not None mlr_callback = None mle_callback = None for callback in experiment.callbacks: if isinstance(callback, MLReportCallback): mlr_callback = callback if isinstance(callback, MLEvaluateCallback): mle_callback = callback assert mlr_callback is not None _experiment_meta: ExperimentMeta = mlr_callback.experiment_meta_ assert len(_experiment_meta.resource_usage) > 0 assert len(_experiment_meta.steps) == 5 assert os.path.exists(file_path) assert mle_callback is not None assert _experiment_meta.evaluation_metric is not None assert len(_experiment_meta.prediction_stats) == 1 assert len(_experiment_meta.datasets) == 3
def run_export_excel_report(maker, has_eval_data=True, str_label=True): df = dsutils.load_blood() df['Constant'] = [0 for i in range(df.shape[0])] df['Id'] = [i for i in range(df.shape[0])] target = 'Class' labels = ["no", "yes"] if str_label: df[target] = df[target].map(lambda v: labels[v]) df_train, df_eval = train_test_split(df, test_size=0.2) df_train['Drifted'] = np.random.random(df_train.shape[0]) df_eval['Drifted'] = np.random.random(df_eval.shape[0]) * 100 file_path = common_util.get_temp_file_path(prefix="report_excel_", suffix=".xlsx") print(file_path) experiment = maker(df_train, target, df_eval, file_path) estimator = experiment.run(max_trials=3) assert estimator is not None mlr_callback = None mle_callback = None for callback in experiment.callbacks: if isinstance(callback, MLReportCallback): mlr_callback = callback if isinstance(callback, MLEvaluateCallback): mle_callback = callback assert mlr_callback is not None _experiment_meta: ExperimentMeta = mlr_callback.experiment_meta_ assert len(_experiment_meta.resource_usage) > 0 assert os.path.exists(file_path) if has_eval_data: assert mle_callback is not None assert _experiment_meta.confusion_matrix is not None assert _experiment_meta.classification_report is not None assert len(_experiment_meta.prediction_elapsed) == 2 assert _experiment_meta.confusion_matrix.data.shape == ( 2, 2) # binary classification assert len(_experiment_meta.datasets) == 3 else: assert len(_experiment_meta.datasets) == 2 return _experiment_meta
def test_render(self): steps_meta = [ self.create_data_clean_step_meta(), self.create_ensemble_step_meta() ] experiment_meta = ExperimentMeta( task=const.TASK_BINARY, datasets=self.create_dataset_meta(), steps=steps_meta, evaluation_metric=self.create_binary_metric_data(), confusion_matrix=self.create_confusion_matrix_data(), resource_usage=self.create_resource_monitor_df(), prediction_stats=self.create_prediction_stats_df()) p = common_util.get_temp_file_path(prefix="report_excel_", suffix=".xlsx") print(p) ExcelReportRender(file_path=p).render(experiment_meta) assert os.path.exists(p)