Exemplo n.º 1
0
    def test_creator(self):
        test_data = dsutils.load_blood()
        eval_data = dsutils.load_blood()
        make_options = {
            'test_data': test_data,
            'eval_data': eval_data,
            "task": const.TASK_BINARY,
            'target': "Class",
            'feature_selection': True,
            'feature_selection_strategy': "threshold",
            "evaluation_metrics": "auto",
            "evaluation_persist_prediction": True,
            "report_render": 'excel',
            "search_space": PlainSearchSpace(),
        }
        job_working_dir = common_util.get_temp_dir_path(
            prefix="hyn_job_creator_test_")

        exp = BloodDatasetJobEngine().create_experiment_with_params(
            make_options, job_working_dir)
        assert exp
        assert isinstance(exp, CompeteExperiment)

        run_options = {"max_trials": 2}

        exp.run(**run_options)
        assert (Path(job_working_dir) / "report.xlsx").exists()
Exemplo n.º 2
0
def foo():
    fn_foo(1, 2, k1='lalala')
    fn_foo('dict', {'a': 'aaa', 'b': 345})
    fn_foo('list', list(range(5)))
    fn_foo('big-list', list(range(100)))
    fn_foo('big-range', range(100))
    fn_foo('df', dsutils.load_blood())
    fn_foo('ndarray', dsutils.load_blood().values)
    fn_foo('fn', foo)
    fn_foo('lambda', lambda: print('lambda'))
    fn_foo(['aaa', 3, 4, ['aaa', 'bbb']], 2, k2='lalala')
Exemplo n.º 3
0
def test_experiment_with_blood_full_features():
    df = dsutils.load_blood()
    target = 'Class'
    df_train, df_test = train_test_split(df, train_size=0.8, random_state=335)
    df_test.pop(target)

    experiment = make_experiment(
        PlainModel,
        df,
        target=target,
        search_space=PlainSearchSpace(),
        test_data=df_test,
        feature_generation=True,
        collinearity_detection=True,
        drift_detection=True,
        feature_selection=True,
        down_sample_search=True,
        down_sample_search_size=0.2,
        feature_reselection=True,
        pseudo_labeling=True,
        random_state=335,
        early_stopping_time_limit=1200,
        # log_level='info',
    )
    estimator = experiment.run(max_trials=3)
    print(estimator)
    assert estimator is not None

    step_names = [step[0] for step in estimator.steps]
    assert step_names == [
        StepNames.DATA_CLEAN, StepNames.MULITICOLLINEARITY_DETECTION,
        'estimator'
    ]
Exemplo n.º 4
0
    def setup_class(cls):
        if is_dask_installed:
            import dask.dataframe as dd
            setup_dask(cls)

            cls.boston = dd.from_pandas(dsutils.load_boston(), npartitions=1)
            cls.blood = dd.from_pandas(dsutils.load_blood(), npartitions=1)
            cls.bike_sharing = dd.from_pandas(dsutils.load_Bike_Sharing(), npartitions=1)
Exemplo n.º 5
0
def test_experiment_with_blood_simple():
    df = dsutils.load_blood()
    experiment = make_experiment(PlainModel,
                                 df,
                                 target='Class',
                                 search_space=PlainSearchSpace())
    estimator = experiment.run(max_trials=3)
    print(estimator)
    assert estimator is not None
Exemplo n.º 6
0
    def experiment_with_blood(self,
                              init_kwargs,
                              run_kwargs,
                              row_count=3000,
                              with_dask=False):
        if with_dask:
            X = self.blood.copy()
            y = X.pop('Class')
        else:
            X = dsutils.load_blood()
            if row_count is not None:
                X = X.head(row_count)
            X['Class'] = LabelEncoder().fit_transform(X['Class'])
            y = X.pop('Class')

        hyper_model = create_plain_model(with_encoder=True)

        tb = get_tool_box(X, y)
        X_train, X_test, y_train, y_test = \
            tb.train_test_split(X, y, test_size=0.3, random_state=9527)
        X_train, X_eval, y_train, y_eval = \
            tb.train_test_split(X_train, y_train, test_size=0.3, random_state=9527)

        init_kwargs = {
            'X_eval': X_eval,
            'y_eval': y_eval,
            'X_test': X_test,
            **init_kwargs
        }

        compete_experiment = CompeteExperiment(hyper_model, X_train, y_train,
                                               **init_kwargs)
        base_experiment = Experiment(hyper_model, X_train, y_train,
                                     **init_kwargs)

        mydict_compete = compete_experiment.get_data_character()
        mydict_base = base_experiment.get_data_character()

        assert mydict_base
        assert mydict_compete
        assert mydict_base['experimentType'] == 'base'
        assert mydict_compete['experimentType'] == 'compete'
        assert mydict_base['target']['taskType'] == 'binary'
        assert mydict_base['target']['freq'] is not None
        assert mydict_base['target']['unique'] is 2
        assert mydict_base['target']['mean'] is None
        assert mydict_base['target']['max'] is None
        assert mydict_base['target']['min'] is None
        assert mydict_base['target']['stdev'] is None
        assert mydict_base['target']['dataType']
        assert len(mydict_base['targetDistribution']) <= 10
        assert mydict_base['datasetShape']['X_train']
        assert mydict_base['datasetShape']['y_train']
        assert mydict_base['datasetShape']['X_eval']
        assert mydict_base['datasetShape']['y_eval']
        assert mydict_base['datasetShape']['X_test']
        assert mydict_compete['featureDistribution']
Exemplo n.º 7
0
    def setup_class(cls):
        from sklearn.preprocessing import LabelEncoder
        df = dsutils.load_bank()
        df['y'] = LabelEncoder().fit_transform(df['y'])  # binary task target
        df['education'] = LabelEncoder().fit_transform(
            df['education'])  # multiclass task target
        cls.bank_data = df
        cls.bank_data_cudf = cudf.from_pandas(df)

        cls.boston_data = dsutils.load_blood()
        cls.boston_data_cudf = cudf.from_pandas(cls.boston_data)

        cls.movie_lens = dsutils.load_movielens()

        os.makedirs(cls.work_dir)
Exemplo n.º 8
0
def test_experiment_with_blood_down_sample():
    df = dsutils.load_blood()
    experiment = make_experiment(
        PlainModel,
        df,
        target='Class',
        search_space=PlainSearchSpace(),
        down_sample_search=True,
        down_sample_search_size=0.1,
        down_sample_search_time_limit=300,
        down_sample_search_max_trials=10,
        # log_level='info',
    )
    estimator = experiment.run(max_trials=3)
    print(estimator)
    assert estimator is not None
Exemplo n.º 9
0
def run_export_excel_report(maker, has_eval_data=True, str_label=True):
    df = dsutils.load_blood()
    df['Constant'] = [0 for i in range(df.shape[0])]
    df['Id'] = [i for i in range(df.shape[0])]

    target = 'Class'
    labels = ["no", "yes"]
    if str_label:
        df[target] = df[target].map(lambda v: labels[v])

    df_train, df_eval = train_test_split(df, test_size=0.2)

    df_train['Drifted'] = np.random.random(df_train.shape[0])
    df_eval['Drifted'] = np.random.random(df_eval.shape[0]) * 100

    file_path = common_util.get_temp_file_path(prefix="report_excel_",
                                               suffix=".xlsx")
    print(file_path)
    experiment = maker(df_train, target, df_eval, file_path)
    estimator = experiment.run(max_trials=3)
    assert estimator is not None
    mlr_callback = None
    mle_callback = None
    for callback in experiment.callbacks:
        if isinstance(callback, MLReportCallback):
            mlr_callback = callback
        if isinstance(callback, MLEvaluateCallback):
            mle_callback = callback

    assert mlr_callback is not None
    _experiment_meta: ExperimentMeta = mlr_callback.experiment_meta_

    assert len(_experiment_meta.resource_usage) > 0
    assert os.path.exists(file_path)

    if has_eval_data:
        assert mle_callback is not None
        assert _experiment_meta.confusion_matrix is not None
        assert _experiment_meta.classification_report is not None
        assert len(_experiment_meta.prediction_elapsed) == 2
        assert _experiment_meta.confusion_matrix.data.shape == (
            2, 2)  # binary classification
        assert len(_experiment_meta.datasets) == 3
    else:
        assert len(_experiment_meta.datasets) == 2
    return _experiment_meta
Exemplo n.º 10
0
 def _create_experiment(self, make_options):
     from hypernets.experiment import make_experiment
     train_data = dsutils.load_blood()
     experiment = make_experiment(PlainModel, train_data, **make_options)
     return experiment