示例#1
0
def test_experiment_with_blood_full_features():
    df = dsutils.load_blood()
    target = 'Class'
    df_train, df_test = train_test_split(df, train_size=0.8, random_state=335)
    df_test.pop(target)

    experiment = make_experiment(
        PlainModel,
        df,
        target=target,
        search_space=PlainSearchSpace(),
        test_data=df_test,
        feature_generation=True,
        collinearity_detection=True,
        drift_detection=True,
        feature_selection=True,
        down_sample_search=True,
        down_sample_search_size=0.2,
        feature_reselection=True,
        pseudo_labeling=True,
        random_state=335,
        early_stopping_time_limit=1200,
        # log_level='info',
    )
    estimator = experiment.run(max_trials=3)
    print(estimator)
    assert estimator is not None

    step_names = [step[0] for step in estimator.steps]
    assert step_names == [
        StepNames.DATA_CLEAN, StepNames.MULITICOLLINEARITY_DETECTION,
        'estimator'
    ]
示例#2
0
def test_experiment_with_blood_simple():
    df = dsutils.load_blood()
    experiment = make_experiment(PlainModel,
                                 df,
                                 target='Class',
                                 search_space=PlainSearchSpace())
    estimator = experiment.run(max_trials=3)
    print(estimator)
    assert estimator is not None
示例#3
0
 def maker(df_train, target, df_eval, file_path):
     experiment = make_experiment(
         PlainModel,
         df_train,
         target=target,
         test_data=df_eval.copy(),
         drift_detection_threshold=0.4,
         drift_detection_min_features=3,
         drift_detection_remove_size=0.5,
         search_space=PlainSearchSpace(enable_lr=False, enable_nn=False),
         report_render='excel',
         report_render_options={'file_path': file_path})
     return experiment
示例#4
0
 def maker(df_train, target, df_eval, file_path):
     from hypernets.experiment.report import ExcelReportRender
     experiment = make_experiment(
         PlainModel,
         df_train,
         target=target,
         eval_data=df_eval,
         test_data=df_eval.copy(),
         drift_detection_threshold=0.4,
         drift_detection_min_features=3,
         drift_detection_remove_size=0.5,
         search_space=PlainSearchSpace(enable_lr=False, enable_nn=False),
         report_render=ExcelReportRender(file_path))
     return experiment
示例#5
0
def test_regression_task_report():
    df = dsutils.load_boston()
    df['Constant'] = [0 for i in range(df.shape[0])]
    df['Id'] = [i for i in range(df.shape[0])]

    target = 'target'

    df_train, df_eval = train_test_split(df, test_size=0.2)

    df_train['Drifted'] = np.random.random(df_train.shape[0])
    df_eval['Drifted'] = np.random.random(df_eval.shape[0]) * 100
    file_path = common_util.get_temp_file_path(prefix="report_excel_",
                                               suffix=".xlsx")
    print(file_path)
    experiment = make_experiment(
        PlainModel,
        df_train,
        target=target,
        eval_data=df_eval.copy(),
        test_data=df_eval.copy(),
        drift_detection_threshold=0.4,
        drift_detection_min_features=3,
        drift_detection_remove_size=0.5,
        search_space=PlainSearchSpace(enable_lr=False,
                                      enable_nn=False,
                                      enable_dt=False,
                                      enable_dtr=True),
        report_render='excel',
        report_render_options={'file_path': file_path})
    estimator = experiment.run(max_trials=3)
    assert estimator is not None
    mlr_callback = None
    mle_callback = None
    for callback in experiment.callbacks:
        if isinstance(callback, MLReportCallback):
            mlr_callback = callback
        if isinstance(callback, MLEvaluateCallback):
            mle_callback = callback

    assert mlr_callback is not None
    _experiment_meta: ExperimentMeta = mlr_callback.experiment_meta_

    assert len(_experiment_meta.resource_usage) > 0
    assert len(_experiment_meta.steps) == 5
    assert os.path.exists(file_path)

    assert mle_callback is not None
    assert _experiment_meta.evaluation_metric is not None
    assert len(_experiment_meta.prediction_stats) == 1
    assert len(_experiment_meta.datasets) == 3
示例#6
0
def run_experiment(train_df, check_as_local=True, **kwargs):
    experiment = make_experiment(PlainModel,
                                 train_df,
                                 search_space=PlainSearchSpace(),
                                 **kwargs)
    estimator = experiment.run()
    print(experiment.random_state, estimator)

    assert estimator is not None

    if check_as_local:
        assert hasattr(estimator, 'as_local')

        local_estimator = estimator.as_local()
        assert not hasattr(local_estimator, 'as_local')
示例#7
0
def test_experiment_with_data_adaption():
    df = dsutils.load_bank()
    df = MultiLabelEncoder().fit_transform(df)
    mem_usage = int(df.memory_usage().sum())
    experiment = make_experiment(
        PlainModel,
        df,
        target='y',
        search_space=PlainSearchSpace(),
        data_adaption_memory_limit=mem_usage // 2,
        log_level='info',
    )
    estimator = experiment.run(max_trials=3)
    assert estimator is not None
    assert estimator.steps[0][0] == 'data_adaption'
示例#8
0
def test_experiment_with_blood_down_sample():
    df = dsutils.load_blood()
    experiment = make_experiment(
        PlainModel,
        df,
        target='Class',
        search_space=PlainSearchSpace(),
        down_sample_search=True,
        down_sample_search_size=0.1,
        down_sample_search_time_limit=300,
        down_sample_search_max_trials=10,
        # log_level='info',
    )
    estimator = experiment.run(max_trials=3)
    print(estimator)
    assert estimator is not None
def main():
    df = dsutils.load_boston()

    df_train, df_eval = train_test_split(df, test_size=0.2)
    search_space = PlainSearchSpace(enable_lr=False,
                                    enable_nn=False,
                                    enable_dt=False,
                                    enable_dtr=True)

    experiment = make_experiment(PlainModel,
                                 df_train,
                                 target='target',
                                 search_space=search_space,
                                 report_render='excel')
    estimator = experiment.run(max_trials=3)
    print(estimator)
示例#10
0
 def maker_(*args, **kwargs):
     if 'random_state' not in kwargs.keys():
         kwargs['random_state'] = 1234
     return make_experiment(PlainModel, *args, **kwargs)
示例#11
0
    def maker_(*args, **kwargs):

        return make_experiment(PlainModel, *args, **kwargs)
示例#12
0
 def _create_experiment(self, make_options):
     from hypernets.experiment import make_experiment
     train_data = dsutils.load_blood()
     experiment = make_experiment(PlainModel, train_data, **make_options)
     return experiment