Exemplo n.º 1
0
    def setup_class(cls):
        if is_dask_installed:
            import dask.dataframe as dd
            setup_dask(cls)

            cls.boston = dd.from_pandas(dsutils.load_boston(), npartitions=1)
            cls.blood = dd.from_pandas(dsutils.load_blood(), npartitions=1)
            cls.bike_sharing = dd.from_pandas(dsutils.load_Bike_Sharing(), npartitions=1)
Exemplo n.º 2
0
def experiment_with_movie_lens(init_kwargs,
                               run_kwargs,
                               row_count=None,
                               with_dask=False):
    hyper_model = create_plain_model(reward_metric='f1',
                                     with_encoder=True,
                                     with_dask=with_dask)

    X = dsutils.load_movielens()
    # X['genres'] = X['genres'].apply(lambda s: s.replace('|', ' '))
    X['timestamp'] = X['timestamp'].apply(datetime.fromtimestamp)
    if row_count is not None:
        X = X.head(row_count)

    if with_dask:
        setup_dask(None)
        X = dd.from_pandas(X, npartitions=1)

    y = X.pop('rating')

    tb = get_tool_box(X, y)

    X_train, X_test, y_train, y_test = \
        tb.train_test_split(X, y, test_size=0.3, random_state=9527)
    X_train, X_eval, y_train, y_eval = \
        tb.train_test_split(X_train, y_train, test_size=0.3, random_state=9527)

    init_kwargs = {
        'X_eval': X_eval,
        'y_eval': y_eval,
        'X_test': X_test,
        'ensemble_size': 0,
        'drift_detection': False,
        **init_kwargs
    }
    run_kwargs = {'max_trials': 3, **run_kwargs}
    experiment = CompeteExperiment(hyper_model, X_train, y_train,
                                   **init_kwargs)
    estimator = experiment.run(**run_kwargs)

    assert estimator

    preds = estimator.predict(X_test)
    proba = estimator.predict_proba(X_test)

    if with_dask:
        preds, proba = tb.to_local(preds, proba)

    score = tb.metrics.calc_score(
        y_test,
        preds,
        proba,
        metrics=['auc', 'accuracy', 'f1', 'recall', 'precision'],
        task=experiment.task)
    print('evaluate score:', score)
    assert score
Exemplo n.º 3
0
def experiment_with_bank_data(init_kwargs,
                              run_kwargs,
                              row_count=3000,
                              with_dask=False):
    hyper_model = create_plain_model(with_encoder=True, with_dask=with_dask)
    X = dsutils.load_bank()
    if row_count is not None:
        X = X.head(row_count)
    X['y'] = LabelEncoder().fit_transform(X['y'])

    if with_dask:
        setup_dask(None)
        X = dd.from_pandas(X, npartitions=1)

    y = X.pop('y')

    tb = get_tool_box(X, y)
    scorer = tb.metrics.metric_to_scoring(hyper_model.reward_metric)

    X_train, X_test, y_train, y_test = \
        tb.train_test_split(X, y, test_size=0.3, random_state=9527)
    X_train, X_eval, y_train, y_eval = \
        tb.train_test_split(X_train, y_train, test_size=0.3, random_state=9527)

    init_kwargs = {
        'X_eval': X_eval,
        'y_eval': y_eval,
        'X_test': X_test,
        'scorer': scorer,
        'ensemble_size': 0,
        'drift_detection': False,
        **init_kwargs
    }
    run_kwargs = {'max_trials': 3, **run_kwargs}
    experiment = CompeteExperiment(hyper_model, X_train, y_train,
                                   **init_kwargs)
    estimator = experiment.run(**run_kwargs)

    assert estimator

    preds = estimator.predict(X_test)
    proba = estimator.predict_proba(X_test)

    if with_dask:
        preds, proba = tb.to_local(preds, proba)

    score = tb.metrics.calc_score(
        y_test,
        preds,
        proba,
        metrics=['auc', 'accuracy', 'f1', 'recall', 'precision'])
    print('evaluate score:', score)
    assert score
Exemplo n.º 4
0
    def setup_class(cls):
        setup_dask(cls)

        print("Loading datasets...")
        row_count = 1000
        df = dsutils.load_adult().head(row_count)

        cls.df = dd.from_pandas(df, npartitions=2)
        cls.df_row_count = row_count
        cls.target = 14

        print(f'Class {cls.__name__} setup.')
Exemplo n.º 5
0
    def setup_class(self):
        setup_dask(self)

        print("Loading datasets...")
        data = dd.from_pandas(dsutils.load_glass_uci(), npartitions=2)
        self.y = data.pop(10).values
        self.X = data

        conf = deeptable.ModelConfig(metrics=['AUC'], apply_gbm_features=False, )
        self.dt = deeptable.DeepTable(config=conf)
        self.X_train, self.X_test, self.y_train, self.y_test = \
            [t.persist() for t in get_tool_box(data).train_test_split(self.X, self.y, test_size=0.2, random_state=42)]
        self.model, self.history = self.dt.fit(self.X_train, self.y_train, batch_size=32, epochs=3)
Exemplo n.º 6
0
def run_compete_experiment_with_heart_disease(init_kwargs,
                                              run_kwargs,
                                              with_dask=False):
    df = dsutils.load_heart_disease_uci()
    target = 'target'

    if with_dask:
        setup_dask(None)
        df = dd.from_pandas(df, npartitions=1)

    tb = get_tool_box(df)
    train_data, test_data = tb.train_test_split(df,
                                                test_size=0.2,
                                                random_state=7)
    train_data, eval_data = tb.train_test_split(train_data,
                                                test_size=0.3,
                                                random_state=7)
    y_test = test_data.pop(target)

    init_kwargs = {
        'searcher': 'random',
        'search_space': tiny_dt_space,
        'reward_metric': 'AUC',
        'ensemble_size': 0,
        'drift_detection': False,
        **init_kwargs
    }
    run_kwargs = {'max_trials': 3, 'batch_size': 16, 'epochs': 1, **run_kwargs}
    experiment = make_experiment(train_data,
                                 target='target',
                                 eval_data=eval_data,
                                 test_data=test_data,
                                 **init_kwargs)
    estimator = experiment.run(**run_kwargs)

    assert estimator is not None

    preds = estimator.predict(test_data)
    proba = estimator.predict_proba(test_data)

    score = tb.metrics.calc_score(
        y_test,
        preds,
        proba,
        metrics=['AUC', 'accuracy', 'f1', 'recall', 'precision'])
    print('evaluate score:', score)
    assert score
Exemplo n.º 7
0
    def setup_class(self):
        TestVarLenCategoricalFeature.setup_class(self)

        setup_dask(self)
        self.df = dd.from_pandas(self.df, npartitions=2)
Exemplo n.º 8
0
 def setup_class(cls):
     setup_dask(cls)
Exemplo n.º 9
0
    # scoring
    preds = dt2.predict(
        df_test,
        batch_size=512,
    )
    proba = dt2.predict_proba(
        df_test,
        batch_size=512,
    )
    print(
        get_tool_box(y_test).metrics.calc_score(y_test,
                                                preds,
                                                proba,
                                                metrics=['accuracy', 'auc']))


if __name__ == '__main__':
    setup_dask(None)
    gpus = tf.config.list_physical_devices('GPU')
    bs = int(os.environ.get('BATCH_SIZE', '32'))
    es = int(os.environ.get('EPOCHS', '5'))

    if len(gpus) < 2:
        run(batch_size=bs, epochs=es)
    else:
        strategy = tf.distribute.MirroredStrategy()
        run(distribute_strategy=strategy, batch_size=len(gpus) * bs, epochs=es)

    print('done')
Exemplo n.º 10
0
 def setup_class(self):
     setup_dask(self)
     super().setup_class(self)