Пример #1
0
    def setup_class(cls):
        from sklearn.preprocessing import LabelEncoder
        df = dsutils.load_bank()
        df['y'] = LabelEncoder().fit_transform(df['y'])
        cls.bank_data = df
        cls.movie_lens = dsutils.load_movielens()

        os.makedirs(cls.work_dir)
Пример #2
0
def experiment_with_movie_lens(init_kwargs,
                               run_kwargs,
                               row_count=None,
                               with_dask=False):
    hyper_model = create_plain_model(reward_metric='f1',
                                     with_encoder=True,
                                     with_dask=with_dask)

    X = dsutils.load_movielens()
    # X['genres'] = X['genres'].apply(lambda s: s.replace('|', ' '))
    X['timestamp'] = X['timestamp'].apply(datetime.fromtimestamp)
    if row_count is not None:
        X = X.head(row_count)

    if with_dask:
        setup_dask(None)
        X = dd.from_pandas(X, npartitions=1)

    y = X.pop('rating')

    tb = get_tool_box(X, y)

    X_train, X_test, y_train, y_test = \
        tb.train_test_split(X, y, test_size=0.3, random_state=9527)
    X_train, X_eval, y_train, y_eval = \
        tb.train_test_split(X_train, y_train, test_size=0.3, random_state=9527)

    init_kwargs = {
        'X_eval': X_eval,
        'y_eval': y_eval,
        'X_test': X_test,
        'ensemble_size': 0,
        'drift_detection': False,
        **init_kwargs
    }
    run_kwargs = {'max_trials': 3, **run_kwargs}
    experiment = CompeteExperiment(hyper_model, X_train, y_train,
                                   **init_kwargs)
    estimator = experiment.run(**run_kwargs)

    assert estimator

    preds = estimator.predict(X_test)
    proba = estimator.predict_proba(X_test)

    if with_dask:
        preds, proba = tb.to_local(preds, proba)

    score = tb.metrics.calc_score(
        y_test,
        preds,
        proba,
        metrics=['auc', 'accuracy', 'f1', 'recall', 'precision'],
        task=experiment.task)
    print('evaluate score:', score)
    assert score
Пример #3
0
 def test_category_datetime_text(self):
     df = dsutils.load_movielens()
     df['genres'] = df['genres'].apply(lambda s: s.replace('|', ' '))
     df['timestamp'] = df['timestamp'].apply(datetime.fromtimestamp)
     ftt = FeatureGenerationTransformer(
         task='binary',
         text_cols=['title'],
         categories_cols=['gender', 'genres'])
     x_t = ftt.fit_transform(df)
     xt_columns = x_t.columns.to_list()
     assert 'CROSS_CATEGORICAL_gender__genres' in xt_columns
     assert 'TFIDF__title____0__' in xt_columns
     assert 'DAY__timestamp__' in xt_columns
Пример #4
0
    def setup_class(cls):
        from sklearn.preprocessing import LabelEncoder
        df = dsutils.load_bank()
        df['y'] = LabelEncoder().fit_transform(df['y'])  # binary task target
        df['education'] = LabelEncoder().fit_transform(
            df['education'])  # multiclass task target
        cls.bank_data = df
        cls.bank_data_cudf = cudf.from_pandas(df)

        cls.boston_data = dsutils.load_blood()
        cls.boston_data_cudf = cudf.from_pandas(cls.boston_data)

        cls.movie_lens = dsutils.load_movielens()

        os.makedirs(cls.work_dir)
Пример #5
0
 def setup_class(cls):
     cls.bank_data = dsutils.load_bank()
     cls.movie_lens = dsutils.load_movielens()
Пример #6
0
    def setup_class(cls):
        setup_dask(cls)

        cls.bank_data = dd.from_pandas(dsutils.load_bank(), npartitions=2)
        cls.movie_lens = dd.from_pandas(dsutils.load_movielens(),
                                        npartitions=2)