Пример #1
0
    def test_multi_config(self):
        data = dsutils.load_adult().head(1000)
        conf1 = deeptable.ModelConfig(
            name='conf001',
            fixed_embedding_dim=False,
            embeddings_output_dim=0,
            apply_gbm_features=False,
            auto_discrete=False,
        )
        conf2 = deeptable.ModelConfig(
            name='conf002',
            fixed_embedding_dim=False,
            embeddings_output_dim=0,
            apply_gbm_features=False,
            auto_discrete=False,
        )
        bt = batch_trainer.BatchTrainer(
            data,
            'x_14',
            eval_size=0,
            validation_size=0.2,
            eval_metrics=['AUC', 'accuracy', 'recall', 'precision', 'f1'],
            dt_config=[conf1, conf2],
            verbose=0,
            dt_epochs=1,
            cross_validation=True,
            num_folds=2,
            retain_single_model=False,
        )

        ms = bt.start(models=['dt'])
        assert len(ms.get_models()), 2
Пример #2
0
    def setup_class(self):
        setup_dask(self)

        print("Loading datasets...")
        df_train = dd.from_pandas(dsutils.load_adult().head(1000),
                                  npartitions=2)
        self.y = df_train.pop(14)
        self.X = df_train

        conf = deeptable.ModelConfig(metrics=['AUC'],
                                     apply_gbm_features=False,
                                     auto_categorize=False,
                                     auto_discrete=False)
        self.dt = deeptable.DeepTable(config=conf)

        self.X_train, \
        self.X_eval, \
        self.y_train, \
        self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42)
        self.oof_proba, self.eval_proba, self.test_proba = \
            self.dt.fit_cross_validation(self.X_train,
                                         self.y_train,
                                         self.X_eval,
                                         num_folds=3,
                                         epochs=1,
                                         n_jobs=1)
Пример #3
0
 def test_run_binary(self):
     data = dsutils.load_adult().head(1000)
     conf = deeptable.ModelConfig(
         dnn_params={
             'dnn_units': ((256, 0, False), (256, 0, False)),
             'dnn_activation': 'relu'
         },
         fixed_embedding_dim=False,
         embeddings_output_dim=0,
         apply_gbm_features=False,
         # auto_discrete=True,
     )
     bt = batch_trainer.BatchTrainer(
         data,
         'x_14',
         eval_size=0.2,
         validation_size=0.2,
         eval_metrics=['AUC', 'accuracy', 'recall', 'precision', 'f1'],
         # AUC/recall/precision/f1/mse/mae/msle/rmse/r2
         dt_config=conf,
         verbose=0,
         dt_epochs=1,
         # seed=9527,
     )
     ms = bt.start()
     assert ms.leaderboard().shape[1], 7
Пример #4
0
    def test_var_categorical_feature(self):
        X = self.df.copy()
        y = X.pop('rating').values.astype('float32')

        conf = deeptable.ModelConfig(nets=['dnn_nets'],
                                     task=consts.TASK_REGRESSION,
                                     categorical_columns=[
                                         "movie_id", "user_id", "gender",
                                         "occupation", "zip", "title", "age"
                                     ],
                                     metrics=['mse'],
                                     fixed_embedding_dim=True,
                                     embeddings_output_dim=4,
                                     apply_gbm_features=False,
                                     apply_class_weight=True,
                                     earlystopping_patience=5,
                                     var_len_categorical_columns=[
                                         ('genres', "|", "max")
                                     ])

        dt = deeptable.DeepTable(config=conf)

        X_train, X_validation, y_train, y_validation = train_test_split(
            X, y, test_size=0.2)

        model, history = dt.fit(X_train,
                                y_train,
                                validation_data=(X_validation, y_validation),
                                epochs=10,
                                batch_size=32)

        assert 'genres' in model.model.input_names
Пример #5
0
 def test_run_binary_heart_disease_CV(self):
     data = dsutils.load_heart_disease_uci()
     conf = deeptable.ModelConfig(
         dnn_params={
             'dnn_units': ((256, 0, False), (256, 0, False)),
             'dnn_activation': 'relu'
         },
         fixed_embedding_dim=False,
         embeddings_output_dim=0,
         apply_gbm_features=False,
         auto_discrete=True,
         auto_categorize=False,
         cat_exponent=0.4,
         cat_remain_numeric=True,
         # optimizer=keras.optimizers.RMSprop(),
         monitor_metric='val_loss',
     )
     bt = batch_trainer.BatchTrainer(
         data,
         'target',
         eval_size=0.2,
         validation_size=0.2,
         eval_metrics=['AUC', 'accuracy', 'recall', 'precision', 'f1'],
         # AUC/recall/precision/f1/mse/mae/msle/rmse/r2
         dt_config=conf,
         verbose=0,
         dt_epochs=1,
         cross_validation=True,
         num_folds=3,
         # seed=9527,
     )
     ms = bt.start()
     assert ms.leaderboard().shape[1], 7
Пример #6
0
    def test_custom_dnn(self):
        df_train = dsutils.load_adult().head(100)
        y = df_train.pop(14).values
        X = df_train

        conf = deeptable.ModelConfig(nets=['dnn_nets'],
                                     dnn_params={
                                         'custom_dnn_fn':
                                         deepnets.custom_dnn_D_A_D_B,
                                         'hidden_units':
                                         ((128, 0.2, True), (64, 0, False)),
                                     },
                                     metrics=['AUC'],
                                     fixed_embedding_dim=True,
                                     embeddings_output_dim=2,
                                     apply_gbm_features=False,
                                     apply_class_weight=True)
        dt = deeptable.DeepTable(config=conf)
        model, history = dt.fit(X, y, epochs=1)
        l1 = model.model.get_layer('dnn_custom_dense_1')
        l2 = model.model.get_layer('dnn_custom_dropout_1')
        l3 = model.model.get_layer('dnn_custom_bn_1')
        l4 = model.model.get_layer('dnn_custom_dense_2')

        assert l1
        assert l2
        assert l3
        assert l4
Пример #7
0
 def test_run_lgbm(self):
     data = dsutils.load_adult().head(1000)
     conf = deeptable.ModelConfig(
         dnn_params={
             'dnn_units': ((256, 0, False), (256, 0, False)),
             'dnn_activation': 'relu'
         },
         fixed_embedding_dim=False,
         embeddings_output_dim=0,
         apply_gbm_features=False,
         # auto_discrete=True,
     )
     bt = batch_trainer.BatchTrainer(
         data,
         'x_14',
         eval_size=0.2,
         validation_size=0.2,
         eval_metrics=['AUC', 'accuracy', 'recall', 'precision', 'f1'],
         # AUC/recall/precision/f1/mse/mae/msle/rmse/r2
         dt_config=conf,
         verbose=0,
         dt_epochs=1,
         # seed=9527,
         lightgbm_params={
             'learning_rate': 0.01,
             'colsample_bytree': 0.95,
             'reg_alpha': 0.04,
             'reg_lambda': 0.07
         },
     )
     lgbm, score = bt.train_lgbm(conf)
     assert lgbm
     assert score['auc'] > 0
Пример #8
0
 def test_probe_evaluation(self):
     data = dsutils.load_adult().head(1000)
     conf = deeptable.ModelConfig(
         # dnn_units=((256, 0, False), (128, 0, False)),
         # dnn_activation='relu',
         fixed_embedding_dim=False,
         embeddings_output_dim=0,
         apply_gbm_features=False,
         auto_discrete=False,
     )
     bt = batch_trainer.BatchTrainer(
         data,
         'x_14',
         eval_size=0.2,
         validation_size=0.2,
         eval_metrics=['AUC', 'accuracy', 'recall', 'precision', 'f1'],
         dt_config=conf,
         verbose=0,
         dt_epochs=1,
         cross_validation=False,
     )
     ms = bt.start(models=['dt'])
     result = bt.probe_evaluate(
         'all', layers=['flatten_embeddings', 'dnn_dense_1', 'dnn_dense_2'])
     assert len(result), 1
     assert len(result["conf-1 - ['dnn_nets'] - eval"]), 3
Пример #9
0
    def test_zero_testset_cross_validation(self):
        data = dsutils.load_adult().head(1000)
        conf = deeptable.ModelConfig(
            # dnn_units=((256, 0, False), (128, 0, False)),
            # dnn_activation='relu',
            fixed_embedding_dim=False,
            embeddings_output_dim=0,
            apply_gbm_features=False,
            auto_discrete=False,
        )
        bt = batch_trainer.BatchTrainer(
            data,
            'x_14',
            eval_size=0,
            validation_size=0.2,
            eval_metrics=['AUC', 'accuracy', 'recall', 'precision', 'f1'],
            dt_config=conf,
            verbose=0,
            dt_epochs=1,
            cross_validation=True,
            num_folds=2,
            retain_single_model=False,
        )
        assert len(bt.X_train), 1000
        assert bt.X_eval is None

        ms = bt.start(models=['dt'])
        assert len(ms.get_models()), 1
Пример #10
0
 def test_run_catboost(self):
     data = dsutils.load_adult().head(1000)
     conf = deeptable.ModelConfig(
         dnn_params={
             'dnn_units': ((256, 0, False), (256, 0, False)),
             'dnn_activation': 'relu'
         },
         fixed_embedding_dim=False,
         embeddings_output_dim=0,
         apply_gbm_features=False,
         # auto_discrete=True,
     )
     bt = batch_trainer.BatchTrainer(
         data,
         'x_14',
         eval_size=0.2,
         validation_size=0.2,
         eval_metrics=['AUC', 'accuracy', 'recall', 'precision', 'f1'],
         # AUC/recall/precision/f1/mse/mae/msle/rmse/r2
         dt_config=conf,
         verbose=0,
         dt_epochs=1,
         catboost_params={'iterations': 5}
         # seed=9527,
     )
     cb, score = bt.train_catboost(conf)
     assert cb
     assert score['auc'] > 0
Пример #11
0
    def test_ensemble_predict_proba(self):
        data = dsutils.load_adult().head(1000)
        conf = deeptable.ModelConfig(
            # dnn_units=((256, 0, False), (128, 0, False)),
            # dnn_activation='relu',
            fixed_embedding_dim=False,
            embeddings_output_dim=0,
            apply_gbm_features=False,
            auto_discrete=False,
        )
        bt = batch_trainer.BatchTrainer(
            data,
            'x_14',
            eval_size=0.2,
            validation_size=0.2,
            eval_metrics=['AUC', 'accuracy', 'recall', 'precision', 'f1'],
            # AUC/recall/precision/f1/mse/mae/msle/rmse/r2
            dt_config=conf,
            verbose=0,
            dt_epochs=1,
            # seed=9527,
            cross_validation=True,
            num_folds=5,
        )
        ms = bt.start()
        proba, preds, score, submission = bt.ensemble_predict_proba('all')

        assert proba.shape, (6513, )
 def test_class_weights(self):
     conf = deeptable.ModelConfig(metrics=['AUC'],
                                  apply_gbm_features=False,
                                  apply_class_weight=True)
     dt = deeptable.DeepTable(config=conf)
     model, history = dt.fit(self.X_train, self.y_train, epochs=1)
     assert history.history['AUC'][0] > 0
Пример #13
0
    def run_nets(self, nets, **kwargs):
        df_train = dsutils.load_adult().head(100)
        y = df_train.pop(14).values
        X = df_train

        conf = deeptable.ModelConfig(nets=nets,
                                     metrics=['AUC'],
                                     fixed_embedding_dim=True,
                                     embeddings_output_dim=2,
                                     apply_gbm_features=False,
                                     apply_class_weight=True,
                                     **kwargs)

        dt = deeptable.DeepTable(config=conf)

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=42)
        model, history = dt.fit(X_train, y_train, epochs=1)

        result = dt.evaluate(X_test, y_test)
        assert result['AUC'] >= 0.0

        # test reload from disk
        # model_path = os.path.join("/tmp/dt_model", str(uuid.uuid4()))
        # dt.save(model_path)
        #
        # p = multiprocessing.Process(target=self.run_load_model, args=(model_path, X_test, y_test, ))
        # p.start()
        # p.join()

        return dt, result
Пример #14
0
def run(distribute_strategy=None, batch_size=32, epochs=5):
    # loading data
    df = dsutils.load_bank()
    df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)

    y = df_train.pop('y')
    y_test = df_test.pop('y')

    # training
    config = deeptable.ModelConfig(
        nets=deepnets.DeepFM,
        earlystopping_patience=999,
        apply_class_weight=True,
        distribute_strategy=distribute_strategy,
    )
    dt = deeptable.DeepTable(config=config)
    model, history = dt.fit(df_train, y, batch_size=batch_size, epochs=epochs)

    # evaluation
    result = dt.evaluate(df_test, y_test, verbose=0)
    print('score:', result)

    # scoring
    preds = dt.predict(df_test)
    uniques = np.unique(preds, return_counts=True)
    print({k: v for k, v in zip(*uniques)})
Пример #15
0
    def test_only_1_categorical(self, net):
        # Note: afm_nets needs embedding array, and at least 2 elements
        # Note: opnn_nets,ipnn_nets,pnn_nets, needs at least 2 embedding to build `layers.InnerProduct`
        # Note: dnn_nets,cross_dnn_nets,cross_nets,dcn_nets, does not using embedding
        # Note: fibi_nets,fibi_dnn_nets  needs at least 2 embedding because of `BilinearInteraction`
        df = dsutils.load_movielens()
        y = df['rating'].values.astype('float32')
        X = df[['movie_id']]

        conf = deeptable.ModelConfig(nets=[net],
                                     task=consts.TASK_REGRESSION,
                                     categorical_columns=["movie_id"],
                                     metrics=['mse'],
                                     fixed_embedding_dim=True,
                                     embeddings_output_dim=4,
                                     apply_gbm_features=False,
                                     apply_class_weight=True,
                                     earlystopping_patience=5)

        dt = deeptable.DeepTable(config=conf)

        model, history = dt.fit(X,
                                y,
                                validation_split=0.2,
                                epochs=10,
                                batch_size=32)
        assert model
Пример #16
0
 def test_transform(self):
     df_train = dsutils.load_adult()
     df_train = dd.from_pandas(df_train, npartitions=2)
     y = df_train.pop(14)  # .values
     X = df_train
     X_train, X_test, y_train, y_test = get_tool_box(X, y).train_test_split(
         X, y, test_size=0.2, random_state=42)
     conf = deeptable.ModelConfig(auto_discrete=True,
                                  auto_imputation=True,
                                  auto_encode_label=True,
                                  auto_categorize=True,
                                  apply_gbm_features=False)
     processor = DefaultDaskPreprocessor(conf, compute_to_local=True)
     X1, y1 = processor.fit_transform(X_train, y_train)
     X2, y2 = processor.transform(X_test, y_test)
     assert len(
         set(X1.columns.tolist()) - set([
             'x_1', 'x_3', 'x_5', 'x_6', 'x_7', 'x_8', 'x_9', 'x_13',
             'x_0_cat', 'x_4_cat', 'x_10_cat', 'x_11_cat', 'x_12_cat',
             'x_2', 'x_0', 'x_4', 'x_10', 'x_11', 'x_12', 'x_2_discrete',
             'x_0_discrete', 'x_4_discrete', 'x_10_discrete',
             'x_11_discrete', 'x_12_discrete'
         ])) == 0
     assert len(set(X1.columns) - set(X2.columns)) == 0
     assert X1.shape, (X_train.shape[0], 25)
     assert X2.shape, (X_test.shape[0], 25)
     assert y1.sum(), 6297
     assert y2.sum(), 1544
Пример #17
0
    def test_predict_unseen_data(self):
        x1 = np.random.randint(0, 10, size=(100), dtype='int')
        x2 = np.random.randint(0, 2, size=(100)).astype('str')
        x3 = np.random.normal(0.0, 1.0, size=(100))
        y = np.random.randint(0, 2, size=(100), dtype='int')

        df = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'y': y})
        df = dd.from_pandas(df, npartitions=1)
        y = df.pop('y')

        dt = deeptable.DeepTable(config=deeptable.ModelConfig(
            apply_gbm_features=False,
            auto_categorize=True,
            auto_discrete=True,
            # nets=['linear', 'cin_nets', 'fm_nets', 'afm_nets', 'pnn_nets', 'dnn2_nets', 'dcn_nets',
            #       'autoint_nets', 'fibi_dnn_nets'],
            # 'fg_nets', 'fgcnn_cin_nets', 'fgcnn_fm_nets', 'fgcnn_ipnn_nets',
            #          'fgcnn_dnn_nets', ]
        ))
        dt.fit(df, y)

        xt_1 = np.random.randint(0, 50, size=(10), dtype='int')
        xt_2 = np.random.randint(0, 10, size=(10)).astype('str')
        xt_3 = np.random.normal(0.0, 2.0, size=(10))

        dft = pd.DataFrame({'x1': xt_1, 'x2': xt_2, 'x3': xt_3})
        dft = dd.from_pandas(dft, npartitions=2)
        preds = dt.predict(dft)
        assert len(preds), 10
Пример #18
0
    def test_fit(self):
        print("Loading datasets...")
        x1 = np.random.randint(0, 10, size=(100), dtype='int')
        x2 = np.random.normal(0.0, 1.0, size=(100))
        x3 = np.random.normal(0.0, 1.0, size=(100))

        y1 = np.random.randint(0, 2, size=(100), dtype='int')
        y2 = np.random.randint(0, 2, size=(100), dtype='int')

        df = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3})
        df_y = pd.DataFrame({'y1': y1, 'y2': y2})

        conf = deeptable.ModelConfig(metrics=['AUC'],
                                     nets=['dnn_nets'],
                                     apply_gbm_features=False,
                                     task='multilabel')
        dt = deeptable.DeepTable(config=conf)
        X_train, X_test, y_train, y_test = train_test_split(df,
                                                            df_y,
                                                            test_size=0.2,
                                                            random_state=42)
        model, history = dt.fit(X_train,
                                y_train.values,
                                batch_size=10,
                                epochs=1)
Пример #19
0
    def test_leaderboard(self):
        data = dsutils.load_adult().head(1000)
        conf = deeptable.ModelConfig(
            # dnn_units=((256, 0, False), (128, 0, False)),
            # dnn_activation='relu',
            fixed_embedding_dim=False,
            embeddings_output_dim=0,
            apply_gbm_features=False,
            auto_discrete=False,
        )
        bt = batch_trainer.BatchTrainer(
            data,
            'x_14',
            eval_size=0.2,
            validation_size=0.2,
            eval_metrics=['AUC', 'accuracy', 'recall', 'precision', 'f1'],
            dt_config=conf,
            verbose=0,
            dt_epochs=1,
            cross_validation=True,
            num_folds=2,
            retain_single_model=True,
        )

        ms = bt.start()
        eval_lb = ms.leaderboard(type='eval')
        oof_lb = ms.leaderboard(type='oof')
        val_lb = ms.leaderboard(type='val')
        assert len(eval_lb), 5
        assert len(oof_lb), 1
        assert val_lb is None
Пример #20
0
 def test_run_cross_validation(self):
     data = dsutils.load_adult().head(1000)
     conf = deeptable.ModelConfig(
         # dnn_units=((256, 0, False), (128, 0, False)),
         # dnn_activation='relu',
         fixed_embedding_dim=False,
         embeddings_output_dim=0,
         apply_gbm_features=False,
         auto_discrete=False,
     )
     bt = batch_trainer.BatchTrainer(
         data,
         'x_14',
         data_test=data,
         eval_size=0.2,
         validation_size=0.2,
         eval_metrics=['AUC', 'accuracy', 'recall', 'precision', 'f1'],
         # AUC/recall/precision/f1/mse/mae/msle/rmse/r2
         dt_config=conf,
         verbose=0,
         dt_epochs=1,
         # seed=9527,
         cross_validation=True,
         num_folds=5,
     )
     ms = bt.start(models=['dt'])
     assert ms.leaderboard().shape[1], 7
Пример #21
0
    def setup_class(self):
        self.X, self.y = self.load_data()

        conf = deeptable.ModelConfig(task=consts.TASK_REGRESSION, metrics=[r2_c, 'RootMeanSquaredError'],
                                     apply_gbm_features=False)
        self.dt = deeptable.DeepTable(config=conf)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            get_tool_box(self.X).train_test_split(self.X, self.y, test_size=0.2, random_state=42)
        self.model, self.history = self.dt.fit(self.X_train, self.y_train, batch_size=32, epochs=100)
Пример #22
0
    def test_gbm_feature_embedding(self):
        conf = deeptable.ModelConfig(metrics=['AUC'],
                                     apply_gbm_features=True,
                                     gbm_feature_type=consts.GBM_FEATURE_TYPE_EMB,
                                     gbm_params={'learning_rate': 0.01, 'colsample_bytree': 0.95, 'reg_alpha': 0.04,
                                                 'reg_lambda': 0.07, 'n_estimators': 10},
                                     )

        dt, dm, history = self.run_dt(conf)
        lgbm_leaves = [c for c in dt.preprocessor.get_categorical_columns() if 'lgbm_leaf' in c]
        assert len(lgbm_leaves), 10
Пример #23
0
    def test_cache_preprocessed_data(self):
        config = deeptable.ModelConfig(metrics=['AUC'], apply_gbm_features=False, apply_class_weight=True)
        df_train = dsutils.load_adult().head(100)
        y = df_train.pop(14).values
        X = df_train
        cache_home = homedir + '/cache'
        preprocessor = DefaultPreprocessor(config, cache_home=cache_home, use_cache=True)
        dt = deeptable.DeepTable(config=config, preprocessor=preprocessor)
        dt.fit(X, y, epochs=1)

        dt = deeptable.DeepTable(config=config, preprocessor=preprocessor)
        dt.fit(X, y, epochs=1)
Пример #24
0
    def setup_class(self):
        setup_dask(self)

        print("Loading datasets...")
        data = dd.from_pandas(dsutils.load_glass_uci(), npartitions=2)
        self.y = data.pop(10).values
        self.X = data

        conf = deeptable.ModelConfig(metrics=['AUC'], apply_gbm_features=False, )
        self.dt = deeptable.DeepTable(config=conf)
        self.X_train, self.X_test, self.y_train, self.y_test = \
            [t.persist() for t in get_tool_box(data).train_test_split(self.X, self.y, test_size=0.2, random_state=42)]
        self.model, self.history = self.dt.fit(self.X_train, self.y_train, batch_size=32, epochs=3)
Пример #25
0
    def test_default_settings(self):
        config = deeptable.ModelConfig(metrics=['AUC'], apply_gbm_features=False, apply_class_weight=True)
        dt, _ = self.run_dt(config)

        # test save and load
        filepath = f'{type(self).__name__}_{time.strftime("%Y%m%d%H%M%S")}'
        dt.save(filepath)
        assert fs.exists(f'{filepath}/dt.pkl')
        assert fs.exists(f'{filepath}/dnn_nets.h5')
        newdt = deeptable.DeepTable.load(filepath)
        X_eval = self.df.copy()
        X_eval.pop(self.target)
        preds = newdt.predict(X_eval)
        assert preds.shape, (self.df_row_count,)
Пример #26
0
    def setup_class(self):
        print("Loading datasets...")
        df_train = dsutils.load_adult().head(1000)
        self.y = df_train.pop(14).values
        self.X = df_train

        conf = deeptable.ModelConfig(metrics=['AUC'], apply_gbm_features=False, apply_class_weight=True)
        self.dt = deeptable.DeepTable(config=conf)

        self.X_train, \
        self.X_test, \
        self.y_train, \
        self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42)
        self.model, self.history = self.dt.fit(self.X_train, self.y_train, epochs=1)
Пример #27
0
    def test_gbm_features_with_params(self):
        params = {'learning_rate': 0.01, 'colsample_bytree': 0.95,
                  'reg_alpha': 0.04, 'reg_lambda': 0.07,
                  'n_estimators': 10}
        config = deeptable.ModelConfig(metrics=['AUC'],
                                       apply_gbm_features=True,
                                       gbm_params=params,
                                       )
        dt, _ = self.run_dt(config)
        lgbm = dt.preprocessor.X_transformers['gbm_features'].lgbm
        assert all([getattr(lgbm, k, None) == v for k, v in params.items()])

        lgbm_leaves = [c for c in dt.preprocessor.get_categorical_columns() if 'lgbm_leaf' in c]
        assert len(lgbm_leaves), 10
Пример #28
0
 def test_gbm_feature_dense(self):
     conf = deeptable.ModelConfig(metrics=['AUC'],
                                  apply_gbm_features=True,
                                  gbm_feature_type=consts.GBM_FEATURE_TYPE_DENSE,
                                  gbm_params={'learning_rate': 0.01, 'colsample_bytree': 0.95, 'reg_alpha': 0.04,
                                              'reg_lambda': 0.07, 'n_estimators': 10},
                                  )
     dt, dm, history = self.run_dt(conf)
     layers = dm.model.layers
     dense_lgbm_input = dm.model.get_layer(consts.INPUT_PREFIX_NUM + 'gbm_leaves')
     concat_continuous_inputs = dm.model.get_layer('concat_continuous_inputs')
     # last_lgbm_emb = model.get_layer('emb_lgbm_leaf_9')
     # flatten_embeddings = model.get_layer('flatten_embeddings')
     assert dense_lgbm_input
     assert concat_continuous_inputs
Пример #29
0
 def test_only_var_len_categorical_feature(self):
     df: pd.DataFrame = self.df_movielens.copy()
     X = df[['genres']]
     y = df['rating']
     conf = deeptable.ModelConfig(
         nets=['dnn_nets'],
         task=consts.TASK_REGRESSION,
         metrics=['mse'],
         fixed_embedding_dim=True,
         embeddings_output_dim=4,
         apply_gbm_features=False,
         apply_class_weight=True,
         earlystopping_patience=3,
     )
     self._train_and_asset(X, y, conf)
Пример #30
0
    def test_embeddings_output_dim(self):
        print("Loading datasets...")
        df_train = dsutils.load_adult().head(1000)
        y = df_train.pop(14).values
        X = df_train

        conf = deeptable.ModelConfig(fixed_embedding_dim=False,
                                     embeddings_output_dim=0)
        dt = deeptable.DeepTable(config=conf)

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=42)
        model, history = dt.fit(X_train, y_train, epochs=1)