Пример #1
0
def test_lightgbm_classifier():
    # Classification
    params = params = {
        'objective': 'binary',
        'metric': ['auc', 'binary_logloss'],
        'boosting_type': 'gbdt',
    }
    classifier_cls = get_lightgbm_learner_learning_api(params, num_boost_round=10000, \
     early_stopping_rounds=5, verbose_eval=50, split_random_seed=42)
    fs = RFE(classifier_cls)

    dataset = BankNote()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Feature selection
    x_columns = [_ for _ in features_df.columns if _ != target_column]
    x_df = features_df[x_columns]
    y_df = features_df[[target_column]]
    n_vars = max(x_df.shape[1] - 5, 1)
    m = fs.fit(x_df, y_df, n_vars)

    # Assertions
    assert len(fs.selected_variables) == n_vars
    assert fs.selected_variables == ['Variance', 'Kurtosis', 'Skewness.ABS(* - MEAN(*))', 'Skewness', 'Variance.ABS(* - MEAN(*))', \
     'Entropy', 'Variance.ABS(* - Q25(*))', 'Kurtosis.ABS(* - MEDIAN(*))', 'Kurtosis.ABS(* - Q75(*))', 'Skewness.ABS(* - MEDIAN(*))', \
     'Kurtosis.ABS(* - Q25(*))', 'Kurtosis.ABS(* - MEAN(*))', 'Variance.ABS(* - MEDIAN(*))', 'Entropy.ABS(* - MEDIAN(*))', \
     'Entropy.ABS(* - Q25(*))']
Пример #2
0
def test_lean_boosted_sklearn_classifier():
    for clz in [
            'sklearn.neighbors.KNeighborsClassifier',
            'sklearn.ensemble.RandomForestClassifier',
            'sklearn.ensemble.AdaBoostClassifier'
    ]:
        # Classification
        sklearn_classifier_cls = get_sklearn_learner(clz)

        dataset = BankNote()
        target_column = dataset.y_column
        df = dataset.df

        # Features generation
        features_df = df.kxy.generate_features(entity=None,
                                               max_lag=None,
                                               entity_name='*',
                                               exclude=[target_column])

        # Model building
        results = features_df.kxy.fit(target_column, sklearn_classifier_cls, \
         problem_type='classification', additive_learning=True, return_scores=True, \
         n_down_perf_before_stop=1)
        model = results['predictor'].models[0]
        feature_columns = results['Selected Variables']
        x = features_df[feature_columns].values
        predictions = model.predict(x)
        path = '../kxy/misc/cache/%s-%s.sav' % (dataset.name, clz)
        model.save(path)

        loaded_model = sklearn_classifier_cls(path=path)
        loaded_predictions = loaded_model.predict(x)

        assert np.allclose(predictions, loaded_predictions)
Пример #3
0
def test_non_additive_lean_boosted_classifier():
    # Classification
    params = {
        'objective': 'binary',
        'metric': ['auc', 'binary_logloss'],
        'boosting_type': 'gbdt',
    }
    lightgbm_classifier_cls = get_lightgbm_learner_learning_api(params, num_boost_round=10000, \
     split_random_seed=42)
    dataset = BankNote()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])
    features_df[target_column] = features_df[target_column].astype(int)

    # Model building
    results = features_df.kxy.fit(target_column, lightgbm_classifier_cls, \
     problem_type='classification', additive_learning=False, return_scores=True, \
     n_down_perf_before_stop=1)
    assert results['Testing Accuracy'] == '0.964'
    assert results['Selected Variables'] == [
        'Variance', 'Skewness.ABS(* - Q25(*))', 'Kurtosis', 'Skewness',
        'Entropy'
    ]
Пример #4
0
def test_xgboost_classifier():
    # Binary classification
    classifier_cls = get_xgboost_learner('xgboost.XGBClassifier',
                                         use_label_encoder=False,
                                         eval_metric='logloss',
                                         learning_rate=0.1,
                                         max_depth=10)
    fs = RFE(classifier_cls)

    dataset = BankNote()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Feature selection
    x_columns = [_ for _ in features_df.columns if _ != target_column]
    x_df = features_df[x_columns]
    y_df = features_df[[target_column]]
    n_vars = max(x_df.shape[1] - 5, 1)
    m = fs.fit(x_df, y_df, n_vars)

    # Assertions
    assert len(fs.selected_variables) == n_vars
    assert fs.selected_variables == ['Variance', 'Skewness', 'Kurtosis', 'Entropy', 'Skewness.ABS(* - MEDIAN(*))', \
     'Variance.ABS(* - MEAN(*))', 'Skewness.ABS(* - MEAN(*))', 'Kurtosis.ABS(* - MEDIAN(*))', 'Kurtosis.ABS(* - Q25(*))', \
     'Entropy.ABS(* - MEDIAN(*))', 'Skewness.ABS(* - Q25(*))', 'Entropy.ABS(* - MEAN(*))', 'Variance.ABS(* - Q25(*))', \
     'Kurtosis.ABS(* - MEAN(*))', 'Kurtosis.ABS(* - Q75(*))']
Пример #5
0
def test_lean_boosted_tensorflow_classifier():
    import tensorflow as tf
    tf.random.set_seed(0)
    # Regression
    layers = [(10, 'relu'), (5, 'relu'), (1, 'sigmoid')]
    loss = 'binary_crossentropy'
    optimizer = 'adam'
    tf_classifier_cls = get_tensorflow_dense_learner('KerasClassifier', layers, loss, optimizer=optimizer, \
     epochs=100, batch_size=100)
    dataset = BankNote()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column],
                                           fill_na=True)
    # features_df.drop('y_no', axis=1, inplace=True)
    # target_column = 'y_yes'

    # Model building
    results = features_df.kxy.fit(target_column, tf_classifier_cls, problem_type='classification', \
     additive_learning=True, return_scores=True, n_down_perf_before_stop=1)
    assert results['Testing Accuracy'] == '1.000'
    assert results['Selected Variables'] == [
        'Variance', 'Skewness.ABS(* - Q25(*))', 'Kurtosis', 'Skewness'
    ]
Пример #6
0
def test_lean_boosted_xgboost_classifier():
    # Binary classification
    xgboost_classifier_cls = get_xgboost_learner('xgboost.XGBClassifier',
                                                 use_label_encoder=False,
                                                 eval_metric='logloss',
                                                 learning_rate=0.1,
                                                 max_depth=10)
    dataset = BankNote()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Model building
    results = features_df.kxy.fit(target_column, xgboost_classifier_cls, \
     problem_type='classification', additive_learning=True, return_scores=True, \
     n_down_perf_before_stop=1)

    assert results['Testing Accuracy'] == '0.974'
    assert results['Selected Variables'] == [
        'Variance', 'Skewness.ABS(* - Q25(*))', 'Kurtosis'
    ]
Пример #7
0
def test_autogluon():
    autogluon_learner_func = get_autogluon_learner(problem_type='binary')
    dataset = BankNote()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])
    features_df[target_column] = features_df[target_column].astype(int)

    # Model building
    results = features_df.kxy.fit(target_column, autogluon_learner_func, \
     problem_type='classification', additive_learning=False, return_scores=True, \
     n_down_perf_before_stop=1)
    assert results['Testing Accuracy'] == '1.000'
    assert results['Selected Variables'] == [
        'Variance', 'Skewness.ABS(* - Q25(*))', 'Kurtosis', 'Skewness'
    ]
Пример #8
0
def test_lean_boosted_lightgbm_classifier():
    # Classification
    lightgbm_classifier_cls = get_lightgbm_learner_sklearn_api(
        'lightgbm.LGBMClassifier')
    dataset = BankNote()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Model building
    results = features_df.kxy.fit(target_column, lightgbm_classifier_cls, \
     problem_type='classification', additive_learning=True, return_scores=True, \
     n_down_perf_before_stop=1)

    assert results['Testing Accuracy'] == '0.989'
    assert results['Selected Variables'] == [
        'Variance', 'Skewness.ABS(* - Q25(*))', 'Kurtosis'
    ]
Пример #9
0
def test_lean_boosted_pytorch_classifier():
    from torch import nn
    import torch
    torch.manual_seed(0)
    # Regression
    layers = [(10, 'relu'), (5, 'relu'), (1, 'sigmoid')]
    pt_classifier_cls = get_pytorch_dense_learner('skorch.NeuralNetClassifier', layers, \
     max_epochs=100, batch_size=100, criterion=nn.BCELoss)
    dataset = BankNote()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column],
                                           fill_na=True)

    # Model building
    results = features_df.kxy.fit(target_column, pt_classifier_cls, problem_type='classification', \
     additive_learning=True, return_scores=True, n_down_perf_before_stop=1)
    assert results['Testing Accuracy'] == '0.573'
    assert results['Selected Variables'] == []