def test_lightgbm_classifier(): # Classification params = params = { 'objective': 'binary', 'metric': ['auc', 'binary_logloss'], 'boosting_type': 'gbdt', } classifier_cls = get_lightgbm_learner_learning_api(params, num_boost_round=10000, \ early_stopping_rounds=5, verbose_eval=50, split_random_seed=42) fs = RFE(classifier_cls) dataset = BankNote() target_column = dataset.y_column df = dataset.df # Features generation features_df = df.kxy.generate_features(entity=None, max_lag=None, entity_name='*', exclude=[target_column]) # Feature selection x_columns = [_ for _ in features_df.columns if _ != target_column] x_df = features_df[x_columns] y_df = features_df[[target_column]] n_vars = max(x_df.shape[1] - 5, 1) m = fs.fit(x_df, y_df, n_vars) # Assertions assert len(fs.selected_variables) == n_vars assert fs.selected_variables == ['Variance', 'Kurtosis', 'Skewness.ABS(* - MEAN(*))', 'Skewness', 'Variance.ABS(* - MEAN(*))', \ 'Entropy', 'Variance.ABS(* - Q25(*))', 'Kurtosis.ABS(* - MEDIAN(*))', 'Kurtosis.ABS(* - Q75(*))', 'Skewness.ABS(* - MEDIAN(*))', \ 'Kurtosis.ABS(* - Q25(*))', 'Kurtosis.ABS(* - MEAN(*))', 'Variance.ABS(* - MEDIAN(*))', 'Entropy.ABS(* - MEDIAN(*))', \ 'Entropy.ABS(* - Q25(*))']
def test_lean_boosted_sklearn_classifier(): for clz in [ 'sklearn.neighbors.KNeighborsClassifier', 'sklearn.ensemble.RandomForestClassifier', 'sklearn.ensemble.AdaBoostClassifier' ]: # Classification sklearn_classifier_cls = get_sklearn_learner(clz) dataset = BankNote() target_column = dataset.y_column df = dataset.df # Features generation features_df = df.kxy.generate_features(entity=None, max_lag=None, entity_name='*', exclude=[target_column]) # Model building results = features_df.kxy.fit(target_column, sklearn_classifier_cls, \ problem_type='classification', additive_learning=True, return_scores=True, \ n_down_perf_before_stop=1) model = results['predictor'].models[0] feature_columns = results['Selected Variables'] x = features_df[feature_columns].values predictions = model.predict(x) path = '../kxy/misc/cache/%s-%s.sav' % (dataset.name, clz) model.save(path) loaded_model = sklearn_classifier_cls(path=path) loaded_predictions = loaded_model.predict(x) assert np.allclose(predictions, loaded_predictions)
def test_non_additive_lean_boosted_classifier(): # Classification params = { 'objective': 'binary', 'metric': ['auc', 'binary_logloss'], 'boosting_type': 'gbdt', } lightgbm_classifier_cls = get_lightgbm_learner_learning_api(params, num_boost_round=10000, \ split_random_seed=42) dataset = BankNote() target_column = dataset.y_column df = dataset.df # Features generation features_df = df.kxy.generate_features(entity=None, max_lag=None, entity_name='*', exclude=[target_column]) features_df[target_column] = features_df[target_column].astype(int) # Model building results = features_df.kxy.fit(target_column, lightgbm_classifier_cls, \ problem_type='classification', additive_learning=False, return_scores=True, \ n_down_perf_before_stop=1) assert results['Testing Accuracy'] == '0.964' assert results['Selected Variables'] == [ 'Variance', 'Skewness.ABS(* - Q25(*))', 'Kurtosis', 'Skewness', 'Entropy' ]
def test_xgboost_classifier(): # Binary classification classifier_cls = get_xgboost_learner('xgboost.XGBClassifier', use_label_encoder=False, eval_metric='logloss', learning_rate=0.1, max_depth=10) fs = RFE(classifier_cls) dataset = BankNote() target_column = dataset.y_column df = dataset.df # Features generation features_df = df.kxy.generate_features(entity=None, max_lag=None, entity_name='*', exclude=[target_column]) # Feature selection x_columns = [_ for _ in features_df.columns if _ != target_column] x_df = features_df[x_columns] y_df = features_df[[target_column]] n_vars = max(x_df.shape[1] - 5, 1) m = fs.fit(x_df, y_df, n_vars) # Assertions assert len(fs.selected_variables) == n_vars assert fs.selected_variables == ['Variance', 'Skewness', 'Kurtosis', 'Entropy', 'Skewness.ABS(* - MEDIAN(*))', \ 'Variance.ABS(* - MEAN(*))', 'Skewness.ABS(* - MEAN(*))', 'Kurtosis.ABS(* - MEDIAN(*))', 'Kurtosis.ABS(* - Q25(*))', \ 'Entropy.ABS(* - MEDIAN(*))', 'Skewness.ABS(* - Q25(*))', 'Entropy.ABS(* - MEAN(*))', 'Variance.ABS(* - Q25(*))', \ 'Kurtosis.ABS(* - MEAN(*))', 'Kurtosis.ABS(* - Q75(*))']
def test_lean_boosted_tensorflow_classifier(): import tensorflow as tf tf.random.set_seed(0) # Regression layers = [(10, 'relu'), (5, 'relu'), (1, 'sigmoid')] loss = 'binary_crossentropy' optimizer = 'adam' tf_classifier_cls = get_tensorflow_dense_learner('KerasClassifier', layers, loss, optimizer=optimizer, \ epochs=100, batch_size=100) dataset = BankNote() target_column = dataset.y_column df = dataset.df # Features generation features_df = df.kxy.generate_features(entity=None, max_lag=None, entity_name='*', exclude=[target_column], fill_na=True) # features_df.drop('y_no', axis=1, inplace=True) # target_column = 'y_yes' # Model building results = features_df.kxy.fit(target_column, tf_classifier_cls, problem_type='classification', \ additive_learning=True, return_scores=True, n_down_perf_before_stop=1) assert results['Testing Accuracy'] == '1.000' assert results['Selected Variables'] == [ 'Variance', 'Skewness.ABS(* - Q25(*))', 'Kurtosis', 'Skewness' ]
def test_lean_boosted_xgboost_classifier(): # Binary classification xgboost_classifier_cls = get_xgboost_learner('xgboost.XGBClassifier', use_label_encoder=False, eval_metric='logloss', learning_rate=0.1, max_depth=10) dataset = BankNote() target_column = dataset.y_column df = dataset.df # Features generation features_df = df.kxy.generate_features(entity=None, max_lag=None, entity_name='*', exclude=[target_column]) # Model building results = features_df.kxy.fit(target_column, xgboost_classifier_cls, \ problem_type='classification', additive_learning=True, return_scores=True, \ n_down_perf_before_stop=1) assert results['Testing Accuracy'] == '0.974' assert results['Selected Variables'] == [ 'Variance', 'Skewness.ABS(* - Q25(*))', 'Kurtosis' ]
def test_autogluon(): autogluon_learner_func = get_autogluon_learner(problem_type='binary') dataset = BankNote() target_column = dataset.y_column df = dataset.df # Features generation features_df = df.kxy.generate_features(entity=None, max_lag=None, entity_name='*', exclude=[target_column]) features_df[target_column] = features_df[target_column].astype(int) # Model building results = features_df.kxy.fit(target_column, autogluon_learner_func, \ problem_type='classification', additive_learning=False, return_scores=True, \ n_down_perf_before_stop=1) assert results['Testing Accuracy'] == '1.000' assert results['Selected Variables'] == [ 'Variance', 'Skewness.ABS(* - Q25(*))', 'Kurtosis', 'Skewness' ]
def test_lean_boosted_lightgbm_classifier(): # Classification lightgbm_classifier_cls = get_lightgbm_learner_sklearn_api( 'lightgbm.LGBMClassifier') dataset = BankNote() target_column = dataset.y_column df = dataset.df # Features generation features_df = df.kxy.generate_features(entity=None, max_lag=None, entity_name='*', exclude=[target_column]) # Model building results = features_df.kxy.fit(target_column, lightgbm_classifier_cls, \ problem_type='classification', additive_learning=True, return_scores=True, \ n_down_perf_before_stop=1) assert results['Testing Accuracy'] == '0.989' assert results['Selected Variables'] == [ 'Variance', 'Skewness.ABS(* - Q25(*))', 'Kurtosis' ]
def test_lean_boosted_pytorch_classifier(): from torch import nn import torch torch.manual_seed(0) # Regression layers = [(10, 'relu'), (5, 'relu'), (1, 'sigmoid')] pt_classifier_cls = get_pytorch_dense_learner('skorch.NeuralNetClassifier', layers, \ max_epochs=100, batch_size=100, criterion=nn.BCELoss) dataset = BankNote() target_column = dataset.y_column df = dataset.df # Features generation features_df = df.kxy.generate_features(entity=None, max_lag=None, entity_name='*', exclude=[target_column], fill_na=True) # Model building results = features_df.kxy.fit(target_column, pt_classifier_cls, problem_type='classification', \ additive_learning=True, return_scores=True, n_down_perf_before_stop=1) assert results['Testing Accuracy'] == '0.573' assert results['Selected Variables'] == []