def test_evaluate_regression_optimiser(): """Test evaluate method of Optimiser class for regression.""" reader = Reader(sep=",") dict = reader.train_test_split(Lpath=[ "data_for_tests/train_regression.csv", "data_for_tests/test_regression.csv" ], target_name="SalePrice") drift_thresholder = Drift_thresholder() drift_thresholder = drift_thresholder.fit_transform(dict) mape = make_scorer(lambda y_true, y_pred: 100 * np.sum( np.abs(y_true - y_pred) / y_true) / len(y_true), greater_is_better=False, needs_proba=False) with pytest.warns(UserWarning) as record: opt = Optimiser(scoring=mape, n_folds=3) assert len(record) == 1 score = opt.evaluate(None, dict) assert -np.Inf <= score with pytest.warns(UserWarning) as record: opt = Optimiser(scoring=None, n_folds=3) assert len(record) == 1 score = opt.evaluate(None, dict) assert -np.Inf <= score with pytest.warns(UserWarning) as record: opt = Optimiser(scoring="wrong_scoring", n_folds=3) assert len(record) == 1 with pytest.warns(UserWarning) as record: score = opt.evaluate(None, dict) assert -np.Inf <= score
def test_evaluate_classification_optimiser(): """Test evaluate method of Optimiser class for classication.""" reader = Reader(sep=",") dict = reader.train_test_split( Lpath=["data_for_tests/train.csv", "data_for_tests/test.csv"], target_name="Survived") drift_thresholder = Drift_thresholder() drift_thresholder = drift_thresholder.fit_transform(dict) with pytest.warns(UserWarning) as record: opt = Optimiser(scoring=None, n_folds=3) assert len(record) == 1 score = opt.evaluate(None, dict) assert -np.Inf <= score with pytest.warns(UserWarning) as record: opt = Optimiser(scoring="roc_auc", n_folds=3) assert len(record) == 1 score = opt.evaluate(None, dict) assert 0. <= score <= 1. with pytest.warns(UserWarning) as record: opt = Optimiser(scoring="wrong_scoring", n_folds=3) assert len(record) == 1 with pytest.warns(UserWarning) as record: score = opt.evaluate(None, dict) assert opt.scoring == "neg_log_loss"
def test_fit_predict_predictor_regression(mock_show): """Test fit_predict method of Predictor class for regression.""" rd = Reader(sep=',') dt = rd.train_test_split(Lpath=[ "data_for_tests/train_regression.csv", "data_for_tests/test_regression.csv" ], target_name="SalePrice") drift_thresholder = Drift_thresholder() df = drift_thresholder.fit_transform(dt) mape = make_scorer(lambda y_true, y_pred: 100 * np.sum( np.abs(y_true - y_pred) / y_true) / len(y_true), greater_is_better=False, needs_proba=False) opt = Optimiser(scoring=mape, n_folds=3) opt.evaluate(None, df) space = { 'ne__numerical_strategy': { "search": "choice", "space": [0] }, 'ce__strategy': { "search": "choice", "space": ["label_encoding", "random_projection", "entity_embedding"] }, 'fs__threshold': { "search": "uniform", "space": [0.01, 0.3] }, 'est__max_depth': { "search": "choice", "space": [3, 4, 5, 6, 7] } } best = opt.optimise(space, df, 1) prd = Predictor(verbose=True) prd.fit_predict(best, df) pred_df = pd.read_csv("save/SalePrice_predictions.csv") assert np.all( list(pred_df.columns) == ['Unnamed: 0', 'SalePrice_predicted']) assert np.shape(pred_df) == (1459, 2)
def test_evaluate_and_optimise_classification(): """Test evaluate_and_optimise method of Optimiser class.""" reader = Reader(sep=",") dict = reader.train_test_split( Lpath=["data_for_tests/train.csv", "data_for_tests/test.csv"], target_name="Survived") drift_thresholder = Drift_thresholder() drift_thresholder = drift_thresholder.fit_transform(dict) with pytest.warns(UserWarning) as record: opt = Optimiser(scoring='accuracy', n_folds=3) assert len(record) == 1 dict_error = dict.copy() dict_error["target"] = dict_error["target"].astype(str) with pytest.raises(ValueError): score = opt.evaluate(None, dict_error) with pytest.warns(UserWarning) as record: opt = Optimiser(scoring='accuracy', n_folds=3) assert len(record) == 1 score = opt.evaluate(None, dict) assert 0. <= score <= 1. space = { 'ne__numerical_strategy': { "search": "choice", "space": [0] }, 'ce__strategy': { "search": "choice", "space": ["label_encoding", "random_projection", "entity_embedding"] }, 'fs__threshold': { "search": "uniform", "space": [0.01, 0.3] }, 'est__max_depth': { "search": "choice", "space": [3, 4, 5, 6, 7] } } best = opt.optimise(space, dict, 1) assert type(best) == type(dict)
from mlbox.model.classification import StackingClassifier, Classifier import pandas as pd paths = ["train_1.csv", "test.csv"] target_name = "Class" rd = Reader(sep=",") df = rd.train_test_split(paths, target_name) print(df["train"].head()) dft = Drift_thresholder() df = dft.fit_transform(df) opt = Optimiser() warnings.filterwarnings('ignore', category=DeprecationWarning) score = opt.evaluate(None, df) space = { 'ne__numerical_strategy':{"search":"choice", "space":[0, "mean"]}, 'ce__strategy':{"search":"choice", "space":["label_encoding", "random_projection", "entity_embedding"]}, 'fs__threshold':{"search":"uniform", "space":[0.001, 0.2]}, 'est__strategy':{"search":"choice", "space":["RandomForest", "ExtraTrees", "LightGBM"]}, 'est__max_depth':{"search":"choice", "space":[8, 9, 10, 11, 12, 13]} } """ #Clf_feature_selector(strategy='l1', threshold=0.3)
display(HTML('<style>.prompt{width: 0px; min-width: 0px; visibility: collapse}</style>')) import warnings warnings.filterwarnings("ignore") paths = ["/content/drive/MyDrive/train.csv","/content/drive/MyDrive/test.csv"] target_name = "class" rd = Reader(sep=",") df = rd.train_test_split(paths, target_name) df["train"].head() dft = Drift_thresholder().fit_transform(df) opt = Optimiser() warnings.filterwarnings('ignore', category=DeprecationWarning) score = opt.evaluate(None, df) space = { 'ne__numerical_strategy':{"search":"choice", "space":[0, "mean"]}, 'ce__strategy':{"search":"choice", "space":["label_encoding", "random_projection", "entity_embedding"]}, 'fs__threshold':{"search":"uniform", "space":[0.001, 0.2]}, 'est__strategy':{"search":"choice", "space":["RandomForest", "ExtraTrees", "LightGBM"]}, 'est__max_depth':{"search":"choice", "space":[-1, 9, 10, 11, 12, 13]} } params = opt.optimise(space, df, 50)