Пример #1
0
def test_parameter_gamma(selection_threshold, create_pool_classifiers):
    X = np.random.rand(10, 2)
    y = np.ones(10)
    with pytest.raises((ValueError, TypeError)):
        meta = METADES(create_pool_classifiers,
                       selection_threshold=selection_threshold)
        meta.fit(X, y)
def test_meta_classifier_not_predict_proba(create_pool_classifiers, model):
    X = np.random.rand(10, 2)
    y = np.ones(10)
    y[:5] = 0
    with pytest.raises(ValueError):
        meta = METADES(create_pool_classifiers, model)
        meta.fit(X, y)
def test_meta_classifier_not_none():
    X = np.random.rand(100, 2)
    y = np.random.randint(0, 2, 100)
    meta = METADES(meta_classifier=GaussianNB())
    meta.fit(X, y)
    check_is_fitted(meta.meta_classifier_, "classes_")
    assert isinstance(meta.meta_classifier_, GaussianNB)
Пример #4
0
def test_meta():
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    meta_des = METADES(pool_classifiers, DFP=True)
    meta_des.fit(X_dsel, y_dsel)
    assert np.isclose(meta_des.score(X_test, y_test), 0.9121212121212121) or \
        np.isclose(meta_des.score(X_test, y_test), 0.8909090909090909)
Пример #5
0
def test_not_predict_proba(create_X_y):
    X, y = create_X_y

    clf1 = Perceptron()
    clf1.fit(X, y)
    with pytest.raises(ValueError):
        meta = METADES([clf1, clf1])
        meta.fit(X, y)
Пример #6
0
def test_not_predict_proba():
    X = X_dsel_ex1
    y = y_dsel_ex1
    clf1 = Perceptron()
    clf1.fit(X, y)
    with pytest.raises(ValueError):
        meta = METADES([clf1, clf1])
        meta.fit(X, y)
def test_fitted_meta_classifier():
    X = np.random.rand(100, 2)
    y = np.random.randint(0, 2, 100)
    meta = METADES(meta_classifier=GaussianNB())
    meta.fit(X, y)

    meta2 = METADES(meta_classifier=meta.meta_classifier_)
    meta2.fit(X, y)
    assert meta.meta_classifier_ == meta2.meta_classifier_
Пример #8
0
def test_meta_no_pool_of_classifiers(knn_methods):
    rng = np.random.RandomState(123456)

    data = load_breast_cancer()
    X = data.data
    y = data.target

    # split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
                                                        random_state=rng)
    # Scale the variables to have 0 mean and unit variance
    scalar = StandardScaler()
    X_train = scalar.fit_transform(X_train)
    X_test = scalar.transform(X_test)

    meta_des = METADES(knn_classifier=knn_methods, random_state=rng,
                       DSEL_perc=0.5)
    meta_des.fit(X_train, y_train)
    assert np.isclose(meta_des.score(X_test, y_test), 0.9095744680851063)
Пример #9
0
def test_meta(knn_methods):
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    meta_des = METADES(pool_classifiers, knn_classifier=knn_methods)
    meta_des.fit(X_dsel, y_dsel)
    assert np.isclose(meta_des.score(X_test, y_test), 0.973404255319149)
def test_meta():
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    meta_des = METADES(pool_classifiers)
    meta_des.fit(X_dsel, y_dsel)
    assert np.isclose(meta_des.score(X_test, y_test), 0.796969696969697)
Пример #11
0
    # can estimate probabilities
    pool_classifiers = RandomForestClassifier(n_estimators=10, max_depth=5)
    pool_classifiers.fit(X_train, y_train)

    # Initialize a DS technique. Here we specify the size of the region of competence (5 neighbors)
    knorau = KNORAU(pool_classifiers)
    kne = KNORAE(pool_classifiers, k=5)
    desp = DESP(pool_classifiers, k=5)
    ola = OLA(pool_classifiers, k=5)
    mcb = MCB(pool_classifiers, k=5)
    meta = METADES(pool_classifiers, k=5)

    # Fit the DS techniques
    knorau.fit(X_dsel, y_dsel)
    kne.fit(X_dsel, y_dsel)
    desp.fit(X_dsel, y_dsel)
    meta.fit(X_dsel, y_dsel)
    ola.fit(X_dsel, y_dsel)
    mcb.fit(X_dsel, y_dsel)

    # Calculate classification accuracy of each technique
    print('Classification accuracy RF: ', RF.score(X_test, y_test))
    print('Evaluating DS techniques:')
    print('Classification accuracy KNORAU: ', knorau.score(X_test, y_test))
    print('Classification accuracy KNORA-Eliminate: ',
          kne.score(X_test, y_test))
    print('Classification accuracy DESP: ', desp.score(X_test, y_test))
    print('Classification accuracy OLA: ', ola.score(X_test, y_test))
    print('Classification accuracy MCB: ', mcb.score(X_test, y_test))
    print('Classification accuracy META-DES: ', meta.score(X_test, y_test))
Пример #12
0
def test_parameter_Hc(Hc, create_pool_classifiers):
    X = np.random.rand(10, 2)
    y = np.ones(10)
    with pytest.raises((ValueError, TypeError)):
        meta = METADES(create_pool_classifiers, Hc=Hc)
        meta.fit(X, y)
Пример #13
0
def main():
    ###############################################################################
    # Preparing the dataset
    # ---------------------
    # In this part we load the breast cancer dataset from scikit-learn and
    # preprocess it in order to pass to the DS models. An important point here is
    # to normalize the data so that it has zero mean and unit variance, which is
    # a common requirement for many machine learning algorithms.
    # This step can be easily done using the StandardScaler class.

    rng = np.random.RandomState(123)
    data = load_breast_cancer()
    X = data.data
    y = data.target
    # split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=rng)

    # Scale the variables to have 0 mean and unit variance
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Split the data into training and DSEL for DS techniques
    X_train, X_dsel, y_train, y_dsel = train_test_split(X_train,
                                                        y_train,
                                                        test_size=0.5,
                                                        random_state=rng)

    # Train a pool of 100 base classifiers
    pool_classifiers = BaggingClassifier(Perceptron(max_iter=10),
                                         n_estimators=100,
                                         random_state=rng)
    pool_classifiers.fit(X_train, y_train)

    # Initialize the DS techniques
    knorau = KNORAU(pool_classifiers)
    kne = KNORAE(pool_classifiers)
    desp = DESP(pool_classifiers)
    ola = OLA(pool_classifiers)
    mcb = MCB(pool_classifiers)

    ###############################################################################
    # Calibrating base classifiers
    # -----------------------------
    # Some dynamic selection techniques requires that the base classifiers estimate
    # probabilities in order to estimate its competence level. Since the Perceptron
    # model is not a probabilistic classifier (does not implements the
    # predict_proba method, it needs to be calibrated for
    # probability estimation before being used by such DS techniques. This step can
    # be conducted using the CalibrateClassifierCV class from scikit-learn. Note
    # that in this example we pass a prefited pool of classifiers to the
    # calibration method in order to use exactly the same pool used in the other
    # DS methods.
    calibrated_pool = []
    for clf in pool_classifiers:
        calibrated = CalibratedClassifierCV(base_estimator=clf, cv='prefit')
        calibrated.fit(X_dsel, y_dsel)
        calibrated_pool.append(calibrated)

    apriori = APriori(calibrated_pool)
    meta = METADES(calibrated_pool)

    knorau.fit(X_dsel, y_dsel)
    kne.fit(X_dsel, y_dsel)
    desp.fit(X_dsel, y_dsel)
    ola.fit(X_dsel, y_dsel)
    mcb.fit(X_dsel, y_dsel)
    apriori.fit(X_dsel, y_dsel)
    meta.fit(X_dsel, y_dsel)

    ###############################################################################
    # Evaluating the methods
    # -----------------------
    # Let's now evaluate the methods on the test set. We also use the performance
    # of Bagging (pool of classifiers without any selection) as a baseline
    # comparison. We can see that  the majority of DS methods achieve higher
    # classification accuracy.

    print('Evaluating DS techniques:')
    print('Classification accuracy KNORA-Union: ',
          knorau.score(X_test, y_test))
    print('Classification accuracy KNORA-Eliminate: ',
          kne.score(X_test, y_test))
    print('Classification accuracy DESP: ', desp.score(X_test, y_test))
    print('Classification accuracy OLA: ', ola.score(X_test, y_test))
    print('Classification accuracy A priori: ', apriori.score(X_test, y_test))
    print('Classification accuracy MCB: ', mcb.score(X_test, y_test))
    print('Classification accuracy META-DES: ', meta.score(X_test, y_test))
    print('Classification accuracy Bagging: ',
          pool_classifiers.score(X_test, y_test))
Пример #14
0
def test_meta(knne, expected):
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    meta_des = METADES(pool_classifiers, DFP=True, knne=knne)
    meta_des.fit(X_dsel, y_dsel)
    assert np.isclose(meta_des.score(X_test, y_test), expected)
Пример #15
0
pool_classifiers.fit(X_train, y_train)

# DS techniques without DFP
apriori = APriori(pool_classifiers)
aposteriori = APosteriori(pool_classifiers)
ola = OLA(pool_classifiers)
lca = LCA(pool_classifiers)
desp = DESP(pool_classifiers)
meta = METADES(pool_classifiers)

apriori.fit(X_dsel, y_dsel)
aposteriori.fit(X_dsel, y_dsel)
ola.fit(X_dsel, y_dsel)
lca.fit(X_dsel, y_dsel)
desp.fit(X_dsel, y_dsel)
meta.fit(X_dsel, y_dsel)

print('Evaluating DS techniques:')
print('Classification accuracy of OLA: ', ola.score(X_test, y_test))
print('Classification accuracy of LCA: ', lca.score(X_test, y_test))
print('Classification accuracy of A priori: ', apriori.score(X_test, y_test))
print('Classification accuracy of A posteriori: ',
      aposteriori.score(X_test, y_test))
print('Classification accuracy of DES-P: ', desp.score(X_test, y_test))
print('Classification accuracy of META-DES: ', meta.score(X_test, y_test))

# Testing fire:
fire_apriori = APriori(pool_classifiers, DFP=True)
fire_aposteriori = APosteriori(pool_classifiers, DFP=True)
fire_ola = OLA(pool_classifiers, DFP=True)
fire_lca = LCA(pool_classifiers, DFP=True)
Пример #16
0
     bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=2, min_samples_split=20, min_samples_leaf=5),
                              algorithm='SAMME', n_estimators=200, learning_rate=0.8)
     bdt.fit(Feature_train, Label_train.ravel())
     Label_predict = bdt.predict(Feature_test)
 elif m == 'SMOTE-AdaBoost-DT':
     sm = SMOTE()
     Feature_train_o, Label_train_o = sm.fit_sample(Feature_train, Label_train.ravel())
     bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=2, min_samples_split=20, min_samples_leaf=5),
                              algorithm='SAMME', n_estimators=200, learning_rate=0.8)
     bdt.fit(Feature_train_o, Label_train_o)
     Label_predict = bdt.predict(Feature_test)
 elif m == 'META-DES':
     pool_classifiers = RandomForestClassifier(n_estimators=10)
     pool_classifiers.fit(Feature_train, Label_train.ravel())
     metades = METADES(pool_classifiers)
     metades.fit(Feature_train, Label_train.ravel())
     Label_predict = metades.predict(Feature_test)
 elif m == 'MCB':
     pool_classifiers = RandomForestClassifier(n_estimators=10)
     pool_classifiers.fit(Feature_train, Label_train.ravel())
     mcb = MCB(pool_classifiers)
     mcb.fit(Feature_train, Label_train.ravel())
     Label_predict = mcb.predict(Feature_test)
 elif m == 'DES-MI':
     pool_classifiers = RandomForestClassifier(n_estimators=10)
     pool_classifiers.fit(Feature_train, Label_train.ravel())
     dmi = DESMI(pool_classifiers)
     dmi.fit(Feature_train, Label_train.ravel())
     Label_predict = dmi.predict(Feature_test)
 elif m == 'One_vs_Rest-SMOTE-XGBoost':
     sm = SMOTE()
Пример #17
0
def test_meta_classifier_not_predict_proba(create_pool_classifiers):
    X = np.random.rand(10, 2)
    y = np.ones(10)
    with pytest.raises(ValueError):
        meta = METADES(create_pool_classifiers, Perceptron())
        meta.fit(X, y)