Пример #1
0
    def predict(self, X):
        """Hard decision."""
        # print("PREDICT")
        # Check is fit had been called
        check_is_fitted(self, "classes_")

        # Input validation
        X = check_array(X)
        if X.shape[1] != self.X_.shape[1]:
            raise ValueError("number of features does not match")

        X_dsel = self.previous_X
        y_dsel = self.previous_y

        if self.oversampled:
            ros = RandomOverSampler(random_state=42)
            X_dsel, y_dsel = ros.fit_resample(X_dsel, y_dsel)

        if self.desMethod == "KNORAE":
            des = KNORAE(self.ensemble_, random_state=42)
        elif self.desMethod == "KNORAU":
            des = KNORAU(self.ensemble_, random_state=42)
        elif self.desMethod == "LCA":
            des = LCA(self.ensemble_, random_state=42)
        elif self.desMethod == "Rank":
            des = Rank(self.ensemble_, random_state=42)
        else:
            des = KNORAE(self.ensemble_, random_state=42)

        des.fit(X_dsel, y_dsel)
        prediction = des.predict(X)

        return prediction
Пример #2
0
def test_kne_proba(knn_methods):
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    kne = KNORAE(pool_classifiers, knn_classifier=knn_methods, voting='soft')
    kne.fit(X_dsel, y_dsel)
    probas = kne.predict_proba(X_test)
    expected = np.load(
        'deslib/tests/expected_values/kne_proba_integration.npy')
    assert np.allclose(probas, expected)
Пример #3
0
def test_knorae_subspaces():
    rng = np.random.RandomState(123456)
    X_dsel, X_test, X_train, y_dsel, y_test, y_train = load_dataset(None, rng)
    pool = BaggingClassifier(LogisticRegression(),
                             max_features=0.5,
                             random_state=rng).fit(X_train, y_train)

    knorae = KNORAE(pool)
    knorae.fit(X_dsel, y_dsel)
    y_pred = knorae.predict_proba(X_test).argmax(axis=1)
    assert np.isclose(accuracy_score(y_pred, y_test), 0.9787234042553191)
Пример #4
0
def test_knorae_subspaces():
    rng = np.random.RandomState(123456)
    X_dsel, X_test, X_train, y_dsel, y_test, y_train = load_dataset(None, rng)
    # split the data into training and test data
    pool = BaggingClassifier(LogisticRegression(),
                             max_features=0.5,
                             random_state=rng).fit(X_train, y_train)

    knorae = KNORAE(pool)
    knorae.fit(X_dsel, y_dsel)
    assert np.isclose(knorae.score(X_test, y_test), 0.9787234042553191)
Пример #5
0
def test_kne(knn_methods, voting):
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    kne = KNORAE(pool_classifiers, knn_classifier=knn_methods, voting=voting)
    kne.fit(X_dsel, y_dsel)
    assert np.isclose(kne.score(X_test, y_test), 0.9787234042553191)
Пример #6
0
# DCS techniques
ola = OLA(pool_classifiers)
mcb = MCB(pool_classifiers)

##############################################################################
# Adding stacked classifier as baseline comparison. Stacked classifier can
# be found in the static module. In this experiment we consider two types
# of stacking: one using logistic regression as meta-classifier
# (default configuration) and the other using a Decision Tree.
stacked_lr = StackedClassifier(pool_classifiers, random_state=rng)
stacked_dt = StackedClassifier(pool_classifiers,
                               random_state=rng,
                               meta_classifier=DecisionTreeClassifier())
# Fitting the DS techniques
knorau.fit(X_dsel, y_dsel)
kne.fit(X_dsel, y_dsel)
desp.fit(X_dsel, y_dsel)
metades.fit(X_dsel, y_dsel)
ola.fit(X_dsel, y_dsel)
mcb.fit(X_dsel, y_dsel)

# Fitting the tacking models
stacked_lr.fit(X_dsel, y_dsel)
stacked_dt.fit(X_dsel, y_dsel)

# Calculate classification accuracy of each technique
print('Evaluating DS techniques:')
print('Classification accuracy of Majority voting the pool: ',
      model_voting.score(X_test, y_test))
print('Classification accuracy of KNORA-U: ', knorau.score(X_test, y_test))
print('Classification accuracy of KNORA-E: ', kne.score(X_test, y_test))
Пример #7
0
    def predict(self, X):
        # Check is fit had been called
        check_is_fitted(self, "classes_")

        # Input validation
        X = check_array(X)
        if X.shape[1] != self.X_.shape[1]:
            raise ValueError("number of features does not match")

        X_dsel = self.previous_X
        y_dsel = self.previous_y

        unique, counts = np.unique(y_dsel, return_counts=True)

        k_neighbors = 5
        if counts[0] - 1 < 5:
            k_neighbors = counts[0] - 1

        if self.oversampler == "SMOTE" and k_neighbors > 0:
            smote = SMOTE(random_state=42, k_neighbors=k_neighbors)
            X_dsel, y_dsel = smote.fit_resample(X_dsel, y_dsel)
        elif self.oversampler == "svmSMOTE" and k_neighbors > 0:
            try:
                svmSmote = SVMSMOTE(random_state=42, k_neighbors=k_neighbors)
                X_dsel, y_dsel = svmSmote.fit_resample(X_dsel, y_dsel)
            except ValueError:
                pass
        elif self.oversampler == "borderline1" and k_neighbors > 0:
            borderlineSmote1 = BorderlineSMOTE(random_state=42,
                                               k_neighbors=k_neighbors,
                                               kind='borderline-1')
            X_dsel, y_dsel = borderlineSmote1.fit_resample(X_dsel, y_dsel)
        elif self.oversampler == "borderline2" and k_neighbors > 0:
            borderlineSmote2 = BorderlineSMOTE(random_state=42,
                                               k_neighbors=k_neighbors,
                                               kind='borderline-2')
            X_dsel, y_dsel = borderlineSmote2.fit_resample(X_dsel, y_dsel)
        elif self.oversampler == "ADASYN" and k_neighbors > 0:
            try:
                adasyn = ADASYN(random_state=42, n_neighbors=k_neighbors)
                X_dsel, y_dsel = adasyn.fit_resample(X_dsel, y_dsel)
            except RuntimeError:
                pass
            except ValueError:
                pass
        elif self.oversampler == "SLS" and k_neighbors > 0:
            sls = Safe_Level_SMOTE(n_neighbors=k_neighbors)
            X_dsel, y_dsel = sls.sample(X_dsel, y_dsel)

        if self.desMethod == "KNORAE":
            des = KNORAE(self.ensemble_, random_state=42)
        elif self.desMethod == "KNORAU":
            des = KNORAU(self.ensemble_, random_state=42)
        elif self.desMethod == "KNN":
            des = DESKNN(self.ensemble_, random_state=42)
        elif self.desMethod == "Clustering":
            des = DESClustering(self.ensemble_, random_state=42)
        else:
            des = KNORAE(self.ensemble_, random_state=42)

        if len(self.ensemble_) < 2:
            prediction = self.ensemble_[0].predict(X)
        else:
            des.fit(X_dsel, y_dsel)
            prediction = des.predict(X)

        return prediction