Пример #1
0
    def test_abod(self):
        clf = ABOD(contamination=0.05)
        clf.fit(self.X_train)
        assert_equal(len(clf.decision_scores), self.X_train.shape[0])

        # invert the scores
        pred_scores = clf.decision_function(self.X_test) * -1
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])
        assert_equal(clf.predict(self.X_test).shape[0],
                     self.X_test.shape[0])
        assert_greater(roc_auc_score(self.y_test, pred_scores), 0.5)
Пример #2
0
def fast_abod_pyod_once(X_nor,
                        X_test,
                        y_test,
                        n_neighbors,
                        contamination=0.05):

    fastABOD = ABOD(n_neighbors=n_neighbors,
                    contamination=contamination,
                    method='fast')

    X_train = X_nor.astype(float).values.copy()

    fastABOD.fit(X_train)
    ## now threshold is determined

    y_pred = fastABOD.predict(X_test)
    scoreTable = fastABOD.decision_function(X_test)
    #print(scoreTable)
    scoreTable = np.nan_to_num(scoreTable, copy=True)

    ## confusion matrix
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

    tpr = tp / (tp + fn)
    fpr = fp / (tn + fp)
    #tprW[trail] = tpr
    #fprW[trail] = fpr
    tprW = tpr
    fprW = fpr

    # Auc score
    auc = roc_auc_score(y_test, scoreTable)

    #print(tpr, fpr)
    #print(auc)

    return tprW, fprW, auc, scoreTable
Пример #3
0
class TestFastABOD(unittest.TestCase):
    def setUp(self):
        self.n_train = 100
        self.n_test = 50
        self.contamination = 0.1
        self.roc_floor = 0.6
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train,
            n_test=self.n_test,
            contamination=self.contamination,
            random_state=42)

        self.clf = ABOD(contamination=self.contamination)
        self.clf.fit(self.X_train)

    def test_sklearn_estimator(self):
        check_estimator(self.clf)

    def test_parameters(self):
        assert_true(
            hasattr(self.clf, 'decision_scores_')
            and self.clf.decision_scores_ is not None)
        assert_true(
            hasattr(self.clf, 'labels_') and self.clf.labels_ is not None)
        assert_true(
            hasattr(self.clf, 'threshold_')
            and self.clf.threshold_ is not None)
        assert_true(hasattr(self.clf, '_mu') and self.clf._mu is not None)
        assert_true(
            hasattr(self.clf, '_sigma') and self.clf._sigma is not None)
        assert_true(hasattr(self.clf, 'tree_') and self.clf.tree_ is not None)

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba(self):
        pred_proba = self.clf.predict_proba(self.X_test)
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_fit_predict_score(self):
        self.clf.fit_predict_score(self.X_test, self.y_test)
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='roc_auc_score')
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='prc_n_score')
        with assert_raises(NotImplementedError):
            self.clf.fit_predict_score(self.X_test,
                                       self.y_test,
                                       scoring='something')

    def tearDown(self):
        pass
Пример #4
0
class TestABOD(unittest.TestCase):
    def setUp(self):
        self.n_train = 50
        self.n_test = 50
        self.contamination = 0.2
        self.roc_floor = 0.6
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train,
            n_test=self.n_test,
            contamination=self.contamination,
            random_state=42)

        self.clf = ABOD(contamination=self.contamination, method='default')
        self.clf.fit(self.X_train)

    def test_sklearn_estimator(self):
        check_estimator(self.clf)

    def test_parameters(self):
        if not hasattr(
                self.clf,
                'decision_scores_') or self.clf.decision_scores_ is None:
            self.assertRaises(AttributeError, 'decision_scores_ is not set')
        if not hasattr(self.clf, 'labels_') or self.clf.labels_ is None:
            self.assertRaises(AttributeError, 'labels_ is not set')
        if not hasattr(self.clf, 'threshold_') or self.clf.threshold_ is None:
            self.assertRaises(AttributeError, 'threshold_ is not set')

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba(self):
        pred_proba = self.clf.predict_proba(self.X_test)
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_fit_predict_score(self):
        self.clf.fit_predict_score(self.X_test, self.y_test)
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='roc_auc_score')
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='prc_n_score')
        with assert_raises(NotImplementedError):
            self.clf.fit_predict_score(self.X_test,
                                       self.y_test,
                                       scoring='something')

    # def test_score(self):
    #     self.clf.score(self.X_test, self.y_test)
    #     self.clf.score(self.X_test, self.y_test, scoring='roc_auc_score')
    #     self.clf.score(self.X_test, self.y_test, scoring='prc_n_score')
    #     with assert_raises(NotImplementedError):
    #         self.clf.score(self.X_test, self.y_test, scoring='something')

    def test_predict_rank(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks)

    def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks)

    def tearDown(self):
        pass
Пример #5
0
    n_test = 100  # number of testing points

    X_train, y_train, X_test, y_test = generate_data(
        n_train=n_train, n_test=n_test, contamination=contamination)

    # train ABOD detector
    clf_name = 'ABOD'
    clf = ABOD()
    clf.fit(X_train)

    # get the prediction labels and outlier scores of the training data
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
    y_test_scores = clf.decision_function(X_test)  # outlier s`cores

    # evaluate and print the results
    print("\nOn Training Data:")
    evaluate_print(clf_name, y_train, y_train_scores)
    print("\nOn Test Data:")
    evaluate_print(clf_name, y_test, y_test_scores)

    # visualize the results
    visualize(clf_name,
              X_train,
              y_train,
              X_test,
              y_test,
              y_train_pred,
Пример #6
0
    # Generate sample data
    X_train, y_train, X_test, y_test = \
        generate_data(n_train=n_train,
                      n_test=n_test,
                      n_features=2,
                      contamination=contamination,
                      random_state=42)

    # train ABOD detector
    clf_name = 'ABOD'
    clf = ABOD()
    clf.fit(X_train)

    # get the prediction labels and outlier scores of the training data
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
    y_test_scores = clf.decision_function(X_test)  # outlier s`cores

    # evaluate and print the results
    print("\nOn Training Data:")
    evaluate_print(clf_name, y_train, y_train_scores)
    print("\nOn Test Data:")
    evaluate_print(clf_name, y_test, y_test_scores)

    # visualize the results
    visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred,
              y_test_pred, show_figure=True, save_figure=False)
Пример #7
0
from pyod.models.abod import ABOD
from pyod.models.auto_encoder import AutoEncoder
from sklearn.decomposition import PCA
from sklearn.preprocessing import RobustScaler

#preprocessing
X_train = on.iloc[:,:]
pca = PCA(n_components = 15)
X_train = pca.fit_transform(X_train)
scale = RobustScaler()
X_train = scale.fit_transform(X_train)

#machine learning model
clf_1 = ABOD(contamination = 0.1, n_neighbors = 100)
clf_1.fit(X_train)
pred_1 = clf_1.predict(X_train)

#output of the ML model
out_1 = []
for i in range(0, len(pred_1)):
  if pred_1[i] == 0:
    out_1.append('Normal')
  else:
    out_1.append('Abnormal')

state_1 = pd.DataFrame(out_1, columns = ['Condition'])
state_1 = state_1.loc[state_1['Condition'] == 'Abnormal'] 
ab_state1 = list(state_1.index.values.tolist())

#Deep Learning model using pyod library
clf_2 = AutoEncoder(hidden_neurons = [15, 64, 32, 64, 15], epochs = 350, 
class AngularBasedOutlier(OutlierStream):
    def __init__(self, inliers, outliers):
        data_total = np.concatenate((inliers, outliers), axis=0)
        self.data_total = data_total
        self.outliers = outliers
        self.inliers = inliers

        OutlierStream.__init__(self, inliers, outliers)
        #self.model = KNN(contamination=0.045)
        self.model = ABOD(n_neighbors=20, contamination=0.2)

    def train_model(self, data):
        self.model.fit(data)
        scores_pred = self.model.decision_function(data) * -1
        self.threshold = stats.scoreatpercentile(scores_pred, 100 * 0.10)

    def update_model(self, data):
        return None

    def predict_model(self, data):
        return self.model.predict(data)

    def summary(self, ground_truth, predictions, is_plot=False):

        predictions = list(map(lambda x: 1 if x > 0 else 0, predictions))

        print(confusion_matrix(predictions, ground_truth))
        print("Acuracia: {}".format(accuracy_score(predictions, ground_truth)))
        print("Precision: {}".format(precision_score(predictions,
                                                     ground_truth)))
        print("Recall: {}".format(recall_score(predictions, ground_truth)))
        print("F1: {}".format(f1_score(predictions, ground_truth)))

        if is_plot:
            self._plot()

    def _plot(self):

        xx, yy = np.meshgrid(np.linspace(-70, 70, 100),
                             np.linspace(-70, 70, 100))
        Z = self.model.decision_function(np.c_[xx.ravel(), yy.ravel()]) * -1
        Z = Z.reshape(xx.shape)

        subplot = plt.subplot(1, 1, 1)
        subplot.contourf(xx,
                         yy,
                         Z,
                         levels=np.linspace(Z.min(), self.threshold, 7),
                         cmap=plt.cm.Blues_r)
        subplot.contourf(xx,
                         yy,
                         Z,
                         levels=[self.threshold, Z.max()],
                         colors='orange')

        a = subplot.contour(xx,
                            yy,
                            Z,
                            levels=[self.threshold],
                            linewidths=2,
                            colors='red')
        subplot.contourf(xx,
                         yy,
                         Z,
                         levels=[self.threshold, Z.max()],
                         colors='orange')
        b = subplot.scatter(self.outliers[:, 0],
                            self.outliers[:, 1],
                            c='red',
                            s=12,
                            edgecolor='k')
        c = subplot.scatter(self.inliers[:, 0],
                            self.inliers[:, 1],
                            c='white',
                            s=12,
                            edgecolor='k')
        subplot.axis('tight')
        subplot.legend([a.collections[0], b, c],
                       ['Borda da funcao', 'Inliers', 'Outliers'],
                       loc='lower right')
        subplot.set_xlim((-70, 70))
        subplot.set_ylim((-70, 70))
        plt.suptitle("Angular based outlier detection")
        plt.show()
Пример #9
0
 df.loc[df['ground.truth'] == 'anomaly', 'ground.truth'] = 1
 df.loc[df['ground.truth'] == 'nominal', 'ground.truth'] = 0
 y = df['ground.truth'].values.reshape(-1)
 df[['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7']] = scaler.fit_transform(
     df[['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7']])
 x1 = df['V1'].values.reshape(-1, 1)
 x2 = df['V2'].values.reshape(-1, 1)
 x3 = df['V3'].values.reshape(-1, 1)
 x4 = df['V4'].values.reshape(-1, 1)
 x5 = df['V5'].values.reshape(-1, 1)
 x6 = df['V6'].values.reshape(-1, 1)
 x7 = df['V7'].values.reshape(-1, 1)
 x = np.concatenate((x1, x2, x3, x4, x5, x6, x7), axis=1)
 abod = ABOD(contamination=outliers_fraction)
 abod.fit(x)
 y_pred = abod.predict(x)
 fpr, tpr, threshold = roc_curve(y, y_pred)  ###计算真阳性率和假阳性率
 roc_auc = auc(fpr, tpr)  ###计算auc的值
 lw = 2
 ax = fig.add_subplot(3, 3, i)
 plt.plot(fpr,
          tpr,
          color='darkorange',
          lw=lw,
          label='ROC curve (area = %0.3f)' % roc_auc)  ###假正率为横坐标,真正率为纵坐标做曲线
 plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
 plt.xlim([0.0, 1.0])
 plt.ylim([0.0, 1.05])
 plt.xlabel('False Positive Rate')
 plt.ylabel('True Positive Rate')
 plt.title('AUC')
Пример #10
0
class TestABOD(unittest.TestCase):
    def setUp(self):
        self.n_train = 50
        self.n_test = 50
        self.contamination = 0.2
        self.roc_floor = 0.6
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train,
            n_test=self.n_test,
            contamination=self.contamination)

        self.clf = ABOD(contamination=self.contamination, method='default')
        self.clf.fit(self.X_train)

    def test_sklearn_estimator(self):
        check_estimator(self.clf)

    def test_parameters(self):
        if not hasattr(
                self.clf,
                'decision_scores_') or self.clf.decision_scores_ is None:
            self.assertRaises(AttributeError, 'decision_scores_ is not set')
        if not hasattr(self.clf, 'labels_') or self.clf.labels_ is None:
            self.assertRaises(AttributeError, 'labels_ is not set')
        if not hasattr(self.clf, 'threshold_') or self.clf.threshold_ is None:
            self.assertRaises(AttributeError, 'threshold_ is not set')

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba(self):
        pred_proba = self.clf.predict_proba(self.X_test)
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert_greater_equal(pred_proba.min(), 0)
        assert_less_equal(pred_proba.max(), 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_evaluate(self):
        self.clf.fit_predict_evaluate(self.X_test, self.y_test)

    def tearDown(self):
        pass
Пример #11
0
    n_train = 200
    n_test = 100

    X_train, y_train, c_train, X_test, y_test, c_test = generate_data(
        n_train=n_train, n_test=n_test, contamination=contamination)

    # train a ABOD detector (default version)
    clf = ABOD(contamination=contamination, fast_method=False)
    clf.fit(X_train)

    # get the prediction on the training data
    y_train_pred = clf.y_pred
    y_train_score = clf.decision_scores * -1

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)
    y_test_score = clf.decision_function(X_test) * -1

    print('Train ROC:{roc}, precision@n:{prn}'.format(
        roc=roc_auc_score(y_train, y_train_score),
        prn=precision_n_scores(y_train, y_train_score)))

    print('Test ROC:{roc}, precision@n:{prn}'.format(
        roc=roc_auc_score(y_test, y_test_score),
        prn=precision_n_scores(y_test, y_test_score)))

    #######################################################################
    # Visualizations
    # initialize the log directory if it does not exist
    pathlib.Path('example_figs').mkdir(parents=True, exist_ok=True)
Пример #12
0
import numpy as np
import pandas as pd
from scipy import stats
import sys
from pyod.models.abod import ABOD
from pyod.utils.data import generate_data, get_outliers_inliers

data = int(sys.argv[1])

# generate random data with two features
X_train, Y_train = generate_data(n_train=2000, train_only=True, n_features=1)

# store outliers and inliers in different numpy arrays
x_outliers, x_inliers = get_outliers_inliers(X_train, Y_train)

clf = ABOD(contamination=0.1)

clf.fit(X_train)

# prediction of a datapoint category outlier or inlier
y_pred = clf.predict(X_train)

output = clf.predict([[data]])

print(output[0])
with open('anomaly.txt', 'w') as file:
    file.write(str(output[0]))