def test_abod(self): clf = ABOD(contamination=0.05) clf.fit(self.X_train) assert_equal(len(clf.decision_scores), self.X_train.shape[0]) # invert the scores pred_scores = clf.decision_function(self.X_test) * -1 assert_equal(pred_scores.shape[0], self.X_test.shape[0]) assert_equal(clf.predict(self.X_test).shape[0], self.X_test.shape[0]) assert_greater(roc_auc_score(self.y_test, pred_scores), 0.5)
def fast_abod_pyod_once(X_nor, X_test, y_test, n_neighbors, contamination=0.05): fastABOD = ABOD(n_neighbors=n_neighbors, contamination=contamination, method='fast') X_train = X_nor.astype(float).values.copy() fastABOD.fit(X_train) ## now threshold is determined y_pred = fastABOD.predict(X_test) scoreTable = fastABOD.decision_function(X_test) #print(scoreTable) scoreTable = np.nan_to_num(scoreTable, copy=True) ## confusion matrix tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel() tpr = tp / (tp + fn) fpr = fp / (tn + fp) #tprW[trail] = tpr #fprW[trail] = fpr tprW = tpr fprW = fpr # Auc score auc = roc_auc_score(y_test, scoreTable) #print(tpr, fpr) #print(auc) return tprW, fprW, auc, scoreTable
class TestFastABOD(unittest.TestCase): def setUp(self): self.n_train = 100 self.n_test = 50 self.contamination = 0.1 self.roc_floor = 0.6 self.X_train, self.y_train, self.X_test, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination, random_state=42) self.clf = ABOD(contamination=self.contamination) self.clf.fit(self.X_train) def test_sklearn_estimator(self): check_estimator(self.clf) def test_parameters(self): assert_true( hasattr(self.clf, 'decision_scores_') and self.clf.decision_scores_ is not None) assert_true( hasattr(self.clf, 'labels_') and self.clf.labels_ is not None) assert_true( hasattr(self.clf, 'threshold_') and self.clf.threshold_ is not None) assert_true(hasattr(self.clf, '_mu') and self.clf._mu is not None) assert_true( hasattr(self.clf, '_sigma') and self.clf._sigma is not None) assert_true(hasattr(self.clf, 'tree_') and self.clf.tree_ is not None) def test_train_scores(self): assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) def test_prediction_scores(self): pred_scores = self.clf.decision_function(self.X_test) # check score shapes assert_equal(pred_scores.shape[0], self.X_test.shape[0]) # check performance assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor) def test_prediction_labels(self): pred_labels = self.clf.predict(self.X_test) assert_equal(pred_labels.shape, self.y_test.shape) def test_prediction_proba(self): pred_proba = self.clf.predict_proba(self.X_test) assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_linear(self): pred_proba = self.clf.predict_proba(self.X_test, method='linear') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_unify(self): pred_proba = self.clf.predict_proba(self.X_test, method='unify') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_parameter(self): with assert_raises(ValueError): self.clf.predict_proba(self.X_test, method='something') def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) def test_fit_predict_score(self): self.clf.fit_predict_score(self.X_test, self.y_test) self.clf.fit_predict_score(self.X_test, self.y_test, scoring='roc_auc_score') self.clf.fit_predict_score(self.X_test, self.y_test, scoring='prc_n_score') with assert_raises(NotImplementedError): self.clf.fit_predict_score(self.X_test, self.y_test, scoring='something') def tearDown(self): pass
class TestABOD(unittest.TestCase): def setUp(self): self.n_train = 50 self.n_test = 50 self.contamination = 0.2 self.roc_floor = 0.6 self.X_train, self.y_train, self.X_test, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination, random_state=42) self.clf = ABOD(contamination=self.contamination, method='default') self.clf.fit(self.X_train) def test_sklearn_estimator(self): check_estimator(self.clf) def test_parameters(self): if not hasattr( self.clf, 'decision_scores_') or self.clf.decision_scores_ is None: self.assertRaises(AttributeError, 'decision_scores_ is not set') if not hasattr(self.clf, 'labels_') or self.clf.labels_ is None: self.assertRaises(AttributeError, 'labels_ is not set') if not hasattr(self.clf, 'threshold_') or self.clf.threshold_ is None: self.assertRaises(AttributeError, 'threshold_ is not set') def test_train_scores(self): assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) def test_prediction_scores(self): pred_scores = self.clf.decision_function(self.X_test) # check score shapes assert_equal(pred_scores.shape[0], self.X_test.shape[0]) # check performance assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor) def test_prediction_labels(self): pred_labels = self.clf.predict(self.X_test) assert_equal(pred_labels.shape, self.y_test.shape) def test_prediction_proba(self): pred_proba = self.clf.predict_proba(self.X_test) assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_linear(self): pred_proba = self.clf.predict_proba(self.X_test, method='linear') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_unify(self): pred_proba = self.clf.predict_proba(self.X_test, method='unify') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_parameter(self): with assert_raises(ValueError): self.clf.predict_proba(self.X_test, method='something') def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) def test_fit_predict_score(self): self.clf.fit_predict_score(self.X_test, self.y_test) self.clf.fit_predict_score(self.X_test, self.y_test, scoring='roc_auc_score') self.clf.fit_predict_score(self.X_test, self.y_test, scoring='prc_n_score') with assert_raises(NotImplementedError): self.clf.fit_predict_score(self.X_test, self.y_test, scoring='something') # def test_score(self): # self.clf.score(self.X_test, self.y_test) # self.clf.score(self.X_test, self.y_test, scoring='roc_auc_score') # self.clf.score(self.X_test, self.y_test, scoring='prc_n_score') # with assert_raises(NotImplementedError): # self.clf.score(self.X_test, self.y_test, scoring='something') def test_predict_rank(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5) assert_array_less(pred_ranks, self.X_train.shape[0] + 1) assert_array_less(-0.1, pred_ranks) def test_predict_rank_normalized(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3.5) assert_array_less(pred_ranks, 1.01) assert_array_less(-0.1, pred_ranks) def tearDown(self): pass
n_test = 100 # number of testing points X_train, y_train, X_test, y_test = generate_data( n_train=n_train, n_test=n_test, contamination=contamination) # train ABOD detector clf_name = 'ABOD' clf = ABOD() clf.fit(X_train) # get the prediction labels and outlier scores of the training data y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers) y_train_scores = clf.decision_scores_ # raw outlier scores # get the prediction on the test data y_test_pred = clf.predict(X_test) # outlier labels (0 or 1) y_test_scores = clf.decision_function(X_test) # outlier s`cores # evaluate and print the results print("\nOn Training Data:") evaluate_print(clf_name, y_train, y_train_scores) print("\nOn Test Data:") evaluate_print(clf_name, y_test, y_test_scores) # visualize the results visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred,
# Generate sample data X_train, y_train, X_test, y_test = \ generate_data(n_train=n_train, n_test=n_test, n_features=2, contamination=contamination, random_state=42) # train ABOD detector clf_name = 'ABOD' clf = ABOD() clf.fit(X_train) # get the prediction labels and outlier scores of the training data y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers) y_train_scores = clf.decision_scores_ # raw outlier scores # get the prediction on the test data y_test_pred = clf.predict(X_test) # outlier labels (0 or 1) y_test_scores = clf.decision_function(X_test) # outlier s`cores # evaluate and print the results print("\nOn Training Data:") evaluate_print(clf_name, y_train, y_train_scores) print("\nOn Test Data:") evaluate_print(clf_name, y_test, y_test_scores) # visualize the results visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred, y_test_pred, show_figure=True, save_figure=False)
from pyod.models.abod import ABOD from pyod.models.auto_encoder import AutoEncoder from sklearn.decomposition import PCA from sklearn.preprocessing import RobustScaler #preprocessing X_train = on.iloc[:,:] pca = PCA(n_components = 15) X_train = pca.fit_transform(X_train) scale = RobustScaler() X_train = scale.fit_transform(X_train) #machine learning model clf_1 = ABOD(contamination = 0.1, n_neighbors = 100) clf_1.fit(X_train) pred_1 = clf_1.predict(X_train) #output of the ML model out_1 = [] for i in range(0, len(pred_1)): if pred_1[i] == 0: out_1.append('Normal') else: out_1.append('Abnormal') state_1 = pd.DataFrame(out_1, columns = ['Condition']) state_1 = state_1.loc[state_1['Condition'] == 'Abnormal'] ab_state1 = list(state_1.index.values.tolist()) #Deep Learning model using pyod library clf_2 = AutoEncoder(hidden_neurons = [15, 64, 32, 64, 15], epochs = 350,
class AngularBasedOutlier(OutlierStream): def __init__(self, inliers, outliers): data_total = np.concatenate((inliers, outliers), axis=0) self.data_total = data_total self.outliers = outliers self.inliers = inliers OutlierStream.__init__(self, inliers, outliers) #self.model = KNN(contamination=0.045) self.model = ABOD(n_neighbors=20, contamination=0.2) def train_model(self, data): self.model.fit(data) scores_pred = self.model.decision_function(data) * -1 self.threshold = stats.scoreatpercentile(scores_pred, 100 * 0.10) def update_model(self, data): return None def predict_model(self, data): return self.model.predict(data) def summary(self, ground_truth, predictions, is_plot=False): predictions = list(map(lambda x: 1 if x > 0 else 0, predictions)) print(confusion_matrix(predictions, ground_truth)) print("Acuracia: {}".format(accuracy_score(predictions, ground_truth))) print("Precision: {}".format(precision_score(predictions, ground_truth))) print("Recall: {}".format(recall_score(predictions, ground_truth))) print("F1: {}".format(f1_score(predictions, ground_truth))) if is_plot: self._plot() def _plot(self): xx, yy = np.meshgrid(np.linspace(-70, 70, 100), np.linspace(-70, 70, 100)) Z = self.model.decision_function(np.c_[xx.ravel(), yy.ravel()]) * -1 Z = Z.reshape(xx.shape) subplot = plt.subplot(1, 1, 1) subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), self.threshold, 7), cmap=plt.cm.Blues_r) subplot.contourf(xx, yy, Z, levels=[self.threshold, Z.max()], colors='orange') a = subplot.contour(xx, yy, Z, levels=[self.threshold], linewidths=2, colors='red') subplot.contourf(xx, yy, Z, levels=[self.threshold, Z.max()], colors='orange') b = subplot.scatter(self.outliers[:, 0], self.outliers[:, 1], c='red', s=12, edgecolor='k') c = subplot.scatter(self.inliers[:, 0], self.inliers[:, 1], c='white', s=12, edgecolor='k') subplot.axis('tight') subplot.legend([a.collections[0], b, c], ['Borda da funcao', 'Inliers', 'Outliers'], loc='lower right') subplot.set_xlim((-70, 70)) subplot.set_ylim((-70, 70)) plt.suptitle("Angular based outlier detection") plt.show()
df.loc[df['ground.truth'] == 'anomaly', 'ground.truth'] = 1 df.loc[df['ground.truth'] == 'nominal', 'ground.truth'] = 0 y = df['ground.truth'].values.reshape(-1) df[['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7']] = scaler.fit_transform( df[['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7']]) x1 = df['V1'].values.reshape(-1, 1) x2 = df['V2'].values.reshape(-1, 1) x3 = df['V3'].values.reshape(-1, 1) x4 = df['V4'].values.reshape(-1, 1) x5 = df['V5'].values.reshape(-1, 1) x6 = df['V6'].values.reshape(-1, 1) x7 = df['V7'].values.reshape(-1, 1) x = np.concatenate((x1, x2, x3, x4, x5, x6, x7), axis=1) abod = ABOD(contamination=outliers_fraction) abod.fit(x) y_pred = abod.predict(x) fpr, tpr, threshold = roc_curve(y, y_pred) ###计算真阳性率和假阳性率 roc_auc = auc(fpr, tpr) ###计算auc的值 lw = 2 ax = fig.add_subplot(3, 3, i) plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve (area = %0.3f)' % roc_auc) ###假正率为横坐标,真正率为纵坐标做曲线 plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('AUC')
class TestABOD(unittest.TestCase): def setUp(self): self.n_train = 50 self.n_test = 50 self.contamination = 0.2 self.roc_floor = 0.6 self.X_train, self.y_train, self.X_test, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination) self.clf = ABOD(contamination=self.contamination, method='default') self.clf.fit(self.X_train) def test_sklearn_estimator(self): check_estimator(self.clf) def test_parameters(self): if not hasattr( self.clf, 'decision_scores_') or self.clf.decision_scores_ is None: self.assertRaises(AttributeError, 'decision_scores_ is not set') if not hasattr(self.clf, 'labels_') or self.clf.labels_ is None: self.assertRaises(AttributeError, 'labels_ is not set') if not hasattr(self.clf, 'threshold_') or self.clf.threshold_ is None: self.assertRaises(AttributeError, 'threshold_ is not set') def test_train_scores(self): assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) def test_prediction_scores(self): pred_scores = self.clf.decision_function(self.X_test) # check score shapes assert_equal(pred_scores.shape[0], self.X_test.shape[0]) # check performance assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor) def test_prediction_labels(self): pred_labels = self.clf.predict(self.X_test) assert_equal(pred_labels.shape, self.y_test.shape) def test_prediction_proba(self): pred_proba = self.clf.predict_proba(self.X_test) assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_linear(self): pred_proba = self.clf.predict_proba(self.X_test, method='linear') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_unify(self): pred_proba = self.clf.predict_proba(self.X_test, method='unify') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_parameter(self): with assert_raises(ValueError): self.clf.predict_proba(self.X_test, method='something') def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) def test_evaluate(self): self.clf.fit_predict_evaluate(self.X_test, self.y_test) def tearDown(self): pass
n_train = 200 n_test = 100 X_train, y_train, c_train, X_test, y_test, c_test = generate_data( n_train=n_train, n_test=n_test, contamination=contamination) # train a ABOD detector (default version) clf = ABOD(contamination=contamination, fast_method=False) clf.fit(X_train) # get the prediction on the training data y_train_pred = clf.y_pred y_train_score = clf.decision_scores * -1 # get the prediction on the test data y_test_pred = clf.predict(X_test) y_test_score = clf.decision_function(X_test) * -1 print('Train ROC:{roc}, precision@n:{prn}'.format( roc=roc_auc_score(y_train, y_train_score), prn=precision_n_scores(y_train, y_train_score))) print('Test ROC:{roc}, precision@n:{prn}'.format( roc=roc_auc_score(y_test, y_test_score), prn=precision_n_scores(y_test, y_test_score))) ####################################################################### # Visualizations # initialize the log directory if it does not exist pathlib.Path('example_figs').mkdir(parents=True, exist_ok=True)
import numpy as np import pandas as pd from scipy import stats import sys from pyod.models.abod import ABOD from pyod.utils.data import generate_data, get_outliers_inliers data = int(sys.argv[1]) # generate random data with two features X_train, Y_train = generate_data(n_train=2000, train_only=True, n_features=1) # store outliers and inliers in different numpy arrays x_outliers, x_inliers = get_outliers_inliers(X_train, Y_train) clf = ABOD(contamination=0.1) clf.fit(X_train) # prediction of a datapoint category outlier or inlier y_pred = clf.predict(X_train) output = clf.predict([[data]]) print(output[0]) with open('anomaly.txt', 'w') as file: file.write(str(output[0]))