def main(): # Read all the csv files csvPath = "./csv_files" csvFiles = [f for f in listdir(csvPath) if isfile(join(csvPath, f))] dfs = [] for cv in csvFiles: print("CSV Processing: " + cv) dfs.append(pd.read_csv(csvPath + '/' + cv, index_col=False)) df = pd.concat(dfs, ignore_index=True) # Process all the csv file totalNormal = 0 totalAnomalies = 0 # Turn every column to numeric cols = [c for c in df.columns] nom_cols = ['ip_flags', 'tcp_udp_flags', 'payload'] for c in nom_cols: le = LabelEncoder() df[c] = le.fit_transform(df[c]) # Remove the cols with small standard deviation df = df.loc[:, df.std() > 0.0] # Calculate the correlation matrix corr_matrix = df.corr().abs() # Select upper triangle of correlation matrix upper = corr_matrix.where( np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool)) # Find index of feature columns with correlation greater than 0.95 to_drop = [column for column in upper.columns if any(upper[column] == 1)] df = df.drop(df[to_drop], axis=1) print(df.head()) # Fit the first model clf = MCD().fit(df) df['label'] = clf.predict(df) print(df) totalNormal = len(df[df['label'] == 0]) totalAnomalies = len(df[df['label'] == 1]) print("Normal: " + str(totalNormal)) print("Anomaly: " + str(totalAnomalies)) print('Accuracy: ' + str(totalNormal / float(totalNormal + totalAnomalies))) df.to_csv('./processed_csv/' + 'processed.csv', index=False) #Save the model filename = 'model.sav' pickle.dump(clf, open(filename, 'wb'))
def setUp(self): self.n_train = 200 self.n_test = 100 self.contamination = 0.1 self.roc_floor = 0.8 self.X_train, self.y_train, self.X_test, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination, random_state=42) self.clf = MCD(contamination=self.contamination, random_state=42) self.clf.fit(self.X_train)
def getOutlierMCD(dataset): ''' @brief Function that executes MCD algorithm on the dataset and obtains the labels of the dataset indicating which instance is an inlier (0) or outlier (1) @param dataset Dataset on which to try the algorithm @return It returns a list of labels 0 means inlier, 1 means outlier ''' # Initializating the model mcd = MCD() # Fits the data and obtains labels mcd.fit(dataset) # Return labels return mcd.labels_
def define_classifiers(random_state, outliers_fraction): classifiers = { 'Angle-based Outlier Detector (ABOD)': ABOD(contamination=outliers_fraction), 'Cluster-based Local Outlier Factor': CBLOF(contamination=outliers_fraction, check_estimator=False, random_state=random_state), 'Feature Bagging': FeatureBagging(contamination=outliers_fraction, random_state=random_state), 'Histogram-base Outlier Detection (HBOS)': HBOS(contamination=outliers_fraction), 'Isolation Forest': IForest(contamination=outliers_fraction, random_state=random_state), 'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction), 'Local Outlier Factor (LOF)': LOF(contamination=outliers_fraction), 'Minimum Covariance Determinant (MCD)': MCD(contamination=outliers_fraction, random_state=random_state), 'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction), 'Principal Component Analysis (PCA)': PCA(contamination=outliers_fraction, random_state=random_state) } return classifiers
def load_classifiers(outliers_fraction): outliers_fraction = min(0.5, outliers_fraction) random_state = np.random.RandomState(42) # Define nine outlier detection tools to be compared classifiers = { 'Angle-based Outlier Detector (ABOD)': ABOD(contamination=outliers_fraction), 'Cluster-based Local Outlier Factor (CBLOF)': CBLOF(contamination=outliers_fraction, check_estimator=False, random_state=random_state), 'Feature Bagging': FeatureBagging(LOF(n_neighbors=35), contamination=outliers_fraction, random_state=random_state), 'Histogram-base Outlier Detection (HBOS)': HBOS(contamination=outliers_fraction), 'Isolation Forest': IForest(contamination=outliers_fraction, random_state=random_state, behaviour="new"), 'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction), 'Average KNN': KNN(method='mean', contamination=outliers_fraction), 'Local Outlier Factor (LOF)': LOF(n_neighbors=35, contamination=outliers_fraction), 'Minimum Covariance Determinant (MCD)': MCD(contamination=outliers_fraction, random_state=random_state), 'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction), 'Principal Component Analysis (PCA)': PCA(contamination=outliers_fraction, random_state=random_state) } return classifiers
def __load_classifiers(self): outliers_fraction = 0.05 random_state = np.random.RandomState(0) classifiers = { 'Cluster-based Local Outlier Factor (CBLOF)': CBLOF(contamination=outliers_fraction, check_estimator=False, random_state=random_state), 'Feature Bagging': FeatureBagging(LOF(n_neighbors=35), contamination=outliers_fraction, random_state=random_state), 'Histogram-base Outlier Detection (HBOS)': HBOS(contamination=outliers_fraction), 'Isolation Forest': IForest(contamination=outliers_fraction, random_state=random_state), 'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction), 'Average KNN': KNN(method='mean', contamination=outliers_fraction), 'Local Outlier Factor (LOF)': LOF(n_neighbors=35, contamination=outliers_fraction), 'Minimum Covariance Determinant (MCD)': MCD(contamination=outliers_fraction, random_state=random_state), 'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction), } return classifiers
def main(): scalers = ['no', 'std', 'minmax'] root = 'Unsupervised_Anamaly_Detection_csv' start = 0 counts = 90 CPUS = 3 CPUS_Models = 4 sklearn_models = [ 'AvgKNN', 'LargestKNN', 'MedKNN', 'PCA', 'COF', 'LODA', 'LOF', 'HBOS', 'MCD', 'AvgBagging', 'MaxBagging', 'IForest', 'CBLOF', 'COPOD', 'SOD', 'LSCPwithLODA', 'AveLMDD', 'VarLMDD', 'IqrLMDD', 'SoGaal', 'MoGaal', 'VAE', 'AutoEncoder' ] models = { 'BRM': BRM(bootstrap_sample_percent=70), 'GM': GaussianMixture(), 'IF': IsolationForest(), 'OCSVM': OneClassSVM(), 'EE': EllipticEnvelope(), 'AvgKNN': KNN(method='mean'), 'LargestKNN': KNN(method='largest'), 'MedKNN': KNN(method='median'), 'PCA': PCA(), 'COF': COF(), 'LODA': LODA(), 'LOF': LOF(), 'HBOS': HBOS(), 'MCD': MCD(), 'AvgBagging': FeatureBagging(combination='average'), 'MaxBagging': FeatureBagging(combination='max'), 'CBLOF': CBLOF(n_clusters=10, n_jobs=4), 'FactorAnalysis': FactorAnalysis(), 'KernelDensity': KernelDensity(), 'COPOD': COPOD(), 'SOD': SOD(), 'LSCPwithLODA': LSCP([LODA(), LODA()]), 'AveLMDD': LMDD(dis_measure='aad'), 'VarLMDD': LMDD(dis_measure='var'), 'IqrLMDD': LMDD(dis_measure='iqr'), 'SoGaal': SO_GAAL(), 'MoGaal': MO_GAAL(), 'VAE': VAE(encoder_neurons=[8, 4, 2]), 'AutoEncoder': AutoEncoder(hidden_neurons=[6, 3, 3, 6]), 'OCKRA': m_OCKRA(), } name = "30_Models" Parallel(n_jobs=CPUS) \ (delayed(runByScaler) (root, scaler, models, start, counts, other_models=sklearn_models, CPUS=CPUS_Models, save_name=name) for scaler in scalers)
def outlier_detection(x_raw, y_raw): """ Filter all ourlier points :param x_raw: feature in ndarray :param y_raw: label in ndarray :return x_clean, y_clean: cleaned feature and label in ndarray """ # TODO Filter the outliers. print() print("Detecting outliers...") print("Before outlier detection: {}".format(x_raw.shape)) outliers_fraction = 0.04 random_state = np.random.RandomState(42) # all outlier detection method candidate list as follows classifiers = { 'Angle-based Outlier Detector (ABOD)': ABOD(contamination=outliers_fraction), 'Cluster-based Local Outlier Factor': CBLOF(contamination=outliers_fraction, check_estimator=False, random_state=random_state), 'Feature Bagging': FeatureBagging(contamination=outliers_fraction, random_state=random_state), 'Histogram-base Outlier Detection (HBOS)': HBOS(contamination=outliers_fraction), 'Isolation Forest': IForest(contamination=outliers_fraction, random_state=random_state), 'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction), 'Local Outlier Factor (LOF)': LOF(contamination=outliers_fraction), 'Minimum Covariance Determinant (MCD)': MCD(contamination=outliers_fraction, random_state=random_state), 'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction), 'Principal Component Analysis (PCA)': PCA(contamination=outliers_fraction, random_state=random_state), 'Improving Supervised Outlier Detection with Unsupervised Representation Learning': XGBOD(contamination=outliers_fraction), } clf_name = 'Isolation Forest' clf = IForest(contamination=outliers_fraction, random_state=random_state) # clf_name = 'Angle-based Outlier Detector (ABOD)' # clf = ABOD(contamination=outliers_fraction, method='default') clf.fit(x_raw) y_pred = clf.predict(x_raw) # for pyod, 1 means outliers and 0 means inliers # for sklearn, -1 means outliers and 1 means inliers idx_y_pred = [i for i in range(0, 1212) if y_pred[i] == 1] x_clean = del_rowsorcolumns(x_raw, idx_y_pred, axis=0) y_clean = del_rowsorcolumns(y_raw, idx_y_pred, axis=0) print("After outlier detection: {}".format(x_clean.shape)) assert (x_clean.shape[0] == y_clean.shape[0]) return x_clean, y_clean
def setUp(self): self.n_train = 100 self.n_test = 50 self.contamination = 0.1 self.roc_floor = 0.6 self.X_train, self.y_train, self.X_test, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination, random_state=42) self.clf = MCD(contamination=self.contamination, random_state=42) self.clf.fit(self.X_train)
def choose_model(model, nnet): """ among implemented in PyOD """ clfs = { 'AE': AutoEncoder(hidden_neurons=nnet, contamination=0.1, epochs=15), 'VAE': VAE(encoder_neurons=nnet[:5], decoder_neurons=nnet[4:], contamination=0.1, epochs=13), 'ABOD': ABOD(), 'FeatureBagging': FeatureBagging(), 'HBOS': HBOS(), 'IForest': IForest(), 'KNN': KNN(), 'LOF': LOF(), 'OCSVM': OCSVM(), 'PCA': PCA(), 'SOS': SOS(), 'COF': COF(), 'CBLOF': CBLOF(), 'SOD': SOD(), 'LOCI': LOCI(), 'MCD': MCD() } return clfs[model]
def train(doc_list, dataset_name, clf_name): model_roc = [] model_prc = [] if clf_name == "PCA": clf = PCA() elif clf_name == "MCD": clf = MCD() elif clf_name == "LOF": clf = LOF() elif clf_name == "KNN": clf = KNN() elif clf_name == "LODA": clf = LODA() for i in range(10): data = pd.read_csv(doc_list[i], header=0, index_col=0) train_x = data.drop(drop + ground_truth, axis=1).values train_y = np.array([ transfor[x] for x in list(_flatten(data[ground_truth].values.tolist())) ]) clf.fit(train_x) predict = clf.decision_scores_ roc = roc_auc_score(train_y, predict) prc = precision_n_scores(train_y, predict) if ((i + 1) % 200 == 0): print("第" + str(i + 1) + "个文件结果:") evaluate_print(clf_name, train_y, predict) model_roc.append(roc) model_prc.append(prc) model_roc_avg = np.mean(model_roc) model_prc_avg = np.mean(model_prc) print("模型" + clf_name + "在数据集" + dataset_name + "的平均roc_auc为" + str(round(model_roc_avg, 4)) + ",平均prc为" + str(round(model_prc_avg, 4)) + "。") return model_roc_avg, model_prc_avg
class TestMCD(unittest.TestCase): def setUp(self): self.n_train = 200 self.n_test = 100 self.contamination = 0.1 self.roc_floor = 0.8 self.X_train, self.y_train, self.X_test, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination, random_state=42) self.clf = MCD(contamination=self.contamination, random_state=42) self.clf.fit(self.X_train) def test_parameters(self): assert (hasattr(self.clf, 'decision_scores_') and self.clf.decision_scores_ is not None) assert (hasattr(self.clf, 'labels_') and self.clf.labels_ is not None) assert (hasattr(self.clf, 'threshold_') and self.clf.threshold_ is not None) assert (hasattr(self.clf, '_mu') and self.clf._mu is not None) assert (hasattr(self.clf, '_sigma') and self.clf._sigma is not None) assert (hasattr(self.clf, 'raw_location_') and self.clf.raw_location_ is not None) assert (hasattr(self.clf, 'raw_covariance_') and self.clf.raw_covariance_ is not None) assert (hasattr(self.clf, 'raw_support_') and self.clf.raw_support_ is not None) assert (hasattr(self.clf, 'location_') and self.clf.location_ is not None) assert (hasattr(self.clf, 'covariance_') and self.clf.covariance_ is not None) assert (hasattr(self.clf, 'precision_') and self.clf.precision_ is not None) assert (hasattr(self.clf, 'support_') and self.clf.support_ is not None) def test_train_scores(self): assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) def test_prediction_scores(self): pred_scores = self.clf.decision_function(self.X_test) # check score shapes assert_equal(pred_scores.shape[0], self.X_test.shape[0]) # check performance assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor) def test_prediction_labels(self): pred_labels = self.clf.predict(self.X_test) assert_equal(pred_labels.shape, self.y_test.shape) def test_prediction_proba(self): pred_proba = self.clf.predict_proba(self.X_test) assert (pred_proba.min() >= 0) assert (pred_proba.max() <= 1) def test_prediction_proba_linear(self): pred_proba = self.clf.predict_proba(self.X_test, method='linear') assert (pred_proba.min() >= 0) assert (pred_proba.max() <= 1) def test_prediction_proba_unify(self): pred_proba = self.clf.predict_proba(self.X_test, method='unify') assert (pred_proba.min() >= 0) assert (pred_proba.max() <= 1) def test_prediction_proba_parameter(self): with assert_raises(ValueError): self.clf.predict_proba(self.X_test, method='something') def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) def test_fit_predict_score(self): self.clf.fit_predict_score(self.X_test, self.y_test) self.clf.fit_predict_score(self.X_test, self.y_test, scoring='roc_auc_score') self.clf.fit_predict_score(self.X_test, self.y_test, scoring='prc_n_score') with assert_raises(NotImplementedError): self.clf.fit_predict_score(self.X_test, self.y_test, scoring='something') def test_predict_rank(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2.5) assert_array_less(pred_ranks, self.X_train.shape[0] + 1) assert_array_less(-0.1, pred_ranks) def test_predict_rank_normalized(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2.5) assert_array_less(pred_ranks, 1.01) assert_array_less(-0.1, pred_ranks) def tearDown(self): pass
# get the prediction labels and outlier scores of the training data y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers) y_train_scores = clf.decision_scores_ # raw outlier scores # get the prediction on the test data y_test_pred = clf.predict(new_origin_all[pos:]) # outlier labels (0 or 1) y_test_scores = clf.decision_function(new_origin_all[pos:]) # outlier scores show_scatter(clf_name, df, y_train_pred, pos) # In[173]: # train MCD detector clf_name = 'MCD' clf = MCD() clf.fit(new_origin_all[:pos]) # get the prediction labels and outlier scores of the training data y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers) y_train_scores = clf.decision_scores_ # raw outlier scores # get the prediction on the test data y_test_pred = clf.predict(new_origin_all[pos:]) # outlier labels (0 or 1) y_test_scores = clf.decision_function(new_origin_all[pos:]) # outlier scores show_scatter(clf_name, df, y_train_pred, pos) # In[174]:
X = StandardScaler().fit_transform(X) # load the pre-trained model cost predictor clf = load('rf_predictor.joblib') classifiers = { 1: ABOD(n_neighbors=10), 2: CBLOF(check_estimator=False), 3: FeatureBagging(LOF()), 4: HBOS(), 5: IForest(), 6: KNN(), 7: KNN(method='mean'), 8: LOF(), 9: MCD(), 10: OCSVM(), 11: PCA(), } clfs = np.random.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], size=n_estimators_total) clfs_real = [] for estimator in clfs: clfs_real.append(classifiers[estimator]) X_w = indices_to_one_hot(clfs - 1, 11) X_d1 = np.array([X.shape[0], X.shape[1]]).reshape(1, 2) X_d = np.repeat(X_d1, len(clfs), axis=0) X_c = np.concatenate((X_d, X_w), axis=1)
if __name__ == "__main__": contamination = 0.1 # percentage of outliers n_train = 200 # number of training points n_test = 100 # number of testing points # Generate sample data X_train, y_train, X_test, y_test = \ generate_data(n_train=n_train, n_test=n_test, n_features=2, contamination=contamination, random_state=42) # train LOF detector clf_name = 'MCD' clf = MCD() clf.fit(X_train) # get the prediction labels and outlier scores of the training data y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers) y_train_scores = clf.decision_scores_ # raw outlier scores # get the prediction on the test data y_test_pred = clf.predict(X_test) # outlier labels (0 or 1) y_test_scores = clf.decision_function(X_test) # outlier scores # evaluate and print the results print("\nOn Training Data:") evaluate_print(clf_name, y_train, y_train_scores) print("\nOn Test Data:") evaluate_print(clf_name, y_test, y_test_scores)
plt.title('Test Set Ground Truth') plt.figure() plt.scatter(X_test_pred_d2_0[:, 0], X_test_pred_d2_0[:, 1], c='g', marker='o') plt.scatter(X_test_pred_d2_1[:, 0], X_test_pred_d2_1[:, 1], c='r', marker='d') plt.legend((u'inliers', u'outliers'), loc=2) plt.title('Test Set Prediction') plt.show() # # MCD # In[8]: #MCD clf_name = 'MCD' clf = MCD() # In[9]: #用训练集训练 clf.fit(X_train) y_train_pred = clf.labels_ y_train_scores = clf.decision_scores_ y_test_pred = clf.predict(X_test) y_test_scores = clf.decision_function(X_test) #评价性能 roc_train = round(roc_auc_score(y_train, y_train_scores), 4) prn_train = round(precision_n_scores(y_train, y_train_scores), ndigits=4) roc_test = round(roc_auc_score(y_test, y_test_scores), 4) prn_test = round(precision_n_scores(y_test, y_test_scores), ndigits=4)
def main(): # PART 1: # Getting the predictions for each classifier # SK means: The classifier is from sklearn or works like sklearn # PY means: The classifier is from pyod or works like pyod models = { 'SK_EE': EllipticEnvelope(), 'SK_GM': GaussianMixture(), 'SK_IF': IsolationForest(), 'SK_OCSVM': OneClassSVM(), 'SK_FA': FactorAnalysis(), 'SK_KD': KernelDensity(), 'PY_PCA': PCA(), 'PY_COF': COF(), 'PY_LODA': LODA(), 'PY_LOF': LOF(), 'PY_HBOS': HBOS(), 'PY_MCD': MCD(), 'PY_AvgKNN': KNN(method='mean'), 'PY_LargestKNN': KNN(method='largest'), 'PY_MedKNN': KNN(method='median'), 'PY_AvgBagging': FeatureBagging(combination='average'), 'PY_MaxBagging': FeatureBagging(combination='max'), 'PY_CBLOF': CBLOF(n_clusters=10, n_jobs=4), 'PY_COPOD': COPOD(), 'PY_SOD': SOD(), 'PY_LSCPwithLODA': LSCP([LODA(), LODA()]), 'PY_AveLMDD': LMDD(dis_measure='aad'), 'PY_VarLMDD': LMDD(dis_measure='var'), 'PY_IqrLMDD': LMDD(dis_measure='iqr'), 'PY_VAE': VAE(encoder_neurons=[8, 4, 2]), 'PY_AutoEncoder': AutoEncoder(hidden_neurons=[6, 3, 3, 6]), 'SK_BRM': BRM(bootstrap_sample_percent=70), 'SK_OCKRA': m_OCKRA(), 'PY_SoGaal': SO_GAAL(), 'PY_MoGaal': MO_GAAL() } ranker = ADRanker(data="datasets", models=models) ranker.get_predictions() # PART 2: # After predictions, we can evaluate our classifiers using different scores # You can add manually a new metric by modifying 'metrics.py' ranker.get_scores(scores={'auc': Metrics.get_roc, 'ave': Metrics.get_ave}) # PART 3: # Finally, it is time to summarize the results by plotting different graphs # You can add your own graphs by modifying ' plots.py' plot = Plots() plot.make_plot_basic(paths=[ 'results/scores/auc/no/results.csv', 'results/scores/auc/minmax/results.csv', 'results/scores/auc/std/results.csv', 'results/scores/ave/no/results.csv', 'results/scores/ave/minmax/results.csv', 'results/scores/ave/std/results.csv' ], scalers=[ 'Without scaler', 'Min max scaler', 'Standard scaler', 'Without scaler', 'Min max scaler', 'Standard scaler' ]) plot.make_cd_plot( paths=[ 'results/scores/auc/minmax/results.csv', 'results/scores/ave/no/results.csv', 'results/scores/auc/no/results.csv', 'results/scores/ave/no/results.csv', 'results/scores/auc/std/results.csv', 'results/scores/ave/std/results.csv' ], names=[ 'CD auc minmax scale', 'CD ave minmax scale', 'CD auc no scale', 'CD ave no scale', 'CD auc std scale', 'CD ave std scale' ], titles=[ 'CD diagram - AUC with min max scaling', 'CD diagram - Average precision with min max scaling', 'CD diagram - AUC without scaling', 'CD diagram - Average precision without scaling', 'CD diagram - AUC with standard scaling', 'CD diagram - Average precision with standard scaling' ])
def get_estimators(contamination): """Internal method to create a list of 600 random base outlier detectors. Parameters ---------- contamination : float in (0., 0.5), optional (default=0.1) The amount of contamination of the data set, i.e. the proportion of outliers in the data set. Used when fitting to define the threshold on the decision function. Returns ------- base_detectors : list A list of initialized random base outlier detectors. """ BASE_ESTIMATORS = [ LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), ABOD(n_neighbors=5, contamination=contamination), ABOD(n_neighbors=10, contamination=contamination), ABOD(n_neighbors=15, contamination=contamination), ABOD(n_neighbors=20, contamination=contamination), ABOD(n_neighbors=25, contamination=contamination), ABOD(n_neighbors=30, contamination=contamination), ABOD(n_neighbors=35, contamination=contamination), ABOD(n_neighbors=40, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), KNN(n_neighbors=5, contamination=contamination), KNN(n_neighbors=15, contamination=contamination), KNN(n_neighbors=25, contamination=contamination), KNN(n_neighbors=35, contamination=contamination), KNN(n_neighbors=45, contamination=contamination), KNN(n_neighbors=50, contamination=contamination), KNN(n_neighbors=55, contamination=contamination), KNN(n_neighbors=65, contamination=contamination), KNN(n_neighbors=75, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=95, contamination=contamination), KNN(n_neighbors=100, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), KNN(n_neighbors=5, contamination=contamination), KNN(n_neighbors=15, contamination=contamination), KNN(n_neighbors=25, contamination=contamination), KNN(n_neighbors=35, contamination=contamination), KNN(n_neighbors=45, contamination=contamination), KNN(n_neighbors=50, contamination=contamination), KNN(n_neighbors=55, contamination=contamination), KNN(n_neighbors=65, contamination=contamination), KNN(n_neighbors=75, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=95, contamination=contamination), KNN(n_neighbors=100, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), KNN(n_neighbors=5, contamination=contamination), KNN(n_neighbors=15, contamination=contamination), KNN(n_neighbors=25, contamination=contamination), KNN(n_neighbors=35, contamination=contamination), KNN(n_neighbors=45, contamination=contamination), KNN(n_neighbors=50, contamination=contamination), KNN(n_neighbors=55, contamination=contamination), KNN(n_neighbors=65, contamination=contamination), KNN(n_neighbors=75, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=95, contamination=contamination), KNN(n_neighbors=100, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), ABOD(n_neighbors=5, contamination=contamination), ABOD(n_neighbors=10, contamination=contamination), ABOD(n_neighbors=15, contamination=contamination), ABOD(n_neighbors=20, contamination=contamination), ABOD(n_neighbors=25, contamination=contamination), ABOD(n_neighbors=30, contamination=contamination), ABOD(n_neighbors=35, contamination=contamination), ABOD(n_neighbors=40, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), KNN(n_neighbors=5, contamination=contamination), KNN(n_neighbors=15, contamination=contamination), KNN(n_neighbors=25, contamination=contamination), KNN(n_neighbors=35, contamination=contamination), KNN(n_neighbors=45, contamination=contamination), KNN(n_neighbors=50, contamination=contamination), KNN(n_neighbors=55, contamination=contamination), KNN(n_neighbors=65, contamination=contamination), KNN(n_neighbors=75, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=95, contamination=contamination), KNN(n_neighbors=100, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), ABOD(n_neighbors=5, contamination=contamination), ABOD(n_neighbors=10, contamination=contamination), ABOD(n_neighbors=15, contamination=contamination), ABOD(n_neighbors=20, contamination=contamination), ABOD(n_neighbors=25, contamination=contamination), ABOD(n_neighbors=30, contamination=contamination), ABOD(n_neighbors=35, contamination=contamination), ABOD(n_neighbors=40, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), KNN(n_neighbors=5, contamination=contamination), KNN(n_neighbors=15, contamination=contamination), KNN(n_neighbors=25, contamination=contamination), KNN(n_neighbors=35, contamination=contamination), KNN(n_neighbors=45, contamination=contamination), KNN(n_neighbors=50, contamination=contamination), KNN(n_neighbors=55, contamination=contamination), KNN(n_neighbors=65, contamination=contamination), KNN(n_neighbors=75, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=95, contamination=contamination), KNN(n_neighbors=100, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), ABOD(n_neighbors=5, contamination=contamination), ABOD(n_neighbors=10, contamination=contamination), ABOD(n_neighbors=15, contamination=contamination), ABOD(n_neighbors=20, contamination=contamination), ABOD(n_neighbors=25, contamination=contamination), ABOD(n_neighbors=30, contamination=contamination), ABOD(n_neighbors=35, contamination=contamination), ABOD(n_neighbors=40, contamination=contamination), ABOD(n_neighbors=45, contamination=contamination), OCSVM(contamination=contamination), OCSVM(contamination=contamination), OCSVM(contamination=contamination), OCSVM(contamination=contamination), OCSVM(contamination=contamination), OCSVM(contamination=contamination), OCSVM(contamination=contamination), OCSVM(contamination=contamination), OCSVM(contamination=contamination), OCSVM(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), ABOD(n_neighbors=5, contamination=contamination), ABOD(n_neighbors=10, contamination=contamination), ABOD(n_neighbors=15, contamination=contamination), ABOD(n_neighbors=20, contamination=contamination), ABOD(n_neighbors=25, contamination=contamination), ABOD(n_neighbors=30, contamination=contamination), ABOD(n_neighbors=35, contamination=contamination), ABOD(n_neighbors=40, contamination=contamination), ] return BASE_ESTIMATORS
res_df[k].append(res[k]) res_df = pd.DataFrame(data=res_df) res_df.to_csv(os.path.join(res_dir, 'result.csv'), index=False) if __name__ == "__main__": opt = { 'data_dir': '../data/anomaly_detection', 'result_dir': './result', 'dataset': { 'abalone': ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7'], 'skin': ['R', 'G', 'B'], }, } models = { 'KNN largest': KNN(method='largest'), 'KNN mean': KNN(method='mean'), 'KNN median': KNN(method='median'), 'CBLOF': CBLOF(), 'LOF': LOF(), 'FeatureBagging': FeatureBagging(), 'HBOS': HBOS(), 'IForest': IForest(), 'MCD': MCD(), 'OCSVM': OCSVM(), 'PCA': PCA(), } main()
f.write("Model: " + modelname + "\n") f.write("Dataset " + str(datasetnumber) + ": " + datasetname + "\n") f.write("Time taken: " + str(time) + " seg.\n") f.write("Accuracy: " + str(accuracy) + "\n") if accuracy!=None: f.write("@scores\n") for score in model.decision_scores_: f.write(str(score) + "\n") f.close() # This is based on executing the script from the folder experiments ROUTE = "../datasets/outlier_ground_truth/" # List of datasets datasets = ["annthyroid.mat", "arrhythmia.mat", "breastw.mat", "cardio.mat", "glass.mat", "ionosphere.mat", "letter.mat", "lympho.mat", "mammography.mat", "mnist.mat", "musk.mat", "optdigits.mat", "pendigits.mat", "pima.mat", "satellite.mat", "satimage-2.mat", "speech.mat", "thyroid.mat", "vertebral.mat", "vowels.mat", "wbc.mat", "wine.mat"] # List of models and names models = [ABOD(), COF(), HBOS(), KNN(), LOF(), MCD(), OCSVM(), PCA(), SOD(), SOS()] names = ["ABOD", "COF", "HBOS", "KNN", "LOF", "MCD", "OCSVM", "PCA", "SOD", "SOS"] accuracies = [] for name, model in zip(names, models): print("\n\n#################################################################") print("MODEL " + name + " " + str(names.index(name)+1) + "/" + str(len(names))) print("#################################################################") acc = [] for dat in datasets: if name=="ABOD" and dat in ["breastw.mat", "letter.mat", "satellite.mat"]: result = None else: print("Computing dataset " + dat + " " + str(datasets.index(dat)+1) + "/" + str(len(datasets))) # Read dataset dataset, labels = readDataset(ROUTE + dat)
def execute(self): evaluation_results = [] print("Loading training data...") data = pd.DataFrame() for i, chunk in enumerate( pd.read_csv(self.input_file, header=None, chunksize=self.chunk_size)): print("Reading chunk: %d" % (i + 1)) #print(chunk) data = data.append(chunk) input_dimensionality = len(data.columns) - 1 print("Input Dimensionality: %d" % (input_dimensionality)) positive_data = data[data[len(data.columns) - 1] == 1].iloc[:, :len(data.columns) - 1] negative_data = data[data[len(data.columns) - 1] == -1].iloc[:, :len(data.columns) - 1] training_data = positive_data.sample(frac=0.70) positive_validation_data = positive_data.drop(training_data.index) if self.neg_cont and self.neg_cont > 0: print("Negative Contamination: %0.4f" % (self.neg_cont)) num_negative = math.floor( self.neg_cont * (len(negative_data) + len(positive_validation_data))) negative_data = data.sample(frac=1, random_state=200)[ data[len(data.columns) - 1] == -1].iloc[:num_negative, :len(data.columns) - 1] negative_validation_data = negative_data.copy() temp_positive = positive_validation_data.copy() temp_positive[input_dimensionality] = 1 temp_negative = negative_data.copy() temp_negative[input_dimensionality] = -1 validation_data_with_labels = pd.concat([temp_positive, temp_negative], ignore_index=True) validation_data = validation_data_with_labels.iloc[:, :len(data.columns ) - 1] validation_labels = validation_data_with_labels.iloc[:, -1:].values # Convert to tensor positive_data = torch.tensor(positive_data.values).float().to( self.device) negative_data = torch.tensor(negative_data.values).float().to( self.device) training_data = torch.tensor(training_data.values).float() validation_data = torch.tensor(validation_data.values).float() print("Validation Data:") print(validation_data) ## AE-D TRAINING ## print("Initializing autoencoder...") net = Autoencoder(layers=self.layers, device=self.device, add_syn=self.add_syn) net.to(self.device) print(net) print("Training Stochastic Autoencoder...") net.fit(training_data, epochs=self.epochs, lr=self.lr, batch_size=self.batch_size) predictions = net.predict(validation_data) tp, tn, fp, fn, tpr, tnr, ppv, npv, ts, pt, acc, f1, mcc = performance_metrics( validation_labels, predictions) r = ["AE-D", tp, tn, fp, fn, tpr, tnr, ppv, npv, ts, pt, acc, f1, mcc] evaluation_results.append(r) print("AE-D Results:") print( tabulate([r], [ "ALGO", "TP", "TN", "FP", "FN", "TPR", "TNR", "PPV", "NPV", "TS", "PT", "ACC", "F1", "MCC" ], tablefmt="grid")) # Convert back to CPU before other methods validation_data = validation_data.cpu() # Train only linear classifiers if self.eval_cat == "linear": print("Initiating training for linear detectors...") ## MCD ## print("Training MCD...") result = train_and_evaluate_classifier("MCD", MCD(), validation_data, validation_labels) evaluation_results.append(result) ## ROBUST COVARIANCE ## print("Training Robust Covariance...") result = train_and_evaluate_classifier("ROB-COV", EllipticEnvelope(), validation_data, validation_labels) evaluation_results.append(result) ## ONE CLASS SVM TRAINING ## print("Training OneClassSVM...") result = train_and_evaluate_classifier( "OC-SVM", svm.OneClassSVM(gamma="auto"), validation_data, validation_labels) evaluation_results.append(result) elif self.eval_cat == "prob": ## ABOD ## #print("Training ABOD...") #result = train_and_evaluate_classifier("ABOD", ABOD(), validation_data, validation_labels) #evaluation_results.append(result) ## SOS ## #print("Training SOS...") #result = train_and_evaluate_classifier("SOS", SOS(), validation_data, validation_labels) #evaluation_results.append(result) ## COPOD ## print("Training COPOD...") result = train_and_evaluate_classifier("COPOD", COPOD(), validation_data, validation_labels) evaluation_results.append(result) elif self.eval_cat == "ensemble": ## ISOLATION FOREST TRAINING ## print("Training Isolation Forest...") result = train_and_evaluate_classifier( "ISO-F", IsolationForest(random_state=0), validation_data, validation_labels) evaluation_results.append(result) ## LODA ## print("Training LODA...") result = train_and_evaluate_classifier("LODA", LODA(), validation_data, validation_labels) evaluation_results.append(result) ## LSCP ## # print("Training LSCP...") # result = train_and_evaluate_classifier("LSCP", LSCP([LOF(), LOF()]), validation_data, validation_labels) # evaluation_results.append(result) elif self.eval_cat == "proximity": ## LOCAL OUTLIER FACTOR ## print("Training Local Outlier Factor...") result = train_and_evaluate_classifier( "LOC-OF", LocalOutlierFactor(novelty=True), validation_data, validation_labels) evaluation_results.append(result) ## CBLOF ## print("Training CBLOF...") result = train_and_evaluate_classifier("CBLOF", CBLOF(), validation_data, validation_labels) evaluation_results.append(result) ## HBOS ## print("Training HBOS...") result = train_and_evaluate_classifier("HBOS", HBOS(), validation_data, validation_labels) evaluation_results.append(result) elif self.eval_cat == "nn": ## VAE ## print("Training VAE...") result = train_and_evaluate_classifier( "VAE", VAE(encoder_neurons=self.layers, decoder_neurons=self.layers.reverse()), validation_data, validation_labels) evaluation_results.append(result) ## SO_GAAL ## print("Training SO_GAAL...") result = train_and_evaluate_classifier( "SO_GAAL", SO_GAAL(lr_d=self.lr, stop_epochs=self.epochs), validation_data, validation_labels) evaluation_results.append(result) ## MO_GAAL ## print("Training MO_GAAL...") result = train_and_evaluate_classifier( "MO_GAAL", MO_GAAL(lr_d=self.lr, stop_epochs=self.epochs), validation_data, validation_labels) evaluation_results.append(result) ## EVALUATE RESULTS ## if self.eval_cat != "none": print("Aggregated Results:") print( tabulate(evaluation_results, [ "ALGO", "TP", "TN", "FP", "FN", "TPR", "TNR", "PPV", "NPV", "TS", "PT", "ACC", "F1", "MCC" ], tablefmt="grid")) ## DATASET METRICS ## len_training_data_points = len(training_data) len_positive_validations = len(positive_validation_data) len_negative_validations = len(negative_validation_data) len_validations = len_positive_validations + len_negative_validations metrics_results = [ ["Training Data Points", len_training_data_points], ["# Normal Points", len_positive_validations], ["# Anomalies", len_negative_validations], [ "Contamination Percentage", math.floor((len_negative_validations / len_validations) * 100) ] ] ## EVALUATE RESULTS ## print(tabulate(metrics_results, ["Metric", "Value"], tablefmt="grid")) if self.printout: print("Saving results to %s" % (self.printout)) df = pd.DataFrame(evaluation_results) df.to_csv(self.printout, header=None, index=False)
def get_estimators(contamination): BASE_ESTIMATORS = [ LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), ABOD(n_neighbors=5, contamination=contamination), ABOD(n_neighbors=10, contamination=contamination), ABOD(n_neighbors=15, contamination=contamination), ABOD(n_neighbors=20, contamination=contamination), ABOD(n_neighbors=25, contamination=contamination), ABOD(n_neighbors=30, contamination=contamination), ABOD(n_neighbors=35, contamination=contamination), ABOD(n_neighbors=40, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), KNN(n_neighbors=5, contamination=contamination), KNN(n_neighbors=15, contamination=contamination), KNN(n_neighbors=25, contamination=contamination), KNN(n_neighbors=35, contamination=contamination), KNN(n_neighbors=45, contamination=contamination), KNN(n_neighbors=50, contamination=contamination), KNN(n_neighbors=55, contamination=contamination), KNN(n_neighbors=65, contamination=contamination), KNN(n_neighbors=75, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=95, contamination=contamination), KNN(n_neighbors=100, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), KNN(n_neighbors=5, contamination=contamination), KNN(n_neighbors=15, contamination=contamination), KNN(n_neighbors=25, contamination=contamination), KNN(n_neighbors=35, contamination=contamination), KNN(n_neighbors=45, contamination=contamination), KNN(n_neighbors=50, contamination=contamination), KNN(n_neighbors=55, contamination=contamination), KNN(n_neighbors=65, contamination=contamination), KNN(n_neighbors=75, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=95, contamination=contamination), KNN(n_neighbors=100, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), KNN(n_neighbors=5, contamination=contamination), KNN(n_neighbors=15, contamination=contamination), KNN(n_neighbors=25, contamination=contamination), KNN(n_neighbors=35, contamination=contamination), KNN(n_neighbors=45, contamination=contamination), KNN(n_neighbors=50, contamination=contamination), KNN(n_neighbors=55, contamination=contamination), KNN(n_neighbors=65, contamination=contamination), KNN(n_neighbors=75, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=95, contamination=contamination), KNN(n_neighbors=100, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), ABOD(n_neighbors=5, contamination=contamination), ABOD(n_neighbors=10, contamination=contamination), ABOD(n_neighbors=15, contamination=contamination), ABOD(n_neighbors=20, contamination=contamination), ABOD(n_neighbors=25, contamination=contamination), ABOD(n_neighbors=30, contamination=contamination), ABOD(n_neighbors=35, contamination=contamination), ABOD(n_neighbors=40, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), KNN(n_neighbors=5, contamination=contamination), KNN(n_neighbors=15, contamination=contamination), KNN(n_neighbors=25, contamination=contamination), KNN(n_neighbors=35, contamination=contamination), KNN(n_neighbors=45, contamination=contamination), KNN(n_neighbors=50, contamination=contamination), KNN(n_neighbors=55, contamination=contamination), KNN(n_neighbors=65, contamination=contamination), KNN(n_neighbors=75, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=95, contamination=contamination), KNN(n_neighbors=100, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), ABOD(n_neighbors=5, contamination=contamination), ABOD(n_neighbors=10, contamination=contamination), ABOD(n_neighbors=15, contamination=contamination), ABOD(n_neighbors=20, contamination=contamination), ABOD(n_neighbors=25, contamination=contamination), ABOD(n_neighbors=30, contamination=contamination), ABOD(n_neighbors=35, contamination=contamination), ABOD(n_neighbors=40, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), HBOS(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), PCA(contamination=contamination), KNN(n_neighbors=5, contamination=contamination), KNN(n_neighbors=15, contamination=contamination), KNN(n_neighbors=25, contamination=contamination), KNN(n_neighbors=35, contamination=contamination), KNN(n_neighbors=45, contamination=contamination), KNN(n_neighbors=50, contamination=contamination), KNN(n_neighbors=55, contamination=contamination), KNN(n_neighbors=65, contamination=contamination), KNN(n_neighbors=75, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=85, contamination=contamination), KNN(n_neighbors=95, contamination=contamination), KNN(n_neighbors=100, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), IForest(n_estimators=50, contamination=contamination), IForest(n_estimators=100, contamination=contamination), IForest(n_estimators=150, contamination=contamination), IForest(n_estimators=200, contamination=contamination), LOF(n_neighbors=5, contamination=contamination), LOF(n_neighbors=10, contamination=contamination), LOF(n_neighbors=15, contamination=contamination), LOF(n_neighbors=25, contamination=contamination), LOF(n_neighbors=35, contamination=contamination), LOF(n_neighbors=45, contamination=contamination), LOF(n_neighbors=50, contamination=contamination), LOF(n_neighbors=55, contamination=contamination), LOF(n_neighbors=60, contamination=contamination), LOF(n_neighbors=65, contamination=contamination), LOF(n_neighbors=70, contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), ABOD(n_neighbors=5, contamination=contamination), ABOD(n_neighbors=10, contamination=contamination), ABOD(n_neighbors=15, contamination=contamination), ABOD(n_neighbors=20, contamination=contamination), ABOD(n_neighbors=25, contamination=contamination), ABOD(n_neighbors=30, contamination=contamination), ABOD(n_neighbors=35, contamination=contamination), ABOD(n_neighbors=40, contamination=contamination), ABOD(n_neighbors=45, contamination=contamination), OCSVM(contamination=contamination), OCSVM(contamination=contamination), OCSVM(contamination=contamination), OCSVM(contamination=contamination), OCSVM(contamination=contamination), OCSVM(contamination=contamination), OCSVM(contamination=contamination), OCSVM(contamination=contamination), OCSVM(contamination=contamination), OCSVM(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), MCD(contamination=contamination), LOF(n_neighbors=75, contamination=contamination), LOF(n_neighbors=80, contamination=contamination), LOF(n_neighbors=85, contamination=contamination), LOF(n_neighbors=90, contamination=contamination), LOF(n_neighbors=95, contamination=contamination), LOF(n_neighbors=100, contamination=contamination), ABOD(n_neighbors=5, contamination=contamination), ABOD(n_neighbors=10, contamination=contamination), ABOD(n_neighbors=15, contamination=contamination), ABOD(n_neighbors=20, contamination=contamination), ABOD(n_neighbors=25, contamination=contamination), ABOD(n_neighbors=30, contamination=contamination), ABOD(n_neighbors=35, contamination=contamination), ABOD(n_neighbors=40, contamination=contamination), ] return BASE_ESTIMATORS
def outlier_ensemble(df): """" ensemble method based on the paper: An unsupervised approach for combining scores of outlier detection techniques, based on similarity measures""" df_numeric = df.select_dtypes( include=[np.number]) # keep only numeric type features algorithms = [ MCD(), PCA(), knn(), ABOD(), HBOS(), LOF(), OCSVM(), IForest() ] d = len(df_numeric.columns) anomaly_scores_matr = [] for clf in algorithms: # sample features nr_of_features = np.random.randint(low=int(d / 2), high=d) sampled_features = list( np.random.choice(d, nr_of_features, replace=False)) df_numeric_sample = df_numeric.iloc[:, sampled_features] # run classifier clf.fit(X=df_numeric_sample) anomaly_score = clf.decision_function(df_numeric_sample).reshape(-1, 1) anomaly_score = list(preprocessing.StandardScaler().fit_transform( anomaly_score).flatten()) # standardize anomaly_scores_matr.append(anomaly_score) anomaly_scores_matr = pd.DataFrame(np.array(anomaly_scores_matr).T) # Create votes matrix multiple votes votes_matr = np.zeros(anomaly_scores_matr.shape, dtype='int') for col in anomaly_scores_matr: IQR_col = anomaly_scores_matr[col].quantile( 0.75) - anomaly_scores_matr[col].quantile(0.25) for col in anomaly_scores_matr: votes = anomaly_scores_matr[anomaly_scores_matr[col] > 1.5 * IQR_col].index votes_matr[list(votes), col] += 1 votes_matr = pd.DataFrame(votes_matr) # determine weights # EDCV weights = [] C = anomaly_scores_matr.corr() for i in range(len(algorithms)): weight = (C[i].sum() - 1) / (len(algorithms) - 1) weights.append(weight) weights = np.array(weights) # combine scores to get final score: final_score = [] for i in range(len(anomaly_scores_matr)): F_final = (anomaly_scores_matr.iloc[i] * votes_matr.iloc[i] * weights).sum() / len(algorithms) final_score.append(F_final) final_score = np.array(final_score) # Regular thresholding # predictions = (final_score > (np.percentile(final_score, 75) + 1.5 * ( # np.percentile(final_score, 75) - np.percentile(final_score, 25))).astype(int)) # Two stage thresholding mask_stage1 = final_score <= ( np.percentile(final_score, 75) + 1.5 * (np.percentile(final_score, 75) - np.percentile(final_score, 25))) threshold = np.percentile( final_score[mask_stage1], 75) + 1.5 * (np.percentile(final_score[mask_stage1], 75) - np.percentile(final_score[mask_stage1], 25)) predictions = (final_score > threshold).astype(int) df_sorted = df.copy() df_sorted['anomaly_score'] = final_score df_sorted['prediction'] = predictions print(predictions.sum()) df_sorted = df_sorted.sort_values(by='anomaly_score', ascending=False) return df_sorted
random_state=random_state), '(HBOS) Histogram-base Outlier Detection': HBOS( contamination=outliers_fraction), 'Isolation Forest': IForest(contamination=outliers_fraction, random_state=random_state), '(KNN) K Nearest Neighbors ': KNN( contamination=outliers_fraction), 'Average KNN': KNN(method='mean', contamination=outliers_fraction), # 'Median KNN': KNN(method='median', # contamination=outliers_fraction), '(LOF) Local Outlier Factor ': LOF(n_neighbors=35, contamination=outliers_fraction), # 'Local Correlation Integral (LOCI)': # LOCI(contamination=outliers_fraction), '(MCD) Minimum Covariance Determinant ': MCD( contamination=outliers_fraction, random_state=random_state), 'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction), '(PCA) Principal Component Analysis ': PCA( contamination=outliers_fraction, random_state=random_state), # 'Stochastic Outlier Selection (SOS)': SOS( # contamination=outliers_fraction), '(LSCP) Locally Selective Combination ': LSCP( detector_list, contamination=outliers_fraction, random_state=random_state), # 'Connectivity-Based Outlier Factor (COF)': # COF(n_neighbors=35, contamination=outliers_fraction), # 'Subspace Outlier Detection (SOD)': # SOD(contamination=outliers_fraction), } st.subheader('SELECT AN ALGORITHM:')
'Histogram-base Outlier Detection (HBOS)': HBOS(), 'Isolation Forest': IForest(random_state=random_state), 'K Nearest Neighbors (KNN)': KNN(), 'Average KNN': KNN(method='mean'), # 'Median KNN': KNN(method='median', # contamination=outliers_fraction), 'Local Outlier Factor (LOF)': LOF(n_neighbors=35), # 'Local Correlation Integral (LOCI)': # LOCI(contamination=outliers_fraction), 'Minimum Covariance Determinant (MCD)': MCD(random_state=random_state), 'One-class SVM (OCSVM)': OCSVM(), 'Principal Component Analysis (PCA)': PCA(random_state=random_state), # 'Stochastic Outlier Selection (SOS)': SOS( # contamination=outliers_fraction), 'Locally Selective Combination (LSCP)': LSCP(detector_list, random_state=random_state), # 'Connectivity-Based Outlier Factor (COF)': # COF(n_neighbors=35, contamination=outliers_fraction), # 'Subspace Outlier Detection (SOD)': # SOD(contamination=outliers_fraction), } # Show all detectors
end = "2020-02-15" test_date = "2020-02-16" KNN_clf = KNN(contamination=0.05) PCA_clf = PCA(contamination=0.05) VAE_clf = VAE(contamination=0.05, epochs=30, encoder_neurons=[9, 4], decoder_neurons=[4, 9]) LOF_clf = LOF(contamination=0.05) IForest_clf = IForest(contamination=0.05) AutoEncoder_clf = AutoEncoder(contamination=0.05, epochs=30, hidden_neurons=[9, 4, 4, 9]) FeatureBagging_clf = FeatureBagging(contamination=0.05, check_estimator=False) ABOD_clf = ABOD(contamination=0.05) HBOS_clf = HBOS(contamination=0.05) CBLOF_clf = CBLOF(contamination=0.05) LODA_clf = LODA(contamination=0.05) MCD_clf = MCD(contamination=0.05) MO_GAAL_clf = MO_GAAL(k=3, stop_epochs=2, contamination=0.05) SO_GAAL_clf = SO_GAAL(contamination=0.05) KNN_MAH_clf = None S_models = ["KNN", "LOF", "PCA", "IForest", "HBOS", "LODA", "MCD", "CBLOF", "FeatureBagging", "ABOD", "KNN_MAH"] K_models = ["AutoEncoder", "SO_GAAL", "VAE"] def get_train_data(): """ 获取训练样本 :return: x_train 9特征训练样本 df 原训练数据 """ acc_date = pd.date_range(begin, end, freq='1D') for day in acc_date:
classifiers = {'Feature Bagging': FeatureBagging(contamination=outliers_fraction, check_estimator=False, random_state=random_state)} classifiers_indices = {'Feature Bagging': 0} elif sys.argv[1] == 'hbos': classifiers = {'Histogram-base Outlier Detection (HBOS)': HBOS(contamination=outliers_fraction)} classifiers_indices = {'Histogram-base Outlier Detection (HBOS)': 0} elif sys.argv[1] == 'iforest': classifiers = {'Isolation Forest': IForest(contamination=outliers_fraction, random_state=random_state)} classifiers_indices = {'Isolation Forest': 0} elif sys.argv[1] == 'knn': classifiers = {'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction)} classifiers_indices = {'K Nearest Neighbors (KNN)': 0} elif sys.argv[1] == 'lof': classifiers = {'Local Outlier Factor (LOF)': LOF(contamination=outliers_fraction)} classifiers_indices = {'Local Outlier Factor (LOF)': 0} elif sys.argv[1] == 'mcd': classifiers = {'Minimum Covariance Determinant (MCD)': MCD(contamination=outliers_fraction, random_state=random_state)} classifiers_indices = {'Minimum Covariance Determinant (MCD)': 0} for clf_name, clf in classifiers.items(): print("\n\nAlgorithm: ", clf_name) t0 = time() clf.fit(X_train_norm) test_scores = clf.decision_function(X_test_norm) t1 = time() duration = round(t1 - t0, ndigits=4) roc = round(roc_auc_score(y_test, test_scores), ndigits=4) prn = round(precision_n_scores(y_test, test_scores), ndigits=4) print('ROC:{roc}, precision @ rank n:{prn}, ' 'execution time: {duration}s'.format(roc=roc, prn=prn, duration=duration))
# In[6]: data195061 = df[(df['CarId'] == '195061')] x = data195061['Time'] y = data195061['Speed diff'] plt.figure(figsize=(10, 4)) plt.plot(x, y, label='Car 195061') plt.xlabel('Time') plt.ylabel('Speed diff') plt.show() # In[7]: lscp = LSCP(detector_list=[MCD(), MCD()]) lscp.fit(df['Speed diff'].values.reshape(-1, 1)) xx = np.linspace(df['Speed diff'].min(), df['Speed diff'].max(), len(df)).reshape(-1, 1) anomaly_score = lscp.decision_function(xx) outlier = lscp.predict(xx) plt.figure(figsize=(10, 4)) plt.plot(xx, anomaly_score, label='anomaly score') plt.ylabel('anomaly score') plt.xlabel('Speed diff') plt.show() # In[8]: df.loc[df['Speed diff'] > 10]
'V.17', 'V.18', 'V.19', 'V.20' ] x = data[cols].values #把label标签加入,把该问题当成有监督问题来处理 #y=data['original.label'] data['s'] = data['original.label'] data.loc[data['original.label'] != 1, 's'] = 0 y = data['s'] #划分测试集和训练集 X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33) #使用pyod中的MCD算法拟合数据 clf_name = 'MCD' clf = MCD() clf.fit(X_train) #预测得到由0和1组成的数组,1表示离群点,0表示飞离群点 y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers) y_train_scores = clf.decision_scores_ # raw outlier scores,The outlier scores of the training data. #预测样本是不是离群点,返回0和1 的数组 y_test_pred = clf.predict(X_test) y_test_scores = clf.decision_function( X_test) # outlier scores,The anomaly score of the input samples. #使用sklearn中的roc_auc_score方法得到auc值,即roc曲线下面的面积 try: sumAuc_train += sklearn.metrics.roc_auc_score(y_train, y_train_scores,
class TestMCD(unittest.TestCase): def setUp(self): self.n_train = 100 self.n_test = 50 self.contamination = 0.1 self.roc_floor = 0.6 self.X_train, self.y_train, self.X_test, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination, random_state=42) self.clf = MCD(contamination=self.contamination, random_state=42) self.clf.fit(self.X_train) def test_sklearn_estimator(self): check_estimator(self.clf) def test_parameters(self): assert_true(hasattr(self.clf, 'decision_scores_') and self.clf.decision_scores_ is not None) assert_true(hasattr(self.clf, 'labels_') and self.clf.labels_ is not None) assert_true(hasattr(self.clf, 'threshold_') and self.clf.threshold_ is not None) assert_true(hasattr(self.clf, '_mu') and self.clf._mu is not None) assert_true(hasattr(self.clf, '_sigma') and self.clf._sigma is not None) assert_true(hasattr(self.clf, 'raw_location_') and self.clf.raw_location_ is not None) assert_true(hasattr(self.clf, 'raw_covariance_') and self.clf.raw_covariance_ is not None) assert_true(hasattr(self.clf, 'raw_support_') and self.clf.raw_support_ is not None) assert_true(hasattr(self.clf, 'location_') and self.clf.location_ is not None) assert_true(hasattr(self.clf, 'covariance_') and self.clf.covariance_ is not None) assert_true(hasattr(self.clf, 'precision_') and self.clf.precision_ is not None) assert_true(hasattr(self.clf, 'support_') and self.clf.support_ is not None) def test_train_scores(self): assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) def test_prediction_scores(self): pred_scores = self.clf.decision_function(self.X_test) # check score shapes assert_equal(pred_scores.shape[0], self.X_test.shape[0]) # check performance assert_greater(roc_auc_score(self.y_test, pred_scores), self.roc_floor) def test_prediction_labels(self): pred_labels = self.clf.predict(self.X_test) assert_equal(pred_labels.shape, self.y_test.shape) def test_prediction_proba(self): pred_proba = self.clf.predict_proba(self.X_test) assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_linear(self): pred_proba = self.clf.predict_proba(self.X_test, method='linear') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_unify(self): pred_proba = self.clf.predict_proba(self.X_test, method='unify') assert_greater_equal(pred_proba.min(), 0) assert_less_equal(pred_proba.max(), 1) def test_prediction_proba_parameter(self): with assert_raises(ValueError): self.clf.predict_proba(self.X_test, method='something') def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) def test_fit_predict_score(self): self.clf.fit_predict_score(self.X_test, self.y_test) self.clf.fit_predict_score(self.X_test, self.y_test, scoring='roc_auc_score') self.clf.fit_predict_score(self.X_test, self.y_test, scoring='prc_n_score') with assert_raises(NotImplementedError): self.clf.fit_predict_score(self.X_test, self.y_test, scoring='something') def test_predict_rank(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2) assert_array_less(pred_ranks, self.X_train.shape[0] + 1) assert_array_less(-0.1, pred_ranks) def test_predict_rank_normalized(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) # assert the order is reserved assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2) assert_array_less(pred_ranks, 1.01) assert_array_less(-0.1, pred_ranks) def tearDown(self): pass
'Histogram-base Outlier Detection (HBOS)': HBOS(contamination=outliers_fraction), 'Isolation Forest': IForest(contamination=outliers_fraction, random_state=random_state), 'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction), 'Average KNN': KNN(method='mean', contamination=outliers_fraction), # 'Median KNN': KNN(method='median', # contamination=outliers_fraction), 'Local Outlier Factor (LOF)': LOF(n_neighbors=35, contamination=outliers_fraction), # 'Local Correlation Integral (LOCI)': # LOCI(contamination=outliers_fraction), 'Minimum Covariance Determinant (MCD)': MCD(contamination=outliers_fraction, random_state=random_state), 'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction), 'Principal Component Analysis (PCA)': PCA(contamination=outliers_fraction, random_state=random_state), # 'Stochastic Outlier Selection (SOS)': SOS( # contamination=outliers_fraction), 'Locally Selective Combination (LSCP)': LSCP(detector_list, contamination=outliers_fraction, random_state=random_state), # 'Connectivity-Based Outlier Factor (COF)': # COF(n_neighbors=35, contamination=outliers_fraction), # 'Subspace Outlier Detection (SOD)': # SOD(contamination=outliers_fraction), }
mat = sp.io.loadmat(os.path.join('../datasets', mat_file)) X = mat['X'] y = mat['y'] X = StandardScaler().fit_transform(X) classifiers = { 1: ABOD(n_neighbors=10), 2: CBLOF(check_estimator=False), 3: FeatureBagging(LOF()), 4: HBOS(), 5: IForest(), 6: KNN(), 7: LOF(), 8: MCD(), 9: OCSVM(), 10: PCA(), } idx_clf_mapping = { 1: 'ABOD', 2: 'CBLOF', 3: 'FeatureBagging', 4: 'HBOS', 5: 'IForest', 6: 'KNN', 7: 'LOF', 8: 'MCD', 9: 'OCSVM', 10: 'PCA',