示例#1
0
    def setUp(self):
        self.n_train = 100
        self.n_test = 50
        self.contamination = 0.1
        self.roc_floor = 0.8
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train, n_test=self.n_test,
            contamination=self.contamination, random_state=42)

        self.clf = SOD(contamination=self.contamination)
        self.clf.fit(self.X_train)
def getOutlierSOD(dataset):
    '''
    @brief Function that executes SOD algorithm on the dataset and obtains the
    labels of the dataset indicating which instance is an inlier (0) or outlier (1)
    @param dataset Dataset on which to try the algorithm
    @return It returns a list of labels 0 means inlier, 1 means outlier
    '''
    # Initializating the model
    sod = SOD()
    # Fits the data and obtains labels
    sod.fit(dataset)
    # Return labels
    return sod.labels_
示例#3
0
def model_test(model_type, y_train, y_test, X_train, X_test, model_file,
               save_flag):
    if model_type == 'KNN':
        clf_name = 'KNN'
        clf = KNN()
        clf.fit(X_train)
    if model_type == 'XGBOD':
        clf_name = 'XGBOD'
        #set this scale_pos_weight  sum(negative instances) / sum(positive instances).
        clf = XGBOD(random_state=42, scale_pos_weight=50)
        clf.fit(X_train, y_train)
    if model_type == 'SOD':
        # train SOD detector
        # Note that SOD is meant to work in high dimensions d > 2.
        # But here we are using 2D for visualization purpose
        # thus, higher precision is expected in higher dimensions
        clf_name = 'SOD'
        clf = SOD()
        clf.fit(X_train)
    if model_type == 'VAE':
        # train VAE detector (Beta-VAE)
        clf_name = 'VAE'
        contamination = 0.01
        clf = VAE(epochs=30,
                  contamination=contamination,
                  gamma=0.8,
                  capacity=0.2)
        clf.fit(X_train)

    #save model if specified
    if save_flag == '1':
        pickle.dump(clf, open(model_file, "wb"))

    # get the prediction labels and outlier scores of the training data
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
    y_test_scores = clf.decision_function(X_test)  # outlier scores

    # evaluate and print the results
    print("\nOn Training Data:")
    evaluate_print(clf_name, y_train, y_train_scores)
    conf_train = confusion_matrix(y_train, y_train_pred)
    print("<<<< confusion matrix for train: ", conf_train)

    print("\nOn Test Data:")
    evaluate_print(clf_name, y_test, y_test_scores)
    conf_test = confusion_matrix(y_test, y_test_pred)
    print("<<<< confusion matrix for test: ", conf_test)

    # visualize the results
    #todo: Input data has to be 2-d for visualization.
    #visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred,
    #         y_test_pred, show_figure=True, save_figure=False)

    return model_file
示例#4
0
def main():

    scalers = ['no', 'std', 'minmax']
    root = 'Unsupervised_Anamaly_Detection_csv'
    start = 0
    counts = 90
    CPUS = 3
    CPUS_Models = 4
    sklearn_models = [
        'AvgKNN', 'LargestKNN', 'MedKNN', 'PCA', 'COF', 'LODA', 'LOF', 'HBOS',
        'MCD', 'AvgBagging', 'MaxBagging', 'IForest', 'CBLOF', 'COPOD', 'SOD',
        'LSCPwithLODA', 'AveLMDD', 'VarLMDD', 'IqrLMDD', 'SoGaal', 'MoGaal',
        'VAE', 'AutoEncoder'
    ]

    models = {
        'BRM': BRM(bootstrap_sample_percent=70),
        'GM': GaussianMixture(),
        'IF': IsolationForest(),
        'OCSVM': OneClassSVM(),
        'EE': EllipticEnvelope(),
        'AvgKNN': KNN(method='mean'),
        'LargestKNN': KNN(method='largest'),
        'MedKNN': KNN(method='median'),
        'PCA': PCA(),
        'COF': COF(),
        'LODA': LODA(),
        'LOF': LOF(),
        'HBOS': HBOS(),
        'MCD': MCD(),
        'AvgBagging': FeatureBagging(combination='average'),
        'MaxBagging': FeatureBagging(combination='max'),
        'CBLOF': CBLOF(n_clusters=10, n_jobs=4),
        'FactorAnalysis': FactorAnalysis(),
        'KernelDensity': KernelDensity(),
        'COPOD': COPOD(),
        'SOD': SOD(),
        'LSCPwithLODA': LSCP([LODA(), LODA()]),
        'AveLMDD': LMDD(dis_measure='aad'),
        'VarLMDD': LMDD(dis_measure='var'),
        'IqrLMDD': LMDD(dis_measure='iqr'),
        'SoGaal': SO_GAAL(),
        'MoGaal': MO_GAAL(),
        'VAE': VAE(encoder_neurons=[8, 4, 2]),
        'AutoEncoder': AutoEncoder(hidden_neurons=[6, 3, 3, 6]),
        'OCKRA': m_OCKRA(),
    }

    name = "30_Models"

    Parallel(n_jobs=CPUS) \
        (delayed(runByScaler)
         (root, scaler, models, start, counts,
          other_models=sklearn_models,
          CPUS=CPUS_Models,
          save_name=name)
         for scaler in scalers)
示例#5
0
 def test_check_parameters(self):
     with assert_raises(ValueError):
         SOD(n_neighbors=None, ref_set=10, alpha=0.8)
     with assert_raises(ValueError):
         SOD(n_neighbors=20, ref_set=None, alpha=0.8)
     with assert_raises(ValueError):
         SOD(n_neighbors=20, ref_set=10, alpha=None)
     with assert_raises(ValueError):
         SOD(n_neighbors=-1, ref_set=10, alpha=0.8)
     with assert_raises(ValueError):
         SOD(n_neighbors=20, ref_set=-1, alpha=0.8)
     with assert_raises(ValueError):
         SOD(n_neighbors=20, ref_set=10, alpha=-1)
     with assert_raises(ValueError):
         SOD(n_neighbors=20, ref_set=25, alpha=0.8)
     with assert_raises(ValueError):
         SOD(n_neighbors='not int', ref_set=25, alpha=0.8)
     with assert_raises(ValueError):
         SOD(n_neighbors=20, ref_set='not int', alpha=0.8)
     with assert_raises(ValueError):
         SOD(n_neighbors=20, ref_set=25, alpha='not float')
示例#6
0
文件: PyodSOD.py 项目: zwbjtu123/tods
    def __init__(
            self,
            *,
            hyperparams: Hyperparams,  #
            random_seed: int = 0,
            docker_containers: Dict[str, DockerContainer] = None) -> None:
        super().__init__(hyperparams=hyperparams,
                         random_seed=random_seed,
                         docker_containers=docker_containers)

        self._clf = SOD(
            contamination=hyperparams['contamination'],
            n_neighbors=hyperparams['n_neighbors'],
            ref_set=hyperparams['ref_set'],
            alpha=hyperparams['alpha'],
        )
def choose_model(model, nnet):
    """ among implemented in PyOD """
    clfs = {
        'AE':
        AutoEncoder(hidden_neurons=nnet, contamination=0.1, epochs=15),
        'VAE':
        VAE(encoder_neurons=nnet[:5],
            decoder_neurons=nnet[4:],
            contamination=0.1,
            epochs=13),
        'ABOD':
        ABOD(),
        'FeatureBagging':
        FeatureBagging(),
        'HBOS':
        HBOS(),
        'IForest':
        IForest(),
        'KNN':
        KNN(),
        'LOF':
        LOF(),
        'OCSVM':
        OCSVM(),
        'PCA':
        PCA(),
        'SOS':
        SOS(),
        'COF':
        COF(),
        'CBLOF':
        CBLOF(),
        'SOD':
        SOD(),
        'LOCI':
        LOCI(),
        'MCD':
        MCD()
    }
    return clfs[model]
    def runMethod(self):
        '''
        @brief This function is the actual implementation of HICS
        '''
        if self.verbose:
            print("Calculating the subspaces\n")
        # First we obtain the high contrast subspaces
        subspaces = self.hicsFramework()

        if self.verbose:
            print("Now calculating the scoring\n")
        # We initialize the scores for each instance as 0
        scores = np.zeros(len(self.dataset))
        # For each subspace
        for sub in subspaces:
            # We place the corresponding scorer according to parameter
            scorer = None
            if self.outlier_rank == "lof":
                scorer = LOF()
            elif self.outlier_rank == "cof":
                scorer = COF()
            elif self.outlier_rank == "cblof":
                scorer = CBLOF()
            elif self.outlier_rank == "loci":
                scorer = LOCI()
            elif self.outlier_rank == "hbos":
                scorer = HBOS()
            elif self.outlier_rank == "sod":
                scorer = SOD()
            # Fits the scorer with the dataset projection
            scorer.fit(self.dataset[:, sub])
            # Adds the scores obtained to the global ones
            scores = scores + scorer.decision_scores_
        # Compute the average
        self.outlier_score = scores / len(subspaces)
        # Marks the calculations as done
        self.calculations_done = True
示例#9
0
    n_test = 100  # number of testing points

    # Generate sample data
    X_train, y_train, X_test, y_test = \
        generate_data(n_train=n_train,
                      n_test=n_test,
                      n_features=2,
                      contamination=contamination,
                      random_state=42)

    # train SOD detector
    # Note that SOD is meant to work in high dimensions d > 2.
    # But here we are using 2D for visualization purpose
    # thus, higher precision is expected in higher dimensions
    clf_name = 'SOD'
    clf = SOD()
    clf.fit(X_train)

    # get the prediction labels and outlier scores of the training data
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
    y_test_scores = clf.decision_function(X_test)  # outlier scores

    # evaluate and print the results
    print("\nOn Training Data:")
    evaluate_print(clf_name, y_train, y_train_scores)
    print("\nOn Test Data:")
    evaluate_print(clf_name, y_test, y_test_scores)
示例#10
0
def main():

    # PART 1:
    # Getting the predictions for each classifier
    # SK means: The classifier is from sklearn or works like sklearn
    # PY means: The classifier is from pyod or works like pyod

    models = {
        'SK_EE': EllipticEnvelope(),
        'SK_GM': GaussianMixture(),
        'SK_IF': IsolationForest(),
        'SK_OCSVM': OneClassSVM(),
        'SK_FA': FactorAnalysis(),
        'SK_KD': KernelDensity(),
        'PY_PCA': PCA(),
        'PY_COF': COF(),
        'PY_LODA': LODA(),
        'PY_LOF': LOF(),
        'PY_HBOS': HBOS(),
        'PY_MCD': MCD(),
        'PY_AvgKNN': KNN(method='mean'),
        'PY_LargestKNN': KNN(method='largest'),
        'PY_MedKNN': KNN(method='median'),
        'PY_AvgBagging': FeatureBagging(combination='average'),
        'PY_MaxBagging': FeatureBagging(combination='max'),
        'PY_CBLOF': CBLOF(n_clusters=10, n_jobs=4),
        'PY_COPOD': COPOD(),
        'PY_SOD': SOD(),
        'PY_LSCPwithLODA': LSCP([LODA(), LODA()]),
        'PY_AveLMDD': LMDD(dis_measure='aad'),
        'PY_VarLMDD': LMDD(dis_measure='var'),
        'PY_IqrLMDD': LMDD(dis_measure='iqr'),
        'PY_VAE': VAE(encoder_neurons=[8, 4, 2]),
        'PY_AutoEncoder': AutoEncoder(hidden_neurons=[6, 3, 3, 6]),
        'SK_BRM': BRM(bootstrap_sample_percent=70),
        'SK_OCKRA': m_OCKRA(),
        'PY_SoGaal': SO_GAAL(),
        'PY_MoGaal': MO_GAAL()
    }
    ranker = ADRanker(data="datasets", models=models)
    ranker.get_predictions()

    # PART 2:
    # After predictions, we can evaluate our classifiers using different scores
    # You can add manually a new metric by modifying 'metrics.py'

    ranker.get_scores(scores={'auc': Metrics.get_roc, 'ave': Metrics.get_ave})

    # PART 3:
    # Finally, it is time to summarize the results by plotting different graphs
    # You can add your own graphs by modifying ' plots.py'
    plot = Plots()
    plot.make_plot_basic(paths=[
        'results/scores/auc/no/results.csv',
        'results/scores/auc/minmax/results.csv',
        'results/scores/auc/std/results.csv',
        'results/scores/ave/no/results.csv',
        'results/scores/ave/minmax/results.csv',
        'results/scores/ave/std/results.csv'
    ],
                         scalers=[
                             'Without scaler', 'Min max scaler',
                             'Standard scaler', 'Without scaler',
                             'Min max scaler', 'Standard scaler'
                         ])

    plot.make_cd_plot(
        paths=[
            'results/scores/auc/minmax/results.csv',
            'results/scores/ave/no/results.csv',
            'results/scores/auc/no/results.csv',
            'results/scores/ave/no/results.csv',
            'results/scores/auc/std/results.csv',
            'results/scores/ave/std/results.csv'
        ],
        names=[
            'CD auc minmax scale', 'CD ave minmax scale', 'CD auc no scale',
            'CD ave no scale', 'CD auc std scale', 'CD ave std scale'
        ],
        titles=[
            'CD diagram - AUC with min max scaling',
            'CD diagram - Average precision with min max scaling',
            'CD diagram - AUC without scaling',
            'CD diagram - Average precision without scaling',
            'CD diagram - AUC with standard scaling',
            'CD diagram - Average precision with  standard scaling'
        ])
示例#11
0
class TestSOD(unittest.TestCase):
    def setUp(self):
        self.n_train = 100
        self.n_test = 50
        self.contamination = 0.1
        self.roc_floor = 0.8
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train,
            n_test=self.n_test,
            contamination=self.contamination,
            random_state=42)

        self.clf = SOD(contamination=self.contamination)
        self.clf.fit(self.X_train)

    def test_check_parameters(self):
        with assert_raises(ValueError):
            SOD(n_neighbors=None, ref_set=10, alpha=0.8)
        with assert_raises(ValueError):
            SOD(n_neighbors=20, ref_set=None, alpha=0.8)
        with assert_raises(ValueError):
            SOD(n_neighbors=20, ref_set=10, alpha=None)
        with assert_raises(ValueError):
            SOD(n_neighbors=-1, ref_set=10, alpha=0.8)
        with assert_raises(ValueError):
            SOD(n_neighbors=20, ref_set=-1, alpha=0.8)
        with assert_raises(ValueError):
            SOD(n_neighbors=20, ref_set=10, alpha=-1)
        with assert_raises(ValueError):
            SOD(n_neighbors=20, ref_set=25, alpha=0.8)
        with assert_raises(ValueError):
            SOD(n_neighbors='not int', ref_set=25, alpha=0.8)
        with assert_raises(ValueError):
            SOD(n_neighbors=20, ref_set='not int', alpha=0.8)
        with assert_raises(ValueError):
            SOD(n_neighbors=20, ref_set=25, alpha='not float')

    def test_parameters(self):
        assert (hasattr(self.clf, 'decision_scores_')
                and self.clf.decision_scores_ is not None)
        assert (hasattr(self.clf, 'labels_') and self.clf.labels_ is not None)
        assert (hasattr(self.clf, 'threshold_')
                and self.clf.threshold_ is not None)
        assert (hasattr(self.clf, 'alpha_') and self.clf.alpha_ is not None)
        assert (hasattr(self.clf, 'ref_set_')
                and self.clf.ref_set_ is not None)
        assert (hasattr(self.clf, 'n_neighbors_')
                and self.clf.n_neighbors_ is not None)

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

        # check performance
        assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor)

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba(self):
        pred_proba = self.clf.predict_proba(self.X_test)
        assert (pred_proba.min() >= 0)
        assert (pred_proba.max() <= 1)

    def test_prediction_proba_linear(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='linear')
        assert (pred_proba.min() >= 0)
        assert (pred_proba.max() <= 1)

    def test_prediction_proba_unify(self):
        pred_proba = self.clf.predict_proba(self.X_test, method='unify')
        assert (pred_proba.min() >= 0)
        assert (pred_proba.max() <= 1)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_fit_predict_score(self):
        self.clf.fit_predict_score(self.X_test, self.y_test)
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='roc_auc_score')
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='prc_n_score')
        with assert_raises(NotImplementedError):
            self.clf.fit_predict_score(self.X_test,
                                       self.y_test,
                                       scoring='something')

    def test_predict_rank(self):
        pred_scores = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)
        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_scores), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks)

    def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks)

    # todo: fix clone issue
    def test_model_clone(self):
        pass
        # clone_clf = clone(self.clf)

    def tearDown(self):
        pass
示例#12
0
            'Connectivity-Based Outlier Factor (COF)': COF(
                contamination=outliers_fraction
            ),
            'K Nearest Neighbors (KNN)': KNN(
                contamination=outliers_fraction
            ),
            'Average K Nearest Neighbors (AvgKNN)': KNN(
                method='mean',
                contamination=outliers_fraction
            ),
            'Median K Nearest Neighbors (MedKNN)': KNN(
                method='median',
                contamination=outliers_fraction
            ),
            'Subspace Outlier Detection (SOD)': SOD(
                contamination=outliers_fraction
            ) 
        }
        classifiers_indices = {
            'Local Outlier Factor (LOF)': 0,
            'Connectivity-Based Outlier Factor (COF)': 1,
            'K Nearest Neighbors (KNN)': 2,
            'Average K Nearest Neighbors (AvgKNN)': 3,
            'Median K Nearest Neighbors (MedKNN)': 4,
            'Subspace Outlier Detection (SOD)': 5
        }

        for clf_name, clf in classifiers.items():
            t0 = time()
            clf.fit(X_norm)
            scores = clf.decision_function(X_norm)
示例#13
0
    def initialise_pyod_classifiers(self, outlier_fraction):
        #Testing every query to every class and then predicting only if it belongs to the same class
        classifiers = {}
        #Proximity based
        classifiers['K Nearest Neighbors (KNN)'] = []
        classifiers['Average K Nearest Neighbors (AvgKNN)'] = []
        classifiers['Median K Nearest Neighbors (MedKNN)'] = []
        classifiers['Local Outlier Factor (LOF)'] = []
        classifiers['Connectivity-Based Outlier Factor (COF)'] = []
        #classifiers['Clustering-Based Local Outlier Factor (CBLOF)'] = []
        classifiers['LOCI'] = []
        #classifiers['Histogram-based Outlier Score (HBOS)'] = []
        classifiers['Subspace Outlier Detection (SOD)'] = []
        #Linear models
        classifiers['Principal Component Analysis (PCA)'] = []
        #classifiers['Minimum Covariance Determinant (MCD)'] = []           #To slow
        classifiers['One-Class Support Vector Machines (OCSVM)'] = []
        classifiers['Deviation-based Outlier Detection (LMDD)'] = []
        #Probabilistic
        classifiers['Angle-Based Outlier Detection (ABOD)'] = []
        classifiers['Stochastic Outlier Selection (SOS)'] = []
        #Outlier Ensembles
        classifiers['Isolation Forest (IForest)'] = []
        classifiers['Feature Bagging'] = []
        classifiers['Lightweight On-line Detector of Anomalies (LODA)'] = []

        for i in range(self.k_way):
            for i in range(self.k_way):
                classifiers['K Nearest Neighbors (KNN)'].append(
                    KNN(method='largest',
                        n_neighbors=int(self.n_shot / 3) + 1,
                        contamination=outlier_fraction))
                classifiers['Average K Nearest Neighbors (AvgKNN)'].append(
                    KNN(method='mean',
                        n_neighbors=int(self.n_shot / 3) + 1,
                        contamination=outlier_fraction))
                classifiers['Median K Nearest Neighbors (MedKNN)'].append(
                    KNN(method='median',
                        n_neighbors=int(self.n_shot / 3) + 1,
                        contamination=outlier_fraction))
                classifiers['Local Outlier Factor (LOF)'].append(
                    LOF(n_neighbors=int(self.n_shot / 3) + 1,
                        contamination=outlier_fraction))
                classifiers['Connectivity-Based Outlier Factor (COF)'].append(
                    COF(n_neighbors=int(self.n_shot / 3) + 1,
                        contamination=outlier_fraction))
                classifiers['LOCI'].append(
                    LOCI(contamination=outlier_fraction))
                classifiers['Subspace Outlier Detection (SOD)'].append(
                    SOD(n_neighbors=int(self.n_shot / 3) + 2,
                        contamination=outlier_fraction,
                        ref_set=max(2, int((int(self.n_shot / 3) + 2) / 3))))
                classifiers['Principal Component Analysis (PCA)'].append(
                    PCA(contamination=outlier_fraction))
                classifiers[
                    'One-Class Support Vector Machines (OCSVM)'].append(
                        OCSVM(contamination=outlier_fraction))
                classifiers['Deviation-based Outlier Detection (LMDD)'].append(
                    LMDD(contamination=outlier_fraction))
                classifiers['Angle-Based Outlier Detection (ABOD)'].append(
                    ABOD(contamination=outlier_fraction))
                classifiers['Stochastic Outlier Selection (SOS)'].append(
                    SOS(contamination=outlier_fraction))
                classifiers['Isolation Forest (IForest)'].append(
                    IForest(contamination=outlier_fraction))
                classifiers['Feature Bagging'].append(
                    FeatureBagging(contamination=outlier_fraction))
                classifiers[
                    'Lightweight On-line Detector of Anomalies (LODA)'].append(
                        LODA(contamination=outlier_fraction))
        self.num_different_models = len(classifiers)
        return classifiers
    f.write("Model: " + modelname + "\n")
    f.write("Dataset " + str(datasetnumber) + ": " + datasetname + "\n")
    f.write("Time taken: " + str(time) + " seg.\n")
    f.write("Accuracy: " + str(accuracy) + "\n")
    if accuracy!=None:
        f.write("@scores\n")
        for score in model.decision_scores_:
            f.write(str(score) + "\n")
    f.close()

# This is based on executing the script from the folder experiments
ROUTE = "../datasets/outlier_ground_truth/"
# List of datasets
datasets = ["annthyroid.mat", "arrhythmia.mat", "breastw.mat", "cardio.mat", "glass.mat", "ionosphere.mat", "letter.mat", "lympho.mat", "mammography.mat", "mnist.mat", "musk.mat", "optdigits.mat", "pendigits.mat", "pima.mat", "satellite.mat", "satimage-2.mat", "speech.mat", "thyroid.mat", "vertebral.mat", "vowels.mat", "wbc.mat", "wine.mat"]
# List of models and names
models = [ABOD(), COF(), HBOS(), KNN(), LOF(), MCD(), OCSVM(), PCA(), SOD(), SOS()]
names = ["ABOD", "COF", "HBOS", "KNN", "LOF", "MCD", "OCSVM", "PCA", "SOD", "SOS"]
accuracies = []

for name, model in zip(names, models):
    print("\n\n#################################################################")
    print("MODEL " + name + " " + str(names.index(name)+1) + "/" + str(len(names)))
    print("#################################################################")
    acc = []
    for dat in datasets:
        if name=="ABOD" and dat in ["breastw.mat", "letter.mat", "satellite.mat"]:
            result = None
        else:
            print("Computing dataset " + dat + " " + str(datasets.index(dat)+1) + "/" + str(len(datasets)))
            # Read dataset
            dataset, labels = readDataset(ROUTE + dat)
示例#15
0
     (FeatureBagging(combination='average',
                     random_state=rs), 'AVE_Bagging'),  # n_jobs
     (LMDD(dis_measure='iqr', random_state=rs), 'IQR_LMDD'),
     (KNN(method='largest'), 'Largest_KNN'),  # n_jobs
     (LODA(), 'LODA'),
     (FeatureBagging(combination='max', n_jobs=-1,
                     random_state=rs), 'MAX_Bagging'),
     (MCD(random_state=rs), 'MCD'),
     (XGBOD(random_state=rs), 'XGBOD'),  # n_jobs
     (GaussianMixture(random_state=rs), 'GMM'),
     (LocalOutlierFactor(novelty=True), 'LOF'),
     (KNN(method='median'), 'Median_KNN'),  # n_jobs
     (KNN(method='mean'), 'Avg_KNN'),  # n_jobs
     (CBLOF(n_clusters=10, random_state=rs), 'CBLOF'),
     (HBOS(), 'HBOS'),
     (SOD(), 'SOD'),
     (PCA(random_state=rs), 'PCA'),
     (VAE(encoder_neurons=[3, 4, 3],
          decoder_neurons=[3, 4, 3],
          random_state=rs), 'VAE'),
     (AutoEncoder(hidden_neurons=[3, 4, 4, 3], verbose=0,
                  random_state=rs), 'AE')
 ]
 # Start the counter of time
 st = time.time()
 # Initialize the pool class with the number of required CPU's
 pool = mp.Pool(mp.cpu_count())
 # StarMap method
 pool.starmap_async(AnomalyTester, [(models[i][0], models[i][1], rootDir)
                                    for i in range(len(models))]).get()
 pool.close()
示例#16
0
def pyod_init(model, n_features=None):
    # initial model set up
    if model == 'abod':
        from pyod.models.abod import ABOD
        clf = ABOD()
    elif model == 'auto_encoder' and n_features:
        #import os
        #os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
        from pyod.models.auto_encoder import AutoEncoder
        clf = AutoEncoder(hidden_neurons=[
            n_features, n_features * 5, n_features * 5, n_features
        ],
                          epochs=5,
                          batch_size=64,
                          preprocessing=False)
    elif model == 'cblof':
        from pyod.models.cblof import CBLOF
        clf = CBLOF(n_clusters=4)
    elif model == 'hbos':
        from pyod.models.hbos import HBOS
        clf = HBOS()
    elif model == 'iforest':
        from pyod.models.iforest import IForest
        clf = IForest()
    elif model == 'knn':
        from pyod.models.knn import KNN
        clf = KNN()
    elif model == 'lmdd':
        from pyod.models.lmdd import LMDD
        clf = LMDD()
    elif model == 'loci':
        from pyod.models.loci import LOCI
        clf = LOCI()
    elif model == 'loda':
        from pyod.models.loda import LODA
        clf = LODA()
    elif model == 'lof':
        from pyod.models.lof import LOF
        clf = LOF()
    elif model == 'mcd':
        from pyod.models.mcd import MCD
        clf = MCD()
    elif model == 'ocsvm':
        from pyod.models.ocsvm import OCSVM
        clf = OCSVM()
    elif model == 'pca':
        from pyod.models.pca import PCA
        clf = PCA()
    elif model == 'sod':
        from pyod.models.sod import SOD
        clf = SOD()
    elif model == 'vae':
        from pyod.models.vae import VAE
        clf = VAE()
    elif model == 'xgbod':
        from pyod.models.xgbod import XGBOD
        clf = XGBOD()
    else:
        #raise ValueError(f"unknown model {model}")
        clf = PyODDefaultModel()
    return clf
示例#17
0
    'LargestKNN': KNN(method='largest'),
    'MedKNN': KNN(method='median'),
    'PCA': PCA(),
    'COF': COF(),
    'LODA': LODA(),
    'LOF': LOF(),
    'HBOS': HBOS(),
    'MCD': MCD(),
    'AvgBagging': FeatureBagging(combination='average'),
    'MaxBagging': FeatureBagging(combination='max'),
    'IForest': IForest(),
    'CBLOF': CBLOF(n_clusters=10, n_jobs=4),
    'FactorAnalysis': FactorAnalysis(),
    'KernelDensity': KernelDensity(),
    'COPOD': COPOD(),
    'SOD': SOD(),
    'LSCPwithLODA': LSCP([LODA(), LODA()]),
    'AveLMDD': LMDD(dis_measure='aad'),
    'VarLMDD': LMDD(dis_measure='var'),
    'IqrLMDD': LMDD(dis_measure='iqr'),
    'SoGaal': SO_GAAL(),
    #'MoGaal':MO_GAAL(),
    'VAE': VAE(encoder_neurons=[8, 4, 2]),
    'AutoEncoder': AutoEncoder(hidden_neurons=[6, 3, 3, 6])
}

models = {
    'XGBOD': XGBOD(),
    'BRM': BRM(),
    'GM': GaussianMixture(),
    'IF': IsolationForest(),