Exemplo n.º 1
0
    def test_invocation(self):
        X_2D = self.X_train[:, 0:2]
        X_3D = self.X_train[:, 0:3]
        X_4D = self.X_train

        with assert_raises(IndexError):
            rod_3D(X_2D)
        assert_array_equal(ROD().decision_function(X_2D),
                           rod_3D(np.hstack((X_2D, np.zeros(shape=(X_2D.shape[0], 3 - X_2D.shape[1])))))[0])
        assert_array_equal(ROD().decision_function(X_3D), rod_3D(X_3D)[0])
        assert_array_equal(ROD().decision_function(X_4D), rod_nD(X_4D, False, self.gm, self.data_scaler,
                                                                 self.angles_scalers1, self.angles_scalers2)[0])
Exemplo n.º 2
0
    def setUp(self):
        self.n_train = 100
        self.n_test = 50
        self.contamination = 0.1
        self.roc_floor = 0.8
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train,
            n_test=self.n_test,
            n_features=4,
            contamination=self.contamination,
            random_state=42)

        self.clf = ROD()
        self.clf.fit(self.X_train)
Exemplo n.º 3
0
    def test_invocation(self):
        X_2D = self.X_train[:, 0:2]
        X_3D = self.X_train[:, 0:3]
        X_4D = self.X_train

        with assert_raises(IndexError):
            rod_3D(X_2D)
        assert_array_equal(
            ROD().decision_function(X_2D),
            rod_3D(
                np.hstack(
                    (X_2D, np.zeros(shape=(X_2D.shape[0],
                                           3 - X_2D.shape[1]))))))
        assert_array_equal(ROD().decision_function(X_3D), rod_3D(X_3D))
        assert_array_equal(ROD().decision_function(X_4D),
                           rod_nD(X_4D, parallel=False))
Exemplo n.º 4
0
    def setUp(self):
        self.n_train = 100
        self.n_test = 50
        self.contamination = 0.1
        self.roc_floor = 0.8
        self.gm = None
        self.median = None
        self.data_scaler = None
        self.angles_scalers1 = None
        self.angles_scalers2 = None
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train, n_test=self.n_test, n_features=4,
            contamination=self.contamination, random_state=42)

        self.clf = ROD()
        self.clf.fit(self.X_train)
Exemplo n.º 5
0
 def test_parameters(self):
     assert (hasattr(self.clf, 'decision_scores_')
             and self.clf.decision_scores_ is not None)
     assert (hasattr(self.clf, 'labels_') and self.clf.labels_ is not None)
     assert (hasattr(self.clf, 'threshold_')
             and self.clf.threshold_ is not None)
     with assert_raises(TypeError):
         ROD(parallel_execution='str')
Exemplo n.º 6
0
class TestROD(unittest.TestCase):
    def setUp(self):
        self.n_train = 100
        self.n_test = 50
        self.contamination = 0.1
        self.roc_floor = 0.8
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train,
            n_test=self.n_test,
            n_features=4,
            contamination=self.contamination,
            random_state=42)

        self.clf = ROD()
        self.clf.fit(self.X_train)

    def test_parameters(self):
        assert (hasattr(self.clf, 'decision_scores_')
                and self.clf.decision_scores_ is not None)
        assert (hasattr(self.clf, 'labels_') and self.clf.labels_ is not None)
        assert (hasattr(self.clf, 'threshold_')
                and self.clf.threshold_ is not None)
        with assert_raises(TypeError):
            ROD(parallel_execution='str')

    def test_train_scores(self):
        assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])

    def test_prediction_scores(self):
        pred_scores = self.clf.decision_function(self.X_test)

        # check score shapes
        assert_equal(pred_scores.shape[0], self.X_test.shape[0])

    def test_prediction_labels(self):
        pred_labels = self.clf.predict(self.X_test)
        assert_equal(pred_labels.shape, self.y_test.shape)

    def test_prediction_proba_parameter(self):
        with assert_raises(ValueError):
            self.clf.predict_proba(self.X_test, method='something')

    def test_fit_predict(self):
        pred_labels = self.clf.fit_predict(self.X_train)
        assert_equal(pred_labels.shape, self.y_train.shape)

    def test_fit_predict_score(self):
        self.clf.fit_predict_score(self.X_test, self.y_test)
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='roc_auc_score')
        self.clf.fit_predict_score(self.X_test,
                                   self.y_test,
                                   scoring='prc_n_score')
        with assert_raises(NotImplementedError):
            self.clf.fit_predict_score(self.X_test,
                                       self.y_test,
                                       scoring='something')

    def test_predict_rank(self):
        pred_scores = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test)
        print(pred_ranks)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_scores), atol=2)
        assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
        assert_array_less(-0.1, pred_ranks)

    def test_predict_rank_normalized(self):
        pred_socres = self.clf.decision_function(self.X_test)
        pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

        # assert the order is reserved
        assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=2)
        assert_array_less(pred_ranks, 1.01)
        assert_array_less(-0.1, pred_ranks)

    def test_invocation(self):
        X_2D = self.X_train[:, 0:2]
        X_3D = self.X_train[:, 0:3]
        X_4D = self.X_train

        with assert_raises(IndexError):
            rod_3D(X_2D)
        assert_array_equal(
            ROD().decision_function(X_2D),
            rod_3D(
                np.hstack(
                    (X_2D, np.zeros(shape=(X_2D.shape[0],
                                           3 - X_2D.shape[1]))))))
        assert_array_equal(ROD().decision_function(X_3D), rod_3D(X_3D))
        assert_array_equal(ROD().decision_function(X_4D),
                           rod_nD(X_4D, parallel=False))

    def test_angle(self):
        assert_equal(0.0, angle(v1=[0, 0, 1], v2=[0, 0, 1]))

    def test_sigmoid(self):
        assert_equal(0.5, sigmoid(np.array([0.0])))

    def test_process_sub(self):
        subspace = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
        assert_equal([0.5, 0.5, 0.5], process_sub(subspace))

    def test_parallel_vs_non_parallel(self):
        assert_equal(rod_nD(self.X_train, parallel=False),
                     rod_nD(self.X_train, parallel=True))

    def test_mad(self):
        assert_equal([0.6745, 0.0, 0.6745], mad(np.array([1, 2, 3])))

    def tearDown(self):
        pass
Exemplo n.º 7
0
if __name__ == "__main__":
    contamination = 0.1  # percentage of outliers
    n_train = 200  # number of training points
    n_test = 100  # number of testing points

    # Generate sample data
    X_train, y_train, X_test, y_test = generate_data(n_train=n_train,
                                                     n_test=n_test,
                                                     n_features=2,
                                                     contamination=contamination,
                                                     random_state=42)

    # train ROD detector
    clf_name = 'ROD'
    clf = ROD()
    clf.fit(X_train)

    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
    y_test_scores = clf.decision_function(X_test)  # outlier scores

    # evaluate and print the results
    print("\nOn Training Data:")
    evaluate_print(clf_name, y_train, y_train_scores)
    print("\nOn Test Data:")
    evaluate_print(clf_name, y_test, y_test_scores)