예제 #1
0
    def test_threshold_minmax(self):
        sample_num = 10
        feature_dim = 5
        num_anomaly = 5
        # actual value
        y_test = np.zeros(sample_num * feature_dim)

        gen_rand_indexes = [0, 7, 16, 33, 45]
        y_test[gen_rand_indexes] = 10
        y_test = y_test.reshape((sample_num, feature_dim))

        td = ThresholdDetector()
        td.set_params(threshold=(-1, 1))
        td.fit(y_test)
        anomaly_scores = td.score()
        assert len(set(np.where(anomaly_scores > 0)[0])) == num_anomaly
        anomaly_indexes = td.anomaly_indexes()
        assert len(anomaly_indexes) == num_anomaly
예제 #2
0
    def test_threshold_single(self):
        sample_num = 10
        feature_dim = 5
        num_anomaly = 5
        # predicted value
        y_pred = np.full((sample_num, feature_dim), 0)
        # actual value
        y_test = np.full(sample_num * feature_dim, 0.2)

        gen_rand_indexes = [0, 7, 16, 33, 45]
        y_test[gen_rand_indexes] = 10
        y_test = y_test.reshape((sample_num, feature_dim))

        td = ThresholdDetector()
        td.set_params(threshold=3)
        td.fit(y_test, y_pred)
        anomaly_scores = td.score()
        assert len(set(np.where(anomaly_scores > 0)[0])) == num_anomaly
        anomaly_indexes = td.anomaly_indexes()
        assert len(anomaly_indexes) == num_anomaly
예제 #3
0
    def test_mode_gaussian(self):
        sample_num = 500
        # actual value
        y_test = np.full(sample_num, 2)
        mu, sigma, ratio = 3, 0.1, 0.01
        s = np.random.normal(mu, sigma, sample_num)
        y_pred = y_test + s

        td = ThresholdDetector()
        td.set_params(mode="gaussian", ratio=ratio)
        td.fit(y_test, y_pred)
        # check estimated threshold
        from scipy.stats import norm
        assert abs(td.th - (norm.ppf(1 - ratio) * sigma + mu)) < 0.04
예제 #4
0
    def test_fit_score(self):
        look_back = 4

        # generate dataframe
        data = self.gen_data(feature_num=6, sample_num=100)
        # split train and test dataframes
        train_df, test_df = self.train_test_split(data, test_num=20, look_back=look_back)

        # roll data to generate model input
        x_train, y_train = self.roll_data(dataset=train_df, look_back=look_back,
                                          target_col_indexes=[0])
        x_test, y_test = self.roll_data(dataset=test_df, look_back=look_back,
                                        target_col_indexes=[0])

        # create model, train on train data and predict on test
        lstm_config = {"lstm_units": [32] * 2, "lr": 0.001}
        forecaster = LSTMForecaster(target_dim=1, feature_dim=x_train.shape[-1], **lstm_config)
        forecaster.fit(x=x_train, y=y_train, batch_size=1024, epochs=50, distributed=False)
        y_predict = forecaster.predict(x_test)

        # find anomaly using a manual set threshold
        td = ThresholdDetector()
        td.set_params(threshold=10)
        td.fit(y_test, y_predict)
        anomaly_scores = td.score()
        assert len(list(np.where(anomaly_scores > 0)[0])) == 0
        anomaly_indexes = td.anomaly_indexes()
        assert len(anomaly_indexes) == 0

        # if threshold is not provided, ThresholdDetector can fit to the data
        ratio = 0.1
        td = ThresholdDetector()
        td.set_params(ratio=ratio)
        td.fit(y_test, y_predict)
        fitted_anomaly_indexes = td.anomaly_indexes()
        assert len(fitted_anomaly_indexes) == int(ratio * y_test.shape[0])
예제 #5
0
    def test_corner_cases(self):
        td = ThresholdDetector()
        with pytest.raises(RuntimeError):
            td.score()
        with pytest.raises(RuntimeError):
            td.anomaly_indexes()

        time = np.arange(0, 1, 0.5)
        y = np.sin(time)
        td.set_params(mode="dummy")
        with pytest.raises(ValueError):
            td.fit(y, y)
        td.set_params(mode="gaussian")
        with pytest.raises(ValueError):
            td.fit(y)
        td.set_params(threshold="1")
        with pytest.raises(ValueError):
            td.fit(y)
        td.set_params(threshold=(1, -1))
        with pytest.raises(ValueError):
            td.fit(y)
        td.set_params(threshold=(np.array([-1]), np.array([-1])))
        with pytest.raises(ValueError):
            td.fit(y)
        td.set_params(threshold=(np.array([1, 1]), np.array([-1, -1])))
        with pytest.raises(ValueError):
            td.fit(y)
예제 #6
0
    def test_fit_score(self):
        look_back = 4

        # generate dataframe
        data = self.gen_data(feature_num=6, sample_num=100)
        # split train and test dataframes
        train_df, test_df = self.train_test_split(data, test_num=20, look_back=look_back)

        # roll data to generate model input
        x_train, y_train = self.roll_data(dataset=train_df, look_back=look_back,
                                          target_col_indexes=[0])
        x_test, y_test = self.roll_data(dataset=test_df, look_back=look_back,
                                        target_col_indexes=[0])

        # create model, train on train data and predict on test
        y_train = np.expand_dims(y_train, 1)
        forecaster = LSTMForecaster(past_seq_len=look_back,
                                    input_feature_num=x_train.shape[-1],
                                    output_feature_num=1,
                                    hidden_dim=32,
                                    layer_num=2)
        forecaster.fit(data=(x_train, y_train), batch_size=1024, epochs=50)
        y_predict = forecaster.predict(x_test)
        y_predict = np.squeeze(y_predict, axis=1)

        # find anomaly using a manual set threshold
        td = ThresholdDetector()
        td.set_params(threshold=10)
        td.fit(y_test, y_predict)
        anomaly_scores = td.score()
        assert len(list(np.where(anomaly_scores > 0)[0])) == 0
        anomaly_indexes = td.anomaly_indexes()
        assert len(anomaly_indexes) == 0

        # if threshold is not provided, ThresholdDetector can fit to the data
        ratio = 0.1
        td = ThresholdDetector()
        td.set_params(ratio=ratio)
        td.fit(y_test, y_predict)
        fitted_anomaly_indexes = td.anomaly_indexes()
        assert len(fitted_anomaly_indexes) == int(ratio * y_test.shape[0])