def test_pipe_summary(): """ Test summary """ reg_ab = LinearRegression() reg_ac = LinearRegression() my_pipe = Pipenet( { "A-B-regression-ad": { "model": detector.RegressionAD(regressor=reg_ab, target="B"), "input": "original", "subset": ["A", "B"], }, "A-C-regression-error": { "model": transformer.RegressionResidual( regressor=reg_ac, target="C" ), "input": "original", "subset": ["A", "C"], }, "A-C-regression-ad": { "model": detector.InterQuartileRangeAD(), "input": "A-C-regression-error", "subset": "all", }, "ABC-ad": { "model": aggregator.OrAggregator(), "input": ["A-B-regression-ad", "A-C-regression-ad"], }, "D-ad": { "model": detector.QuantileAD(high=0.9, low=0.1), "input": "original", "subset": ["D"], }, "ABCD-ad": { "model": aggregator.OrAggregator(), "input": ["ABC-ad", "D-ad"], }, } ) my_pipe.summary()
one2many_models = [ transformer.RollingAggregate( agg="quantile", agg_params={"q": [0.1, 0.5, 0.9]} ), transformer.RollingAggregate( agg="hist", agg_params={"bins": [20, 50, 80]} ), transformer.Retrospect(n_steps=3), ] many2one_models = [ detector.MinClusterDetector(KMeans(n_clusters=2)), detector.OutlierDetector( LocalOutlierFactor(n_neighbors=20, contamination=0.1) ), detector.RegressionAD(regressor=LinearRegression()), detector.PcaAD(), transformer.SumAll(), transformer.RegressionResidual(LinearRegression()), transformer.PcaReconstructionError(), ] @pytest.mark.parametrize("model", one2one_models) def test_one2one_s2s_w_name(model): """ if a one-to-one model is applied to a Series, it should keep the Series name unchanged """ s_name = pd.Series( np.arange(100),
def test_skip_fit(): reg_ab = LinearRegression() reg_ac = LinearRegression() my_pipe = Pipenet( { "A-B-regression-ad": { "model": detector.RegressionAD(regressor=reg_ab, target="B"), "input": "original", "subset": ["A", "B"], }, "A-C-regression-error": { "model": transformer.RegressionResidual( regressor=reg_ac, target="C" ), "input": "original", "subset": ["A", "C"], }, "A-C-regression-ad": { "model": detector.InterQuartileRangeAD(), "input": "A-C-regression-error", "subset": "all", }, "ABC-ad": { "model": aggregator.OrAggregator(), "input": ["A-B-regression-ad", "A-C-regression-ad"], }, "D-ad": { "model": detector.QuantileAD(high=0.9, low=0.1), "input": "original", "subset": ["D"], }, "ABCD-ad": { "model": aggregator.OrAggregator(), "input": ["ABC-ad", "D-ad"], }, } ) df = pd.DataFrame( np.array( [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], [0, 100, 200, 300, 400, 500, 600, 700, 800, 900], [0, 0, 0, 0, 0, 0, 0, 100, 0, 0], ] ).T, index=pd.date_range(start="2017-1-1", periods=10, freq="D"), columns=["A", "B", "C", "D"], ) my_pipe.fit(df) df = pd.DataFrame( np.array( [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 10, 20, 30, 41, 50, 60, 70, 80, 90], [0, 100, 200, 300, 400, 500, 601, 700, 800, 900], [0, 0, 0, 0, 0, 0, 0, 100, 0, 0], ] ).T, index=pd.date_range(start="2017-1-1", periods=10, freq="D"), columns=["A", "B", "C", "D"], ) my_pipe.fit(df, skip_fit=["A-B-regression-ad", "A-C-regression-error"]) assert reg_ab.coef_[0] == pytest.approx(10) assert reg_ac.coef_[0] == pytest.approx(100) assert my_pipe.steps["A-C-regression-ad"]["model"].abs_high_ == 0 assert my_pipe.steps["A-C-regression-ad"]["model"].abs_low_ == 0 my_pipe.fit(df, skip_fit=["A-B-regression-ad"]) assert reg_ab.coef_[0] == pytest.approx(10) assert reg_ac.coef_[0] != pytest.approx(100) assert my_pipe.steps["A-C-regression-ad"]["model"].abs_high_ != 0 assert my_pipe.steps["A-C-regression-ad"]["model"].abs_low_ != 0
def test_pipenet_return_list_return_intermediate(): """ Test pipenet with return_list=True and return_intermediate=True """ df = pd.DataFrame( np.array( [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 10, 20, 30, 41, 50, 60, 70, 80, 90], [0, 100, 200, 300, 400, 500, 601, 700, 800, 900], [0, 0, 0, 0, 0, 0, 0, 100, 0, 0], ] ).T, index=pd.date_range(start="2017-1-1", periods=10, freq="D"), columns=["A", "B", "C", "D"], ) reg_ab = LinearRegression() reg_ac = LinearRegression() my_pipe = Pipenet( { "A-B-regression-ad": { "model": detector.RegressionAD(regressor=reg_ab, target="B"), "input": "original", "subset": ["A", "B"], }, "A-C-regression-error": { "model": transformer.RegressionResidual( regressor=reg_ac, target="C" ), "input": "original", "subset": ["A", "C"], }, "A-C-regression-ad": { "model": detector.InterQuartileRangeAD(), "input": "A-C-regression-error", "subset": "all", }, "ABC-ad": { "model": aggregator.OrAggregator(), "input": ["A-B-regression-ad", "A-C-regression-ad"], }, "D-ad": { "model": detector.QuantileAD(high=0.9, low=0.1), "input": "original", "subset": ["D"], }, "ABCD-ad": { "model": aggregator.OrAggregator(), "input": ["ABC-ad", "D-ad"], }, } ) results = my_pipe.fit_detect( df, return_list=True, return_intermediate=True ) assert set(results.keys()) == set(my_pipe.steps.keys()).union({"original"}) assert results["A-B-regression-ad"] == [ ( pd.Timestamp("2017-01-05 00:00:00"), pd.Timestamp("2017-01-05 23:59:59.999999999"), ) ] assert results["A-C-regression-ad"] == [ ( pd.Timestamp("2017-01-07 00:00:00"), pd.Timestamp("2017-01-07 23:59:59.999999999"), ) ] assert results["ABC-ad"] == [ ( pd.Timestamp("2017-01-05 00:00:00"), pd.Timestamp("2017-01-05 23:59:59.999999999"), ), ( pd.Timestamp("2017-01-07 00:00:00"), pd.Timestamp("2017-01-07 23:59:59.999999999"), ), ] assert results["D-ad"] == [ ( pd.Timestamp("2017-01-08 00:00:00"), pd.Timestamp("2017-01-08 23:59:59.999999999"), ) ] assert results["ABCD-ad"] == [ ( pd.Timestamp("2017-01-05 00:00:00"), pd.Timestamp("2017-01-05 23:59:59.999999999"), ), ( pd.Timestamp("2017-01-07 00:00:00"), pd.Timestamp("2017-01-08 23:59:59.999999999"), ), ]
def test_pipenet_return_intermediate(): """ Test pipenet with return_intermediate=True """ df = pd.DataFrame( np.array( [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 10, 20, 30, 41, 50, 60, 70, 80, 90], [0, 100, 200, 300, 400, 500, 601, 700, 800, 900], [0, 0, 0, 0, 0, 0, 0, 100, 0, 0], ] ).T, index=pd.date_range(start="2017-1-1", periods=10, freq="D"), columns=["A", "B", "C", "D"], ) reg_ab = LinearRegression() reg_ac = LinearRegression() my_pipe = Pipenet( { "A-B-regression-ad": { "model": detector.RegressionAD(regressor=reg_ab, target="B"), "input": "original", "subset": ["A", "B"], }, "A-C-regression-error": { "model": transformer.RegressionResidual( regressor=reg_ac, target="C" ), "input": "original", "subset": ["A", "C"], }, "A-C-regression-ad": { "model": detector.InterQuartileRangeAD(), "input": "A-C-regression-error", "subset": "all", }, "ABC-ad": { "model": aggregator.OrAggregator(), "input": ["A-B-regression-ad", "A-C-regression-ad"], }, "D-ad": { "model": detector.QuantileAD(high=0.9, low=0.1), "input": "original", "subset": ["D"], }, "ABCD-ad": { "model": aggregator.OrAggregator(), "input": ["ABC-ad", "D-ad"], }, } ) results = my_pipe.fit(df, return_intermediate=True) assert set(results.keys()) == set(my_pipe.steps.keys()).union({"original"}) assert results["A-B-regression-ad"] is None assert results["A-C-regression-error"] is not None assert results["A-C-regression-ad"] is None assert results["ABC-ad"] is None assert results["D-ad"] is None assert results["ABCD-ad"] is None results = my_pipe.fit_detect(df, return_intermediate=True) assert set(results.keys()) == set(my_pipe.steps.keys()).union({"original"}) pd.testing.assert_series_equal( results["A-B-regression-ad"], pd.Series([0, 0, 0, 0, 1, 0, 0, 0, 0, 0], index=df.index), check_dtype=False, check_names=False, ) pd.testing.assert_series_equal( results["A-C-regression-ad"], pd.Series([0, 0, 0, 0, 0, 0, 1, 0, 0, 0], index=df.index), check_dtype=False, check_names=False, ) pd.testing.assert_series_equal( results["ABC-ad"], pd.Series([0, 0, 0, 0, 1, 0, 1, 0, 0, 0], index=df.index), check_dtype=False, check_names=False, ) pd.testing.assert_series_equal( results["D-ad"], pd.Series([0, 0, 0, 0, 0, 0, 0, 1, 0, 0], index=df.index), check_dtype=False, check_names=False, ) pd.testing.assert_series_equal( results["ABCD-ad"], pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index), check_dtype=False, check_names=False, )
def test_pipenet_default(): """ Test default setting of pipenet """ df = pd.DataFrame( np.array( [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 10, 20, 30, 41, 50, 60, 70, 80, 90], [0, 100, 200, 300, 400, 500, 601, 700, 800, 900], [0, 0, 0, 0, 0, 0, 0, 100, 0, 0], ] ).T, index=pd.date_range(start="2017-1-1", periods=10, freq="D"), columns=["A", "B", "C", "D"], ) reg_ab = LinearRegression() reg_ac = LinearRegression() my_pipe = Pipenet( { "A-B-regression-ad": { "model": detector.RegressionAD(regressor=reg_ab, target="B"), "input": "original", "subset": ["A", "B"], }, "A-C-regression-error": { "model": transformer.RegressionResidual( regressor=reg_ac, target="C" ), "input": "original", "subset": ["A", "C"], }, "A-C-regression-ad": { "model": detector.InterQuartileRangeAD(), "input": "A-C-regression-error", "subset": "all", }, "ABC-ad": { "model": aggregator.OrAggregator(), "input": ["A-B-regression-ad", "A-C-regression-ad"], }, "D-ad": { "model": detector.QuantileAD(high=0.9, low=0.1), "input": "original", "subset": ["D"], }, "ABCD-ad": { "model": aggregator.OrAggregator(), "input": ["ABC-ad", "D-ad"], }, } ) anomaly = my_pipe.fit_detect(df) pd.testing.assert_series_equal( anomaly, pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index), check_dtype=False, ) assert ( my_pipe.score( df, pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index), scoring="recall", ) == 1 ) assert ( my_pipe.score( df, pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index), scoring="precision", ) == 1 ) assert ( my_pipe.score( df, pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index), scoring="iou", ) == 1 ) assert ( my_pipe.score( df, pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index), scoring="f1", ) == 1 )
import numpy as np import pandas as pd import pytest from sklearn.cluster import KMeans from sklearn.linear_model import LinearRegression from sklearn.neighbors import LocalOutlierFactor import adtk.detector as detector import adtk.transformer as transformer models = [ detector.MinClusterDetector(KMeans(n_clusters=2)), detector.OutlierDetector( LocalOutlierFactor(n_neighbors=20, contamination=0.1)), detector.RegressionAD(target="A", regressor=LinearRegression()), detector.PcaAD(), transformer.RegressionResidual(target="A", regressor=LinearRegression()), transformer.PcaReconstructionError(), transformer.PcaProjection(), transformer.PcaReconstruction(), ] df_train = pd.DataFrame( np.arange(40).reshape(20, 2), columns=["A", "B"], index=pd.date_range(start="2017-1-1", periods=20, freq="D"), ) df_test_ok = pd.DataFrame( np.arange(0, -60, -1).reshape(20, 3),