def test_correct_data_is_marked_as_change_point(self):
        input_data = [5, 5, 5, 5, 5, 5, 5, 5]
        input_data.extend([7, 7, 7, 7, 7, 7, 7, 7])
        X_train = pd.Series(input_data, name="ts")

        cpd = IidChangePointDetector(confidence=95,
                                     change_history_length=4) << {
                                         'result': 'ts'
                                     }
        data = cpd.fit_transform(X_train)

        self.assertEqual(data.loc[8, 'result.Alert'], 1.0)

        data = data.loc[data['result.Alert'] == 1.0]
        self.assertEqual(len(data), 1)
    def test_multiple_user_specified_columns_is_not_allowed(self):
        path = get_dataset('timeseries').as_filepath()
        data = FileDataStream.read_csv(path)

        try:
            pipeline = Pipeline([
                IidChangePointDetector(columns=['t2', 't3'],
                                       change_history_length=5)
            ])
            pipeline.fit_transform(data)

        except RuntimeError as e:
            self.assertTrue('Only one column is allowed' in str(e))
            return

        self.fail()
Exemplo n.º 3
0
###############################################################################
# IidChangePointDetector
import pandas as pd
from nimbusml.timeseries import IidChangePointDetector

# Create a sample series with a change
input_data = [5, 5, 5, 5, 5, 5, 5, 5]
input_data.extend([7, 7, 7, 7, 7, 7, 7, 7])

X_train = pd.Series(input_data, name="ts")

cpd = IidChangePointDetector(confidence=95, change_history_length=4) << {
    'result': 'ts'
}
data = cpd.fit_transform(X_train)

print(data)

#     ts  result.Alert  result.Raw Score  result.P-Value Score  result.Martingale Score
# 0    5           0.0               5.0          5.000000e-01                 0.001213
# 1    5           0.0               5.0          5.000000e-01                 0.001213
# 2    5           0.0               5.0          5.000000e-01                 0.001213
# 3    5           0.0               5.0          5.000000e-01                 0.001213
# 4    5           0.0               5.0          5.000000e-01                 0.001213
# 5    5           0.0               5.0          5.000000e-01                 0.001213
# 6    5           0.0               5.0          5.000000e-01                 0.001213
# 7    5           0.0               5.0          5.000000e-01                 0.001213
# 8    7           1.0               7.0          1.000000e-08             10298.666376   <-- alert is on, predicted changepoint
# 9    7           0.0               7.0          1.328455e-01             33950.164799
# 10   7           0.0               7.0          2.613750e-01             60866.342063
# 11   7           0.0               7.0          3.776152e-01             78362.038772
Exemplo n.º 4
0
 'LightGbmRegressor':
 LightGbmRegressor(minimum_example_count_per_group=1,
                   minimum_example_count_per_leaf=1),
 'LightGbmRanker':
 LightGbmRanker(minimum_example_count_per_group=1,
                minimum_example_count_per_leaf=1),
 'NGramFeaturizer':
 NGramFeaturizer(word_feature_extractor=n_gram()),
 'SkipFilter':
 SkipFilter(count=5),
 'TakeFilter':
 TakeFilter(count=100000),
 'IidSpikeDetector':
 IidSpikeDetector(columns=['F0']),
 'IidChangePointDetector':
 IidChangePointDetector(columns=['F0']),
 'SsaSpikeDetector':
 SsaSpikeDetector(columns=['F0'], seasonal_window_size=2),
 'SsaChangePointDetector':
 SsaChangePointDetector(columns=['F0'], seasonal_window_size=2),
 'SsaForecaster':
 SsaForecaster(columns=['F0'],
               window_size=2,
               series_length=5,
               train_size=5,
               horizon=1),
 'TensorFlowScorer':
 TensorFlowScorer(model_location=os.path.join(this, '..', 'nimbusml',
                                              'examples',
                                              'frozen_saved_model.pb'),
                  columns={'c': ['a', 'b']}),
Exemplo n.º 5
0
 'FromKey': Pipeline([
     ToKey(columns=['Sepal_Length']),
     FromKey(columns=['Sepal_Length'])
 ]),
 # GlobalContrastRowScaler currently requires a vector input to work
 'GlobalContrastRowScaler': Pipeline([
     ColumnConcatenator() << {
         'concated_columns': [
             'Petal_Length',
             'Sepal_Width',
             'Sepal_Length']},
     GlobalContrastRowScaler(columns={'normed_columns': 'concated_columns'})
 ]),
 'Handler': Handler(replace_with='Mean', columns={'NewVals': 'Petal_Length'}),
 'IidSpikeDetector': IidSpikeDetector(columns=['Sepal_Length']),
 'IidChangePointDetector': IidChangePointDetector(columns=['Sepal_Length']),
 'Indicator': Indicator(columns={'Has_Nan': 'Petal_Length'}),
 'KMeansPlusPlus': KMeansPlusPlus(n_clusters=3, feature=['Sepal_Width', 'Sepal_Length']),
 'LightGbmRanker': LightGbmRanker(feature=['Class', 'dep_day', 'duration'],
                                  label='rank',
                                  group_id='group'),
 'Loader': Loader(columns={'ImgPath': 'Path'}),
 'LpScaler': Pipeline([
     ColumnConcatenator() << {
         'concated_columns': [
             'Petal_Length',
             'Sepal_Width',
             'Sepal_Length']},
     LpScaler(columns={'normed_columns': 'concated_columns'})
 ]),
 'MutualInformationSelector': Pipeline([
Exemplo n.º 6
0
# data input (as a FileDataStream)
path = get_dataset('timeseries').as_filepath()

data = FileDataStream.read_csv(path)
print(data.head())
#      t1    t2      t3
# 0  0.01  0.01  0.0100
# 1  0.02  0.02  0.0200
# 2  0.03  0.03  0.0200
# 3  0.03  0.03  0.0250
# 4  0.03  0.03  0.0005

# define the training pipeline
pipeline = Pipeline(
    [IidChangePointDetector(columns={'t2_cp': 't2'}, change_history_length=4)])

result = pipeline.fit_transform(data)
print(result)

#      t1     t2       t3  t2_cp.Alert  t2_cp.Raw Score  t2_cp.P-Value Score  t2_cp.Martingale Score
# 0  0.01   0.01   0.0100          0.0             0.01         5.000000e-01            1.212573e-03
# 1  0.02   0.02   0.0200          0.0             0.02         4.960106e-01            1.221347e-03
# 2  0.03   0.03   0.0200          0.0             0.03         1.139087e-02            3.672914e-02
# 3  0.03   0.03   0.0250          0.0             0.03         2.058296e-01            8.164447e-02
# 4  0.03   0.03   0.0005          0.0             0.03         2.804577e-01            1.373786e-01
# 5  0.03   0.05   0.0100          1.0             0.05         1.448886e-06            1.315014e+04
# 6  0.05   0.07   0.0500          0.0             0.07         2.616611e-03            4.941587e+04
# 7  0.07   0.09   0.0900          0.0             0.09         3.053187e-02            2.752614e+05
# 8  0.09  99.00  99.0000          0.0            99.00         1.000000e-08            1.389396e+12
# 9  1.10   0.10   0.1000          1.0             0.10         3.778296e-01            1.854344e+07