def test__compute_perc_cutoff(self): X = self._gen_data() z = np.arange(1, 101).astype(int) cutoff = 10 perc = 100 - cutoff + 1 tof = TOF().fit(X) tof.outlier_score_ = z perc_cutoff = tof._compute_perc_cutoff(cutoff).astype(int) self.assertTrue(perc_cutoff == perc)
def test__find_nearest_neighbors(self): X = self._gen_data() tof = TOF().fit(X) tof._find_nearest_neighbors(X) tof = TOF().fit(X) tof._find_nearest_neighbors(X, k=7)
def test__compute_tof(self): X = self._gen_data() nn_ids = np.array([[1, 3], [0, 2], [3, 2], [0, 1]]) ids = np.arange(4).reshape([4, 1]) score = np.mean((nn_ids - ids)**2, axis=1)**(1 / 2) calcscore = TOF().fit(X)._compute_tof(nn_ids, ids) is_eq = np.all(score == calcscore) self.assertTrue(is_eq)
def detect_outlier( time_series, cutoff_n=1.0, k=None, in_percent=False, embedding_dimension=3, embedding_delay=1, **other_method_kwargs ): """Detects outliers with TOF :param pandas.DataFrame time_series: pandas dataframe with the time series :param float cutoff_n: the threshold for the detector (max event length, or % of #datapoints) :param int k: numbert of neighbors to use (default is embedding)dimension+1) :param bool in_percent: if True then the threshold is draw at the given percentage not in event length :param int embedding_dimension: embedding dimension value (>=1) [default: 3] :param int embedding_delay: embedding delay (>=1) [default: 1] :return: result DataFrame :rtype: pandas.DataFrame """ # Conversion to numpy array np_time_series = time_series.values[:, 0] # Time series embedding, and new time axis embededd_time_series = TimeDelayEmbedder( d=embedding_dimension, tau=embedding_delay ).fit_transform(np_time_series) new_time_axis = TransformYTrue( d=embedding_dimension, tau=embedding_delay ).fit_transform(time_series.index) new_time_axis = pd.DataFrame(new_time_axis).values # initialize method object mytof = TOF(cutoff_n=cutoff_n, k=k, **other_method_kwargs) mytof = mytof.fit(embededd_time_series) if in_percent: mytof.cutoff_ = mytof._compute_perc_cutoff(cutoff_n) y_pred = mytof.predict(embededd_time_series) # locally scoring outlierness for each time series points outlier_score = mytof.outlier_score_ res_df = _make_result_df( new_time_axis, outlier_score, y_pred, inv_it=True, prefix="TOF" ) return pd.concat([time_series, res_df], axis=1, sort=False)
from uniqed.models.tof import TOF from uniqed.data.gen_logmap import generate_logmapdata from uniqed.transformers.transformers import TimeDelayEmbedder import matplotlib.pyplot as plt # Generate some data data = generate_logmapdata(rseed=231) x = data['value'].values t = data.index.values # Time delay embedding of the time series X = TimeDelayEmbedder().fit_transform(x) T = TimeDelayEmbedder().fit_transform(t) # Initialize TOF instance and find the anomaly mytof = TOF(cutoff_n=100) y = mytof.fit_predict(X) tof_score = 1. / mytof.outlier_score_ # Plot the results plt.figure() plt.subplot(211) plt.plot(data) plt.legend(['time series', 'anomaly'], loc='upper left') plt.ylabel('values') plt.xlim(0, 2000) plt.subplot(212) plt.scatter(T[:, 0], tof_score, c=y) plt.ylabel("TOF score") plt.xlabel("t")
def test__compute_outlier_score(self): X = self._gen_data() TOF().fit(X)._compute_outlier_score(X)
def test__compute_p_value(self): x = np.arange(100) p = np.arange(0.01, 1.01, 0.01) p_calculated = TOF()._compute_p_value(x) is_equal = np.round(p, 2) == np.round(p_calculated, 2) self.assertTrue(np.all(is_equal))
def test__compute_cutoff2(self): X = self._gen_data() tof = TOF(k=21).fit(X) tof._compute_cutoff(cutoff_n=100)
def test__compute_cutoff(self): X = self._gen_data() tof = TOF().fit(X) tof._compute_cutoff(cutoff_n=100) with self.assertRaises(ValueError): tof._compute_cutoff(cutoff_n='goosebump')
def test__get_outliers_inds(self): X = self._gen_data() TOF()._get_outliers_inds(TOF().fit(X).predict(X))
def test_predict(self): X = self._gen_data() TOF().fit(X).predict(X) TOF(cutoff_n=70).fit(X).predict(X)
def test_fit(self): X = self._gen_data() TOF().fit(X)