def predict(self, dt, x_units): '''Diagnoise each target unit based on its data x_units[uid] (where uid is in ids_target_units). Compute deviation level by comparing the data from the target unit (x_units[uid]) against the reference group. Parameters: ----------- dt : datetime Current datetime period x_units : array-like, shape (n_units, n_features) Each element x_units[i] corresponds to a data-point from the i'th unit at time dt. len(x_units) should correspond to the number of units (nb_units). Returns: -------- strangeness : float Non-conformity score of the test unit compared to the reference group. pvalue : float, in [0, 1] p-value for the test sample. Represents the proportion of samples in the reference group that are stranger than the test sample. deviation : float, in [0, 1] Scaled deviation level computed based on the martingale method. is_deviating : boolean True if the deviation is above the threshold (dev_threshold) ''' self.dfs_original = [ append_to_df(self.dfs_original[i], dt, x) for i, x in enumerate(x_units) ] x_units_tr = [ transformer.transform(x) for x, transformer in zip(x_units, self.transformers) ] self.dfs = [ append_to_df(self.dfs[i], dt, x) for i, x in enumerate(x_units_tr) ] deviations = [] for uid in self.ids_target_units: detector = self.detectors[uid] try: x, Xref = self.pg.get_target_and_reference(uid, dt, self.dfs) detector.fit(Xref) devContext = detector.predict(dt, x) except (TestUnitError, NoRefGroupError): devContext = DeviationContext(0, 0.5, 0, False) # no deviation by default deviations.append(devContext) return deviations
def _fit(self, dtime, x, external=None): ''' Private method for internal use only. Constructs a reference dataset based on historical data and the specified ref_group criteria and fits a model to this reference data. ''' if self.ref_group == "external": if external is None: raise InputValidationError("When ref_group is set to 'external', the parameter external must be specified.") current = external historical = np.array(self.externals) pm = 2 * np.std(historical) / 10 if len(historical) > 0 else 0 X = self.df.loc[(current-pm <= historical) & (historical <= current+pm)].values else: df_sub = self.df for criterion in self.ref_group: current = dt2num(dtime, criterion) historical = np.array([dt2num(dt, criterion) for dt in df_sub.index]) df_sub = df_sub.loc[(current == historical)] X = df_sub.values if len(X) == 0: X = [x] self.strg.fit(X) self.scores = self.strg.get_fit_scores() self.df = append_to_df(self.df, dtime, x) self.externals.append(external)
def _fit(self, dtime, x, external=None): ''' Private method for internal use only. Constructs a reference dataset based on historical data and the specified ref_group criteria and fits a model to this reference data. ''' if self.ref_group == "week": current = dtime.isocalendar()[1] historical = np.array( [dt.isocalendar()[1] for dt in self.df.index]) X = self.df.loc[(current == historical)].values elif self.ref_group == "month": current = dtime.month historical = np.array([dt.month for dt in self.df.index]) X = self.df.loc[(current == historical)].values elif self.ref_group == "season": season = { 12: 1, 1: 1, 2: 1, 3: 2, 4: 2, 5: 2, 6: 3, 7: 3, 8: 3, 9: 4, 10: 4, 11: 4 } get_season = lambda dt: season[dt.month] current = get_season(dtime) historical = np.array([get_season(dt) for dt in self.df.index]) X = self.df.loc[(current == historical)].values else: # self.ref_group == "external": if external is None: raise InputValidationError( "When ref_group is set to 'external', the parameter external must specified." ) current = external historical = np.array(self.externals) pm = 2 * np.std(historical) / 10 if len(historical) > 0 else 0 X = self.df.loc[(current - pm <= historical) & (historical <= current + pm)].values if len(X) == 0: X = [x] self.strg.fit(X) self.scores = self.strg.get_fit_scores() self.df = append_to_df(self.df, dtime, x) self.externals.append(external)
def _fit(self, dtime, x, external=None): ''' Private method for internal use only. Constructs a reference dataset based on historical data and the specified ref_group criteria and fits a model to this reference data. ''' if self.ref_group == "external": if external is None: raise InputValidationError( "When ref_group is set to 'external', the parameter external must be specified." ) all_externals = np.array( list(self.externals_init) + list(self.externals)) all_X = np.array(list(self.df_init.values) + list(self.df.values)) k = int(len(all_externals) * self.external_percentage) ids = np.argsort(np.abs(all_externals - external))[:k] X = all_X[ids] elif callable(self.ref_group): df = self.df_init.append(self.df) if len(df) == 0: X = [] else: history_times, history_data = df.index.to_pydatetime( ), df.values current_time, current_data = dtime, x X = self.ref_group(history_times, history_data, current_time, current_data) else: df_sub = self.df.append(self.df_init) for criterion in self.ref_group: current = dt2num(dtime, criterion) historical = np.array( [dt2num(dt, criterion) for dt in df_sub.index]) df_sub = df_sub.loc[(current == historical)] X = df_sub.values if len(X) == 0: X = [x] self.strg.fit(X) self.df = append_to_df(self.df, dtime, x) self.externals.append(external)
def predict(self, dtime, x): '''Update the deviation level based on the new test sample x Parameters: ----------- dtime : datetime datetime corresponding to the sample x x : array-like, shape (n_features,) Sample for which the strangeness, p-value and deviation level are computed Returns: -------- strangeness : float Strangeness of x with respect to samples in Xref pval : float, in [0, 1] p-value that represents the proportion of samples in Xref that are stranger than x. deviation : float, in [0, 1] Normalized deviation level updated based on the last w_martingale steps ''' self.T.append(dtime) self.df = append_to_df(self.df, dtime, x) strangeness, diff, representative = self.strg.predict(x) self.S.append(strangeness) self.diffs.append(diff) self.representatives.append(representative) pval = self.strg.pvalue(strangeness) self.P.append(pval) deviation = self._update_martingale(pval) self.M.append(deviation) is_deviating = deviation > self.dev_threshold return DeviationContext(strangeness, pval, deviation, is_deviating)