Exemplo n.º 1
0
    def predict(self, dt, x_units):
        '''Diagnoise each target unit based on its data x_units[uid] (where uid is in ids_target_units).
        Compute deviation level by comparing the data from the target unit (x_units[uid]) against the reference group.
        
        Parameters:
        -----------
        dt : datetime
            Current datetime period
        
        x_units : array-like, shape (n_units, n_features)
            Each element x_units[i] corresponds to a data-point from the i'th unit at time dt.
            len(x_units) should correspond to the number of units (nb_units).
        
        Returns:
        --------
        strangeness : float
            Non-conformity score of the test unit compared to the reference group.
        
        pvalue : float, in [0, 1]
            p-value for the test sample. Represents the proportion of samples in the reference group that are stranger than the test sample.
        
        deviation : float, in [0, 1]
            Scaled deviation level computed based on the martingale method.
        
        is_deviating : boolean
            True if the deviation is above the threshold (dev_threshold)
        '''

        self.dfs_original = [
            append_to_df(self.dfs_original[i], dt, x)
            for i, x in enumerate(x_units)
        ]

        x_units_tr = [
            transformer.transform(x)
            for x, transformer in zip(x_units, self.transformers)
        ]
        self.dfs = [
            append_to_df(self.dfs[i], dt, x) for i, x in enumerate(x_units_tr)
        ]

        deviations = []

        for uid in self.ids_target_units:
            detector = self.detectors[uid]

            try:
                x, Xref = self.pg.get_target_and_reference(uid, dt, self.dfs)
                detector.fit(Xref)
                devContext = detector.predict(dt, x)
            except (TestUnitError, NoRefGroupError):
                devContext = DeviationContext(0, 0.5, 0,
                                              False)  # no deviation by default

            deviations.append(devContext)

        return deviations
Exemplo n.º 2
0
    def _fit(self, dtime, x, external=None):
        ''' Private method for internal use only.
        Constructs a reference dataset based on historical data and the specified ref_group criteria
        and fits a model to this reference data.
        '''

        if self.ref_group == "external":
            if external is None:
                raise InputValidationError("When ref_group is set to 'external', the parameter external must be specified.")
            current = external
            historical = np.array(self.externals)
            pm = 2 * np.std(historical) / 10 if len(historical) > 0 else 0
            X = self.df.loc[(current-pm <= historical) & (historical <= current+pm)].values
        else:
            df_sub = self.df
            for criterion in self.ref_group:
                current = dt2num(dtime, criterion)
                historical = np.array([dt2num(dt, criterion) for dt in df_sub.index])
                df_sub = df_sub.loc[(current == historical)]
            X = df_sub.values

        if len(X) == 0:
            X = [x]

        self.strg.fit(X)
        self.scores = self.strg.get_fit_scores()

        self.df = append_to_df(self.df, dtime, x)
        self.externals.append(external)
Exemplo n.º 3
0
    def _fit(self, dtime, x, external=None):
        ''' Private method for internal use only.
        Constructs a reference dataset based on historical data and the specified ref_group criteria
        and fits a model to this reference data.
        '''

        if self.ref_group == "week":
            current = dtime.isocalendar()[1]
            historical = np.array(
                [dt.isocalendar()[1] for dt in self.df.index])
            X = self.df.loc[(current == historical)].values

        elif self.ref_group == "month":
            current = dtime.month
            historical = np.array([dt.month for dt in self.df.index])
            X = self.df.loc[(current == historical)].values

        elif self.ref_group == "season":
            season = {
                12: 1,
                1: 1,
                2: 1,
                3: 2,
                4: 2,
                5: 2,
                6: 3,
                7: 3,
                8: 3,
                9: 4,
                10: 4,
                11: 4
            }
            get_season = lambda dt: season[dt.month]
            current = get_season(dtime)
            historical = np.array([get_season(dt) for dt in self.df.index])
            X = self.df.loc[(current == historical)].values

        else:  # self.ref_group == "external":
            if external is None:
                raise InputValidationError(
                    "When ref_group is set to 'external', the parameter external must specified."
                )

            current = external
            historical = np.array(self.externals)

            pm = 2 * np.std(historical) / 10 if len(historical) > 0 else 0
            X = self.df.loc[(current - pm <= historical)
                            & (historical <= current + pm)].values

        if len(X) == 0:
            X = [x]

        self.strg.fit(X)
        self.scores = self.strg.get_fit_scores()

        self.df = append_to_df(self.df, dtime, x)
        self.externals.append(external)
Exemplo n.º 4
0
    def _fit(self, dtime, x, external=None):
        ''' Private method for internal use only.
        Constructs a reference dataset based on historical data and the specified ref_group criteria
        and fits a model to this reference data.
        '''

        if self.ref_group == "external":
            if external is None:
                raise InputValidationError(
                    "When ref_group is set to 'external', the parameter external must be specified."
                )

            all_externals = np.array(
                list(self.externals_init) + list(self.externals))
            all_X = np.array(list(self.df_init.values) + list(self.df.values))

            k = int(len(all_externals) * self.external_percentage)
            ids = np.argsort(np.abs(all_externals - external))[:k]
            X = all_X[ids]
        elif callable(self.ref_group):
            df = self.df_init.append(self.df)
            if len(df) == 0: X = []
            else:
                history_times, history_data = df.index.to_pydatetime(
                ), df.values
                current_time, current_data = dtime, x
                X = self.ref_group(history_times, history_data, current_time,
                                   current_data)
        else:
            df_sub = self.df.append(self.df_init)
            for criterion in self.ref_group:
                current = dt2num(dtime, criterion)
                historical = np.array(
                    [dt2num(dt, criterion) for dt in df_sub.index])
                df_sub = df_sub.loc[(current == historical)]
            X = df_sub.values

        if len(X) == 0:
            X = [x]

        self.strg.fit(X)

        self.df = append_to_df(self.df, dtime, x)
        self.externals.append(external)
    def predict(self, dtime, x):
        '''Update the deviation level based on the new test sample x
        
        Parameters:
        -----------
        dtime : datetime
            datetime corresponding to the sample x
        
        x : array-like, shape (n_features,)
            Sample for which the strangeness, p-value and deviation level are computed
        
        Returns:
        --------
        strangeness : float
            Strangeness of x with respect to samples in Xref
        
        pval : float, in [0, 1]
            p-value that represents the proportion of samples in Xref that are stranger than x.
        
        deviation : float, in [0, 1]
            Normalized deviation level updated based on the last w_martingale steps
        '''

        self.T.append(dtime)
        self.df = append_to_df(self.df, dtime, x)

        strangeness, diff, representative = self.strg.predict(x)
        self.S.append(strangeness)
        self.diffs.append(diff)
        self.representatives.append(representative)

        pval = self.strg.pvalue(strangeness)
        self.P.append(pval)

        deviation = self._update_martingale(pval)
        self.M.append(deviation)

        is_deviating = deviation > self.dev_threshold
        return DeviationContext(strangeness, pval, deviation, is_deviating)