def test_win_base():
    signal = data_month['sku_num_sum'].values

    n, dim = 500, 3  # number of samples, dimension
    n_bkps, sigma = 3, 5  # number of change points, noise standart deviation
    signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma)

    # change point detection
    model = "l2"  # "l1", "rbf", "linear", "normal", "ar"
    algo = rpt.Window(width=40, model=model).fit(signal)
    my_bkps = algo.predict(n_bkps=3)

    # show results
    rpt.show.display(signal, bkps, my_bkps, figsize=(10, 6))
    plt.show()

    # change point detection
    model = "l2"  # "l1", "rbf", "linear", "normal", "ar"
    algo = rpt.Window(width=40, model=model).fit(signal)
    my_bkps = algo.predict(n_bkps=3)

    # show results
    rpt.show.display(signal, my_bkps, figsize=(10, 6))
    plt.show()
    pass
示例#2
0
def window(datos):
    '''
    data: Valores del activo EURUSD.
    
    '''
    data = np.array(
        datos.Close
    )  #De los datos del activo, selecciona la columna Close y la hace un array.

    n = len(data)  #Tamaño de el array de datos.
    sigma = data.std()  #Desviación estandar de los datos.
    p = np.log(n) * sigma**2  #Penalización que tiene el modelo.
    suma = []
    suma1 = []
    #Pasos a realizar para el metodo de window-based.
    for i in range(0, 100):
        algo = rpt.Window(width=i + 10).fit(data)
        my_bkps = algo.predict(pen=p)
        senal = pd.DataFrame(my_bkps)
        suma.append(my_bkps)
    suma = pd.DataFrame(suma)
    suma = suma.dropna()

    width = list(suma.index)
    width = width[0]

    for i in range(0, 100):
        algo = rpt.Window(width=width, jump=i + 1).fit(data)
        my_bkps = algo.predict(pen=p)
        senal = pd.DataFrame(my_bkps)
        suma.append(my_bkps)
    suma1 = pd.DataFrame(suma1)
    suma1 = suma.dropna()

    jump = list(suma1.index)
    jump = jump[0]

    algo = rpt.Window(width=width, jump=jump).fit(data)
    my_bkps = algo.predict(pen=p)
    senal = pd.DataFrame(my_bkps)

    mean = senal.drop(
        len(my_bkps) -
        1)  #Quitamos de la serie el último valor ya que no es correcto.
    mean = np.array(mean)  #Datos generados del metodo, traidos a un array.
    changes = mean.astype(
        int)  #Hacer que el array contenga solo valores numericos enteros.

    fecha = []  #Lista vacia para introducir fechas donde el cambio ocurrio.
    #For para introducir los valores de la fechas en donde ocurrieron los changepoints.
    for i in range(0, len(my_bkps) - 1):
        fecha += datos.index[changes[i]]

    #Esta variable sirve para crear el feature que se utilizará en el modelo.
    feature = boolean_change_point(data, changes)

    #La función regresa las fechas y los valores numericos en donde ocurrieron los cambios.
    return fecha, changes, feature
示例#3
0
    def plot(self):
        import matplotlib.pyplot as plt
        import ruptures as rpt

        # generate signal
        n_bkps = 4  # number of breakpoints
        bkps = np.zeros(n_bkps)
        for b in bkps:
            index = np.random.randint(0, len(unique_days))

        tweets_per_day_array = np.asarray(tweets_per_day)
        x = range(0, len(tweets_per_day))
        y = tweets_per_day

        from scipy.signal import savgol_filter
        yhat_day = savgol_filter(y, int(len(tweets_per_day) / 7),
                                 4)  # window size 100, polynomial order 1

        # change point detection
        model = "l2"  # "l1", "rbf", "linear", "normal", "ar"
        algo = rpt.Window(width=40, model=model).fit(tweets_per_day_array)
        my_bkps = algo.predict(n_bkps=3)

        # show results
        rpt.show.display(yhat_day, bkps, my_bkps, figsize=(10, 6))
        plt.show()
示例#4
0
def shift_detect(data, model="l2", width=40, noise_std=4, debug=False):
    """
    Shift detection using window based method (see gitlab wiki for more info)
    
    Args:
        data (Array)    : list of values to check outliers
        model (String)  : which distance to use
        width (int)     : Window width
        noise_std(float): std for estimated noise
        debug (Bool)    : to display shift in data
        
     Returns:
         List: shift starting points 
    """

    n = len(data)
    pen = __np.log(n) * noise_std ** 2
    algo = __rpt.Window(width=width, model=model).fit(data)
    shifts = algo.predict(pen=pen)

    if debug:
        __rpt.show.display(data, shifts, figsize=(10, 6))
        __plt.show()

    return shifts[:-1]
示例#5
0
    def _window_sliding_segmentation(self, ping, n_bkps, start_idx, end_idx,
                                     width):
        """Use window sliding method to segment the input numpy array from
        start_idx to end_idx into (n_bkps + 1) segments. Return a list of
        suggested break points."""

        algo = rpt.Window(width=width, model='l2').fit(ping[start_idx:end_idx])
        bkps = algo.predict(n_bkps=n_bkps)
        bkps = [bkps[i] + start_idx for i in range(len(bkps))]
        return bkps
示例#6
0
    def detect_change_points(self, ys: np.ndarray, **kwargs) -> Sequence[int]:
        '''
        @param width: window size (default is 10)
        @param model: "l1", "rbf", "linear", "normal", "ar" (default is "l2")
        :return: list of estimated change points
        '''
        model = kwargs["model"] if "model" in kwargs else "l2"
        width = kwargs["width"] if "width" in kwargs else 10

        estimator = ruptures.Window(width=width, model=model).fit(ys)
        return estimator.predict(pen=1)
示例#7
0
def find_break_points(weight_series: pd.Series,
                      estimated_breaks: int,
                      window_width=int,
                      model: str = "l1") -> list:
    """
    Find break points in a weight measurement series using Ruptures Windowing.
    Args:
        weight_series: Weight measurements as a Pandas Series.
        estimated_breaks: Number of estimated breakpoints (i.e. number of product arrivals to customer)
        window_width: Minimum width to be used for Ruptures Windowing function.
        model: Model selected to be used with the Window Sliding Segmentation search method.

    Returns: List of break point timestamps.

    """
    weight_schema = pas.weight_series
    acceptable_models = ["l2", "l1", "rbf", "linear", "normal", "ar"]

    try:
        weight_schema(weight_series)
    except pa.errors.SchemaErrors:
        raise

    if not isinstance(estimated_breaks, int):
        logging.exception("estimated_breaks must be an integer.")
        raise TypeError
    if not isinstance(window_width, int):
        logging.exception("window_width must be an integer.")
        raise TypeError
    if not isinstance(model, str):
        logging.exception("model must be a string.")
        raise TypeError
    if model not in acceptable_models:
        logging.exception(
            f"model must an acceptable model type for Ruptures Windowing: {acceptable_models}"
        )
        raise ValueError

    algorithm = rpt.Window(width=window_width,
                           model=model).fit(weight_series.values)
    break_points = algorithm.predict(n_bkps=estimated_breaks)
    if break_points[-1] >= len(weight_series):
        del break_points[-1]  # ruptures adds a breakpoint at end of series
    break_point_time_stamps = list(weight_series.index[break_points])
    return break_point_time_stamps
示例#8
0
def changePointDetection(glacier, attr, startdate=None, enddate=None, \
    n_breakpoints=1, method='window', model='l1', wwidth=5):
    """Use ruptures package to identify change points in glacier time series. Acceptable methods are 'window' (sliding window), 'binseg' (binary segmentation), and bottomup (bottom-up). See https://centre-borelli.github.io/ruptures-docs/user-guide for further information."""
    attrs, dates = glacier.filterDates(attr, startdate, enddate)
    signal = attrs.values
    sigma = signal.std()
    n = len(signal)
    if method == 'window':
        algo = rpt.Window(width=wwidth, model=model).fit(signal)
    elif method == 'binseg':
        algo = rpt.Binseg(model=model).fit(signal)
    elif method == 'bottomup':
        algo = rpt.BottomUp(model=model).fit(signal)
    breakpoints = algo.predict(n_bkps=n_breakpoints)
    # remove breakpoints at beginning/end of time series
    if dates.index[0] - 1 in breakpoints:
        breakpoints.remove(dates.index[0] - 1)
    if dates.index[-1] in breakpoints:
        breakpoints.remove(dates.index[-1])
    breakpoint_dates = dates[breakpoints]
    return breakpoint_dates, signal, breakpoints
示例#9
0
def get_change_point(series, jump=5, n_bkps=5, pen=10):
    """

    series: numpy array please
    jump: размер сэмпла
    n_bkps: количество возвращаемых остановок
    pen: пенальти для Pelt

    """
    series = series.values
    alg_dynp = rpt.Dynp(jump=jump).fit_predict(series, n_bkps=n_bkps)

    alg_pelt = rpt.Pelt(jump=jump).fit_predict(series, pen=pen)

    alg_bin = rpt.Binseg(jump=jump).fit_predict(series, n_bkps=n_bkps)

    alg_bot = rpt.BottomUp(jump=jump).fit_predict(series, n_bkps=n_bkps)

    alg_win = rpt.Window(jump=jump).fit_predict(series, n_bkps=n_bkps)

    alg_cumsum = change_point_detection(series.tolist())

    # Получили разладки от нескольких алгоритмов
    # Теперь найдём точки, которые предсказывались алгоритмами несколько раз
    res = {}
    for i in alg_dynp + alg_pelt + alg_bin + alg_bot + alg_win + alg_cumsum:
        if i in res:
            res[i] += 1
        else:
            res[i] = 1

    del res[0]
    del res[len(series)]

    itemMaxValue = max(res.items(), key=lambda x: x[1])
    listOfKeys = []
    for key, value in res.items():
        if value == itemMaxValue[1]:
            listOfKeys.append(key)
    return listOfKeys
import numpy as np
import matplotlib.pylab as plt
import ruptures as rpt

from sklearn.decomposition import PCA as pca
from sklearn.decomposition import FastICA as ica

# creation of data
n, dim = 500, 3  # number of samples, dimension
n_bkps, sigma = 3, 5  # number of change points, noise standart deviation
signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma)

# change point detection
model = 'normal'  #"l2"  # "l1", "rbf", "linear", "normal", "ar"
algo = rpt.Window(width=40, model=model).fit(signal)
my_bkps = algo.predict(n_bkps=2)
#my_bkps = algo.predict(pen=np.log(n)*dim*sigma**2)
#my_bkps = algo.predict(epsilon=3*n*sigma**2)

# show results
rpt.show.display(signal, bkps, my_bkps, figsize=(10, 6))
plt.show()

# Perform PCA on signal and check changepoint detection
pca_obj = pca(n_components=1).fit(signal)
reduced_signal = pca_obj.transform(signal)

algo = rpt.Window(width=40, model=model).fit(reduced_signal)
my_bkps = algo.predict(n_bkps=3)
rpt.show.display(reduced_signal, bkps, my_bkps, figsize=(10, 6))
示例#11
0
def change_point_analysis_and_plot(path=None, estimator_hawkes=None,
                                   type_analysis="optimal",
                                   parameters_for_analysis=(1, "l2", 1),
                                   true_breakpoints=None,
                                   column_for_multi_plot_name=None):
    '''

    Args:
        true_breakpoints: should be a dict with keys ["parameter", m, n], all elements being lists.
        type_analysis:
        parameters_for_analysis:
        path:  path is where the file is located where one can read the estimator Hawkes.
        estimator_hawkes:
        column_for_multi_plot_name:

    Returns:

    '''
    # number of breakpoints doesn't support a different value of breakpoints for each variable.
    # path should be with \\
    #
    # column_for_multi_plot_name a string

    if type_analysis == "optimal":
        number_of_breakpoints, model, min_size = parameters_for_analysis

    elif type_analysis == "window":
        number_of_breakpoints, model, width = parameters_for_analysis
    else:
        raise Error_not_yet_allowed("Not good type of analysis.")

    if estimator_hawkes is None:
        the_estimator = Estimator_Hawkes.from_path(path)

    elif path is None:
        if isinstance(estimator_hawkes, Estimator_Hawkes):
            the_estimator = estimator_hawkes
        else:
            raise Error_not_allowed_input("Function needs estimator Hawkes for estimator_hawkes.")

    else:
        raise Error_not_enough_information("Path and Estimator_Hawkes can't be both None.")

    SEPARATORS = ['parameter', 'm', 'n']

    dict_serie = {}
    global_dict = the_estimator.DF.groupby(SEPARATORS)
    for k1, k2, k3 in global_dict.groups.keys():
        if column_for_multi_plot_name is not None:
            super_dict = global_dict.get_group((k1, k2, k3)).groupby([column_for_multi_plot_name])
            for k4 in super_dict.groups.keys():
                # discrimination of whether the serie already exists.
                if (k1, k2, k3) not in dict_serie:  # not yet crossed those values
                    dict_serie[(k1, k2, k3)] = super_dict.get_group(k4).groupby(['time estimation'])[
                        'value'].mean().values.reshape((1, -1))
                else:  # the condition already seen, so I aggregate to what was already done.
                    dict_serie[(k1, k2, k3)] = np.vstack((dict_serie[(k1, k2, k3)],
                                                          super_dict.get_group(k4).groupby(['time estimation'])[
                                                              'value'].mean()
                                                          ))
        else:
            dict_serie[(k1, k2, k3)] = global_dict.get_group((k1, k2, k3)).groupby(['time estimation'])[
                'value'].mean().values.reshape((1, -1))

    for k in dict_serie.keys():  # iterate through dictionary, the data is not in the right position.
        dict_serie[k] = np.transpose(dict_serie[k])
    ############################################## dynamic programming   http://ctruong.perso.math.cnrs.fr/ruptures-docs/build/html/detection/dynp.html
    ans = []
    for i, k in enumerate(dict_serie.keys()):
        if type_analysis == "optimal":
            algo = rpt.Dynp(model=model, min_size=min_size, jump=1).fit(dict_serie[k])

        elif type_analysis == "window":
            algo = rpt.Window(width=width, model=model).fit(dict_serie[k])

        else:
            raise Error_not_yet_allowed("Only type_analysis optimal and window implemented so far.")
        my_bkps1 = algo.predict(n_bkps=number_of_breakpoints)
        true_bkpts = true_breakpoints[k].copy()  # I am doing a copy in order to not alter the original dict.
        last_value = my_bkps1[-1]  # last value, equal to number of time estimates.
        for i in range(len(true_bkpts)):
            true_bkpts[i] = round(true_bkpts[i] * last_value)  # makes an integer out of the number
        true_bkpts.append(last_value)  # I add to each list the last breakpoint, in my simulations it is 50,
        # usually it is the number of points of estimation in evolution wrt time.
        _, my_axs = rpt.show.display(dict_serie[k], computed_chg_pts=my_bkps1, true_chg_pts=true_bkpts, figsize=(10, 6))
        ans.append(my_bkps1)
    print(true_breakpoints)
    return ans
示例#12
0
def detect_data_shifts(series,
                       filtering=True,
                       use_default_models=True,
                       method=None,
                       cost=None,
                       penalty=40):
    """
    Detect data shifts in a time series of daily values.

    .. warning:: If the passed time series is less than 2 years in length,
        it will not be corrected for seasonality. Data shift detection will
        be run on the min-max normalized time series with no seasonality
        correction.

    Parameters
    ----------
    series : Pandas series with datetime index.
        Time series of daily PV data values, which can include irradiance
        and power data.
    filtering : Boolean, default True.
        Whether or not to filter out outliers and stale data from the time
        series. If True, then this data is filtered out before running the
        data shift detection sequence. If False, this data is not filtered
        out. Default set to True.
    use_default_models: Boolean, default True
        If True, then default change point detection search parameters are
        used. For time series shorter than 2 years in length, the search
        function is `rpt.Window`  with `model='rbf'`, `width=50` and
        `penalty=30`. For time series 2 years or longer in length, the
        search function is `rpt.BottomUp` with `model='rbf'`
        and `penalty=40`.
    method: ruptures search method instance or None, default None.
        Ruptures search method instance. See
        https://centre-borelli.github.io/ruptures-docs/user-guide/.
    cost: str or None, default None
        Cost function passed to the ruptures changepoint search instance.
        See https://centre-borelli.github.io/ruptures-docs/user-guide/
    penalty: int, default 40
        Penalty value passed to the ruptures changepoint detection method.
        Default set to 40.

    Returns
    -------
    Pandas Series
        Series of boolean values with the input Series' datetime index, where
        detected changepoints are labeled as True, and all other values are
        labeled as False.

    References
    -------
    .. [1] Perry K., and Muller, M. "Automated shift detection in sensor-based
       PV power and irradiance time series", 2022 IEEE 48th Photovoltaic
       Specialists Conference (PVSC).
    """
    try:
        import ruptures as rpt
    except ImportError:
        raise ImportError("data_shifts() requires ruptures.")
    # Run data checks on cleaned data to make sure that the data can be run
    # successfully through the routine
    _run_data_checks(series)
    # Run the filtering sequence, if marked as True
    if filtering:
        series_filtered = _erroneous_filter(series)
    # Drop any duplicated data from the time series
    series_filtered = series_filtered.drop_duplicates()
    # Check if the time series is more than 2 years long. If so, remove
    # seasonality. If not, run analysis on the normalized time series
    if (series_filtered.index.max() - series_filtered.index.min()).days <= 730:
        series_processed = _preprocess_data(series_filtered,
                                            remove_seasonality=False)
        seasonality_rmv = False
    else:
        # Perform pre-processing on the time series, to get the
        # seasonality-removed time series.
        series_processed = _preprocess_data(series_filtered,
                                            remove_seasonality=True)
        seasonality_rmv = True
    points = np.array(series_processed.dropna())
    # If seasonality has been removed and default model is used, run
    # BottomUp method
    if (seasonality_rmv) & (use_default_models):
        algo = rpt.BottomUp(model='rbf').fit(points)
        result = algo.predict(pen=40)
    # If there is no seasonality but default model is used, run
    # Window-based method
    elif (not seasonality_rmv) & (use_default_models):
        algo = rpt.Window(model='rbf', width=50).fit(points)
        result = algo.predict(pen=30)
    # Otherwise run changepoint detection with the passed parameters
    else:
        algo = method(model=cost).fit(points)
        result = algo.predict(pen=penalty)
    # Remove the last index of the time series, if present
    if len(points) in result:
        result.remove(len(points))
    # Return a list of dates where changepoints are detected
    series_processed.index.name = "datetime"
    mask = pd.Series(False, index=series_processed.index)
    mask.iloc[result] = True
    # Re-index the mask to include any timestamps that were
    # filtered out as outliers
    mask = mask.reindex(series.index, fill_value=False)
    return mask
示例#13
0
plt.xlabel("RankedCells")
plt.ylabel("Moving Avg Topic Probability")
plt.savefig(os.path.join(sc.settings.figdir, name + "_TopicMovingAvg.png"))
plt.clf()
convolvedSD = moving_average(adata.obs['percent_ribo'].tolist(), 300)
plt.plot(range(len(convolvedSD)), convolvedSD)
plt.title("Moving Average Percent Ribo")
plt.savefig(os.path.join(sc.settings.figdir, name + "_RiboCounts.png"))
plt.clf()
convolvedSD = moving_average(adata.obs['percent_mito'].tolist(), 300)
plt.plot(range(len(convolvedSD)), convolvedSD)
plt.title("Moving Average Percent Mito")
plt.savefig(os.path.join(sc.settings.figdir, name + "_MitoCounts.png"))
plt.clf()
signal = np.column_stack(
    (np.std(doc_topic, axis=1), adata.obs['percent_ribo'].tolist(),
     adata.obs['percent_mito'].tolist(), np.array(list(adata.obs.n_counts))))

algo = rpt.Window(width=2500, model="l1").fit(signal)
result = algo.predict(pen=50)
costs = []
for i in range(len(result) - 1):
    costs.append(algo.cost.sum_of_costs([result[i], result[len(result) - 1]]))
rpt.display(signal=signal, true_chg_pts=result, computed_chg_pts=result)
plt.title(str(np.argmin(costs)) + ' <- Best cPoint')
plt.savefig(os.path.join(sc.settings.figdir, name + "_Changepoints.png"))
plt.clf()
print_top_words(ldaM, adata.var.index, 15)
table_top_words(ldaM, adata.var.index, 25).to_csv(
    os.path.join(sc.settings.figdir, name + "_TopicMarkers.txt"))
示例#14
0
  def get_decomp_plus_cp(self,signal, dates, decomp_algo='STL', cp_algo='bayes', config=None):
    '''
    task function

    description: applies decomposition, and gets the change points
    '''
    #formatting the np.array to dataframe for trend extraction
    signal = pd.DataFrame({'signal': signal})
    signal.index = dates

    
    #trend extraction
    if config:
      if 'decomp_algo' in config:
        decomp_algo = config['decomp_algo']
    if decomp_algo == 'STL':
      signal_trend = self.extract_climate_trend(signal, 'STL')
      signal = np.array(signal_trend['signal'])

    if decomp_algo == None:
      signal = np.array(signal)

    #change point detection

    #bayesian change point detection
    if cp_algo == 'bayes':
    #change point detection
      
      #assign config if exists
      if config:
        if 'distribution' in config:
          distribution = config['distribution']
        if 'log_odds_threshold' in config:
          log_odds = config['log_odds_threshold']
        detector = cpDetector([signal], distribution=distribution, log_odds_threshold=log_odds)
      #else use log normal and 0 treshold
      else:
        detector = cpDetector([signal], distribution='log_normal', log_odds_threshold=0)
      detector.detect_cp()
      
      
      #gets the breakpoints via idx from the detector
      predicted_breaks = detector.change_points['traj_0']['ts'].values
      predicted_breaks = np.append(predicted_breaks, len(signal))

    if self.pen == 'aic':
        pen = self.aic_penalty(signal)
    elif self.pen == 'bic':
        pen = self.bic_penalty(signal)

    if cp_algo == 'pelt':
      model = 'rbf'
      #pen= 10
      if config:
        if 'model' in config:
          model = config['model']
        if 'pen' in config:
          pen = config['pen']
      algo = rpt.Pelt(model=model).fit(signal)
      #gets the breakpoints via idx from the detector
      predicted_breaks = algo.predict(pen=pen) #may need to change the 10


    if cp_algo == 'binseg':
      algo = rpt.Binseg(model='rbf').fit(signal)
      predicted_breaks = algo.predict(pen=pen)

    if cp_algo == 'window':
      width = 10
      model = 'rbf'
      std = 0.045
      #cost = rpt.costs.CostRank().fit(signal)
      if config:
        if 'width' in config:
          width = config['width']
        if 'model' in config:
          model = config['model']
        if 'std' in config:
          std = config['std']
      n_bkps = 3*len(signal)*std**2

      algo = rpt.Window(width=width).fit(signal)
      predicted_breaks = algo.predict(pen=pen)

    return predicted_breaks
示例#15
0
            for idx in result[:-1]:
                x.append(indexes[idx])
            y = []
            for idx in x:
                y.append(df.loc[df.index == idx]['p1_current'].values[0]) 

            plt.plot(df.loc[df['category_column'] == category].index, df.loc[df['category_column'] == category]['p1_current'], label='normal')
            plt.scatter(x, y, label='outlier', color='red', marker='o')
            plt.title("Change Finder Bottom Up p1_current")
            plt.xlabel('Date Time')
            plt.ylabel('p1_current')
            plt.savefig( ofn + "_BottomUp_p1_current.png")
            plt.show()
            plt.close()

            algo = rpt.Window(model="l2")
            result = algo.fit_predict(X, n_bkps=n_bkps)
            x = []
            for idx in result[:-1]:
                x.append(indexes[idx])
            y = []
            for idx in x:
                y.append(df.loc[df.index == idx]['p1_current'].values[0]) 

            plt.plot(df.loc[df['category_column'] == category].index, df.loc[df['category_column'] == category]['p1_current'], label='normal')
            plt.scatter(x, y, label='outlier', color='red', marker='o')
            plt.title("Change Finder Window Segmentation p1_current")
            plt.xlabel('Date Time')
            plt.ylabel('p1_current')
            plt.savefig(ofn + "_Window_p1_current.png")
            plt.show()
def windows(series, window_size=20, pen=2):
    algo = rpt.Window(width=window_size, model="l2").fit(series)
    result = algo.predict(pen=2)
    rpt.display(series, result)
    plt.show()
    return result
示例#17
0
def make_neighborhood_rank_divergence_plot(rank_df, adj_df):
    rank_df.sort_values('rank', inplace=True, ascending=True)

    divergences = np.zeros(len(rank_df.index))
    for i, (county, rank) in enumerate(zip(rank_df['County'],
                                           rank_df['rank'])):
        neighbors = adj_df.loc[adj_df.source == county, 'destination']

        if len(neighbors) == 0:
            neighbors = adj_df.loc[adj_df.destination == county, 'source']

        rank_ind = rank_df.County.isin(neighbors).values
        neighbor_ranks = rank_df.loc[rank_ind, 'rank']
        divergence = np.abs(rank - neighbor_ranks).mean()
        divergences[i] = divergence

        if np.isnan(divergence):
            print(county)
            print(neighbors)
            print(neighbor_ranks)

    rank_df['rank_div'] = divergences

    # Change point detection
    signal = rank_df['rank_div'].rolling(100).mean().dropna().values
    # model = {'l1', 'l2', 'rbf', 'linear', 'normal', 'ar'}
    pelt_bkps = rpt.Pelt(model='rbf').fit(signal).predict(pen=100)
    window_bkps = rpt.Window(width=1000,
                             model='l2').fit(signal).predict(n_bkps=1)
    bin_bkps = rpt.Binseg(model='l2').fit(signal).predict(n_bkps=1)
    ensemble_bkp = np.mean(
        [*pelt_bkps[:-1], *window_bkps[:-1], *bin_bkps[:-1]])

    print('Identified Breakpoints:'
          f'\n\tPelt Breakpoints:    {pelt_bkps[:-1]}'
          f'\n\tWindow Breakpoints:  {window_bkps[:-1]}'
          f'\n\tBinary Breakpoints:  {bin_bkps[:-1]}'
          f'\n\tEnsemble Breakpoint: {ensemble_bkp}')

    plt.scatter(
        rank_df['rank'].values,
        rank_df['rank_div'].values,
        facecolor='None',
        edgecolor=sns.xkcd_rgb['denim blue'],
        linewidth=2,
        label='Data',
    )
    plt.plot(
        rank_df['rank'].values,
        rank_df['rank_div'].rolling(100).mean(),
        color='darkorange',
        label='Rolling Mean',
    )

    y_min, y_max = divergences.min(), divergences.max()
    y_range = y_max - y_min
    plt.plot([ensemble_bkp, ensemble_bkp],
             [y_min - 0.1 * y_range, y_max + 0.1 * y_range],
             'k--',
             label='Estimated Breakpoint')
    plt.legend()
    plt.title('Mean Neighborhood Rank Divergence')
    plt.xlabel('Quality of Life Rank (Lower is better)')
    plt.ylabel('Rank Divergence')
    plt.tight_layout()
    ymin, ymax = plt.gca().get_ylim()
    figsize = plt.gcf().get_size_inches()
    plt.savefig('../output/neighborhood_rank_divergence.png', dpi=600)
    plt.close('all')

    # Visualize change points
    bkps = []
    rpt.display(
        signal,
        bkps,
        pelt_bkps,
        figsize=figsize,
    )
    plt.ylim(ymin, ymax)
    plt.gca().get_lines()[0].set_color('darkorange')
    plt.title('Pelt Change Point Detection')
    plt.xlabel('Quality of Life Rank')
    plt.ylabel('Local Rank Divergence')
    plt.tight_layout()
    plt.savefig('../output/rank_div_change_point_pelt.png', dpi=600)
    plt.close('all')

    rpt.show.display(
        signal,
        bkps,
        window_bkps,
        figsize=figsize,
    )
    plt.ylim(ymin, ymax)
    plt.gca().get_lines()[0].set_color('darkorange')
    plt.title('Window Change Point Detection')
    plt.xlabel('Quality of Life Rank')
    plt.ylabel('Local Rank Divergence')
    plt.tight_layout()
    plt.savefig('../output/rank_div_change_point_window.png', dpi=600)
    plt.close('all')

    rpt.show.display(
        signal,
        bkps,
        bin_bkps,
        figsize=figsize,
    )
    plt.ylim(ymin, ymax)
    plt.gca().get_lines()[0].set_color('darkorange')
    plt.title('Binary Change Point Detection')
    plt.xlabel('Quality of Life Rank')
    plt.ylabel('Local Rank Divergence')
    plt.tight_layout()
    plt.savefig('../output/rank_div_change_point_binary.png', dpi=600)
    plt.close('all')
示例#18
0
文件: test1.py 项目: kartikdube/BBDC
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 18 19:00:18 2020

@author: dubek
"""

import pandas as pd
import numpy as np
import ruptures as rpt
import matplotlib.pyplot as plt

df1 = pd.read_csv('K:\\AA JU DE\\BBDC\\bbdc_2020\\train\\emg\\s01t01.emg.csv')

X = np.array(df1.iloc[:, 6:7])

model = "l2"
algo = rpt.Window(width=1500, model=model).fit(X)
my_bkps = algo.predict(n_bkps=40)
rpt.show.display(X, my_bkps, figsize=(25, 20))
plt.title('Change Point Detection: Window-Based Search Method')
plt.show()
示例#19
0
algo = rpt.Pelt(model="rbf").fit(signal)
result = algo.predict(pen=10)
rpt.display(signal, result)
plt.title('Change Point Detection: Pelt Search Method')
plt.show()

#Changepoint detection with the Binary Segmentation search method
model = "l2"
algo = rpt.Binseg(model=model).fit(signal)
my_bkps = algo.predict(n_bkps=10)
# show results
rpt.show.display(signal, my_bkps)
plt.title('Change Point Detection: Binary Segmentation Search Method')
plt.show()

#Changepoint detection with window-based search method
model = "l2"
algo = rpt.Window(width=40, model=model).fit(signal)
my_bkps = algo.predict(n_bkps=10)
rpt.show.display(signal, my_bkps)
plt.title('Change Point Detection: Window-Based Search Method')
plt.show()

#Changepoint detection with dynamic programming search method
model = "l1"
algo = rpt.Dynp(model=model, min_size=3, jump=5).fit(signal)
my_bkps = algo.predict(n_bkps=10)
rpt.show.display(signal, my_bkps)
plt.title('Change Point Detection: Dynamic Programming Search Method')
plt.show()
def detect_anomalies( kernel_distance_seq, policy ):
    # Unpack policy
    policy_name = policy["name"]
    policy_params = policy["params"]

    # Do a truly naive anomaly detection policy where we just define the slice 
    # containing the max kernel distance as anomalous and all others as not
    # anomalous. This is not really "anomaly detection" in any meaningful sense
    # But it suffices for testing the basic workflow
    if policy_name == "naive_max":
        max_dist_slice_idx = 0
        max_dist = 0
        for slice_idx,distance_mat in enumerate( kernel_distance_seq ):
            distances = get_flat_distances( distance_mat )
            slice_max = max( distances)
            if max_distance_in_slice > max_dist:
                max_dist = slice_max
                max_dist_slice_idx = slice_idx
        return [ max_dist_slice_idx ]

    # Detect anomalies based on whether the median kernel distance increases
    # from slice to slice or not
    elif policy_name == "increasing_median":
        threshold = policy_params["threshold"]
        flagged_slice_indices = []
        prev_median_distance = 0
        curr_median_distance = 0
        for slice_idx,distance_mat in enumerate( kernel_distance_seq ):
            distances = get_flat_distances( distance_mat )
            curr_median_distance = np.median( distances )
            #if curr_median_distance > prev_median_distance:
            if curr_median_distance - prev_median_distance > threshold:
                flagged_slice_indices.append( slice_idx )
            prev_median_distance = curr_median_distance
        return flagged_slice_indices
    
    elif policy_name == "kolmogorov_smirnov":
        flagged_slice_indices = []
        prev_distribution = None
        next_distribution = None
        for slice_idx in range(len(kernel_distance_seq))[1:-1]:
            prev_dist = flatten_distance_matrix(kernel_distance_seq[ slice_idx - 1 ])
            curr_dist = flatten_distance_matrix(kernel_distance_seq[ slice_idx  ])
            next_dist = flatten_distance_matrix(kernel_distance_seq[ slice_idx + 1 ])
            ks2_stat_prev, p_val_prev = ks_2samp( prev_dist, curr_dist )
            ks2_stat_next, p_val_next = ks_2samp( next_dist, curr_dist )
            thresh = 0.0001
            if p_val_prev < thresh and p_val_next < thresh:
                flagged_slice_indices.append( slice_idx )
        return flagged_slice_indices

    # Flag slices if the median kernel distance exceeds a user-supplied 
    # threshold
    elif policy_name == "median_exceeds_threshold":
        threshold = policy_params[ "threshold" ]
        flagged_slice_indices = []
        for slice_idx,distance_mat in enumerate( kernel_distance_seq ):
            distances = get_flat_distances( distance_mat )
            median_distance = np.median( distances )
            if median_distance > threshold:
                flagged_slice_indices.append( slice_idx )
        return flagged_slice_indices
        
    # Randomly choose slices. This isn't really an anomaly detection policy, but
    # we use it to check whether the distribution of callstacks from a random
    # sample of slices looks different than the distribution of callstacks from
    # the flagged slices
    elif policy_name == "random":
        n_samples = policy_params["n_samples"]
        n_slices = len(kernel_distance_seq)
        n_generated = 0
        flagged_slice_indices = set()
        while n_generated < n_samples:
            # generate uniform random number between 0 and n_slices-1
            rand_slice_idx = np.random.randint( 0, n_slices, size=1 )[0]
            if rand_slice_idx not in flagged_slice_indices:
                flagged_slice_indices.add( rand_slice_idx )
                n_generated += 1
        return list( flagged_slice_indices )

    elif policy_name == "all":
        n_slices = len(kernel_distance_seq)
        return list( range( n_slices ) )
    

    elif policy_name == "ruptures_binary_segmentation":
        # Unpack policy
        model = policy_params[ "model" ]
        #width = policy_params[ "width" ]
        n_change_points = policy_params[ "n_change_points" ]
        penalty = policy_params[ "penalty" ]
        epsilon = policy_params[ "epsilon" ]

        # Get list of distance distributions
        distance_distribution_seq = []
        for slice_idx,distance_mat in enumerate( kernel_distance_seq ):
            distances = get_flat_distances( distance_mat )
            distance_distribution_seq.append( distances )

        # Get some properties about the distances needed by Ruptures
        n_distributions = len( distance_distribution_seq )
        dim = len( distances )
        all_distances = []
        for d in distance_distribution_seq:
            all_distances += d
        sigma = np.std( all_distances )

        # Make into ndarray for ruptures
        #signal = np.array( [ np.array(d) for d in distance_distribution_seq ] )
        signal = np.array( [ np.array(d) for d in distance_distribution_seq ] )

        # Set up model
        algo = rpt.Binseg( model=model ).fit( signal )

        # Find change-points
        if n_change_points == "unknown":
            if penalty == True and epsilon == False:
                penalty_value = np.log( n_distributions ) * dim * sigma**2 
                change_points = algo.predict( pen=penalty_value )
            elif penalty == False and epsilon == True:
                threshold = 3 * n_distributions * sigma**2
                change_points = algo.predict( epsilon=threshold )
            else:
                raise ValueError("Invalid policy for window-based change-point detection: {}".format(policy_params))
        else:
            change_points = algo.predict( n_bkps=n_change_points )
        
        flagged_slice_indices = [ cp-1 for cp in change_points ]
        return flagged_slice_indices


    elif policy_name == "ruptures_window_based":
        # Unpack policy
        model = policy_params[ "model" ]
        width = policy_params[ "width" ]
        n_change_points = policy_params[ "n_change_points" ]
        penalty = policy_params[ "penalty" ]
        epsilon = policy_params[ "epsilon" ]

        # Get list of distance distributions
        distance_distribution_seq = []
        for slice_idx,distance_mat in enumerate( kernel_distance_seq ):
            distances = get_flat_distances( distance_mat )
            distance_distribution_seq.append( distances )

        # Get some properties about the distances needed by Ruptures
        n_distributions = len( distance_distribution_seq )
        dim = len( distances )
        all_distances = []
        for d in distance_distribution_seq:
            all_distances += d
        sigma = np.std( all_distances )

        # Make into ndarray for ruptures
        signal = np.array( [ np.array(d) for d in distance_distribution_seq ] )

        # Set up model
        algo = rpt.Window( width=width, model=model ).fit( signal )

        # Find change-points
        if n_change_points == "unknown":
            if penalty == True and epsilon == False:
                penalty_value = np.log( n_distributions ) * dim * sigma**2 
                change_points = algo.predict( pen=penalty_value )
            elif penalty == False and epsilon == True:
                threshold = 3 * n_distributions * sigma**2
                change_points = algo.predict( epsilon=threshold )
            else:
                raise ValueError("Invalid policy for window-based change-point detection: {}".format(policy_params))
        else:
            change_points = algo.predict( n_bkps=n_change_points )
        
        flagged_slice_indices = [ cp-1 for cp in change_points ]
        return flagged_slice_indices
    


    else:
        raise NotImplementedError("Anomaly detection policy: {} is not implemented".format(policy_name))
示例#21
0
    def change_point(self,
                     width: int,
                     cut_off: list,
                     custom_cost,
                     jump: int,
                     pen: float,
                     results_show: bool,
                     title=None,
                     save_path=None,
                     fig_name=None):
        '''                
        ----------------
        DESCRIPTION
        ----------------
        The purpose of the change point detection is to check whether there is a large enough sudden change 
        in a specific interval interval of the resistance signal.
        If there is a large enough change, it means that the explosion phenomenon has occurred during this welding.
        the algorithms of detection can be fund by this link:
        
        https://centre-borelli.github.io/ruptures-docs/index.html#documentation

        for the resistance data especially MDK2 this methode can be used to detective if a change point in selectarea
        if there is a change point, mean value before change point and after change point will be compared --> delta R
        else no change point delta R = 0
        because of material loss the dalta R musst bigger than 0, if the there is a chagne point but delta R < 0, 
        this situation has nothing to do with spritzer rarely occurs delta R can also be 0 
        and as usaual the resistance curve is going down with the time  
        ----------------
        PARAMETER
        ----------------
        width: int windows width 40
        cutoff: list [float, float], float: 0...1 1 means all data length will be selected [0.15, 0.45]
        custom_cost:  https://centre-borelli.github.io/ruptures-docs/costs/index.html
        jump: int subsample (one every jump points) 5
        pen:  float penalty value (>0) 2
        result_show : show image evaluation to displan the detective result
        title: the image title
        save_path: the path to save the result image
        fig_name: the image name
        ----------------
        RETURN
        ----------------
        delta_R: the Variation before and after the change point of resistance signal
        '''

        ab_R = self.R_data[round(len(self.valley_id) * cut_off[0]
                                 ):round(len(self.valley_id) *
                                         cut_off[1])].values

        c = custom_cost

        algo = rpt.Window(width=width, custom_cost=c,
                          jump=jump).fit_predict(ab_R, pen=2)

        if len(algo) >= 2:
            delta_R = np.mean(ab_R[:algo[0]]) - np.mean(ab_R[algo[0]:])
            if delta_R < 0:
                # delta_R can not less than 0 bescause the the resistance curve is going down with the time
                delta_R = 0
        else:
            delta_R = 0

        if results_show:
            rpt.display(ab_R, algo)
            if title != None:
                plt.title(title)
            if save_path and fig_name is not None:
                save_fig(image_path=save_path, fig_name=fig_name)
            plt.show()

        return delta_R
    rpt.display(points, result, figsize=(10, 6))
    plt.title('Change Point Detection: Pelt Search Method')
    plt.show()

    #Changepoint detection with the Binary Segmentation search method
    model = "l2"
    algo = rpt.Binseg(model=model).fit(points)
    my_bkps = algo.predict(n_bkps=10)
    # show results
    rpt.show.display(points, my_bkps, figsize=(10, 6))
    plt.title('Change Point Detection: Binary Segmentation Search Method')
    plt.show()

    #Changepoint detection with window-based search method
    model = "l2"
    algo = rpt.Window(width=40, model=model).fit(points)
    my_bkps = algo.predict(n_bkps=10)
    rpt.show.display(points, my_bkps, figsize=(10, 6))
    plt.title('Change Point Detection: Window-Based Search Method')
    plt.show()

    #Changepoint detection with dynamic programming search method
    model = "l1"
    algo = rpt.Dynp(model=model, min_size=3, jump=5).fit(points)
    my_bkps = algo.predict(n_bkps=10)
    rpt.show.display(points, my_bkps, figsize=(10, 6))
    plt.title('Change Point Detection: Dynamic Programming Search Method')
    plt.show()

    #Create a synthetic data set to test against
    points = np.concatenate([
示例#23
0


shift_train_data = np.load("../data/Electricity/elect_pre_train_data.npy")
shift_train_onehot = np.load("../data/Electricity/elect_train_onehot.npy")
v_all = np.load("../data/Electricity/elect_train_v.npy")
shift_train_label = np.load("../data/Electricity/elect_train_label.npy")
param = np.load("../data/Electricity/elect_train_param.npy")
index_list = np.load("../data/Electricity/elect_train_index.npy")
indexs_pred = np.load("../data/Electricity/elect_train_pred_index.npy")

shift_train_pvalue = [] #change points for each window

n, dim = 192, 1  # number of samples, dimension
model = "rbf"  # "l1", "rbf", "linear", "normal", "ar"
for i in range(shift_train_data.shape[0]):
    change_points = np.zeros(192)
    signal = shift_train_data[i, :, 0]
    # change point detection
    algo = rpt.Window(width=48, model=model).fit(signal[:-24])
    sigma = np.std(signal)
    my_bkps = algo.predict(pen=2*np.log(n)*dim*sigma**2)
    for j in my_bkps:
        if j < 160:
            change_points[j-1] = 1
    # save results
    shift_train_pvalue.append(change_points)
shift_train_pvalue = np.array(shift_train_pvalue)

np.save("../data/Electricity/elect_train_p_value", shift_train_pvalue)
示例#24
0
def find_changepoints_for_time_series(series,
                                      modeltype="binary",
                                      number_breakpoints=10,
                                      plot_flag=True,
                                      plot_with_dates=False,
                                      show_time_flag=False):

    #RUPTURES PACKAGE
    #points=np.array(series)
    points = series.values
    title = ""

    t0 = time.time()
    if modeltype == "binary":
        title = "Change Point Detection: Binary Segmentation Search Method"
        model = "l2"
        changepoint_model = rpt.Binseg(model=model).fit(points)
        result = changepoint_model.predict(n_bkps=number_breakpoints)
    if modeltype == "pelt":
        title = "Change Point Detection: Pelt Search Method"
        model = "rbf"
        changepoint_model = rpt.Pelt(model=model).fit(points)
        result = changepoint_model.predict(pen=10)
    if modeltype == "window":
        title = "Change Point Detection: Window-Based Search Method"
        model = "l2"
        changepoint_model = rpt.Window(width=40, model=model).fit(points)
        result = changepoint_model.predict(n_bkps=number_breakpoints)
    if modeltype == "Dynamic":
        title = "Change Point Detection: Dynamic Programming Search Method"
        model = "l1"
        changepoint_model = rpt.Dynp(model=model, min_size=3,
                                     jump=5).fit(points)
        result = changepoint_model.predict(n_bkps=number_breakpoints)
    if modeltype == "online":
        # CHANGEFINDER PACKAGE
        title = "Simulates the working of finding changepoints in online fashion"
        cf = changefinder.ChangeFinder()
        scores = [cf.update(p) for p in points]
        result = (-np.array(scores)).argsort()[:number_breakpoints]
        result = sorted(list(result))
        if series.shape[0] not in result:
            result.append(series.shape[0])

    if show_time_flag:
        elapsed_time = time.time() - t0
        print("[exp msg] elapsed time for process: " +
              str(time.strftime("%H:%M:%S", time.gmtime(elapsed_time))))

    if plot_flag:
        if not plot_with_dates:
            rpt.display(points, result, figsize=(18, 6))
            plt.title(title)
            plt.show()
        else:
            series.plot(figsize=(18, 6))
            plt.title(title)
            for i in range(len(result) - 1):
                if i % 2 == 0:
                    current_color = 'xkcd:salmon'
                else:
                    current_color = 'xkcd:sky blue'
                #plt.fill_between(series.index[result[i]:result[i+1]], series.max(), color=current_color, alpha=0.3)
                plt.fill_between(series.index[result[i]:result[i + 1]],
                                 y1=series.max() * 1.1,
                                 y2=series.min() * 0.9,
                                 color=current_color,
                                 alpha=0.3)
            plt.show()

    return (result)