Пример #1
0
def change_point(connect_df, pen=None, epsilon=None, nbkp=1):
    '''
    Change point analysis for either the connectivity method or graph metrics

least sq - l1 l2
rbf radial basis functions

    a.create_posjac()
    t = connectivity(a,inorganics,plot=2)
    r= np.log10(t[t.sum().sort_values(ascending = False).index])


 change_point(np.array(r).T[:,::50])

    '''
    import ruptures as rpt
    import matplotlib.pyplot as plt
    print('calculating change points')
    # detection
    signal = np.array(connect_df)
    algo = rpt.Binseg(model='rbf').fit(
        signal
    )  #rpt.Binseg(model='l2', custom_cost=None, min_size=1, jump=1, params=None).fit(signal)
    result = algo.fit_predict(signal, n_bkps=nbkp)
    print(result)
    #algo.predict(pen=pen)

    # display
    rpt.display(signal, result, np.array(result) / 2)
    plt.show()
Пример #2
0
def binary(data):
    '''
    data: Valores del activo EURUSD.

    '''
    datos = np.array(data.Close)

    n = len(datos)  # Tamaño de los datos dentro del array.
    sigma = datos.std()  # Desviación estandar de los datos.
    p = np.log(n) * sigma**2  # Penalización utilizada dentro del modelo.

    # Pasos a realizar dentro del modelo de Binary segmentation.
    algo = rpt.Binseg().fit(datos)
    my_bkps = algo.predict(pen=p)
    senal = pd.DataFrame(my_bkps)

    mean = senal.drop([
        len(my_bkps) - 1
    ])  # Quitamos de la serie el último valor ya que no es correcto.
    mean = np.array(mean)  # Valores obtenidos del modelo traidos a un array.

    changes = mean.astype(
        int)  # Valores del array anterior convertidos a numeros enteros.

    feature = boolean_change_point(datos, changes)

    # La función regresa las fechas y los cambios numericos.
    return changes, feature
def plot_changePoint(array):
    model = ""  
    algo = rpt.Binseg(model=model).fit(array)
    my_bkps = algo.predict(n_bkps=1)
    # show results
    rpt.show.display(array, my_bkps, figsize=(10, 6))
    plt.title('Change Point Detection: Binary Segmentation Search Method')
    plt.show()
    return None
Пример #4
0
    def detect_change_points(self, ys: np.ndarray, **kwargs) -> Sequence[int]:
        '''
        @param model: "l1", "rbf", "linear", "normal", "ar" (default is "l2")
        :return: list of estimated change points
        '''
        model = kwargs["model"] if "model" in kwargs else "l2"
        estimator = ruptures.Binseg(model=model).fit(ys)

        return estimator.predict(pen=3)
Пример #5
0
def cp_detection_binary_segmentation(points):
    # Changepoint detection with the Binary Segmentation search method
    model = "l2"
    algo = rpt.Binseg(model=model).fit(points)
    my_bkps = algo.predict(n_bkps=2)
    # show results
    rpt.show.display(points, my_bkps, figsize=(10, 6))
    plt.title('Change Point Detection: Binary Segmentation Search Method')
    plt.show()
    return my_bkps
Пример #6
0
def find_changepoints(lista_datos):
    #Convert the time series values to a numpy 1D array
    points=np.array(lista_datos)

    #Changepoint detection with the Binary Segmentation search method
    model = "l2"
    algo = rpt.Binseg(model=model).fit(points)
    my_bkps = algo.predict(pen=np.log(len(lista_datos))*6**2)
    output= pd.DataFrame(my_bkps,columns=['step'])
    return(output.to_dict(orient='record'))
Пример #7
0
def changePoint(value, model='rbf', penalty=1.0, brakepts=None, plot=False):
    # change point detection
    # available models: "rbf", "l1", "l2", rbf", "linear", "normal", "ar", "mahalanobis"
    signal = np.array(value)
    algo = rpt.Binseg(model=model).fit(signal)
    my_bkps = algo.predict(pen=penalty, n_bkps=brakepts)
    if plot:
        # show results
        rpt.show.display(signal, my_bkps, figsize=(10, 3))
        plt.show()
    # define regions from breaking points
    sections = my_bkps
    sections.insert(0, 0)
    # check last point
    sections[-1] -= 1
    if plot: print('model = ', model, ' - sections = ', sections)
    return (sections)
Пример #8
0
def filter_dwelltimes_smpl(N_smpl, t_smpl=[], pen=1, plots=False):
    ''' return filtered version of trace N_smpl(t) (sampled),
    using ruptures with parameter pen 
    '''
    import ruptures as rpt
    t0 = time.time()
    # find rupture points in N_smpl (can be slow):
    #bkpts = rpt.Pelt(model='rbf', jump=1).fit_predict(N_smpl, pen=pen)
    bkpts = rpt.Binseg(model='l2', jump=1).fit_predict(N_smpl, pen=pen)
    print(f'filter_dwelltimes_smpl(): rupture p.ts done in {time.time()-t0:.1f}s')
    bkpts = np.append(0, bkpts)
    N_filt = np.zeros(len(N_smpl))
    # between rpt points, choose for N_filt the most frequent value of N_smpl: 
    for i in range(len(bkpts)-1):
        Ns = N_smpl[bkpts[i]:bkpts[i+1]]
        m = np.array(list(zip(set(Ns), [list(Ns).count(s) for s in set(Ns)]))) # [(Ni, counts(Ni))]
        N_filt[bkpts[i]:bkpts[i+1]] = m[np.argmax(m[:,1])][0]
    if plots:
        if len(t_smpl)==0:
            t_smpl = np.arange(len(N_smpl))
        ts_orig, dwts_orig = find_dwelltimes_smpl(t_smpl, N_smpl)
        ts_filt, dwts_filt = find_dwelltimes_smpl(t_smpl, N_filt)
        bins_orig = np.logspace(np.log10(np.min(dwts_orig)), np.log10(np.max(dwts_orig)), 50)
        bins_filt = np.logspace(np.log10(np.min(dwts_filt)), np.log10(np.max(dwts_filt)), 50)
        bins = np.max([bins_filt, bins_orig], axis=0)
        fig = plt.figure('filter_dwelltimes_smpl', clear=True)
        ax1 = fig.add_subplot(311)
        ax2 = fig.add_subplot(312)
        ax3 = fig.add_subplot(313)
        ax1.plot(t_smpl, N_smpl, '-', lw=3, label='orig')
        ax1.vlines(t_smpl[bkpts[:-1]], 0, Nmax, ls='--', alpha=0.2)
        ax1.plot(t_smpl, N_filt, label='filt')
        ax1.legend()
        ax1.set_xlabel('time or index')
        ax2.plot(t_smpl, N_smpl-N_filt, label='orig-filt')
        ax2.set_title(f'|err| = {np.sum(np.abs(N_smpl-N_filt))}', fontsize=10)
        ax2.legend()
        ax2.set_xlabel('time or index')
        ax3.hist([dwts_orig,dwts_filt], bins, label=['orig','filt'])
        ax3.legend()
        ax3.set_xscale('log')
        ax3.set_xlabel('dwell times')
        ax3.set_ylabel('counts')
        fig.tight_layout()
    return N_filt
Пример #9
0
def R_breakouts_detection(points):
    #Changepoint detection with the Pelt search method
    model = "rbf"
    algo = rpt.Pelt(model=model).fit(points)
    result = algo.predict(pen=10)
    rpt.display_breakouts(points, result, figsize=(10, 6))
    plt.title('Change Point Detection: Pelt Search Method')
    plt.tight_layout()
    plt.show()

    #Changepoint detection with the Binary Segmentation search method
    model = "l2"
    algo = rpt.Binseg(model=model).fit(points)
    my_bkps = algo.predict(n_bkps=10)
    # show results
    rpt.show.display_breakouts(points, my_bkps, figsize=(10, 6))
    plt.title('Change Point Detection: Binary Segmentation Search Method')
    plt.tight_layout()
    plt.show()
Пример #10
0
def changePointDetection(glacier, attr, startdate=None, enddate=None, \
    n_breakpoints=1, method='window', model='l1', wwidth=5):
    """Use ruptures package to identify change points in glacier time series. Acceptable methods are 'window' (sliding window), 'binseg' (binary segmentation), and bottomup (bottom-up). See https://centre-borelli.github.io/ruptures-docs/user-guide for further information."""
    attrs, dates = glacier.filterDates(attr, startdate, enddate)
    signal = attrs.values
    sigma = signal.std()
    n = len(signal)
    if method == 'window':
        algo = rpt.Window(width=wwidth, model=model).fit(signal)
    elif method == 'binseg':
        algo = rpt.Binseg(model=model).fit(signal)
    elif method == 'bottomup':
        algo = rpt.BottomUp(model=model).fit(signal)
    breakpoints = algo.predict(n_bkps=n_breakpoints)
    # remove breakpoints at beginning/end of time series
    if dates.index[0] - 1 in breakpoints:
        breakpoints.remove(dates.index[0] - 1)
    if dates.index[-1] in breakpoints:
        breakpoints.remove(dates.index[-1])
    breakpoint_dates = dates[breakpoints]
    return breakpoint_dates, signal, breakpoints
Пример #11
0
def get_change_point(series, jump=5, n_bkps=5, pen=10):
    """

    series: numpy array please
    jump: размер сэмпла
    n_bkps: количество возвращаемых остановок
    pen: пенальти для Pelt

    """
    series = series.values
    alg_dynp = rpt.Dynp(jump=jump).fit_predict(series, n_bkps=n_bkps)

    alg_pelt = rpt.Pelt(jump=jump).fit_predict(series, pen=pen)

    alg_bin = rpt.Binseg(jump=jump).fit_predict(series, n_bkps=n_bkps)

    alg_bot = rpt.BottomUp(jump=jump).fit_predict(series, n_bkps=n_bkps)

    alg_win = rpt.Window(jump=jump).fit_predict(series, n_bkps=n_bkps)

    alg_cumsum = change_point_detection(series.tolist())

    # Получили разладки от нескольких алгоритмов
    # Теперь найдём точки, которые предсказывались алгоритмами несколько раз
    res = {}
    for i in alg_dynp + alg_pelt + alg_bin + alg_bot + alg_win + alg_cumsum:
        if i in res:
            res[i] += 1
        else:
            res[i] = 1

    del res[0]
    del res[len(series)]

    itemMaxValue = max(res.items(), key=lambda x: x[1])
    listOfKeys = []
    for key, value in res.items():
        if value == itemMaxValue[1]:
            listOfKeys.append(key)
    return listOfKeys
Пример #12
0
def get_breakpoints(df: pd.DataFrame,
                    model: str = "rbf",
                    min_size: int = 5,
                    jump: int = 1,
                    pen: int = 2) -> List[int]:
    """
    Calculate the breakpoints of a time series or a group of time series using binary segmentation.

    For more info http://ctruong.perso.math.cnrs.fr/ruptures-docs/build/html/detection/binseg.html.

    :param df: DataFrame containing the target time series as columns.
    :param model: segment model, [“l1”, “l2”, “rbf”,…]. Not used if 'custom_cost' is not None.
    :param min_size: minimum segment length. Defaults to 5 samples.
    :param jump: subsample (one every jump points). Defaults to 1 sample.
    :param pen:  penalty value (>0).
    :return: list containing the indexes where breakpoints happen.
    """
    signal = (df.values - df.values.mean(axis=0)) / df.values.std(axis=0)
    algo = ruptures.Binseg(model=model, min_size=min_size,
                           jump=jump).fit(signal)
    result = algo.predict(pen=pen)
    return result
Пример #13
0
def breakpoint_detection(raw_data,fname,estimated_breaks,n_feat=feat):
    ruptures_cpts={} # PELT,BinSeg,Dynp
    bocpd_l1={} # BOCPD l=200
    bocpd_l2={} # BOCPD l=400
    exo_cpd_offline={} #EXO CPD offline
    exo_cpd_online={} #EXO CPD online
    for ind in range(n_feat):
        temp_data=raw_data[:,ind] # Going per feature/column

        start=time.time()
        # Ruptures
        rpt_pelt=rpt.Pelt(model='rbf').fit(temp_data)
        pelt_result=rpt_pelt.predict(pen=5)
        print("Pelt: ",time.time()-start)

        start=time.time()
        rpt_binseg=rpt.Binseg(model='rbf').fit(temp_data)
        bin_result=rpt_binseg.predict(n_bkps=estimated_breaks)
        print("Binseg: ",time.time()-start)

#         start=time.time()
#         rpt_dynp=rpt.Dynp(model='normal',min_size=2,jump=5).fit(temp_data)
#         dynp_result=rpt_dynp.predict(n_bkps=estimated_breaks)
#         print("Dynp: ",time.time()-start)


#         ruptures_cpts[ind]=list(set().union(pelt_result,bin_result,dynp_result))
        ruptures_cpts[ind]=list(set().union(pelt_result,bin_result))

        #BOCPD
        start=time.time()
        hazard_func_l1=lambda r: bcp.constant_hazard(r, _lambda=200)
        beliefs_l1,maxes_l1=bcp.inference(temp_data, hazard_func_l1)
        log_bel_l1=-np.log(beliefs_l1)
        index_changes_l1=np.where(np.diff(maxes_l1.T[0])<0)[0]
        print("BOCPD_l1: ",time.time()-start)


        bocpd_l1[ind]=[index_changes_l1,log_bel_l1]

        start=time.time()
        hazard_func_l2=lambda r: bcp.constant_hazard(r, _lambda=400)
        beliefs_l2,maxes_l2=bcp.inference(temp_data, hazard_func_l2)
        log_bel_l2=-np.log(beliefs_l2)
        index_changes_l2=np.where(np.diff(maxes_l2.T[0])<0)[0]
        print("BOCPD_l2: ",time.time()-start)

        bocpd_l2[ind]=[index_changes_l2,log_bel_l2]

        #Offline/Online Exact and Efficient Bayesian Inference
        #Offline
#         start=time.time()
#         Q,P,Pcp = offcd.offline_changepoint_detection(temp_data,partial(offcd.const_prior, l=(len(temp_data)+1)), offcd.gaussian_obs_log_likelihood, truncate=-40)
#         offline_cpts=data=np.exp(Pcp).sum(0)
#         offline_peaks=find_peaks(offline_cpts)
#         print("Offline EXO: ",time.time()-start)


#         exo_cpd_offline[ind]=[offline_peaks,offline_cpts]

        #Online
        start=time.time()
        Nw=10
        R,maxes=oncd.online_changepoint_detection(temp_data, partial(oncd.constant_hazard, 250), oncd.StudentT(0.1, .01, 1, 0))
        online_cpts=R[Nw,Nw:-1]
        online_peaks=sig.find_peaks(online_cpts)
        print("Online EXO: ",time.time()-start)

        exo_cpd_online[ind]=[online_peaks,online_cpts]

        print("\t\t\t Breakpoint detection Index {} done.".format(ind))

    ruptures_fname=fname+'_ruptures.csv'
    bocpdl1_fname=fname+'_bocpdl1.csv'
    bocpdl2_fname=fname+'_bocpdl2.csv'
    ofexo_fname=fname+'_ofexo.csv'
    onexo_fname=fname+'_onexo.csv'


    with open(ruptures_fname,'w') as f:
        writer=csv.writer(f)
        for k,v in ruptures_cpts.items():
            writer.writerow([k,v])
    f.close()

    with open(bocpdl1_fname,'w') as f:
        writer=csv.writer(f)
        for k,v in bocpd_l1.items():
            writer.writerow([k,v])
    f.close()

    with open(bocpdl2_fname,'w') as f:
        writer=csv.writer(f)
        for k,v in bocpd_l2.items():
            writer.writerow([k,v])
    f.close()

#     with open(ofexo_fname,'w') as f:
#         writer=csv.writer(f)
#         for k,v in exo_cpd_offline.items():
#             writer.writerow([k,v])
#     f.close()

    with open(onexo_fname,'w') as f:
        writer=csv.writer(f)
        for k,v in exo_cpd_online.items():
            writer.writerow([k,v])
    f.close()

    print("\t\t Saved changepoint detection")
Пример #14
0
            for idx in result[:-1]:
                x.append(indexes[idx])
            y = []
            for idx in x:
                y.append(df.loc[df.index == idx]['p1_current'].values[0]) 

            plt.plot(df.loc[df['category_column'] == category].index, df.loc[df['category_column'] == category]['p1_current'], label='normal')
            plt.scatter(x, y, label='outlier', color='red', marker='o')
            plt.title("Change Finder Window Segmentation p1_current")
            plt.xlabel('Date Time')
            plt.ylabel('p1_current')
            plt.savefig(ofn + "_Window_p1_current.png")
            plt.show()
            plt.close()

            algo = rpt.Binseg(model="l2")
            result = algo.fit_predict(X, n_bkps=n_bkps)
            x = []
            for idx in result[:-1]:
                x.append(indexes[idx])
            y = []
            for idx in x:
                y.append(df.loc[df.index == idx]['p1_current'].values[0]) 

            plt.plot(df.loc[df['category_column'] == category].index, df.loc[df['category_column'] == category]['p1_current'], label='normal')
            plt.scatter(x, y, label='outlier', color='red', marker='o')
            plt.title("Change Finder Binseg p1_current")
            plt.xlabel('Date Time')
            plt.ylabel('p1_current')
            plt.savefig(ofn + "_BinarySeg_p1_current.png")
            plt.show()
    #Convert the time series values to a numpy 1D array
    points = np.array(price_df['WTI_Price'])

    #RUPTURES PACKAGE
    #Changepoint detection with the Pelt search method
    model = "rbf"
    algo = rpt.Pelt(model=model).fit(points)
    result = algo.predict(pen=10)
    rpt.display(points, result, figsize=(10, 6))
    plt.title('Change Point Detection: Pelt Search Method')
    plt.show()

    #Changepoint detection with the Binary Segmentation search method
    model = "l2"
    algo = rpt.Binseg(model=model).fit(points)
    my_bkps = algo.predict(n_bkps=10)
    # show results
    rpt.show.display(points, my_bkps, figsize=(10, 6))
    plt.title('Change Point Detection: Binary Segmentation Search Method')
    plt.show()

    #Changepoint detection with window-based search method
    model = "l2"
    algo = rpt.Window(width=40, model=model).fit(points)
    my_bkps = algo.predict(n_bkps=10)
    rpt.show.display(points, my_bkps, figsize=(10, 6))
    plt.title('Change Point Detection: Window-Based Search Method')
    plt.show()

    #Changepoint detection with dynamic programming search method
Пример #16
0
    playerids = np.unique(list(c[:-2] for c in team.columns if c[:4] in ['Home', 'Away']))
    playerids = np.unique(list(map(lambda x: split_at(x, '_', 2)[0], playerids)))

    #for player in playerids:
        player = 'Home_6'
        mc_temp = list(map(lambda x: metabolic_cost(team[player + '_Acc'][x]), range(1, len(team[player + '_Acc'])+1)))
        #team[player+'_MP'] = mc_temp * team[player+'_speed']
        mp_temp = mc_temp * team[player+'_speed']
        test_mp = mp_temp.rolling(7500,min_periods=1).apply(lambda x : np.nansum(x)) #Use Changepoint Detection Here
        plt.plot(test_mp)
        plt.title('Metabolic Power Output [5 min Rolling Window]')
        signal = np.array(test_mp[7500:len(test_mp)]).reshape((len(test_mp[7500:len(test_mp)]),1))
        algo = rpt.Pelt(model="l2",min_size=7500).fit(signal)
        result = algo.predict(pen=np.log(len(signal))*1*np.std(signal)**2) ##Potentially pacing strategy or identifying moments in the game that are slower

        algo = rpt.Binseg(model="l2").fit(signal) ##potentially finding spot where substitution should happen
        result = algo.predict(n_bkps=1) #big_seg
        rpt.show.display(signal, result, figsize=(10, 6))
        plt.title('Metabolic Power Output [5 min Rolling Window]')



#SPI and Measure the minute after
home_spi_list = []

for player in home_players:
    print(player)
    test_spi = tracking_home['Home_'+player+'_speed'].rolling(1500,min_periods=1).apply(lambda x : np.nansum(x)) / 25.
    xcoords = sp.signal.find_peaks(test_spi, distance=1500)
    spi_values = list(map(lambda x: test_spi[x], xcoords[0]))
    spi_values_index = np.argsort(spi_values)[-3:]
def detect_anomalies( kernel_distance_seq, policy ):
    # Unpack policy
    policy_name = policy["name"]
    policy_params = policy["params"]

    # Do a truly naive anomaly detection policy where we just define the slice 
    # containing the max kernel distance as anomalous and all others as not
    # anomalous. This is not really "anomaly detection" in any meaningful sense
    # But it suffices for testing the basic workflow
    if policy_name == "naive_max":
        max_dist_slice_idx = 0
        max_dist = 0
        for slice_idx,distance_mat in enumerate( kernel_distance_seq ):
            distances = get_flat_distances( distance_mat )
            slice_max = max( distances)
            if max_distance_in_slice > max_dist:
                max_dist = slice_max
                max_dist_slice_idx = slice_idx
        return [ max_dist_slice_idx ]

    # Detect anomalies based on whether the median kernel distance increases
    # from slice to slice or not
    elif policy_name == "increasing_median":
        threshold = policy_params["threshold"]
        flagged_slice_indices = []
        prev_median_distance = 0
        curr_median_distance = 0
        for slice_idx,distance_mat in enumerate( kernel_distance_seq ):
            distances = get_flat_distances( distance_mat )
            curr_median_distance = np.median( distances )
            #if curr_median_distance > prev_median_distance:
            if curr_median_distance - prev_median_distance > threshold:
                flagged_slice_indices.append( slice_idx )
            prev_median_distance = curr_median_distance
        return flagged_slice_indices
    
    elif policy_name == "kolmogorov_smirnov":
        flagged_slice_indices = []
        prev_distribution = None
        next_distribution = None
        for slice_idx in range(len(kernel_distance_seq))[1:-1]:
            prev_dist = flatten_distance_matrix(kernel_distance_seq[ slice_idx - 1 ])
            curr_dist = flatten_distance_matrix(kernel_distance_seq[ slice_idx  ])
            next_dist = flatten_distance_matrix(kernel_distance_seq[ slice_idx + 1 ])
            ks2_stat_prev, p_val_prev = ks_2samp( prev_dist, curr_dist )
            ks2_stat_next, p_val_next = ks_2samp( next_dist, curr_dist )
            thresh = 0.0001
            if p_val_prev < thresh and p_val_next < thresh:
                flagged_slice_indices.append( slice_idx )
        return flagged_slice_indices

    # Flag slices if the median kernel distance exceeds a user-supplied 
    # threshold
    elif policy_name == "median_exceeds_threshold":
        threshold = policy_params[ "threshold" ]
        flagged_slice_indices = []
        for slice_idx,distance_mat in enumerate( kernel_distance_seq ):
            distances = get_flat_distances( distance_mat )
            median_distance = np.median( distances )
            if median_distance > threshold:
                flagged_slice_indices.append( slice_idx )
        return flagged_slice_indices
        
    # Randomly choose slices. This isn't really an anomaly detection policy, but
    # we use it to check whether the distribution of callstacks from a random
    # sample of slices looks different than the distribution of callstacks from
    # the flagged slices
    elif policy_name == "random":
        n_samples = policy_params["n_samples"]
        n_slices = len(kernel_distance_seq)
        n_generated = 0
        flagged_slice_indices = set()
        while n_generated < n_samples:
            # generate uniform random number between 0 and n_slices-1
            rand_slice_idx = np.random.randint( 0, n_slices, size=1 )[0]
            if rand_slice_idx not in flagged_slice_indices:
                flagged_slice_indices.add( rand_slice_idx )
                n_generated += 1
        return list( flagged_slice_indices )

    elif policy_name == "all":
        n_slices = len(kernel_distance_seq)
        return list( range( n_slices ) )
    

    elif policy_name == "ruptures_binary_segmentation":
        # Unpack policy
        model = policy_params[ "model" ]
        #width = policy_params[ "width" ]
        n_change_points = policy_params[ "n_change_points" ]
        penalty = policy_params[ "penalty" ]
        epsilon = policy_params[ "epsilon" ]

        # Get list of distance distributions
        distance_distribution_seq = []
        for slice_idx,distance_mat in enumerate( kernel_distance_seq ):
            distances = get_flat_distances( distance_mat )
            distance_distribution_seq.append( distances )

        # Get some properties about the distances needed by Ruptures
        n_distributions = len( distance_distribution_seq )
        dim = len( distances )
        all_distances = []
        for d in distance_distribution_seq:
            all_distances += d
        sigma = np.std( all_distances )

        # Make into ndarray for ruptures
        #signal = np.array( [ np.array(d) for d in distance_distribution_seq ] )
        signal = np.array( [ np.array(d) for d in distance_distribution_seq ] )

        # Set up model
        algo = rpt.Binseg( model=model ).fit( signal )

        # Find change-points
        if n_change_points == "unknown":
            if penalty == True and epsilon == False:
                penalty_value = np.log( n_distributions ) * dim * sigma**2 
                change_points = algo.predict( pen=penalty_value )
            elif penalty == False and epsilon == True:
                threshold = 3 * n_distributions * sigma**2
                change_points = algo.predict( epsilon=threshold )
            else:
                raise ValueError("Invalid policy for window-based change-point detection: {}".format(policy_params))
        else:
            change_points = algo.predict( n_bkps=n_change_points )
        
        flagged_slice_indices = [ cp-1 for cp in change_points ]
        return flagged_slice_indices


    elif policy_name == "ruptures_window_based":
        # Unpack policy
        model = policy_params[ "model" ]
        width = policy_params[ "width" ]
        n_change_points = policy_params[ "n_change_points" ]
        penalty = policy_params[ "penalty" ]
        epsilon = policy_params[ "epsilon" ]

        # Get list of distance distributions
        distance_distribution_seq = []
        for slice_idx,distance_mat in enumerate( kernel_distance_seq ):
            distances = get_flat_distances( distance_mat )
            distance_distribution_seq.append( distances )

        # Get some properties about the distances needed by Ruptures
        n_distributions = len( distance_distribution_seq )
        dim = len( distances )
        all_distances = []
        for d in distance_distribution_seq:
            all_distances += d
        sigma = np.std( all_distances )

        # Make into ndarray for ruptures
        signal = np.array( [ np.array(d) for d in distance_distribution_seq ] )

        # Set up model
        algo = rpt.Window( width=width, model=model ).fit( signal )

        # Find change-points
        if n_change_points == "unknown":
            if penalty == True and epsilon == False:
                penalty_value = np.log( n_distributions ) * dim * sigma**2 
                change_points = algo.predict( pen=penalty_value )
            elif penalty == False and epsilon == True:
                threshold = 3 * n_distributions * sigma**2
                change_points = algo.predict( epsilon=threshold )
            else:
                raise ValueError("Invalid policy for window-based change-point detection: {}".format(policy_params))
        else:
            change_points = algo.predict( n_bkps=n_change_points )
        
        flagged_slice_indices = [ cp-1 for cp in change_points ]
        return flagged_slice_indices
    


    else:
        raise NotImplementedError("Anomaly detection policy: {} is not implemented".format(policy_name))
Пример #18
0
def make_neighborhood_rank_divergence_plot(rank_df, adj_df):
    rank_df.sort_values('rank', inplace=True, ascending=True)

    divergences = np.zeros(len(rank_df.index))
    for i, (county, rank) in enumerate(zip(rank_df['County'],
                                           rank_df['rank'])):
        neighbors = adj_df.loc[adj_df.source == county, 'destination']

        if len(neighbors) == 0:
            neighbors = adj_df.loc[adj_df.destination == county, 'source']

        rank_ind = rank_df.County.isin(neighbors).values
        neighbor_ranks = rank_df.loc[rank_ind, 'rank']
        divergence = np.abs(rank - neighbor_ranks).mean()
        divergences[i] = divergence

        if np.isnan(divergence):
            print(county)
            print(neighbors)
            print(neighbor_ranks)

    rank_df['rank_div'] = divergences

    # Change point detection
    signal = rank_df['rank_div'].rolling(100).mean().dropna().values
    # model = {'l1', 'l2', 'rbf', 'linear', 'normal', 'ar'}
    pelt_bkps = rpt.Pelt(model='rbf').fit(signal).predict(pen=100)
    window_bkps = rpt.Window(width=1000,
                             model='l2').fit(signal).predict(n_bkps=1)
    bin_bkps = rpt.Binseg(model='l2').fit(signal).predict(n_bkps=1)
    ensemble_bkp = np.mean(
        [*pelt_bkps[:-1], *window_bkps[:-1], *bin_bkps[:-1]])

    print('Identified Breakpoints:'
          f'\n\tPelt Breakpoints:    {pelt_bkps[:-1]}'
          f'\n\tWindow Breakpoints:  {window_bkps[:-1]}'
          f'\n\tBinary Breakpoints:  {bin_bkps[:-1]}'
          f'\n\tEnsemble Breakpoint: {ensemble_bkp}')

    plt.scatter(
        rank_df['rank'].values,
        rank_df['rank_div'].values,
        facecolor='None',
        edgecolor=sns.xkcd_rgb['denim blue'],
        linewidth=2,
        label='Data',
    )
    plt.plot(
        rank_df['rank'].values,
        rank_df['rank_div'].rolling(100).mean(),
        color='darkorange',
        label='Rolling Mean',
    )

    y_min, y_max = divergences.min(), divergences.max()
    y_range = y_max - y_min
    plt.plot([ensemble_bkp, ensemble_bkp],
             [y_min - 0.1 * y_range, y_max + 0.1 * y_range],
             'k--',
             label='Estimated Breakpoint')
    plt.legend()
    plt.title('Mean Neighborhood Rank Divergence')
    plt.xlabel('Quality of Life Rank (Lower is better)')
    plt.ylabel('Rank Divergence')
    plt.tight_layout()
    ymin, ymax = plt.gca().get_ylim()
    figsize = plt.gcf().get_size_inches()
    plt.savefig('../output/neighborhood_rank_divergence.png', dpi=600)
    plt.close('all')

    # Visualize change points
    bkps = []
    rpt.display(
        signal,
        bkps,
        pelt_bkps,
        figsize=figsize,
    )
    plt.ylim(ymin, ymax)
    plt.gca().get_lines()[0].set_color('darkorange')
    plt.title('Pelt Change Point Detection')
    plt.xlabel('Quality of Life Rank')
    plt.ylabel('Local Rank Divergence')
    plt.tight_layout()
    plt.savefig('../output/rank_div_change_point_pelt.png', dpi=600)
    plt.close('all')

    rpt.show.display(
        signal,
        bkps,
        window_bkps,
        figsize=figsize,
    )
    plt.ylim(ymin, ymax)
    plt.gca().get_lines()[0].set_color('darkorange')
    plt.title('Window Change Point Detection')
    plt.xlabel('Quality of Life Rank')
    plt.ylabel('Local Rank Divergence')
    plt.tight_layout()
    plt.savefig('../output/rank_div_change_point_window.png', dpi=600)
    plt.close('all')

    rpt.show.display(
        signal,
        bkps,
        bin_bkps,
        figsize=figsize,
    )
    plt.ylim(ymin, ymax)
    plt.gca().get_lines()[0].set_color('darkorange')
    plt.title('Binary Change Point Detection')
    plt.xlabel('Quality of Life Rank')
    plt.ylabel('Local Rank Divergence')
    plt.tight_layout()
    plt.savefig('../output/rank_div_change_point_binary.png', dpi=600)
    plt.close('all')
Пример #19
0
signal = numpy.array(ll)

# alignlen = 5000
# mean = numpy.mean(signal)
# std = numpy.std(signal)

# change point detection
model = "l1"  # "l1", "rbf", "linear", "normal", "ar"

# search_method = 'dynamic programming'
# my_bkps = rpt.Dynp(model=model, min_size=100).fit_predict(signal,n_bkps=5)

# search_method = 'Window-based change point detection'
# my_bkps = rpt.Window(model=model, width= 5).fit_predict(signal,pen=1000)

# search_method = 'Exact segmentation: Pelt'
# my_bkps = rpt.Pelt(model = model, min_size=5).fit_predict(signal,pen=10)

# search_method = 'Bottom-up segmentation'
# my_bkps = rpt.BottomUp(model = model).fit_predict(signal,pen=5)

search_method = 'Binary segmentation'
my_bkps = rpt.Binseg(model=model).fit_predict(signal, pen=30)

print(my_bkps)

# show results
rpt.show.display(signal, my_bkps, figsize=(15, 7))
plt.title(search_method)
plt.show()
Пример #20
0
    value = day.iloc[:, j].mean()
    if value > 0:
        arr.append(value)

#Changepoint detection with the Pelt search method
signal = np.array(arr)
algo = rpt.Pelt(model="rbf").fit(signal)
result = algo.predict(pen=10)
rpt.display(signal, result)
plt.title('Change Point Detection: Pelt Search Method')
plt.show()

#Changepoint detection with the Binary Segmentation search method
model = "l2"
algo = rpt.Binseg(model=model).fit(signal)
my_bkps = algo.predict(n_bkps=10)
# show results
rpt.show.display(signal, my_bkps)
plt.title('Change Point Detection: Binary Segmentation Search Method')
plt.show()

#Changepoint detection with window-based search method
model = "l2"
algo = rpt.Window(width=40, model=model).fit(signal)
my_bkps = algo.predict(n_bkps=10)
rpt.show.display(signal, my_bkps)
plt.title('Change Point Detection: Window-Based Search Method')
plt.show()

#Changepoint detection with dynamic programming search method
Пример #21
0
def find_changepoints_for_time_series(series,
                                      modeltype="binary",
                                      number_breakpoints=10,
                                      plot_flag=True,
                                      plot_with_dates=False,
                                      show_time_flag=False):

    #RUPTURES PACKAGE
    #points=np.array(series)
    points = series.values
    title = ""

    t0 = time.time()
    if modeltype == "binary":
        title = "Change Point Detection: Binary Segmentation Search Method"
        model = "l2"
        changepoint_model = rpt.Binseg(model=model).fit(points)
        result = changepoint_model.predict(n_bkps=number_breakpoints)
    if modeltype == "pelt":
        title = "Change Point Detection: Pelt Search Method"
        model = "rbf"
        changepoint_model = rpt.Pelt(model=model).fit(points)
        result = changepoint_model.predict(pen=10)
    if modeltype == "window":
        title = "Change Point Detection: Window-Based Search Method"
        model = "l2"
        changepoint_model = rpt.Window(width=40, model=model).fit(points)
        result = changepoint_model.predict(n_bkps=number_breakpoints)
    if modeltype == "Dynamic":
        title = "Change Point Detection: Dynamic Programming Search Method"
        model = "l1"
        changepoint_model = rpt.Dynp(model=model, min_size=3,
                                     jump=5).fit(points)
        result = changepoint_model.predict(n_bkps=number_breakpoints)
    if modeltype == "online":
        # CHANGEFINDER PACKAGE
        title = "Simulates the working of finding changepoints in online fashion"
        cf = changefinder.ChangeFinder()
        scores = [cf.update(p) for p in points]
        result = (-np.array(scores)).argsort()[:number_breakpoints]
        result = sorted(list(result))
        if series.shape[0] not in result:
            result.append(series.shape[0])

    if show_time_flag:
        elapsed_time = time.time() - t0
        print("[exp msg] elapsed time for process: " +
              str(time.strftime("%H:%M:%S", time.gmtime(elapsed_time))))

    if plot_flag:
        if not plot_with_dates:
            rpt.display(points, result, figsize=(18, 6))
            plt.title(title)
            plt.show()
        else:
            series.plot(figsize=(18, 6))
            plt.title(title)
            for i in range(len(result) - 1):
                if i % 2 == 0:
                    current_color = 'xkcd:salmon'
                else:
                    current_color = 'xkcd:sky blue'
                #plt.fill_between(series.index[result[i]:result[i+1]], series.max(), color=current_color, alpha=0.3)
                plt.fill_between(series.index[result[i]:result[i + 1]],
                                 y1=series.max() * 1.1,
                                 y2=series.min() * 0.9,
                                 color=current_color,
                                 alpha=0.3)
            plt.show()

    return (result)
Пример #22
0
  def get_decomp_plus_cp(self,signal, dates, decomp_algo='STL', cp_algo='bayes', config=None):
    '''
    task function

    description: applies decomposition, and gets the change points
    '''
    #formatting the np.array to dataframe for trend extraction
    signal = pd.DataFrame({'signal': signal})
    signal.index = dates

    
    #trend extraction
    if config:
      if 'decomp_algo' in config:
        decomp_algo = config['decomp_algo']
    if decomp_algo == 'STL':
      signal_trend = self.extract_climate_trend(signal, 'STL')
      signal = np.array(signal_trend['signal'])

    if decomp_algo == None:
      signal = np.array(signal)

    #change point detection

    #bayesian change point detection
    if cp_algo == 'bayes':
    #change point detection
      
      #assign config if exists
      if config:
        if 'distribution' in config:
          distribution = config['distribution']
        if 'log_odds_threshold' in config:
          log_odds = config['log_odds_threshold']
        detector = cpDetector([signal], distribution=distribution, log_odds_threshold=log_odds)
      #else use log normal and 0 treshold
      else:
        detector = cpDetector([signal], distribution='log_normal', log_odds_threshold=0)
      detector.detect_cp()
      
      
      #gets the breakpoints via idx from the detector
      predicted_breaks = detector.change_points['traj_0']['ts'].values
      predicted_breaks = np.append(predicted_breaks, len(signal))

    if self.pen == 'aic':
        pen = self.aic_penalty(signal)
    elif self.pen == 'bic':
        pen = self.bic_penalty(signal)

    if cp_algo == 'pelt':
      model = 'rbf'
      #pen= 10
      if config:
        if 'model' in config:
          model = config['model']
        if 'pen' in config:
          pen = config['pen']
      algo = rpt.Pelt(model=model).fit(signal)
      #gets the breakpoints via idx from the detector
      predicted_breaks = algo.predict(pen=pen) #may need to change the 10


    if cp_algo == 'binseg':
      algo = rpt.Binseg(model='rbf').fit(signal)
      predicted_breaks = algo.predict(pen=pen)

    if cp_algo == 'window':
      width = 10
      model = 'rbf'
      std = 0.045
      #cost = rpt.costs.CostRank().fit(signal)
      if config:
        if 'width' in config:
          width = config['width']
        if 'model' in config:
          model = config['model']
        if 'std' in config:
          std = config['std']
      n_bkps = 3*len(signal)*std**2

      algo = rpt.Window(width=width).fit(signal)
      predicted_breaks = algo.predict(pen=pen)

    return predicted_breaks