Exemplo n.º 1
0
 def test_pelt_exponential_big(self):
     np.random.seed(1)
     data = np.hstack(
         (np.random.exponential(1, 100), np.random.exponential(2.1, 100)))
     cost = exponential(data)
     result = pelt(cost, len(data))
     self.assertEqual(result, [0, 101])
def detect_changepoints(points, min_time, data_processor=acc_difference):
    """ Detects changepoints on points that have at least a specific duration

    Args:
        points (:obj:`Point`)
        min_time (float): Min time that a sub-segmented, bounded by two changepoints, must have
        data_processor (function): Function to extract data to feed to the changepoint algorithm.
            Defaults to `speed_difference`
    Returns:
        :obj:`list` of int: Indexes of changepoints
    """
    data = data_processor(points)
    changepoints = pelt(normal_mean(data, np.std(data)), len(data))
    changepoints.append(len(points) - 1)

    result = []
    for start, end in pairwise(changepoints):
        time_diff = points[end].time_difference(points[start])
        if time_diff > min_time:
            result.append(start)

    # adds the first point
    result.append(0)
    # adds the last changepoint detected
    result.append(len(points) - 1)
    return sorted(list(set(result)))
Exemplo n.º 3
0
def detect_changepoints(points, min_time, data_processor=acc_difference):
    """ Detects changepoints on points that have at least a specific duration

    Args:
        points (:obj:`Point`)
        min_time (float): Min time that a sub-segmented, bounded by two changepoints, must have
        data_processor (function): Function to extract data to feed to the changepoint algorithm.
            Defaults to `speed_difference`
    Returns:
        :obj:`list` of int: Indexes of changepoints
    """
    data = data_processor(points)
    changepoints = pelt(normal_mean(data, np.std(data)), len(data))
    changepoints.append(len(points) - 1)

    result = []
    for start, end in pairwise(changepoints):
        time_diff = points[end].time_difference(points[start])
        if time_diff > min_time:
            result.append(start)

    # adds the first point
    result.append(0)
    # adds the last changepoint detected
    result.append(len(points) - 1)
    return sorted(list(set(result)))
Exemplo n.º 4
0
 def test_poisson_r(self):
     data = [
         4, 4, 3, 8, 4, 3, 9, 6, 5, 4, 6, 5, 3, 4, 6, 0, 3, 5, 4, 4, 7, 7,
         5, 2, 6, 4, 8, 6, 3, 3, 2, 1, 1, 5, 9, 7, 3, 3, 6, 4, 6, 2, 4, 3,
         6, 3, 10, 6, 9, 3, 8, 2, 5, 8, 5, 4, 4, 3, 1, 5, 9, 5, 4, 2, 7, 4,
         0, 1, 2, 3, 6, 4, 6, 2, 8, 6, 2, 5, 6, 4, 6, 6, 2, 5, 5, 7, 11, 7,
         6, 9, 7, 9, 4, 4, 7, 5, 8, 5, 8, 9, 12, 6, 1, 5, 6, 6, 3, 7, 7, 8,
         10, 4, 5, 8, 2, 7, 8, 7, 10, 9, 7, 4, 3, 8, 6, 7, 4, 7, 4, 11, 12,
         6, 10, 5, 8, 6, 8, 5, 4, 8, 7, 5, 9, 7, 8, 6, 9, 8, 5, 6, 13, 4, 8,
         5, 11, 4, 8, 5, 5, 8, 7, 10, 8, 8, 4, 5, 4, 4, 11, 8, 5, 10, 4, 4,
         8, 9, 5, 5, 12, 1, 4, 8, 4, 6, 6, 9, 3, 6, 7, 8, 3, 3, 6, 7, 5, 7,
         5, 4, 10, 5, 5, 6, 7, 4, 3, 3, 3, 6, 9, 7, 9, 6, 6, 7, 5, 6, 6, 7,
         7, 6, 12, 10, 6, 5, 10, 10, 9, 8, 19, 24, 16, 25, 15, 24, 16, 21,
         18, 16, 21, 9, 11, 18, 22, 16, 21, 18, 11, 19, 20, 15, 17, 25, 16,
         19, 22, 20, 17, 25, 27, 12, 20, 23, 20, 22, 19, 24, 21, 19, 19, 20,
         18, 21, 14, 16, 15, 18, 24, 19, 17, 16, 21, 20, 22, 14, 21, 21, 16,
         16, 12, 15, 22, 21, 18, 10, 21, 16, 22, 14, 22, 16, 19, 20, 26, 21,
         20, 23, 6, 21, 17, 18, 24, 18, 18, 14, 17, 16, 16, 19, 16, 19, 15,
         18, 19, 22, 23, 22, 19, 16, 21, 14, 24, 22, 19, 21, 16, 16, 15, 21,
         23, 17, 15, 21, 16, 20, 13, 17, 16, 20, 9, 21, 17, 23, 15, 20, 11,
         20, 15, 20, 19, 18, 5, 6, 9, 4, 8, 6, 5, 8, 4, 5, 6, 7, 7, 8, 4, 3,
         7, 3, 2, 8, 5, 7, 4, 12, 7, 8, 7, 1, 6, 8, 5, 9, 9, 9, 6, 6, 6, 4,
         9, 4, 7, 2, 5, 7, 11, 4, 4, 9, 6, 4, 10, 11, 11, 6, 12, 5, 5, 6, 5,
         3, 3, 6, 13, 3, 4, 4, 7, 6, 8, 5, 9, 6, 7, 11, 2, 6, 9, 5, 3, 3, 3,
         6, 1, 2, 4, 1, 5, 1, 3, 6, 3, 2, 4, 2, 3, 3, 3, 0, 1, 7, 6, 1, 3,
         2, 3, 1, 7, 1, 2, 4, 4, 3, 4, 5, 2, 3, 2, 5, 1, 0, 4, 5, 0, 4, 4,
         3, 3, 2, 2, 2, 1, 4, 1, 4, 5, 2, 2, 6, 3, 2
     ]
     cost = poisson(data)
     result = pelt(cost, len(data), 2 * np.log(len(data)))
     self.assertEqual(result, [0, 85, 228, 360, 438])
    def calculate_peak(self, file_index=-1, is_horizontal=True):
        if is_horizontal:
            label = 'horizontal'
        else:
            label = 'vertical'

        _profile = self.roi[label]['profiles'][str(file_index)]
        yaxis = _profile['profile']

        if self.peak_algorithm == 'change_point':
            var = np.mean(yaxis)
            result = pelt(normal_var(yaxis, var), len(yaxis))
            if len(result) > 2:
                peak = np.mean(result[2:])
            else:
                peak = np.mean(result[1:])

        else: # sliding average
            _o_range = MeanRangeCalculation(data=yaxis)
            nbr_pixels = len(yaxis)
            delta_array = []
            for _pixel in np.arange(0, nbr_pixels-5):
                _o_range.calculate_left_right_mean(pixel=_pixel)
                _o_range.calculate_delta_mean_square()
                delta_array.append(_o_range.delta_square)

            peak_value = delta_array.index(max(delta_array[0: nbr_pixels -5]))

        self.peak[label][file_index] = np.int(peak_value)
Exemplo n.º 6
0
    def test_pelt_normal_var_small(self):
        """ Test normal changing variance, with smaller dataset
        """
        data = [
            -1.82348457, -0.13819782, 1.25618544, -0.54487136, -2.24769311,
            9.82204284, -1.0181088, 3.93764179, -8.73177678, 5.99949843
        ]

        result = pelt(normal_var(np.array(data), 0), len(data))
        self.assertEqual(result, [0, 5])
Exemplo n.º 7
0
    def test_pelt_normal_mean_small(self):
        """ Test normal changing mean, with smaller dataset
        """
        var = 0.1
        data = [
            0.16853651, 0.0261112, -0.0655322, 0.11575204, 0.11388594,
            10.001775, 9.92765733, 10.01303474, 9.97938986, 10.05994745
        ]

        result = pelt(normal_mean(data, var), len(data))
        self.assertEqual(result, [0, 5])
Exemplo n.º 8
0
    def test_pelt_normal_var_big(self):
        """ Test normal changing variance, with bigger dataset
        """
        size = 100
        mean = 0.0
        var_a = 1.0
        var_b = 10.0

        np.random.seed(19348)
        data_a = np.random.normal(mean, var_a, size)
        data_b = np.random.normal(mean, var_b, size)
        data = np.append(data_a, data_b)

        result = pelt(normal_var(data, mean), len(data))
        self.assertEqual(result, [0, 100, 198])
Exemplo n.º 9
0
    def test_pelt_normal_mean_big(self):
        """ Test normal changing mean, with bigger dataset
        """
        size = 100
        mean_a = 0.0
        mean_b = 10.0
        var = 0.1

        np.random.seed(19348)
        data_a = np.random.normal(mean_a, var, size)
        data_b = np.random.normal(mean_b, var, size)
        data = np.append(data_a, data_b)

        result = pelt(normal_mean(data, var), len(data))
        self.assertEqual(result, [0, size])
sp_der = sp_dset.derivative(n=1)

dset_der = sp_der(frames)
#---------------------------------------------------------------------------------------

#Q, P, Pcp = offcd.offline_changepoint_detection(dset_der, partial(offcd.const_prior,
#                l=(len(dset_der)+1)), offcd.gaussian_obs_log_likelihood, truncate=-50)

#---------------------------------------------------------------------------------------

dset_var = np.var(dset_der)
#Q, P, Pcp = offcd.offline_changepoint_detection(dset_var, \
#     partial(offcd.const_prior, l=(len(dset_var)+1)), \
#     offcd.gaussian_obs_log_likelihood, truncate=-40)
changepts = pelt(
    normal_mean(dset_der, var_user),
    len(dset_der))  #var_scale*dset_var #var_scale_testdata*len(dset_der)
#changepts = pelt(normal_meanvar(dset_der),len(dset_der))
N = len(dset_der) - 1

if 0 not in changepts:
    changepts.insert(0, 0)
#if len(dset_der) not in changepts:
#    changepts.append(len(dset_der))
if N not in changepts:
    changepts.append(N)

#print(changepts)

#---------------------------------------------------------------------------------------
def pelt_(data):
    return pelt(normal_mean(data, np.std(data)), len(data))
Exemplo n.º 12
0
def bout_analysis(dset,
                  frames,
                  var_user=analysisParams['var_user'],
                  var_user_flag=False,
                  mad_thresh=analysisParams['mad_thresh']):

    wlevel = analysisParams['wlevel']
    wtype = analysisParams['wtype']
    medfilt_window = analysisParams['medfilt_window']
    #var_scale = .5
    #var_scale_testdata = 2*(1.3e-4)
    #mad_thresh = analysisParams['mad_thresh']
    #var_user = analysisParams['var_user']
    min_bout_duration = analysisParams['min_bout_duration']
    min_bout_volume = analysisParams['min_bout_volume']

    #---------------------------------------------------------------------------------------

    dset_denoised = wavelet_denoise(dset, wtype, wlevel)

    dset_denoised_med = signal.medfilt(dset_denoised, medfilt_window)

    sp_dset = interpolate.InterpolatedUnivariateSpline(
        frames, np.squeeze(dset_denoised_med))
    sp_der = sp_dset.derivative(n=1)

    dset_der = sp_der(frames)

    if var_user_flag == False:
        iq_range = np.percentile(dset_der, 75) - np.percentile(dset_der, 25)
        var_user = np.power(iq_range, 2.0) / 2.0
    #---------------------------------------------------------------------------------------

    #dset_var = np.var(dset_der)
    changepts = pelt(
        normal_mean(dset_der, var_user),
        len(dset_der))  #var_scale*dset_var #var_scale_testdata*len(dset_der)
    #changepts = pelt(normal_meanvar(dset_der),len(dset_der))
    N = len(dset_der) - 1

    if 0 not in changepts:
        changepts.insert(0, 0)
    #if len(dset_der) not in changepts:
    #    changepts.append(len(dset_der))
    if N not in changepts:
        changepts.append(N)

    #print(changepts)

    #---------------------------------------------------------------------------------------

    piecewise_fits = np.empty(len(changepts) - 1)
    piecewise_fit_dist = np.empty_like(dset_der)

    for i in range(0, len(changepts) - 1):
        ipt1 = changepts[i]
        ipt2 = changepts[i + 1] + 1
        fit_temp = np.median(dset_der[ipt1:ipt2])
        piecewise_fits[i] = fit_temp
        piecewise_fit_dist[ipt1:ipt2] = fit_temp * np.ones_like(
            dset_der[ipt1:ipt2])

    #mean_pw_slope = np.mean(piecewise_fit_dist)
    #std_pw_slope = np.std(piecewise_fit_dist)
    mad_slope = np.median(np.abs(np.median(dset_der) - dset_der))

    piecewise_fits_dev = (piecewise_fits - np.median(dset_der)) / mad_slope
    bout_ind = (piecewise_fits_dev < mad_thresh
                )  #~z score of 1 #(mean_pw_slope - std_pw_slope)
    bout_ind = bout_ind.astype(int)
    bout_ind_diff = np.diff(bout_ind)

    #plt.figure()
    #plt.plot(bout_ind)

    bouts_start_ind = np.where(bout_ind_diff == 1)[0] + 1
    bouts_end_ind = np.where(bout_ind_diff == -1)[0] + 1

    #print(bouts_start_ind)
    #print(bouts_end_ind)

    if len(bouts_start_ind) != len(bouts_end_ind):
        minLength = np.min([len(bouts_start_ind), len(bouts_end_ind)])
        bouts_start_ind = bouts_start_ind[0:minLength]
        bouts_end_ind = bouts_end_ind[0:minLength]

    #print(bouts_start_ind)
    #print(bouts_end_ind)

    changepts_array = np.asarray(changepts)
    bouts_start = changepts_array[bouts_start_ind]
    bouts_end = changepts_array[bouts_end_ind]

    bouts = np.vstack((bouts_start, bouts_end))
    volumes = dset_denoised_med[bouts_start] - dset_denoised_med[bouts_end]

    bout_durations = bouts_end - bouts_start
    good_ind = (bout_durations > min_bout_duration) & (volumes >
                                                       min_bout_volume)

    bouts = bouts[:, good_ind]
    volumes = volumes[good_ind]

    return (dset_denoised_med, bouts, volumes)
Exemplo n.º 13
0
    def _pelt_change_point_detection(self,
                                     df=None,
                                     metric=None,
                                     min_ts_length=None,
                                     max_ts_length=None):
        """
        This function computes the significant change points based on PELT and the Kullback-Leibler divergence method.
        :param pandas.dataframe df: A pandas dataframe containing the time series
        :param pandas.dataframe metric: The metric in the dataframe that contains the time series
        :param int min_ts_length: Specifying the minimum required length of the time series for training
        :param int max_ts_length: Specifying the maximum required length of the time series for training.
        The training time series length truncates accordingly based on minimum between max_ts_length and the
        length of the input time series.
        :return: A pandas dataframe containing the time series after the last changepoint

        >>> df
                          raw  interpolated
        2016-01-02  1753421.0     14.377080
        2016-01-03  1879108.0     14.446308
        2016-01-04  1462725.0     14.195812
        2016-01-05  1525162.0     14.237612
        2016-01-06  1424264.0     14.169166
        ...               ...           ...
        2018-10-14  2185230.0     14.597232
        2018-10-15  1825539.0     14.417386
        2018-10-16  1776778.0     14.390313
        2018-10-17  1792899.0     14.399345
        2018-10-18  1738657.0     14.368624

        >>> copy_df, change_point_list
        (                  raw  interpolated
        2016-01-02  1753421.0     14.377080
        2016-01-03  1879108.0     14.446308
        2016-01-04  1462725.0     14.195812
        2016-01-05  1525162.0     14.237612
        2016-01-06  1424264.0     14.169166
        ...               ...           ...
        2018-10-14  2185230.0     14.597232
        2018-10-15  1825539.0     14.417386
        2018-10-16  1776778.0     14.390313
        2018-10-17  1792899.0     14.399345
        2018-10-18  1738657.0     14.368624
        [1021 rows x 2 columns], ['2016-12-26 00:00:00', '2018-09-10 00:00:00'])
        """
        import numpy as np
        import pandas as pd
        from changepy import pelt
        from changepy.costs import normal_var

        change_point_threshold = self.change_point_threshold

        df_copy = pd.DataFrame(df[metric])

        counts = df_copy[metric].values
        mean = np.mean(counts)

        # Performing changepoint detection with respect to the data variablity shift through PELT
        cdate = pelt(normal_var(counts, mean), len(counts))

        # If PELT detects the first datapoint to be a change point, then we ignore that change point
        if cdate:
            if cdate[0] == 0:
                cdate.remove(0)
        if len(cdate) > 0:
            # Finding the magnitude of divergence around every change point (detected by PELT) by comparing the
            # distributions of the data points on the left and the right side of the change point
            entrp = self._shift_intensity(change_points=cdate,
                                          df=df_copy,
                                          metric=metric)
            df_change_points = pd.DataFrame({
                'c_point': cdate,
                'entropy': entrp
            })

            # Narrowing down to the change points which satisfies a required lower bound of divergence
            df_change_points = df_change_points[
                df_change_points['entropy'] > change_point_threshold]
            cdate = df_change_points['c_point'].values

            # Set the start date of the time series based on the min_ts_length and the max_ts_length and the change
            # points
            if len(cdate) > 0:
                df_subset = df_copy.iloc[cdate]
                change_point_list = [i.__str__() for i in df_subset.index]
                index = df_subset.index[-1]
                copy_df = df.loc[index:df.last_valid_index(
                )] if self.data_shift_truncate else df
                if copy_df.shape[0] < min_ts_length:
                    # Return None, If time series after the change point contains less number of data points
                    # than the minimum required length
                    return None, change_point_list
                elif copy_df.shape[0] < max_ts_length:
                    # Return the time series after the change point if it's length lies between the minimum and the
                    # maximum required length
                    pass
                elif copy_df.shape[0] > max_ts_length:
                    # Truncate the time series after change point if it contains more than required data for
                    # training
                    copy_df = df.iloc[-max_ts_length:]
            else:
                change_point_list = None
                if df.shape[0] < min_ts_length:
                    return None, change_point_list
                else:
                    if df.shape[0] < max_ts_length:
                        copy_df = df
                    else:
                        copy_df = df.iloc[-max_ts_length:]
        else:
            change_point_list = None
            if df.shape[0] < min_ts_length:
                return None, change_point_list
            else:
                if df.shape[0] < max_ts_length:
                    copy_df = df
                else:
                    copy_df = df.iloc[-max_ts_length:]
        return copy_df, change_point_list
Exemplo n.º 14
0
 def test_pelt_exponential_big(self):
     np.random.seed(1)
     data = np.hstack((np.random.exponential(1, 100), np.random.exponential(2.1, 100)))
     cost = exponential(data)
     result = pelt(cost, len(data))
     self.assertEqual(result, [0, 101])
Exemplo n.º 15
0
 def test_poisson_r(self):
     data = [4,4,3,8,4,3,9,6,5,4,6,5,3,4,6,0,3,5,4,4,7,7,5,2,6,4,8,6,3,3,2,1,1,5,9,7,3,3,6,4,6,2,4,3,6,3,10,6,9,3,8,2,5,8,5,4,4,3,1,5,9,5,4,2,7,4,0,1,2,3,6,4,6,2,8,6,2,5,6,4,6,6,2,5,5,7,11,7,6,9,7,9,4,4,7,5,8,5,8,9,12,6,1,5,6,6,3,7,7,8,10,4,5,8,2,7,8,7,10,9,7,4,3,8,6,7,4,7,4,11,12,6,10,5,8,6,8,5,4,8,7,5,9,7,8,6,9,8,5,6,13,4,8,5,11,4,8,5,5,8,7,10,8,8,4,5,4,4,11,8,5,10,4,4,8,9,5,5,12,1,4,8,4,6,6,9,3,6,7,8,3,3,6,7,5,7,5,4,10,5,5,6,7,4,3,3,3,6,9,7,9,6,6,7,5,6,6,7,7,6,12,10,6,5,10,10,9,8,19,24,16,25,15,24,16,21,18,16,21,9,11,18,22,16,21,18,11,19,20,15,17,25,16,19,22,20,17,25,27,12,20,23,20,22,19,24,21,19,19,20,18,21,14,16,15,18,24,19,17,16,21,20,22,14,21,21,16,16,12,15,22,21,18,10,21,16,22,14,22,16,19,20,26,21,20,23,6,21,17,18,24,18,18,14,17,16,16,19,16,19,15,18,19,22,23,22,19,16,21,14,24,22,19,21,16,16,15,21,23,17,15,21,16,20,13,17,16,20,9,21,17,23,15,20,11,20,15,20,19,18,5,6,9,4,8,6,5,8,4,5,6,7,7,8,4,3,7,3,2,8,5,7,4,12,7,8,7,1,6,8,5,9,9,9,6,6,6,4,9,4,7,2,5,7,11,4,4,9,6,4,10,11,11,6,12,5,5,6,5,3,3,6,13,3,4,4,7,6,8,5,9,6,7,11,2,6,9,5,3,3,3,6,1,2,4,1,5,1,3,6,3,2,4,2,3,3,3,0,1,7,6,1,3,2,3,1,7,1,2,4,4,3,4,5,2,3,2,5,1,0,4,5,0,4,4,3,3,2,2,2,1,4,1,4,5,2,2,6,3,2]
     cost = poisson(data)
     result = pelt(cost, len(data), 2*np.log(len(data)))
     self.assertEqual(result, [0, 85, 228, 360, 438])