def test_pelt_exponential_big(self): np.random.seed(1) data = np.hstack( (np.random.exponential(1, 100), np.random.exponential(2.1, 100))) cost = exponential(data) result = pelt(cost, len(data)) self.assertEqual(result, [0, 101])
def detect_changepoints(points, min_time, data_processor=acc_difference): """ Detects changepoints on points that have at least a specific duration Args: points (:obj:`Point`) min_time (float): Min time that a sub-segmented, bounded by two changepoints, must have data_processor (function): Function to extract data to feed to the changepoint algorithm. Defaults to `speed_difference` Returns: :obj:`list` of int: Indexes of changepoints """ data = data_processor(points) changepoints = pelt(normal_mean(data, np.std(data)), len(data)) changepoints.append(len(points) - 1) result = [] for start, end in pairwise(changepoints): time_diff = points[end].time_difference(points[start]) if time_diff > min_time: result.append(start) # adds the first point result.append(0) # adds the last changepoint detected result.append(len(points) - 1) return sorted(list(set(result)))
def test_poisson_r(self): data = [ 4, 4, 3, 8, 4, 3, 9, 6, 5, 4, 6, 5, 3, 4, 6, 0, 3, 5, 4, 4, 7, 7, 5, 2, 6, 4, 8, 6, 3, 3, 2, 1, 1, 5, 9, 7, 3, 3, 6, 4, 6, 2, 4, 3, 6, 3, 10, 6, 9, 3, 8, 2, 5, 8, 5, 4, 4, 3, 1, 5, 9, 5, 4, 2, 7, 4, 0, 1, 2, 3, 6, 4, 6, 2, 8, 6, 2, 5, 6, 4, 6, 6, 2, 5, 5, 7, 11, 7, 6, 9, 7, 9, 4, 4, 7, 5, 8, 5, 8, 9, 12, 6, 1, 5, 6, 6, 3, 7, 7, 8, 10, 4, 5, 8, 2, 7, 8, 7, 10, 9, 7, 4, 3, 8, 6, 7, 4, 7, 4, 11, 12, 6, 10, 5, 8, 6, 8, 5, 4, 8, 7, 5, 9, 7, 8, 6, 9, 8, 5, 6, 13, 4, 8, 5, 11, 4, 8, 5, 5, 8, 7, 10, 8, 8, 4, 5, 4, 4, 11, 8, 5, 10, 4, 4, 8, 9, 5, 5, 12, 1, 4, 8, 4, 6, 6, 9, 3, 6, 7, 8, 3, 3, 6, 7, 5, 7, 5, 4, 10, 5, 5, 6, 7, 4, 3, 3, 3, 6, 9, 7, 9, 6, 6, 7, 5, 6, 6, 7, 7, 6, 12, 10, 6, 5, 10, 10, 9, 8, 19, 24, 16, 25, 15, 24, 16, 21, 18, 16, 21, 9, 11, 18, 22, 16, 21, 18, 11, 19, 20, 15, 17, 25, 16, 19, 22, 20, 17, 25, 27, 12, 20, 23, 20, 22, 19, 24, 21, 19, 19, 20, 18, 21, 14, 16, 15, 18, 24, 19, 17, 16, 21, 20, 22, 14, 21, 21, 16, 16, 12, 15, 22, 21, 18, 10, 21, 16, 22, 14, 22, 16, 19, 20, 26, 21, 20, 23, 6, 21, 17, 18, 24, 18, 18, 14, 17, 16, 16, 19, 16, 19, 15, 18, 19, 22, 23, 22, 19, 16, 21, 14, 24, 22, 19, 21, 16, 16, 15, 21, 23, 17, 15, 21, 16, 20, 13, 17, 16, 20, 9, 21, 17, 23, 15, 20, 11, 20, 15, 20, 19, 18, 5, 6, 9, 4, 8, 6, 5, 8, 4, 5, 6, 7, 7, 8, 4, 3, 7, 3, 2, 8, 5, 7, 4, 12, 7, 8, 7, 1, 6, 8, 5, 9, 9, 9, 6, 6, 6, 4, 9, 4, 7, 2, 5, 7, 11, 4, 4, 9, 6, 4, 10, 11, 11, 6, 12, 5, 5, 6, 5, 3, 3, 6, 13, 3, 4, 4, 7, 6, 8, 5, 9, 6, 7, 11, 2, 6, 9, 5, 3, 3, 3, 6, 1, 2, 4, 1, 5, 1, 3, 6, 3, 2, 4, 2, 3, 3, 3, 0, 1, 7, 6, 1, 3, 2, 3, 1, 7, 1, 2, 4, 4, 3, 4, 5, 2, 3, 2, 5, 1, 0, 4, 5, 0, 4, 4, 3, 3, 2, 2, 2, 1, 4, 1, 4, 5, 2, 2, 6, 3, 2 ] cost = poisson(data) result = pelt(cost, len(data), 2 * np.log(len(data))) self.assertEqual(result, [0, 85, 228, 360, 438])
def calculate_peak(self, file_index=-1, is_horizontal=True): if is_horizontal: label = 'horizontal' else: label = 'vertical' _profile = self.roi[label]['profiles'][str(file_index)] yaxis = _profile['profile'] if self.peak_algorithm == 'change_point': var = np.mean(yaxis) result = pelt(normal_var(yaxis, var), len(yaxis)) if len(result) > 2: peak = np.mean(result[2:]) else: peak = np.mean(result[1:]) else: # sliding average _o_range = MeanRangeCalculation(data=yaxis) nbr_pixels = len(yaxis) delta_array = [] for _pixel in np.arange(0, nbr_pixels-5): _o_range.calculate_left_right_mean(pixel=_pixel) _o_range.calculate_delta_mean_square() delta_array.append(_o_range.delta_square) peak_value = delta_array.index(max(delta_array[0: nbr_pixels -5])) self.peak[label][file_index] = np.int(peak_value)
def test_pelt_normal_var_small(self): """ Test normal changing variance, with smaller dataset """ data = [ -1.82348457, -0.13819782, 1.25618544, -0.54487136, -2.24769311, 9.82204284, -1.0181088, 3.93764179, -8.73177678, 5.99949843 ] result = pelt(normal_var(np.array(data), 0), len(data)) self.assertEqual(result, [0, 5])
def test_pelt_normal_mean_small(self): """ Test normal changing mean, with smaller dataset """ var = 0.1 data = [ 0.16853651, 0.0261112, -0.0655322, 0.11575204, 0.11388594, 10.001775, 9.92765733, 10.01303474, 9.97938986, 10.05994745 ] result = pelt(normal_mean(data, var), len(data)) self.assertEqual(result, [0, 5])
def test_pelt_normal_var_big(self): """ Test normal changing variance, with bigger dataset """ size = 100 mean = 0.0 var_a = 1.0 var_b = 10.0 np.random.seed(19348) data_a = np.random.normal(mean, var_a, size) data_b = np.random.normal(mean, var_b, size) data = np.append(data_a, data_b) result = pelt(normal_var(data, mean), len(data)) self.assertEqual(result, [0, 100, 198])
def test_pelt_normal_mean_big(self): """ Test normal changing mean, with bigger dataset """ size = 100 mean_a = 0.0 mean_b = 10.0 var = 0.1 np.random.seed(19348) data_a = np.random.normal(mean_a, var, size) data_b = np.random.normal(mean_b, var, size) data = np.append(data_a, data_b) result = pelt(normal_mean(data, var), len(data)) self.assertEqual(result, [0, size])
sp_der = sp_dset.derivative(n=1) dset_der = sp_der(frames) #--------------------------------------------------------------------------------------- #Q, P, Pcp = offcd.offline_changepoint_detection(dset_der, partial(offcd.const_prior, # l=(len(dset_der)+1)), offcd.gaussian_obs_log_likelihood, truncate=-50) #--------------------------------------------------------------------------------------- dset_var = np.var(dset_der) #Q, P, Pcp = offcd.offline_changepoint_detection(dset_var, \ # partial(offcd.const_prior, l=(len(dset_var)+1)), \ # offcd.gaussian_obs_log_likelihood, truncate=-40) changepts = pelt( normal_mean(dset_der, var_user), len(dset_der)) #var_scale*dset_var #var_scale_testdata*len(dset_der) #changepts = pelt(normal_meanvar(dset_der),len(dset_der)) N = len(dset_der) - 1 if 0 not in changepts: changepts.insert(0, 0) #if len(dset_der) not in changepts: # changepts.append(len(dset_der)) if N not in changepts: changepts.append(N) #print(changepts) #---------------------------------------------------------------------------------------
def pelt_(data): return pelt(normal_mean(data, np.std(data)), len(data))
def bout_analysis(dset, frames, var_user=analysisParams['var_user'], var_user_flag=False, mad_thresh=analysisParams['mad_thresh']): wlevel = analysisParams['wlevel'] wtype = analysisParams['wtype'] medfilt_window = analysisParams['medfilt_window'] #var_scale = .5 #var_scale_testdata = 2*(1.3e-4) #mad_thresh = analysisParams['mad_thresh'] #var_user = analysisParams['var_user'] min_bout_duration = analysisParams['min_bout_duration'] min_bout_volume = analysisParams['min_bout_volume'] #--------------------------------------------------------------------------------------- dset_denoised = wavelet_denoise(dset, wtype, wlevel) dset_denoised_med = signal.medfilt(dset_denoised, medfilt_window) sp_dset = interpolate.InterpolatedUnivariateSpline( frames, np.squeeze(dset_denoised_med)) sp_der = sp_dset.derivative(n=1) dset_der = sp_der(frames) if var_user_flag == False: iq_range = np.percentile(dset_der, 75) - np.percentile(dset_der, 25) var_user = np.power(iq_range, 2.0) / 2.0 #--------------------------------------------------------------------------------------- #dset_var = np.var(dset_der) changepts = pelt( normal_mean(dset_der, var_user), len(dset_der)) #var_scale*dset_var #var_scale_testdata*len(dset_der) #changepts = pelt(normal_meanvar(dset_der),len(dset_der)) N = len(dset_der) - 1 if 0 not in changepts: changepts.insert(0, 0) #if len(dset_der) not in changepts: # changepts.append(len(dset_der)) if N not in changepts: changepts.append(N) #print(changepts) #--------------------------------------------------------------------------------------- piecewise_fits = np.empty(len(changepts) - 1) piecewise_fit_dist = np.empty_like(dset_der) for i in range(0, len(changepts) - 1): ipt1 = changepts[i] ipt2 = changepts[i + 1] + 1 fit_temp = np.median(dset_der[ipt1:ipt2]) piecewise_fits[i] = fit_temp piecewise_fit_dist[ipt1:ipt2] = fit_temp * np.ones_like( dset_der[ipt1:ipt2]) #mean_pw_slope = np.mean(piecewise_fit_dist) #std_pw_slope = np.std(piecewise_fit_dist) mad_slope = np.median(np.abs(np.median(dset_der) - dset_der)) piecewise_fits_dev = (piecewise_fits - np.median(dset_der)) / mad_slope bout_ind = (piecewise_fits_dev < mad_thresh ) #~z score of 1 #(mean_pw_slope - std_pw_slope) bout_ind = bout_ind.astype(int) bout_ind_diff = np.diff(bout_ind) #plt.figure() #plt.plot(bout_ind) bouts_start_ind = np.where(bout_ind_diff == 1)[0] + 1 bouts_end_ind = np.where(bout_ind_diff == -1)[0] + 1 #print(bouts_start_ind) #print(bouts_end_ind) if len(bouts_start_ind) != len(bouts_end_ind): minLength = np.min([len(bouts_start_ind), len(bouts_end_ind)]) bouts_start_ind = bouts_start_ind[0:minLength] bouts_end_ind = bouts_end_ind[0:minLength] #print(bouts_start_ind) #print(bouts_end_ind) changepts_array = np.asarray(changepts) bouts_start = changepts_array[bouts_start_ind] bouts_end = changepts_array[bouts_end_ind] bouts = np.vstack((bouts_start, bouts_end)) volumes = dset_denoised_med[bouts_start] - dset_denoised_med[bouts_end] bout_durations = bouts_end - bouts_start good_ind = (bout_durations > min_bout_duration) & (volumes > min_bout_volume) bouts = bouts[:, good_ind] volumes = volumes[good_ind] return (dset_denoised_med, bouts, volumes)
def _pelt_change_point_detection(self, df=None, metric=None, min_ts_length=None, max_ts_length=None): """ This function computes the significant change points based on PELT and the Kullback-Leibler divergence method. :param pandas.dataframe df: A pandas dataframe containing the time series :param pandas.dataframe metric: The metric in the dataframe that contains the time series :param int min_ts_length: Specifying the minimum required length of the time series for training :param int max_ts_length: Specifying the maximum required length of the time series for training. The training time series length truncates accordingly based on minimum between max_ts_length and the length of the input time series. :return: A pandas dataframe containing the time series after the last changepoint >>> df raw interpolated 2016-01-02 1753421.0 14.377080 2016-01-03 1879108.0 14.446308 2016-01-04 1462725.0 14.195812 2016-01-05 1525162.0 14.237612 2016-01-06 1424264.0 14.169166 ... ... ... 2018-10-14 2185230.0 14.597232 2018-10-15 1825539.0 14.417386 2018-10-16 1776778.0 14.390313 2018-10-17 1792899.0 14.399345 2018-10-18 1738657.0 14.368624 >>> copy_df, change_point_list ( raw interpolated 2016-01-02 1753421.0 14.377080 2016-01-03 1879108.0 14.446308 2016-01-04 1462725.0 14.195812 2016-01-05 1525162.0 14.237612 2016-01-06 1424264.0 14.169166 ... ... ... 2018-10-14 2185230.0 14.597232 2018-10-15 1825539.0 14.417386 2018-10-16 1776778.0 14.390313 2018-10-17 1792899.0 14.399345 2018-10-18 1738657.0 14.368624 [1021 rows x 2 columns], ['2016-12-26 00:00:00', '2018-09-10 00:00:00']) """ import numpy as np import pandas as pd from changepy import pelt from changepy.costs import normal_var change_point_threshold = self.change_point_threshold df_copy = pd.DataFrame(df[metric]) counts = df_copy[metric].values mean = np.mean(counts) # Performing changepoint detection with respect to the data variablity shift through PELT cdate = pelt(normal_var(counts, mean), len(counts)) # If PELT detects the first datapoint to be a change point, then we ignore that change point if cdate: if cdate[0] == 0: cdate.remove(0) if len(cdate) > 0: # Finding the magnitude of divergence around every change point (detected by PELT) by comparing the # distributions of the data points on the left and the right side of the change point entrp = self._shift_intensity(change_points=cdate, df=df_copy, metric=metric) df_change_points = pd.DataFrame({ 'c_point': cdate, 'entropy': entrp }) # Narrowing down to the change points which satisfies a required lower bound of divergence df_change_points = df_change_points[ df_change_points['entropy'] > change_point_threshold] cdate = df_change_points['c_point'].values # Set the start date of the time series based on the min_ts_length and the max_ts_length and the change # points if len(cdate) > 0: df_subset = df_copy.iloc[cdate] change_point_list = [i.__str__() for i in df_subset.index] index = df_subset.index[-1] copy_df = df.loc[index:df.last_valid_index( )] if self.data_shift_truncate else df if copy_df.shape[0] < min_ts_length: # Return None, If time series after the change point contains less number of data points # than the minimum required length return None, change_point_list elif copy_df.shape[0] < max_ts_length: # Return the time series after the change point if it's length lies between the minimum and the # maximum required length pass elif copy_df.shape[0] > max_ts_length: # Truncate the time series after change point if it contains more than required data for # training copy_df = df.iloc[-max_ts_length:] else: change_point_list = None if df.shape[0] < min_ts_length: return None, change_point_list else: if df.shape[0] < max_ts_length: copy_df = df else: copy_df = df.iloc[-max_ts_length:] else: change_point_list = None if df.shape[0] < min_ts_length: return None, change_point_list else: if df.shape[0] < max_ts_length: copy_df = df else: copy_df = df.iloc[-max_ts_length:] return copy_df, change_point_list
def test_pelt_exponential_big(self): np.random.seed(1) data = np.hstack((np.random.exponential(1, 100), np.random.exponential(2.1, 100))) cost = exponential(data) result = pelt(cost, len(data)) self.assertEqual(result, [0, 101])
def test_poisson_r(self): data = [4,4,3,8,4,3,9,6,5,4,6,5,3,4,6,0,3,5,4,4,7,7,5,2,6,4,8,6,3,3,2,1,1,5,9,7,3,3,6,4,6,2,4,3,6,3,10,6,9,3,8,2,5,8,5,4,4,3,1,5,9,5,4,2,7,4,0,1,2,3,6,4,6,2,8,6,2,5,6,4,6,6,2,5,5,7,11,7,6,9,7,9,4,4,7,5,8,5,8,9,12,6,1,5,6,6,3,7,7,8,10,4,5,8,2,7,8,7,10,9,7,4,3,8,6,7,4,7,4,11,12,6,10,5,8,6,8,5,4,8,7,5,9,7,8,6,9,8,5,6,13,4,8,5,11,4,8,5,5,8,7,10,8,8,4,5,4,4,11,8,5,10,4,4,8,9,5,5,12,1,4,8,4,6,6,9,3,6,7,8,3,3,6,7,5,7,5,4,10,5,5,6,7,4,3,3,3,6,9,7,9,6,6,7,5,6,6,7,7,6,12,10,6,5,10,10,9,8,19,24,16,25,15,24,16,21,18,16,21,9,11,18,22,16,21,18,11,19,20,15,17,25,16,19,22,20,17,25,27,12,20,23,20,22,19,24,21,19,19,20,18,21,14,16,15,18,24,19,17,16,21,20,22,14,21,21,16,16,12,15,22,21,18,10,21,16,22,14,22,16,19,20,26,21,20,23,6,21,17,18,24,18,18,14,17,16,16,19,16,19,15,18,19,22,23,22,19,16,21,14,24,22,19,21,16,16,15,21,23,17,15,21,16,20,13,17,16,20,9,21,17,23,15,20,11,20,15,20,19,18,5,6,9,4,8,6,5,8,4,5,6,7,7,8,4,3,7,3,2,8,5,7,4,12,7,8,7,1,6,8,5,9,9,9,6,6,6,4,9,4,7,2,5,7,11,4,4,9,6,4,10,11,11,6,12,5,5,6,5,3,3,6,13,3,4,4,7,6,8,5,9,6,7,11,2,6,9,5,3,3,3,6,1,2,4,1,5,1,3,6,3,2,4,2,3,3,3,0,1,7,6,1,3,2,3,1,7,1,2,4,4,3,4,5,2,3,2,5,1,0,4,5,0,4,4,3,3,2,2,2,1,4,1,4,5,2,2,6,3,2] cost = poisson(data) result = pelt(cost, len(data), 2*np.log(len(data))) self.assertEqual(result, [0, 85, 228, 360, 438])