def get_CP_dict(feature_dict, vid_list): """ Function for CP Baseline Calculate changepoints for whole epsiode. Args: features_dict: dictionary of i3d features to calculate changepoints vid_list: list of video names Returns: dictionary of changepoints (key: video names; value: changepoints) """ CP_dict = {} model ='l2' pen = 80 jump = 2 for vid in vid_list: features = feature_dict[vid] if len(features) < 2: CP_dict[vid] = np.zeros(len(features)) continue algo = rpt.Pelt(model=model, jump=jump).fit(features) res = algo.predict(pen=pen) res_np = [1 if ix in res else 0 for ix in range(len(features))] CP_dict[vid] = np.asarray(res_np) return CP_dict
def changepoint_detection_singlecell(df, cellid, penalty=12, fontsize=14, figsize=(10, 4), create_plot=False): df1 = df.sort_values(by=["date_c"]) signal = df1.rad_corr.values dates = df1.date_c.values algo = rpt.Pelt(model="rbf").fit(signal) result = algo.predict(pen=penalty) # we exclude the last value of result as it is irrelevant result = result[0:-1] if create_plot == True: fig, ax = plt.subplots(figsize=figsize, tight_layout=True) ax.scatter(dates, signal, s=6) ax.vlines(dates[result], signal.min(), signal.max(), linestyles="dashed") for i in result: date = pd.to_datetime(dates[i]) date = date.strftime("%d-%b-%Y") ax.text(dates[i], signal.max() - 0.5, date, color="red", fontsize=fontsize) ax.set_title("{}".format(cellid), fontsize=fontsize) ax.set_xlabel("Date", fontsize=fontsize) ax.set_ylabel("Nadir Normalized Radiance", fontsize=fontsize) ax.tick_params(axis='both', which='major', labelsize=fontsize) plt.show() return dates[result]
def _clean_and_compute_changes(self): tmp = [] # now we go through and background substract the light curves for lc, poly in zip(self._light_curves, self._polys): n = len(lc.counts) bkg_counts = np.empty(n) bkg_errs = np.empty(n) for i, (a, b) in enumerate(zip(lc.time_bins[:-1], lc.time_bins[1:])): bkg_counts[i] = poly.integral(a, b) bkg_errs[i] = poly.integral_error(a, b) clean_counts = lc.counts - bkg_counts tmp.append(clean_counts) # look for the change points in the # in the cleaned light curves tmp = np.vstack(tmp).T angles = angle_mapping(tmp) penalty = 2 * np.log(len(angles)) algo = rpt.Pelt().fit(angles) cpts_seg = algo.predict(pen=penalty) self._all_change_points = np.array(cpts_seg) - 1
def pelt(data): ''' data: Valores del activo EURUSD. ''' datos = np.array(data.Close) n = len(datos) # Tamaño de los datos dentro del array. sigma = datos.std() # Desviación estandar de los datos. p = np.log(n) * sigma**2 # Penalización utilizada dentro del modelo. # Pasos a realizar dentro del modelo de Binary segmentation. algo = rpt.Pelt().fit(datos) my_bkps = algo.predict(pen=p) senal = pd.DataFrame(my_bkps) mean = senal.drop([ len(my_bkps) - 1 ]) # Quitamos de la serie el último valor ya que no es correcto. mean = np.array(mean) # Valores obtenidos del modelo traidos a un array. changes = mean.astype( int) # Valores del array anterior convertidos a numeros enteros. feature = boolean_change_point(datos, changes) # La función regresa las fechas y los cambios numericos. return changes, feature
def extract_CP(args, features_dict): """ Calculate and save Changepoints. Saves one .pkl label file per episode/ video. Args: args: parser arguments features_dict: dictionary of i3d features to calculate changepoints """ pen = 80 for vid in tqdm(features_dict.keys()): features = features_dict[vid] changepoints = [] # check if CP for this episode with given setting (pen,..) already exists if not os.path.exists( f"data/pseudo_labels/CP/{args.test_data}/{vid.split('.')[0]}/pen_{pen}.pkl" ): algo = rpt.Pelt(model=args.merge_model, jump=args.merge_jump).fit(features) res = algo.predict(pen=pen) CP = [1 if ix in res else 0 for ix in range(len(features))] if not os.path.exists( f"data/pseudo_labels/CP/{args.test_data}/{vid.split('.')[0]}" ): os.makedirs( f"data/pseudo_labels/CP/{args.test_data}/{vid.split('.')[0]}" ) pickle.dump( np.asarray(CP), open( f"data/pseudo_labels/CP/{args.test_data}/{vid.split('.')[0]}/pen_{pen}.pkl", "wb"))
def segment_array(data: np.ndarray) -> List[np.ndarray]: """ Split up data into segments. :param data: :return: """ if data is None or len(data) == 0: return [] if len(data) == 1: return [np.array([1])] try: algo = rpt.Pelt().fit(data) segment_idxs = algo.predict(pen=1) segments: List[np.ndarray] = [] start = 0 for idx in segment_idxs: segments.append(data[start:idx]) start = idx return segments except Exception as e: logging.error(str(e)) return []
def needs_refinement_pelt(self, signals): import ruptures count = 0 for signal in signals: if len(signal) < 100: continue algo = ruptures.Pelt(model=self.model, jump=len(signal) // 100, min_size=self.min_dist) algo = algo.fit(self.norm_signal(signal)) # Empirically, most sub-state detectino results use a penalty # in the range 30 to 60. If there's no changepoints with a # penalty of 20, there's also no changepoins with any penalty # > 20, so we can safely skip changepoint detection altogether. changepoints = algo.predict(pen=20) if not changepoints: continue if len(changepoints) and changepoints[-1] == len(signal): changepoints.pop() if len(changepoints) and changepoints[0] == 0: changepoints.pop(0) if changepoints: count += 1 refinement_ratio = count / len(signals) return refinement_ratio > 0.3
def actvTrnsAutoDetect(Data, Interval, Time='YYYY/MM/DD_HH:MM:SS', Model="mahalanobis", Penalty=50): #Interval is in Minutes ###Not recommended to use. Current decetion algorithm is not very robust. import ruptures algo = ruptures.Pelt(model=Model).fit(Data) result = algo.predict(pen=Penalty) times = [] Ttimes = [] #plt.plot(Data) #plt.xticks(rotation=45) #plt.show() ruptures.display(Data, result) plt.show() for entry in result: time = Data.index[entry - 1] print("Time is: {}".format(time)) #if entry is not 1 and entry is not len(self.data): if True: ###Excluding results that are first and last. It's usually meaningless. times.append(time) Ttimes.append([ time - pandas.Timedelta(minutes=Interval / 2), time + pandas.Timedelta(minutes=Interval / 2) ]) return Ttimes
def ada_preprocessing(timeseries, delay_correction=0, transition_size=5): ''' --> timeseries Complete preprocessing for later forecasting. Calculates breakpoints using rbf kernel with optimal parameters. Delay correction specifies, by how much found breakpoints are moved backwards. Transition size specifies, how large the window around the breakpoint should be, in which transition period = 1. Returns timeseries with categorical concept features, transition period feature and steps since/to next breakpoint. ''' series = timeseries.copy() #series = create_simdata.linear1_abrupt() series = preprocess_timeseries(series) #cuts out the first 10 observations signal = series.loc[:, [ "t", 'pacf1', 'pacf2', 'pacf3', 'acf1', 'acf2', 'acf3', 'acf4', 'acf5', 'var', 'kurt', 'skew', 'osc', 'mi_lag1', 'mi_lag2', 'mi_lag3' ]].to_numpy() algo = rpt.Pelt(model="rbf", min_size=2, jump=1).fit(signal[:, 1:]) bkps = algo.predict(pen=12) #print(bkps) bkps = bkps[:-1] series = series.reset_index(drop=True) series = transform_bkps_to_features(bkps, series, delay_correction, transition_size) series = steps_to_from_bkps(series, bkps, delay_correction) series = series.loc[:, [ "t", "t-1", "t-2", "t-3", "t-4", "t-5", "concept", "transition", "steps_since_bkp", "steps_to_bkp" ]] return series
def get_change_points(log): attr_datetime = pm4py.get_attribute_values(log, 'time:timestamp') start_date = min(attr_datetime).date() end_date = max(attr_datetime).date() delta = datetime.timedelta(days=1) print("Start date: ", start_date, "\nEnd date: ", end_date) event_counts = {} i = start_date while i <= end_date: event_counts[i.strftime('%Y-%m-%d')] = 0 #print(i) i += delta #print(event_counts) for t in attr_datetime: event_counts[t.date().strftime('%Y-%m-%d')] += 1 dates = np.array(list(event_counts.values())) # detection algo = rpt.Pelt(model=MODEL).fit(dates) detect_result = algo.predict(pen=PENALTY) # display rpt.display(dates, detect_result, detect_result) plt.savefig('change_points.png') plt.show() print('Change point plot is saved as "change_points.png"') return event_counts, detect_result
def grafik_baseline(): if request.method == 'POST': if 'flink' in session: flink = session['flink'] ftype = session['ftype'] # baseline baseline = session['baseline'] pilih_baseline = int(baseline) if (ftype == 'sp2') or (ftype == 'sp28'): a_file = urllib.request.urlopen(flink) #Read raw file list_of_lists = [] # Konversi file raw menjadi list of lists for line in a_file: stripped_line = line.strip() line_list = stripped_line.split() list_of_lists.append(line_list) a_file.close() # Mengubah list of lists menjadi suatu dataframe data = pd.DataFrame(list_of_lists) data.columns = [ "time", "ch1", "ch2", "subsidiary", "difference" ] # Konversi dari dataframe ke array data_numpy = data.to_numpy().transpose() data_y = data_numpy[1][0:len(data_numpy[1])] data_y = np.asfarray(data_y, float) data_x = data_numpy[0][0:len(data_numpy[0])] data_x = np.asfarray(data_x, float) # menghilangkan outliers dspk_y = despike(data_y, 50) # moving average filter ma_y = ma(dspk_y, 9) # savitzky-golay filter svg_y = savgol_filter(ma_y, window_length=41, polyorder=2) # change points detection chgpts_y = rpt.Pelt(model='l2').fit(svg_y) result_y = chgpts_y.predict(pen=5000) if pilih_baseline == 1: base_y = baseline_poly_manual(data_x, svg_y) #return render_template('grafik_baseline.html', **baseline_poly_manual.kwargs) elif pilih_baseline == 2: base_y = baseline_prediction(data_x, svg_y, result_y) #return render_template('grafik_baseline.html', **baseline_prediction.kwargs) elif (ftype == 'csv') or (ftype == 'xls') or (ftype == 'xlsx'): if pilih_baseline == 1: base_y = baseline_poly_manual(data_x, svg_y) elif pilih_baseline == 2: base_y = baseline_prediction(data_x, svg_y, result_y) return redirect(url_for('content'))
def get_change_point_dates(df, label, from_date, to_date): signal = timeseries[label].loc[from_date:to_date].values timeline = df.loc[from_date:to_date].index algo = rpt.Pelt(model="rbf").fit(signal) result = algo.predict(pen=10) return np.take(timeline, [x - 1 for x in result])
def get_changepoints(arr): """Helper function to find and return the changepoints in a wind speed time series array""" algo = rpt.Pelt(jump=1).fit(arr) # 260 determined to be the smallest penalty where # the most changepoints detected across all stations # was 2. return algo.predict(pen=260)
def run(self): """ Run optimization and find change points. Returns: self """ # Convert the dataset, index: Recovered, column: log10(Susceptible) sr_df = self.sr_df.copy() sr_df[self.S] = np.log10(sr_df[self.S].astype(np.float64)) df = sr_df.pivot_table(index=self.R, values=self.S, aggfunc="last") # Convert index to serial numbers serial_df = pd.DataFrame(np.arange(1, df.index.max() + 1, 1)) serial_df.index += 1 df = pd.merge(df, serial_df, left_index=True, right_index=True, how="outer") series = df.reset_index(drop=True).iloc[:, 0] series = series.interpolate(limit_direction="both") # Sampling to reduce run-time of Ruptures samples = np.linspace(0, series.index.max(), len(self.sr_df), dtype=np.int64) series = series[samples] # Detection with Ruptures algorithm = rpt.Pelt(model="rbf", jump=1, min_size=self.min_size) results = algorithm.fit_predict(series.values, pen=0.5) # Convert index values to Susceptible values reset_series = series.reset_index(drop=True) reset_series.index += 1 susceptible_df = reset_series[results].reset_index() # Convert Susceptible values to dates df = pd.merge_asof(susceptible_df.sort_values(self.S), sr_df.reset_index().sort_values(self.S), on=self.S, direction="nearest") found_list = df[self.DATE].sort_values()[:-1] # Only use dates when the previous phase has more than {min_size + 1} days delta_days = timedelta(days=self.min_size) first_obj = self.date_obj(self.dates[0]) last_obj = self.date_obj(self.dates[-1]) effective_list = [first_obj] for found in found_list: if effective_list[-1] + delta_days < found: effective_list.append(found) # The last change date must be under the last date of records {- min_size} days if effective_list[-1] >= last_obj - delta_days: effective_list = effective_list[:-1] # Set change points self._change_dates = [ date.strftime(self.DATE_FORMAT) for date in effective_list[1:] ] return self
def analysis_linear(penalization, iterations, data_creation_function, size_concepts, obs_amount_beyond_window): identified_bkps_total = 0 not_detected_bkps_total = 0 miss_detected_bkps_total = 0 delays_score_total = 0 for i in range(0, iterations, 1): print(i) data = data_creation_function() data = pd.DataFrame({"t": data}) #data = preprocess_timeseries(data) #cuts out the first 10 observations lags = pd.concat([ data["t"].shift(1), data["t"].shift(2), data["t"].shift(3), data["t"].shift(4), data["t"].shift(5) ], axis=1) data["t-1"] = lags.iloc[:, 0] data["t-2"] = lags.iloc[:, 1] data["t-3"] = lags.iloc[:, 2] data["t-4"] = lags.iloc[:, 3] data["t-5"] = lags.iloc[:, 4] data = mutual_info(10, data) data = data[10:] signal = data.loc[:, ["t", 't-1', 't-2', 't-3', 't-4', 't-5']].to_numpy() algo = rpt.Pelt(model="linear", min_size=2, jump=1).fit(signal) bkps = algo.predict(pen=penalization) result = bkps_stats(bkps, signal, size_concepts, obs_amount_beyond_window) identified_bkps = result[0] not_detected_bkps = result[1] miss_detected_bkps = result[2] list_delays = result[3] identified_bkps_total += identified_bkps not_detected_bkps_total += not_detected_bkps miss_detected_bkps_total += miss_detected_bkps delays_score_total += sum(list_delays) if (identified_bkps_total + miss_detected_bkps_total) != 0: precision = identified_bkps_total / (identified_bkps_total + miss_detected_bkps_total) else: precision = 0 recall = identified_bkps_total / (iterations * 3) if identified_bkps_total != 0: average_delay = delays_score_total / identified_bkps_total else: average_delay = 0 return [precision, recall, average_delay]
def data(self, source): """ Loads data from source and performs changepoint detection :param source: timeseries array """ self._signal = np.array(source).flatten() algo = rpt.Pelt(model=self._model).fit(self._signal) self._bkpts = algo.predict(pen=self._penalty) return self
def stability_analysis_long_term(penalization, iterations, data_creation_function, size_concepts, windowsize_preprocessing=10): # data_creation_function = create_simdata.linear1_abrupt # penalization = 12 # iterations = 1 # size_concepts = 200 # windowsize_preprocessing = 10 #size_concepts*2 + 100 - windowsize_preprocessing standard_deviations = [] for i in range(0, iterations, 1): print("iteration: ", i) data = data_creation_function() indices_bkp1 = [] indices_bkp2 = [] discard = False for j in range( int(size_concepts * 2 + (size_concepts / 2)) + 50 - 1, (size_concepts * 4), 1): print("observation: ", j) temp_data = data[0:j] temp_data = preprocess_timeseries( temp_data, windowsize_preprocessing ) #cuts out the first "windowsize_preprocessing" observations signal = temp_data.loc[:, [ "t", 'pacf1', 'pacf2', 'pacf3', 'acf1', 'acf2', 'acf3', 'acf4', 'acf5', 'var', 'kurt', 'skew', 'osc', 'mi_lag1', 'mi_lag2', 'mi_lag3' ]].to_numpy() algo = rpt.Pelt(model="rbf", min_size=2, jump=1).fit(signal[:, 1:]) bkps = algo.predict(pen=penalization) bkps = bkps[:-1] filtered_bkps = [ bkp for bkp in bkps if bkp < (size_concepts * 2 + size_concepts / 2) - windowsize_preprocessing ] if len(filtered_bkps) == 2: indices_bkp1.append(filtered_bkps[0]) indices_bkp2.append(filtered_bkps[1]) else: discard = True break if discard == False: bkp1_sd = stdev(indices_bkp1) bkp2_sd = stdev(indices_bkp2) standard_deviations.append(bkp1_sd) standard_deviations.append(bkp2_sd) return standard_deviations
def estimate_power(activity, rider_setup, pen=5, temp=None): """Estimate the power output for a ride . Args: activity (Activity): target activity rider_setup (RiderSetup): target rider setup pen (int, optional): regularization penalty. Defaults to 5. temp (float, optional): temperature in °C. By default, temperature will be taken from activity data. """ activity.resample(pd.Timedelta(seconds=1)) values = np.array(activity["speed"].rolling( pd.Timedelta(seconds=3)).mean(), dtype=np.float64) values = np.gradient(values) values[np.isnan(values)] = 0.0 activity["acceleration"] = values if temp is None: temp = 20 algo = rpt.Pelt(model="rbf").fit(values) pen = pen * np.log(len(values)) * np.std(values)**2 indices = algo.predict(pen=pen) print("n = {0}".format(len(indices))) key_est = "pwr" activity[key_est] = 0 for i in range(1, len(indices) - 1): altitude = activity["alt"][indices[i - 1]] dist = activity["dist"][indices[i]] - activity["dist"][indices[i - 1]] delta_t = (activity.index[indices[i]] - activity.index[indices[i - 1]]).total_seconds() grade = 100 * (activity["alt"][indices[i]] - activity["alt"][indices[i - 1]]) / (dist + 1) acceleration_power = 0.5 * rider_setup.total_mass * ( activity["speed"][indices[i]]**2 - activity["speed"][indices[i - 1]]**2) / delta_t if "temp" in activity.keys(): temp = activity["temp"][indices[i - 1]] if "headwind" in activity.keys(): headwind = activity["headwind"][indices[i - 1]] else: headwind = 0.0 speed = activity["speed"][indices[i - 1]:indices[i]].mean() steady_power = power_from_speed(rider_setup, grade, speed, altitude=altitude, temp=temp, headwind=headwind) pwr = steady_power + acceleration_power if (pwr < 0): pwr = 0 activity.loc[activity.index[indices[i - 1]:indices[i]], key_est] = pwr
def analysis_linear_final(penalization, iterations, dataset, size_concepts, obs_amount_beyond_window, windowsize_preprocessing=10): identified_bkps_total = 0 not_detected_bkps_total = 0 miss_detected_bkps_total = 0 delays_score_total = 0 for i in range(0, iterations, 1): print(i) with open("data_final_detection/" + dataset + "_" + str(i) + ".data", 'rb') as filehandle: data = pickle.load(filehandle) data = pd.DataFrame({"t": data}) #data = preprocess_timeseries(data) #cuts out the first 10 observations lags = pd.concat([ data["t"].shift(1), data["t"].shift(2), data["t"].shift(3), data["t"].shift(4), data["t"].shift(5) ], axis=1) data["t-1"] = lags.iloc[:, 0] data["t-2"] = lags.iloc[:, 1] data["t-3"] = lags.iloc[:, 2] data["t-4"] = lags.iloc[:, 3] data["t-5"] = lags.iloc[:, 4] data = mutual_info(10, data) data = data[10:] signal = data.loc[:, ["t", 't-1', 't-2', 't-3', 't-4', 't-5']].to_numpy() algo = rpt.Pelt(model="linear", min_size=2, jump=1).fit(signal) bkps = algo.predict(pen=penalization) result = bkps_stats(bkps, signal, size_concepts, obs_amount_beyond_window) identified_bkps = result[0] not_detected_bkps = result[1] miss_detected_bkps = result[2] list_delays = result[3] identified_bkps_total += identified_bkps not_detected_bkps_total += not_detected_bkps miss_detected_bkps_total += miss_detected_bkps delays_score_total += sum(list_delays) return [ identified_bkps_total, not_detected_bkps_total, miss_detected_bkps_total, delays_score_total ]
def analysis_rbf_final(penalization, iterations, dataset, size_concepts, obs_amount_beyond_window, windowsize_preprocessing=10): identified_bkps_total = 0 not_detected_bkps_total = 0 miss_detected_bkps_total = 0 delays_score_total = 0 for i in range(0, iterations, 1): print(i) #data = data_creation_function() with open("data_final_detection/" + dataset + "_" + str(i) + ".data", 'rb') as filehandle: data = pickle.load(filehandle) #with open("data_final_detection/" + "linear1" + "_"+ str(1) +".data", 'rb') as filehandle: # data = pickle.load(filehandle) data = preprocess_timeseries( data, windowsize_preprocessing ) #cuts out the first "windowsize_preprocessing" observations signal = data.loc[:, [ "t", 'pacf1', 'pacf2', 'pacf3', 'acf1', 'acf2', 'acf3', 'acf4', 'acf5', 'var', 'kurt', 'skew', 'osc', 'mi_lag1', 'mi_lag2', 'mi_lag3' ]].to_numpy() algo = rpt.Pelt(model="rbf", min_size=2, jump=1).fit(signal[:, 1:]) bkps = algo.predict(pen=penalization) result = bkps_stats(bkps, signal, size_concepts, obs_amount_beyond_window, windowsize_preproc=windowsize_preprocessing) identified_bkps = result[0] not_detected_bkps = result[1] miss_detected_bkps = result[2] list_delays = result[3] identified_bkps_total += identified_bkps not_detected_bkps_total += not_detected_bkps miss_detected_bkps_total += miss_detected_bkps delays_score_total += sum(list_delays) return [ identified_bkps_total, not_detected_bkps_total, miss_detected_bkps_total, delays_score_total ]
def plot_change_points(df, label, from_date, to_date): signal = timeseries[label].loc[from_date:to_date].values timeline = df.loc[from_date:to_date].index algo = rpt.Pelt(model="rbf").fit(signal) result = algo.predict(pen=10) plt.plot(timeline, signal, 'b-') for xc in np.take(timeline, [x - 1 for x in result]): plt.axvline(x=xc, color='black', linestyle='--') plt.show()
def analysis_rbf(penalization, iterations, data_creation_function, size_concepts, obs_amount_beyond_window, windowsize_preprocessing=10): identified_bkps_total = 0 not_detected_bkps_total = 0 miss_detected_bkps_total = 0 delays_score_total = 0 for i in range(0, iterations, 1): print(i) data = data_creation_function() data = preprocess_timeseries( data, windowsize_preprocessing ) #cuts out the first "windowsize_preprocessing" observations signal = data.loc[:, [ "t", 'pacf1', 'pacf2', 'pacf3', 'acf1', 'acf2', 'acf3', 'acf4', 'acf5', 'var', 'kurt', 'skew', 'osc', 'mi_lag1', 'mi_lag2', 'mi_lag3' ]].to_numpy() algo = rpt.Pelt(model="rbf", min_size=2, jump=1).fit(signal[:, 1:]) bkps = algo.predict(pen=penalization) result = bkps_stats(bkps, signal, size_concepts, obs_amount_beyond_window, windowsize_preproc=windowsize_preprocessing) identified_bkps = result[0] not_detected_bkps = result[1] miss_detected_bkps = result[2] list_delays = result[3] identified_bkps_total += identified_bkps not_detected_bkps_total += not_detected_bkps miss_detected_bkps_total += miss_detected_bkps delays_score_total += sum(list_delays) if (identified_bkps_total + miss_detected_bkps_total) != 0: precision = identified_bkps_total / (identified_bkps_total + miss_detected_bkps_total) else: precision = 0 recall = identified_bkps_total / (iterations * 3) if identified_bkps_total != 0: average_delay = delays_score_total / identified_bkps_total else: average_delay = 0 return [precision, recall, average_delay]
def find_regimes(data, first=19): # find the break points for each flight change_points = {} # for idx, flight in enumerate(test_range): for idx, flight in enumerate(list(data.keys())): theta = data[flight]['theta'].values[first:] algo = rpt.Pelt(model="l2").fit(theta) result = algo.predict(pen=5) change_points[flight] = [0] + result return change_points
def f(x, y): pen = float(display['pen']) algo = rpt.Pelt(model="rbf").fit(y) breakpoint_index = algo.predict(pen=10) breakpoints = [] for b in breakpoint_index[:-1]: breakpoints.append(b-1) breakpoints.append(b) return breakpoint_index, breakpoints
def get_ball_velocity(json_file_path): """ Using this to test out a methodology for programatically getting hike and dead ball time marks In theory, the ball should be stationary at the hike, and maybe it reaches another stopped point at, or nearly after the dead ball whistle """ #print (json_file_path) df_all_tracking, times = ngs.extract_player_tracking_data(json_file_path) #print (' ',list(df_all_tracking['description'])[0]) df_filtered = df_all_tracking[df_all_tracking['player_id'] == 'ball'].sort_values(by='time') df_filtered['time_val'] = df_filtered['time'].apply( lambda x: convert_time_to_seconds(x)) all_velocities = {} xpos = np.array(df_filtered['x']) ypos = np.array(df_filtered['y']) positions = list(zip(xpos, ypos)) timesteps = np.array(df_filtered['time_val']) velocities = get_velocity_list(positions, timesteps) indices = list(df_filtered.index) all_velocities = {**all_velocities, **dict(zip(indices, velocities))} # Compile the velocity data structures of all players and add it to the dataframe df_filtered['velocity'] = df_filtered.index.to_series().map(all_velocities) df_filtered['speed'] = df_filtered['velocity'].apply( lambda x: (x[0]**2 + x[1]**2)**0.5) #print (min(df_filtered['speed']), '\t', max(df_filtered['speed'])) # ball speed ranged from 0 to 75.1, and [0,45] is ~90% of the data values # so going to make 45 the max and 0 the min for the color mapper #print (df_filtered.head(100)) #df_filtered.to_csv('./ball_velocity.csv') ## Using sklearn discretizer #enc = KBinsDiscretizer(n_bins=10, encode='onehot') #ball_speed_binned = enc.fit_transform(np.array(list(df_filtered['speed'])).reshape(-1, 1)) #print (ball_speed_binned) ## Using sklearn.cluster.MeanShift (https://scikit-learn.org/stable/modules/generated/sklearn.cluster.MeanShift.html) #clustering = MeanShift().fit() ## Using ruptures library speed_signal = np.array(df_filtered['speed']).reshape(-1, 1) speed_breakpts = 2 speed_detection = rpt.Pelt(model="rbf").fit(speed_signal) speed_result = speed_detection.predict(pen=10) return speed_result[:-1], [ list(df_filtered['time'])[x] for x in speed_result[:-1] ], list(df_filtered['time'])[-1]
def rpt_changepoint(f, window=25): # changepoint detection based on ruptures # NOT recommended: no guidance for penalty (pen) # f, window = pre_changepoint(ts_, window=window) # ycol = 'dtrend_diff' # used to detect changes in trend # ts = f[ycol].values ts = f['dtrend_diff'].values if f['period'].isnull().sum() == 0: window = f.loc[f.index[0], 'period'] algo = rpt.Pelt(model="rbf").fit(ts) for p in [0, 5, 10, 20, 50, 100]: result = algo.predict(pen=p) print(str(p) + ' ' + str(result)) return None
def rpt_pelt(series, pen=3): '''Applies the PELT-algorithm with the provided penalty args: series: (Reduced) time series, retrieved when applying dimensionality reduction pen: penalty value for classifying change points returns: list of change points ''' algo = rpt.Pelt(model="rbf", min_size=1, jump=1).fit(series) result = algo.predict(pen=pen) # display #rpt.display(series, result) #plt.show() return result[:-1]
def merge_PL_CP(args, features_dict, PL_dict): """ Calculate "merge" fusion strategy and save new pseudo-labels. Changepoint detection for whole epsiode and take union of PL and CP. Saves one .pkl label file per episode/ video. Args: args: parser arguments features_dict: dictionary of i3d features to calculate changepoints PL_dict: pre-extracted pseudo-labels as startpoint """ for vid in features_dict.keys(): features = features_dict[vid] PL = PL_dict[vid] changepoints = [] # check if CP for this episode with given setting (pen,..) already exists if os.path.exists( f"data/pseudo_labels/CP/{args.test_data}/{vid.split('.')[0]}/pen_{args.merge_pen}.pkl" ): cp = pickle.load( open( f"data/pseudo_labels/CP/{args.test_data}/{vid.split('.')[0]}/pen_{args.merge_pen}.pkl", "rb")) changepoints.append(cp) else: algo = rpt.Pelt(model=args.merge_model, jump=args.merge_jump).fit(features) res = algo.predict(pen=args.merge_pen) CP = [1 if ix in res else 0 for ix in range(len(features))] changepoints.append(np.asarray(CP)) if not os.path.exists( f"data/pseudo_labels/CP/{args.test_data}/{vid.split('.')[0]}" ): os.makedirs( f"data/pseudo_labels/CP/{args.test_data}/{vid.split('.')[0]}" ) pickle.dump( np.asarray(CP), open( f"data/pseudo_labels/CP/{args.test_data}/{vid.split('.')[0]}/pen_{args.merge_pen}.pkl", "wb")) CP = np.asarray(dilate_boundaries([list(changepoints[0])])[0]) merges = CP | PL save_path = f"data/pseudo_labels/PL_CP_merge/{args.test_data}/{args.i3d_training}/merge_{args.merge_pen}/seed_{args.seed}/{vid.split('.')[0]}" if not os.path.exists(save_path): Path(save_path).mkdir(parents=True, exist_ok=True) pickle.dump(merges, open(f"{save_path}/preds.pkl", "wb"))
def f(x, y): pen = float(display['pen']) algo = rpt.Pelt(model="rbf").fit(y) breakpoint_index = algo.predict(pen=4) knots = [] for b in breakpoint_index[:-1]: knots.append(b - 1) knots.append(b) k = 1 tck = interpolate.splrep(x, y, s=0, t=knots, task=-1, k=k) ynew = interpolate.splev(x, tck, der=0) return ynew
def FindChangePoints(BitScores, penalty, minimumSize, Display=False): # This function uses the module Ruptures to detect change points in the bit score map. algo = rpt.Pelt(min_size=minimumSize).fit(np.array(BitScores)) result = algo.predict(pen=penalty) # After the change points are found using ruptures, the program makes sure that the last # index in the MSA is included as a change point if len(BitScores) - 1 not in result: result.append(len(BitScores) - 1) # Because python starts its counting at 0, if the length of the MSA is included as # a result, it will cause an indexing error if it's used. This is why it's removed # if it exists. if len(BitScores) in result: result.remove(len(BitScores)) # It also makes sure the starting index is in the results if 0 not in result: result.append(0) result = sorted(result) # The average bit score value in each partitioned section of the bit score map # is then determined averages = [] for changePoint in result: # If the last change point is found, then there will not be a change point after it, # so the loop ends if result.index(changePoint) == len(result) - 1: break # otherwise, the change point after the change point selected is chosen else: changePoint2 = result[result.index(changePoint) + 1] # The total bit score is set to zero total = 0 # and then each bit score between the two change points is added to the total for score in BitScores[changePoint:changePoint2]: total += score # The average is then found by dividing the total by the length of the interval average = total / (changePoint2 - changePoint) # and the average is added to the list that will be returned to the user averages.append(average) # If the user has specified that they would like to see the change points # illustrated on the change point map, then the plot is printed. if Display == True: rpt.display(np.array(BitScores), result) plt.show() # The program then returns the list of change points and the average bit score values # between them to the user. return result, averages