def scoreCalculate(r, smooth): cf = changefinder.ChangeFinder(r=r, order=lag, smooth=smooth) ret = [] for i in tmp: score = math.exp(cf.update(i)) ret.append(score) return ret
def get_score(self, x, model): """Update the score for the model based on most recent data, flag if it's percentile passes self.cf_threshold. """ # get score if model not in self.models: # initialise empty model if needed self.models[model] = changefinder.ChangeFinder(r=self.cf_r, order=self.cf_order, smooth=self.cf_smooth) # if the update for this step fails then just fallback to last known score try: score = self.models[model].update(x) self.scores_latest[model] = score except Exception as _: score = self.scores_latest.get(model, 0) score = 0 if np.isnan(score) else score # update sample scores used to calculate percentiles if model in self.scores_samples: self.scores_samples[model].append(score) else: self.scores_samples[model] = [score] self.scores_samples[model] = self.scores_samples[model][-self.n_score_samples:] # convert score to percentile score = percentileofscore(self.scores_samples[model], score) # flag based on score percentile flag = 1 if score >= self.cf_threshold else 0 return score, flag
def change_find(signal, r, order, smooth, window, head_width, tail_width, task, survive=False): # シグナルの値がすべて同じだった場合、changefinder をかけずに終了する if np.all(signal[0] == signal): return np.zeros(len(signal)), \ np.zeros(head_width + len(signal) + tail_width), \ np.zeros(head_width + len(signal) + tail_width) # 生き残った個体がいる可能性がある場合(survive==True)、始めと最後のシグナルの平均値から # 生き残ったかを判断し、生き残った場合は changefinder をかけずに終了する if survive: start = signal[:window] start_mean = start.mean() start_std = start.std() end = signal[-window:] end_mean = end.mean() end_std = end.std() if -2.8 * (start_mean - end_mean) + 1.0 > (start_std - end_std): return np.zeros(len(signal)), \ np.zeros(head_width + len(signal) + tail_width), \ np.zeros(head_width + len(signal) + tail_width) # ラベル差分シグナルに ChangeFinder 適用すると、立ち下がり検出が # うまくいかないので、蛹化・死亡判定時(立ち下がり検出時)は # あらかじめシグナルを左右反転しておく。 if task in ('pupariation', 'death'): signal = signal[::-1] signal_padded = randpad(signal, head_width, tail_width) # r の値が小さすぎるとまれに ValueError: math domain error が生じるので # 10回までリトライする for _ in range(10): try: cf = changefinder.ChangeFinder(r, order=order, smooth=smooth) score_padded = np.array([cf.update(s) for s in signal_padded]) score = score_padded[head_width + tail_width:] # 反転されたシグナルとスコアをさらに反転してもとに戻す if event in ('pupariation', 'death'): score = score[::-1] score_padded = score_padded[::-1] signal_padded = signal_padded[::-1] return score, score_padded, signal_padded except: pass # エラーが生じても繰り返す else: raise Exception() # 10回ともエラーが生じた場合は Exception を送出する
def __calcScore(self, inputList, score, diff): cf = changefinder.ChangeFinder(r=self.r, order=self.order, smooth=self.smooth) for n in self.noise: cf.update(n) for i in inputList: s = cf.update(i) score.append(s) prev = score[0] for s in score: d = s - prev diff.append(d) prev = s
def fft(f, dt, fc): fq = np.linspace(0, 1.0/dt, N) # 周波数軸の作成 linspace(開始,終了,分割数) F = np.fft.fft(f) # 高速フーリエ変換(FFT) F_abs = np.abs(F) # FFT結果(複素数)を絶対値に変換 F_abs_amp = F_abs / N * 2 # 交流成分はデータ数で割って2倍 F_abs_amp[0] = F_abs_amp[0] / 2 #2倍不要 F2 = np.copy(F) # FFT結果コピー F2[(fq > fc)] = 0 # カットオフを超える周波数のデータをゼロにする F2_abs = np.abs(F2) # FFT結果(複素数)を絶対値に変換 F2_abs_amp = F2_abs / N * 2 # 交流成分はデータ数で割って2倍 F2_abs_amp[0] = F2_abs_amp[0] / 2 #2倍不要 """ # グラフ表示(FFT解析結果) plt.xlabel('freqency(Hz)', fontsize=14) plt.ylabel('amplitude', fontsize=14) plt.plot(fq, F_abs_amp) plt.show() # グラフ表示(IFFT復元結果) F_ifft = np.fft.ifft(F) # 逆フーリエ変換(IFFT) F_ifft_real = F_ifft.real # 実数部 plt.plot(x, F_ifft_real, c="g") # IFFT(逆変換) plt.show() """ # グラフ表示 (FFT解析結果 (ノイズ除去後) ) plt.xlabel('freqency(Hz)', fontsize=14) plt.ylabel('amplitude', fontsize=14) plt.plot(fq, F2_abs_amp, c='r') plt.show() # グラフ表示(IFFT復元結果) F2_ifft = np.fft.ifft(F2) # 逆フーリエ変換 (IFFT) F2_ifft_real = F2_ifft.real # 実数部 plt.plot(x, F2_ifft_real, c="g") # IFFT (逆変換) plt.show() ret = [] cf = changefinder.ChangeFinder(r=0.01, order=1, smooth=6) for v in F2_ifft_real: score = cf.update(v) ret.append(score) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(ret) ax2 = ax.twinx() ax2.plot(F2_ifft_real,'r') plt.show()
def adetection(counts, users, starttime, tohours): count_array = np.zeros((5, len(users), tohours + 1)) count_all_array = [] result_array = [] cfdetect = {} for _, event in counts.iterrows(): column = int( (datetime.datetime.strptime(event["dates"], "%Y-%m-%d %H:%M:%S") - starttime).total_seconds() / 3600) row = users.index(event["username"]) #count_array[row, column, 0] = count_array[row, column, 0] + count if event["eventid"] == 4624: count_array[0, row, column] = event["count"] elif event["eventid"] == 4625: count_array[1, row, column] = event["count"] elif event["eventid"] == 4768: count_array[2, row, column] = event["count"] elif event["eventid"] == 4769: count_array[3, row, column] = event["count"] elif event["eventid"] == 4776: count_array[4, row, column] = event["count"] #count_average = count_array.mean(axis=0) count_sum = np.sum(count_array, axis=0) count_average = count_sum.mean(axis=0) num = 0 for udata in count_sum: cf = changefinder.ChangeFinder(r=0.04, order=1, smooth=5) ret = [] for i in count_average: cf.update(i) for i in udata: score = cf.update(i) ret.append(round(score, 2)) result_array.append(ret) cfdetect[users[num]] = max(ret) count_all_array.append(udata.tolist()) for var in range(0, 5): con = [] for i in range(0, tohours + 1): con.append(count_array[var, num, i]) count_all_array.append(con) num += 1 return count_all_array, result_array, cfdetect
def __init__(self, setting_path: str, audio_path: str): """コンストラクタ Args: setting_path (str): 設定ファイルのパス audio_path (str): 音楽ファイルのパス """ with open(setting_path, 'r') as f: cfg = yaml.load(f) self.cf = cf.ChangeFinder(**cfg['change_finder']) self.audio = Audio(cfg['audio'], audio_file_path=audio_path) self.buffer = np.zeros(cfg['model']['buffer_audio_length'], dtype=np.float32) self.buf_num = int(cfg['model']['frame_buf_num']) self.spec_buf = [] self.thr = float(cfg['model']['thr'])
def __calcScore(self, inputList, score, diff, useLearningData=False): cf = changefinder.ChangeFinder(r=self.r, order=self.order, smooth=self.smooth) if useLearningData == False: for n in self.noise: cf.update(n) else: for l in self.learning: cf.update(l) for i in inputList: s = cf.update(i) score.append(s) prev = score[0] for s in score: d = s - prev diff.append(d) prev = s
def __calcScore(self, inputList, score, diff, useLearningData): cf = changefinder.ChangeFinder(r=self.r, order=self.order, smooth=self.smooth) #とりあえずこの場合について for n in range(10): for p in self.noise[:, n]: cf.update(p) self.noiseAlert.append(0) for i in range(10): for j in inputList[:, i]: s = cf.update(j) score.append(s) self.totalAlert.append(0) prev = score[0] for s in score: d = s - prev diff.append(d) prev = s
def _new_cf(self): return changefinder.ChangeFinder(r=self._cf_r, order=1, smooth=self._cf_smooth)
data=np.concatenate([np.random.normal(0.7, 0.05, 300), np.random.normal(1.5, 0.05, 300), np.random.normal(0.6, 0.05, 300), np.random.normal(1.3, 0.05, 300)]) """ csvpass = sys.argv[1] + "3DFiltered.csv" df = pd.read_csv(csvpass) """ data = df.MidHipZ.diff() data = data.diff() data = data.fillna(method='bfill') """ data = df.RSholderZ data2 = data.drop(data.index[[0]]) print(data) cf = changefinder.ChangeFinder(r=0.2, order=1, smooth=5) ret = [] for i in data2: score = cf.update(i) ret.append(score) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(ret) ax2 = ax.twinx() ax2.plot(data, 'r') plt.show() print(ret)
def daterange(start_date, end_date): for n in range(int((end_date - start_date).days)): yield start_date + timedelta(n) x = [] y = [] x_stockprice = [] y_stockprice = [] switcher = 0 accum_count = 0 accum_stockprice = 0.0 prev_stockprice = 0.0 cf = changefinder.ChangeFinder(r=0.25, order=2, smooth=7) changeRet = [] for single_date in daterange(start_date, end_date): isodate = single_date.isoformat() mentionQuery = {"timestamp": isodate, "symbol": symbol} count = 0 mentionInDb = mentionCollection.count_documents(mentionQuery) if mentionInDb == 1: mention = mentionCollection.find_one(mentionQuery) count = mention["count"] else: count = 0 if isodate in priceData[0]:
def run_evars_gpr(base_model: ModelsGaussianProcessRegression.GaussianProcessRegression, data: pd.DataFrame, season_length: int, target_column: str, train_ind: int, comparison_partners: bool = False, da: str = 'scaled', cpd: str = 'cf', scale_thr: float = 0.1, scale_seasons: int = 2, scale_window: int = None, scale_window_factor: float = 0.1, scale_window_minimum: int = 2, const_hazard: int = None, const_hazard_factor: int = 2, cf_r: float = 0.4, cf_order: int = 1, cf_smooth: int = 4, cf_thr_perc: int = 90, append: str = 'no', max_samples: int = None, max_samples_factor: int = 10, o_perc: float = 1.1, u_perc: float = 0.1, thr: float = 0.2, under_samp: bool = False, rel_thr: float = 0.5, rel_coef: float = 1.5, verbose: bool = False): """ Run EVARS-GPR algo :param base_model: base model fitted during offline phase :param data: data to use :param season_length: length of one season :param target_column: target column :param train_ind: index of last train sample :param comparison_partners: specify whether to include comparison partners in optimization loop :param da: data augmentation method :param cpd: change point detection method :param scale_thr: threshold for output scaling factor :param scale_seasons: number of seasons to consider for calculation of output scaling factor :param scale_window: number of samples prior to change point for calculation of output scaling factor :param scale_window_factor: scale window as a multiple of the season length :param scale_window_minimum: minimum of the scale window :param const_hazard: constant hazard value in case of bocpd :param const_hazard_factor: constant hazard value as a multiple of the season length :param cf_r: r value (forgetting factor) for changefinder :param cf_order: order of SDAR models for changefinder :param cf_smooth: smoothing constant for changefinder :param cf_thr_perc: percentile of offline anomaly scores to use for declaration of a change point :param append: specify whether to append original and scaled dataset for da or not :param max_samples: maximum samples to consider for data augmentation :param max_samples_factor: maximum samples to consider for data augmentation as a multiple of the season length :param o_perc: oversampling percentage for GN :param u_perc: undersampling percentage for GN :param thr: threshold for GN :param under_samp: specify whether to undersample for SMOGN :param rel_thr: relevance threshold for SMOGN :param rel_coef: relevance coefficient for SMOGN :param verbose: print debug info :return: list of detected change points, evars-gpr predictions, dictionary with predictions of comparison partners, number of refits """ scale_window = max(scale_window_minimum, int(scale_window_factor * season_length)) \ if scale_window is None else scale_window const_hazard = const_hazard_factor * season_length if const_hazard is None else const_hazard max_samples = max_samples_factor * season_length if max_samples is None else max_samples data = data.copy() data.reset_index(drop=True, inplace=True) train = data[:train_ind] # setup cpd y_deseas = data[target_column].diff(season_length).dropna().values y_train_deseas = y_deseas[:train_ind-season_length] if cpd == 'bocd': mean = np.mean(y_train_deseas) std = np.std(y_train_deseas) train_std = (y_train_deseas - mean) / std bc = bocd.BayesianOnlineChangePointDetection(bocd.ConstantHazard(const_hazard), bocd.StudentT(mu=0, kappa=1, alpha=1, beta=1)) for i, d_bocd_train in enumerate(train_std): bc.update(d_bocd_train) elif cpd == 'cf': scores = [] cf = changefinder.ChangeFinder(r=cf_r, order=cf_order, smooth=cf_smooth) for i in y_train_deseas: scores.append(cf.update(i)) cf_threshold = np.percentile(scores, cf_thr_perc) if verbose: print('CF_Scores_Train: threshold=' + str(cf_threshold) + ', mean=' + str(np.mean(scores)) + ', max=' + str(np.max(scores)) + ', 70perc=' + str(np.percentile(scores, 70)) + ', 80perc=' + str(np.percentile(scores, 80)) + ', 90perc=' + str(np.percentile(scores, 90)) + ', 95perc=' + str(np.percentile(scores, 95)) ) # online part test = data[train_ind:] y_train_deseas_manip = y_train_deseas.copy() rt_mle = np.empty(test[target_column].shape) predictions = None train_manip = train.copy() model = copy.deepcopy(base_model) # setup comparison partners if comparison_partners: model_cpd_retrain_full = copy.deepcopy(base_model) predictions_cpd_retrain_full = None model_cpd_moving_window_full = copy.deepcopy(base_model) predictions_cpd_moving_window_full = None predictions_cpd_scaled_full = None cp_detected = [] output_scale_old = 1 output_scale = 1 n_refits = 0 # iterate over whole test set for index in test.index: sample = test.loc[index] train_manip = train_manip.append(sample) # predict next target value prediction = model.predict(test=sample.to_frame().T, train=train_manip) if predictions is None: predictions = prediction.copy() else: predictions = predictions.append(prediction) # get predictions of comparison partners if specified if comparison_partners: prediction_cpd_retrain_full = model_cpd_retrain_full.predict(test=sample.to_frame().T, train=train_manip) prediction_cpd_moving_window_full = model_cpd_moving_window_full.predict(test=sample.to_frame().T, train=train_manip) prediction_cpd_scaled_full = prediction.copy() prediction_cpd_scaled_full *= output_scale_old if predictions_cpd_retrain_full is None: predictions_cpd_retrain_full = prediction_cpd_retrain_full.copy() predictions_cpd_moving_window_full = prediction_cpd_moving_window_full.copy() predictions_cpd_scaled_full = prediction_cpd_scaled_full.copy() else: predictions_cpd_retrain_full = predictions_cpd_retrain_full.append(prediction_cpd_retrain_full) predictions_cpd_moving_window_full = \ predictions_cpd_moving_window_full.append(prediction_cpd_moving_window_full) predictions_cpd_scaled_full = predictions_cpd_scaled_full.append(prediction_cpd_scaled_full) # CPD change_point_detected = False y_deseas = sample[target_column] - data.loc[index-season_length][target_column] if cpd == 'bocd': d_bocd = (y_deseas - mean) / std bc.update(d_bocd) rt_mle_index = index-train_ind rt_mle[rt_mle_index] = bc.rt y_train_deseas_manip = np.append(y_train_deseas_manip, y_deseas) mean = np.mean(y_train_deseas_manip) std = np.std(y_train_deseas_manip) if rt_mle_index > 0 and (rt_mle[rt_mle_index] - rt_mle[rt_mle_index-1] < 0): change_point_detected = True curr_ind = rt_mle_index elif cpd == 'cf': score = cf.update(y_deseas) scores.append(score) if score >= cf_threshold: if verbose: print('Anomaly Score ' + str(score) + ' > ' + 'threshold ' + str(cf_threshold)) change_point_detected = True curr_ind = index - train_ind # Trigger remaining EVARS-GPR procedures if a change point is detected if change_point_detected: if verbose: print('CP Detected ' + str(curr_ind + train.shape[0])) cp_detected.append(curr_ind) try: # Calculate output scaling factor change_point_index = curr_ind + train.shape[0] mean_now = np.mean(data[change_point_index-scale_window+1:change_point_index+1][target_column]) mean_prev_seas_1 = \ np.mean(data[change_point_index-season_length-scale_window+1:change_point_index-season_length+1] [target_column]) mean_prev_seas_2 = \ np.mean(data[change_point_index-2*season_length-scale_window+1:change_point_index-2*season_length+1] [target_column]) if scale_seasons == 1: output_scale = mean_now / mean_prev_seas_1 elif scale_seasons == 2: output_scale = np.mean([mean_now / mean_prev_seas_1, mean_now / mean_prev_seas_2]) if output_scale == 0: raise Exception if verbose: print('ScaleDiff=' + str(np.abs(output_scale - output_scale_old) / output_scale_old)) # Check deviation to previous scale factor if np.abs(output_scale - output_scale_old) / output_scale_old > scale_thr: n_refits += 1 if verbose: print('try to retrain model: ' + str(change_point_index) + ' , output_scale=' + str(output_scale)) if output_scale > 1: focus = 'high' else: focus = 'low' # augment data train_samples = TrainHelper.get_augmented_data(data=data, target_column=target_column, da=da, change_point_index=curr_ind + train.shape[0], output_scale=output_scale, rel_coef=rel_coef, rel_thr=rel_thr, under_samp=under_samp, focus=focus, o_perc=o_perc, u_perc=u_perc, thr=thr, append=append, max_samples=max_samples) # retrain current model model = ModelsGaussianProcessRegression.GaussianProcessRegression( target_column=base_model.target_column, seasonal_periods=base_model.seasonal_periods, kernel=base_model.model.kernel_, alpha=base_model.model.alpha, n_restarts_optimizer=base_model.model.n_restarts_optimizer, standardize=base_model.standardize, normalize_y=base_model.model.normalize_y, one_step_ahead=base_model.one_step_ahead) model.train(train_samples, cross_val_call=False) if comparison_partners: train_data = data.copy()[:change_point_index+1] # cpd Retrain model_cpd_retrain_full = ModelsGaussianProcessRegression.GaussianProcessRegression( target_column=base_model.target_column, seasonal_periods=base_model.seasonal_periods, kernel=base_model.model.kernel_, alpha=base_model.model.alpha, n_restarts_optimizer=base_model.model.n_restarts_optimizer, standardize=base_model.standardize, normalize_y=base_model.model.normalize_y, one_step_ahead=base_model.one_step_ahead) model_cpd_retrain_full.train(train_data, cross_val_call=False) # Moving Window model_cpd_moving_window_full = ModelsGaussianProcessRegression.GaussianProcessRegression( target_column=base_model.target_column, seasonal_periods=base_model.seasonal_periods, kernel=base_model.model.kernel_, alpha=base_model.model.alpha, n_restarts_optimizer=base_model.model.n_restarts_optimizer, standardize=base_model.standardize, normalize_y=base_model.model.normalize_y, one_step_ahead=base_model.one_step_ahead) model_cpd_moving_window_full.train(train_data[-season_length:], cross_val_call=False) # in case of a successful refit change output_scale_old output_scale_old = output_scale except Exception as exc: print(exc) if comparison_partners: comparison_partners_dict = {'cpd_retrain_full': predictions_cpd_retrain_full, 'cpd_cpd_moving_window_full': predictions_cpd_moving_window_full, 'cpd_scaled_full': predictions_cpd_scaled_full } else: comparison_partners_dict = {} return cp_detected, predictions, comparison_partners_dict, n_refits
def execute(self, data, r, order, smooth): cf = changefinder.ChangeFinder(r, order, smooth) score = self.getNormalized([cf.update(d) for d in data]) self.plot_result(data, r, order, smooth, score)
import matplotlib.pyplot as plt import changefinder import numpy as np data = np.concatenate([ np.random.normal(0.7, 0.05, 300), np.random.normal(1.5, 0.05, 300), np.random.normal(0.6, 0.05, 300), np.random.normal(1.3, 0.05, 300) ]) cf = changefinder.ChangeFinder(r=0.01, order=1, smooth=7) ret = [] for i in data: score = cf.update(i) ret.append(score) print(ret) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(ret) ax2 = ax.twinx() ax2.plot(data, 'r') plt.show()
def find_changepoints_for_time_series(series, modeltype="binary", number_breakpoints=10, plot_flag=True, plot_with_dates=False, show_time_flag=False): #RUPTURES PACKAGE #points=np.array(series) points = series.values title = "" t0 = time.time() if modeltype == "binary": title = "Change Point Detection: Binary Segmentation Search Method" model = "l2" changepoint_model = rpt.Binseg(model=model).fit(points) result = changepoint_model.predict(n_bkps=number_breakpoints) if modeltype == "pelt": title = "Change Point Detection: Pelt Search Method" model = "rbf" changepoint_model = rpt.Pelt(model=model).fit(points) result = changepoint_model.predict(pen=10) if modeltype == "window": title = "Change Point Detection: Window-Based Search Method" model = "l2" changepoint_model = rpt.Window(width=40, model=model).fit(points) result = changepoint_model.predict(n_bkps=number_breakpoints) if modeltype == "Dynamic": title = "Change Point Detection: Dynamic Programming Search Method" model = "l1" changepoint_model = rpt.Dynp(model=model, min_size=3, jump=5).fit(points) result = changepoint_model.predict(n_bkps=number_breakpoints) if modeltype == "online": # CHANGEFINDER PACKAGE title = "Simulates the working of finding changepoints in online fashion" cf = changefinder.ChangeFinder() scores = [cf.update(p) for p in points] result = (-np.array(scores)).argsort()[:number_breakpoints] result = sorted(list(result)) if series.shape[0] not in result: result.append(series.shape[0]) if show_time_flag: elapsed_time = time.time() - t0 print("[exp msg] elapsed time for process: " + str(time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))) if plot_flag: if not plot_with_dates: rpt.display(points, result, figsize=(18, 6)) plt.title(title) plt.show() else: series.plot(figsize=(18, 6)) plt.title(title) for i in range(len(result) - 1): if i % 2 == 0: current_color = 'xkcd:salmon' else: current_color = 'xkcd:sky blue' #plt.fill_between(series.index[result[i]:result[i+1]], series.max(), color=current_color, alpha=0.3) plt.fill_between(series.index[result[i]:result[i + 1]], y1=series.max() * 1.1, y2=series.min() * 0.9, color=current_color, alpha=0.3) plt.show() return (result)
my_bkps = algo.predict(n_bkps=10) rpt.show.display(points, my_bkps, figsize=(10, 6)) plt.title('Change Point Detection: Window-Based Search Method') plt.show() #Changepoint detection with dynamic programming search method model = "l1" algo = rpt.Dynp(model=model, min_size=3, jump=5).fit(points) my_bkps = algo.predict(n_bkps=10) rpt.show.display(points, my_bkps, figsize=(10, 6)) plt.title('Change Point Detection: Dynamic Programming Search Method') plt.show() #Create a synthetic data set to test against points = np.concatenate([ np.random.rand(100) + 5, np.random.rand(100) + 10, np.random.rand(100) + 5 ]) #CHANGEFINDER PACKAGE f, (ax1, ax2) = plt.subplots(2, 1) f.subplots_adjust(hspace=0.4) ax1.plot(points) ax1.set_title("data point") #Initiate changefinder function cf = changefinder.ChangeFinder() scores = [cf.update(p) for p in points] ax2.plot(scores) ax2.set_title("anomaly score") plt.show()