def predict(data, threshold): weight = [0.2, 0.2, 0.2, 0.2, 0.2] from BaseDetectors.getScore import GetScore score = GetScore(data, threshold, seasonal) all_score = np.dot(weight, score) y_pre = util.score2label_threshold(all_score, threshold) return y_pre
def predict(self, data): self.train(data) diff_value = self.Z_Score(data) diff_value_normalize = util.normalize(abs(diff_value)) result = util.score2label_threshold( score=diff_value_normalize, percentage=self.threshold_percentage) return diff_value_normalize, result
def evaluate(name, isscore, value, y_true, threshold=0.99, txt=''): ''' :param isscore: True:value=score, False:value=label :param value: score or label ''' if isscore: score = np.copy(value) roc, pr = get_preformance(score, y_true) y_pre = score2label_threshold(score, threshold) else: y_pre = np.copy(value) roc = 0 pr = 0 if sum(y_true) == 0: text = txt + 'In this section, y_true is all 0!!\n' return 0, 0, 0, 0, 0, 0, 0, 0, y_pre, text from sklearn.metrics import precision_recall_fscore_support precision, recall, f1, _ = precision_recall_fscore_support(y_true=y_true, y_pred=y_pre, average="binary") newresult = label_evaluation(y_true, y_pre) precision_eval, recall_eval, f1_eval, _ = precision_recall_fscore_support(y_true=y_true, y_pred=newresult, average="binary") text = str(name) + ',' + str(precision) + ',' + str(recall) + ',' + str(f1) + ',' + str(roc) + ',' + str( pr) + ',' + str(precision_eval) + ',' + str(recall_eval) + ',' + str(f1_eval) + ',' + txt + '\n' return precision, recall, f1, roc, pr, precision_eval, recall_eval, f1_eval, newresult, text
def predict(self, data, seasonal=None): diff_value, seasonal2 = self.seasonal_esd(data, seasonality=seasonal, max_anomalies=self.max_feed_len) # diff_value, result, seasonal2 = self.seasonal_esd(data, max_anomalies=self.max_feed_len, alpha=self.threshold_percentage) diff_value_normalize = util.normalize(diff_value) label = util.score2label_threshold(diff_value_normalize, percentage=self.threshold_percentage) # sesd.seasonal_esd(data, hybrid=False, max_anomalies=10, alpha=0.05) return abs(diff_value_normalize), label, seasonal2
def withstress_query(self, ts, score_list): # 获取集成后的异常分数 score score_vec = np.copy(score_list) score = np.dot(self.weight, score_vec) # 判断是否为异常,是否需要查询 anomaly = util.score2label_threshold(score, self.threshold) is_anomaly = anomaly[-self.batch:] need_labeled = self.needlabeled(score) return is_anomaly, need_labeled, 0
def predict(self, score, y_true, dropstress=False): self.updata_weight(score, y_true) diff_value_normalize = util.getsum_score(score, self.weight) if dropstress: y_pre, y_stress, nostress_score = self.DropSeasonal( diff_value_normalize) else: y_pre = util.score2label_threshold(diff_value_normalize, self.prethreshold) y_stress = np.zeros(len(y_pre)) nostress_score = np.copy(diff_value_normalize) return y_pre, y_stress, diff_value_normalize, nostress_score
def predict(self, data): Y = data types = self.types if types == 'linear': alpha, beta = self.alpha, self.beta a = [Y[0]] b = [Y[1] - Y[0]] y = [a[0] + b[0]] for i in range(len(Y)): a.append(alpha * Y[i] + (1 - alpha) * (a[i] + b[i])) b.append(beta * (a[i + 1] - a[i]) + (1 - beta) * b[i]) y.append(a[i + 1] + b[i + 1]) else: alpha, beta, gamma = self.alpha, self.beta, self.gama m = self.m a = [sum(Y[0:m]) / float(m)] b = [(sum(Y[m:2 * m]) - sum(Y[0:m])) / m**2] if types == 'additive': s = [Y[i] - a[0] for i in range(m)] # print(a,b,s) y = [a[0] + b[0] + s[0]] for i in range(len(Y)): a.append(alpha * (Y[i] - s[i]) + (1 - alpha) * (a[i] + b[i])) b.append(beta * (a[i + 1] - a[i]) + (1 - beta) * b[i]) s.append(gamma * (Y[i] - a[i] - b[i]) + (1 - gamma) * s[i]) y.append(a[i + 1] + b[i + 1] + s[i + 1]) elif types == 'multiplicative': s = [Y[i] / a[0] for i in range(m)] y = [(a[0] + b[0]) * s[0]] for i in range(len(Y)): a.append(alpha * (Y[i] / s[i]) + (1 - alpha) * (a[i] + b[i])) b.append(beta * (a[i + 1] - a[i]) + (1 - beta) * b[i]) s.append(gamma * (Y[i] / (a[i] + b[i])) + (1 - gamma) * s[i]) y.append((a[i + 1] + b[i + 1]) * s[i + 1]) else: raise ValueError( "ERROR: unsupported type, expect linear, additive or multiplicative." ) y.pop() diff_value = abs(data - y) diff_value_normalize = util.normalize(abs(diff_value)) result = util.score2label_threshold( score=diff_value_normalize, percentage=self.threshold_percentage) return diff_value_normalize, result
def predict(self, data): beta = self.beta predict_value = np.zeros(len(data)) predict_value[0] = data[0] for i in range(1, len(data)): predict_value[i] = beta * predict_value[i - 1] + (1 - beta) * data[i] diff_value = abs(data - predict_value) diff_value_normalize = util.normalize(abs(diff_value)) result = util.score2label_threshold( score=diff_value_normalize, percentage=self.threshold_percentage) return diff_value_normalize, result
def update_weight(self): # 获取偏置w feedback = np.copy(self.feedback) feedback[feedback == 2] = 1 index = np.where(feedback != -1)[0] # score = np.copy(self.score) # [[MA],[EWMA],[Holt],[Zscore],[SESD]] score = [self.score[i][index] for i in range(len(self.score))] label = np.array(self.feedback)[index] from sklearn.metrics import accuracy_score w = np.zeros(len(score)) for i in range(len(score)): singlescore = score[i] # singlescore = singlescore[index] pre_label = util.score2label_threshold(singlescore, self.threshold) f = accuracy_score(label, pre_label) w[i] = f # 通过生成200个随机权重来确定新的权重weight loss = [10000] count = 1 last_loss = 0 while abs(last_loss - min(loss)) / len(label) > 0.001: last_loss = min(loss) weightlist = [self.weight] for i in range(200): # 产生随机权重 weight = np.zeros(len(w)) for j in range(len(w)): alpha = random.uniform(1, 1 + 1 / count) mul_div = random.random() if mul_div <= w[j]: weight[j] = self.weight[j] * alpha else: weight[j] = self.weight[j] / alpha weight = util.norm_to_1(weight) weightlist.append(weight) loss = [] for weight in weightlist: l1 = self.loss(score, label, weight) loss.append(l1) index = loss.index(min(loss)) partbest_weight = weightlist[index] self.weight = partbest_weight count += 1 return
def predict(self, data): win_size = self.win_size evaldata = np.copy(data) if self.quired == None: res = np.array([ np.average(data[i:i + win_size]) for i in range(len(data) - win_size) ]) predict_value = np.concatenate((data[:win_size], res)) else: for item in self.quired: if item[1] == 1 and item[0] != 0: #anomaly start = int(max(item[0] - win_size, 0)) evaldata[item[0]] = np.average(evaldata[start:item[0]]) res = np.array([ np.average(evaldata[i:i + win_size]) for i in range(len(data) - win_size) ]) predict_value = np.concatenate((evaldata[:win_size], res)) diff_value = abs(data - predict_value) diff_value_normalize = util.normalize(abs(diff_value)) result = util.score2label_threshold( score=diff_value_normalize, percentage=self.threshold_percentage) return diff_value_normalize, result
def getstresstest(self, score, repeat=20, d=0.1, p=0.5): ''' :param score: 异常分数 :param self.feedback: 1:outlier, 0:inlier, -1:unlabeled, 2:presure test :param repeat: 异常按周期出现的次数 :param d: 异常出现周期与规定周期的波动,占比 :param p: 允许的错差次数,占比 :return: 判定为压力测试的点的标签 ''' if d >= 1: deviation = d else: deviation = int(d * self.seasonal) if deviation >= self.seasonal / 2: raise ValueError("deviation >= self.seasonal/2.", deviation, self.seasonal / 2) diff_value_normalize = np.copy(score) # util.norm_min_max(data) threshold = min(0.999, self.threshold - 2 / self.seasonal) label = util.score2label_threshold(diff_value_normalize, percentage=threshold) anomaly_indices = np.where(label == 1)[0] stresstest = [] # 判定为压力测试的点 __len__ = len(score) stressnum = __len__ // self.seasonal # 有stressnum个压力测试的点 repeat = min(repeat, stressnum) for i in anomaly_indices: # 遍历每一个异常 if i <= repeat * self.seasonal / 2: forward = i // self.seasonal back = repeat - forward elif i >= __len__ - repeat * self.seasonal / 2: back = (__len__ - i) // self.seasonal forward = repeat - back else: back = repeat // 2 forward = back flag = 0 for j in range(1, back): # 如果在向后找back次数内,都间隔seasonal再次出现异常 start = max(0, i + self.seasonal * j - deviation) end = min(__len__ - 1, i + self.seasonal * j + deviation) if sum(label[start:end]) != 0: flag += 1 for j in range(1, forward): # 如果在向前找forward次数内,都间隔seasonal再次出现异常 start = max(0, i - self.seasonal * j - deviation) end = min(__len__ - 1, i - self.seasonal * j + deviation) if sum(label[start:end]) != 0: flag += 1 if flag > p * repeat: stresstest.append(i) if len(stresstest) == 0: label_stress = np.zeros(len(score)) return label_stress final_stress_index = [] for curr in range(len(stresstest)): line = (len(final_stress_index) / 2 + len(stresstest)) * p stress_score = self.calculate_test_statistic( stresstest, final_stress_index, deviation) # 得分最高的标签,及其分数 if max(stress_score) < line: break max_idx = int(np.argmax(stress_score)) final_stress_index.append(stresstest[max_idx]) stresstest.pop(max_idx) if len(final_stress_index) != 0: diff_value_normalize[final_stress_index] = 0 label_stress = np.zeros(len(score)) label_stress[final_stress_index] = 1 return label_stress
def DropSeasonal(self, score, repeat=20, d=0.1, p=0.5): ''' :param score: 异常分数 :param repeat: 异常按周期出现的次数 :param deviation: 异常出现周期与规定周期的波动 :param p: 允许的错差次数 :return: ''' if d >= 1: deviation = d else: deviation = int(d * self.seasonal) if deviation >= self.seasonal / 2: raise ValueError("deviation >= self.seasonal/2.", deviation, self.seasonal / 2) diff_value_normalize = np.copy(score) # Utils.norm_min_max(data) # threshold = min(0.999, self.truethreshold - 2 / self.seasonal) threshold = self.truethreshold label = util.score2label_threshold(diff_value_normalize, percentage=threshold) anomaly_indices = np.where(label == 1)[0] stresstest = [] # 判定为压力测试的点 __len__ = len(score) stressnum = int(__len__ / self.seasonal) # 有stressnum个压力测试的点 repeat = min(repeat, stressnum) for i in anomaly_indices: # 遍历每一个异常 import math if i <= repeat * self.seasonal / 2: forward = math.ceil(i / self.seasonal) back = repeat - forward # forward = 0 elif i >= __len__ - repeat * self.seasonal / 2: back = math.ceil((__len__ - i) / self.seasonal) forward = repeat - back else: back = math.ceil(repeat / 2) forward = back flag = 0 for j in range(1, back): # 如果在向后找back次数内,都间隔seasonal再次出现异常 start = max(0, i + self.seasonal * j - deviation) end = min(__len__ - 1, i + self.seasonal * j + deviation) if sum(label[start:end]) != 0: flag += 1 for j in range(1, forward): # 如果在向前找forward次数内,都间隔seasonal再次出现异常 start = max(0, i - self.seasonal * j - deviation) end = min(__len__ - 1, i - self.seasonal * j + deviation) if sum(label[start:end]) != 0: flag += 1 if flag > p * repeat: stresstest.append(i) if len(stresstest) == 0: label = util.score2label_threshold(diff_value_normalize, percentage=self.prethreshold) return label, label, [], score, stresstest final_stress_index = [] for curr in range(len(stresstest)): line = (len(final_stress_index) / 2 + len(stresstest)) * p stress_score = self.calculate_test_statistic( stresstest, final_stress_index, label, deviation) # 得分最高的标签,及其分数 if max(stress_score) < line: break max_idx = int(np.argmax(stress_score)) final_stress_index.append(stresstest[max_idx]) stresstest.pop(max_idx) if len(final_stress_index) != 0: diff_value_normalize[final_stress_index] = 0 # label = util.score2label_threshold(diff_value_normalize, percentage=self.prethreshold) y_stress = np.zeros(len(label)) y_stress[final_stress_index] = 1 label[final_stress_index] = 0 return label, y_stress, diff_value_normalize
def score2label(self, y_scores): label = util.score2label_threshold( y_scores, percentage=self.threshold_percentage) return label