def test_compOutlier(self): """Test outlier detection. """ pfile = "passFunTime.pickle" with open(pfile, 'rb') as handle: passData = pickle.load(handle) passOtl = outlier.Outlier("test.cfg") funId = 0 passOtl.compOutlier(passData, funId) passFunOutl = np.array(passOtl.getOutlier()) passFunOutlId = passData[passFunOutl == -1][:, 6] passNumOutl = len(passFunOutlId) self.assertEqual(passNumOutl, 1) pfile = "failFunTime.pickle" with open(pfile, 'rb') as handle: failData = pickle.load(handle) failOtl = outlier.Outlier("test.cfg") funId = 0 failOtl.compOutlier(failData, funId) failFunOutl = np.array(failOtl.getOutlier()) failFunOutlId = failData[failFunOutl == -1][:, 6] failNumOutl = len(failFunOutlId) self.assertEqual(failNumOutl, 0)
def generate_outlier(self, X_test, y_test, dist, percent): normalized, not_normalized = None, None for i in range(self.y.shape[1]): out = outlier.Outlier(i, X_test, y_test, self.s, self.size_trajectory) traj_anom_norm, traj_anom = out.get_noise_trajectory(dist, percent) if i == 0: not_normalized = traj_anom normalized = traj_anom_norm else: not_normalized = np.concatenate([not_normalized, traj_anom]) normalized = np.concatenate([normalized, traj_anom_norm]) return not_normalized, normalized
def outlier_test(wsize, factor, low, high): data = np.random.randint(low, high, 1000) out = outlier.Outlier(wsize, factor, max_drift=0.001) success_count = 0 count = 0 for d in data: out.add(d) if not out.rs.ready: continue a = np.array(list(out.rs)) s = np.std(a) m = np.mean(a) d = np.random.randint(low + high // 2, high * factor) np_outlier = d >= (factor * s + m) if out.check(d) == np_outlier: success_count += 1 count += 1 return success_count / count
def data_file(): fname = tk.filedialog.askopenfilename(filetypes=[("JSON", ".json")], defaultextension='.json') if fname is not '': data_file.outlier = outlier.Outlier(fname) scr2.insert(tk.END, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + '\n打开文件 ' + fname + '\n\n')
if parseMethod == "BP": # Determine event types eventTypeDict = prs.getEventType() numEventTypes = len(eventTypeDict) eventTypeList = [None] * numEventTypes assert (numEventTypes > 0), "No event types detected (Assertion)...\n" for i in range(0, numEventTypes): eventTypeList[i] = eventTypeDict[i] # Initialize event class evn = event.Event(sys.argv[1]) evn.setEventType(eventTypeList) # Initialize outlier class otl = outlier.Outlier(sys.argv[1]) # Initialize visualizer class maxDepth = int(config['Visualizer']['MaxFunDepth']) viz = visualizer.Visualizer(sys.argv[1]) # Reset visualization server viz.sendReset() # In nonstreaming mode send function map and event type to the visualization server viz.sendEventType(eventTypeList, 0) funMap = prs.getFunMap() viz.sendFunMap(list(funMap.values()), 0) else: # Initialize event object evn = event.Event(sys.argv[1])
def populate_initial_data(wsize): out = outlier.Outlier(wsize, 3) for _ in range(wsize): out.add(random.randint(0, 100)) return out
"""train_df = pd.read_csv("../datasets/ecg_prep/train1.csv", sep=";", squeeze=True, index_col=0).sort_index() test_df = pd.read_csv("../datasets/ecg_prep/test1.csv", sep=";", squeeze=True, index_col=0).sort_index() outl_df = pd.read_csv("../datasets/ecg_prep/val1.csv", sep=";", squeeze=True, index_col=0).sort_index()""" train_df = pd.read_csv("../datasets/monthly_sunspots/train.csv", sep=";", squeeze=True) test_df = pd.read_csv("../datasets/monthly_sunspots/test.csv", sep=";", squeeze=True) outl_df = pd.read_csv("../datasets/monthly_sunspots/outl.csv", sep=";", squeeze=True) N_outl_ts = 10 for i in range(N_outl_ts): outl = outlier.Outlier(type="contextual", n_outliers=1, size=25) outl_data, outl_idxs = outl.generate(data=outl_df.values.copy(), finetuning=[1], constant=False) labels = np.zeros(len(outl_df)) plt.plot(outl_data, label="Original") for o in outl_idxs: plt.plot(o, outl_data[o], label="Outlier") labels[o] = True plt.legend(loc="upper right") name_ds = "data" + str(i) + "/" os.mkdir(PREFIX + name_ds) np.save(Path(PREFIX + name_ds + "train"), train_df.values) np.save(Path(PREFIX + name_ds + "test"), test_df.values)
#%% X_tokens, key_set = get_uber_tokens(X) X_train, y_train, X_val, y_val, X_test, y_test = get_splitted_data(X, y) X_tokens_train, _ = get_uber_tokens(X_train) X_tokens_val, _ = get_uber_tokens(X_val) X_tokens_test, _ = get_uber_tokens(X_test) #%% # Aqui gero anomalias dos dados de teste e vou colocando em ordem por label ex:0, 1... for i in range(y.shape[1]): out = outlier.Outlier(i, X_test, y_test, s) traj_anom_norm, traj_anom = out.get_noise_trajectory(1., 0.3) if i == 0: traj_anom_test = traj_anom else: traj_anom_test = np.concatenate([traj_anom_test, traj_anom]) traj_anom_test[0:1, :, 1] == X_test[148:149, :, 1] #%% def get_all_tokens(old_list, new_list): set_old = set(old_list) set_new = set(new_list) diff = set_new.difference(set_old) old_list += diff
def test_no_outlier(): data = np.random.random(100) out = outlier.Outlier(100, 5) for d in data: assert not out.add_and_check(d)