def luminol_artificial(other=False): train = "../data/artificial/artificial_train.csv" test = "../data/artificial/artificial_test1.csv" if other: test = "../data/artificial/artificial_test2.csv" train, _ = loader_generic(train) test, labels = loader_generic(test) lag_size = 1500 train = Dataset(train, 1, 0, lag_size, 1, 45).data test = Dataset(test, 1, 0, lag_size, 1, 45).data lumi_params = dict() lumi_params["precision"] = 8 lumi_params["lag_window_size"] = lag_size lumi_params["future_window_size"] = 1500 lumi_params["chunk_size"] = 7 # put data as a dict, required by luminol processed_data = np.concatenate([train, test]) ts = dict() for i, d in enumerate(processed_data): ts[i] = d detector = luminol.anomaly_detector.AnomalyDetector( ts, algorithm_params=lumi_params) # get scores score = detector.get_all_scores() scores = [] for (timestamp, value) in score.iteritems(): scores.append(value) # keep scores only related to test set scores = scores[len(train):] # normalize scores = np.array(scores) if scores.max() != 0: scores /= scores.max() beta = 1 thres, f1, fpr, rfpr, p, r, tot_pred, tot_labels, tot_cor, rpc, roc = get_thres( labels[:len(scores)], scores, beta) print("beta 1") print( "thres %s |f1 %s |fpr %s |rfpr %s |p %s |r %s |tot_pred %s |tot_labels %s |tot_cor %s |rpc %s |roc %s" % (thres, f1, fpr, rfpr, p, r, tot_pred, tot_labels, tot_cor, rpc, roc)) beta = 0.1 thres, f1, fpr, rfpr, p, r, tot_pred, tot_labels, tot_cor, rpc, roc = get_thres( labels[:len(scores)], scores, beta) print("beta 0.1") print( "thres %s |f1 %s |fpr %s |rfpr %s |p %s |r %s |tot_pred %s |tot_labels %s |tot_cor %s |rpc %s |roc %s" % (thres, f1, fpr, rfpr, p, r, tot_pred, tot_labels, tot_cor, rpc, roc))
def som_taxi(): train = "../data/taxi/nyc_taxi_train.csv" test = "../data/taxi/nyc_taxi_test.csv" train, _ = loader_generic(train) test, labels = loader_generic(test) wsize = 175 train = Dataset(train, 1, 0, wsize, 1, 5) test = Dataset(test, 1, 0, wsize, 1, 5) detector = SomAnomalyDetector(8, 8, wsize, 5, 0.001, 1400, decay_factor=0.5) # use train set just to get statistics in the model for i in range(len(train)): detector.add_data_point(train[i]) # pass on test set scores = np.zeros(len(test.data)) for i in range(len(test)): window_score = detector.add_data_point(test[i]) # update score of elements in window for u in range(i, i + wsize): scores[u] = max(scores[u], window_score) beta = 1 thres, f1, fpr, rfpr, p, r, tot_pred, tot_labels, tot_cor, rpc, roc = get_thres( labels[:len(scores)], scores, beta) print("beta 1") print( "thres %s |f1 %s |fpr %s |rfpr %s |p %s |r %s |tot_pred %s |tot_labels %s |tot_cor %s |rpc %s |roc %s" % (thres, f1, fpr, rfpr, p, r, tot_pred, tot_labels, tot_cor, rpc, roc)) beta = 0.1 thres, f1, fpr, rfpr, p, r, tot_pred, tot_labels, tot_cor, rpc, roc = get_thres( labels[:len(scores)], scores, beta) print("beta 0.1") print( "thres %s |f1 %s |fpr %s |rfpr %s |p %s |r %s |tot_pred %s |tot_labels %s |tot_cor %s |rpc %s |roc %s" % (thres, f1, fpr, rfpr, p, r, tot_pred, tot_labels, tot_cor, rpc, roc))
def loda_artificial(other=False): train = "../data/artificial/artificial_train.csv" test = "../data/artificial/artificial_test1.csv" if other: test = "../data/artificial/artificial_test2.csv" train, _ = loader_generic(train) test, labels = loader_generic(test) wsize = 225 train = Dataset(train, 0, 0, wsize, 1, 1) test = Dataset(test, 0, 0, wsize, 1, 1) detector = Loda(wsize, 1400) # use train set just to get statistics in the model for i in range(len(train)): detector.add_data_point(train[i]) # pass on test set scores = np.zeros(len(test.data)) for i in range(len(test)): window_score = detector.add_data_point(test[i]) # update score of elements in window for u in range(i, i + wsize): scores[u] = max(scores[u], window_score) beta = 1 thres, f1, fpr, rfpr, p, r, tot_pred, tot_labels, tot_cor, rpc, roc = get_thres( labels[:len(scores)], scores, beta) print("beta 1") print( "thres %s |f1 %s |fpr %s |rfpr %s |p %s |r %s |tot_pred %s |tot_labels %s |tot_cor %s |rpc %s |roc %s" % (thres, f1, fpr, rfpr, p, r, tot_pred, tot_labels, tot_cor, rpc, roc)) beta = 0.1 thres, f1, fpr, rfpr, p, r, tot_pred, tot_labels, tot_cor, rpc, roc = get_thres( labels[:len(scores)], scores, beta) print("beta 0.1") print( "thres %s |f1 %s |fpr %s |rfpr %s |p %s |r %s |tot_pred %s |tot_labels %s |tot_cor %s |rpc %s |roc %s" % (thres, f1, fpr, rfpr, p, r, tot_pred, tot_labels, tot_cor, rpc, roc))