def test_file_bohmer(file): split_dataset(file + "_data.csv", file + "_labels.csv", file + "_train.csv", file + "_test.csv", 10000) train_data = LogFile(file + "_train.csv", ",", 0, 1000000, None, "case_id", "name", convert=False) train_data.remove_attributes(["label"]) model = bohmer.train(train_data, 3, 4, 1) test_data = LogFile(file + "_test.csv", ",", 0, 1000000, None, "case_id", "name", convert=False, values=train_data.values) bohmer.test(test_data, file + "_output_bohmer.csv", model, "label", 0) plot.plot_single_roc_curve(file + "_output_bohmer.csv", file, save_file="../Data/Nolle_Graphs/" + file.split("/")[-1] + "_roc_bohmer.png") plot.plot_single_prec_recall_curve(file + "_output_bohmer.csv", file, save_file="../Data/Nolle_Graphs/" + file.split("/")[-1] + "_precrec_bohmer.png")
def score_continuous_net(model, test, label_attr, output_file=None, title=None): import Utils.PlotResults as plot ranking = model.test_parallel(test) ranking.sort(key=lambda l: l[0].get_total_score()) scores = [] y = [] for r in ranking: scores.append((getattr(r[1], "Index"), r[0].get_total_score(), getattr(r[1], label_attr) != 0)) y.append(r[0].get_total_score()) print(len(scores)) if output_file is None: output_file = "../output.csv" with open(output_file, "w") as fout: for s in scores: fout.write(",".join([str(i) for i in s])) fout.write("\n") plot.plot_single_roc_curve(output_file, title) plot.plot_single_prec_recall_curve(output_file, title)
def test_file_full(file): split_dataset(file + "_data.csv", file + "_labels.csv", file + "_train.csv", file + "_test.csv", None) train_data = LogFile(file + "_train.csv", ",", 0, 1000000, None, "case_id", "name") train_data.remove_attributes(["label"]) model = edbn.train(train_data) test_data = LogFile(file + "_test.csv", ",", 0, 1000000, None, "case_id", "name", values=train_data.values) edbn.test(test_data, file + "_output_full.csv", model, "label", "0", train_data) plot.plot_single_roc_curve(file + "_output_full.csv", file, save_file="../Data/Nolle_Graphs/" + file.split("/")[-1] + "_roc.png") plot.plot_single_prec_recall_curve(file + "_output_full.csv", file, save_file="../Data/Nolle_Graphs/" + file.split("/")[-1] + "_precrec.png")
def stephenRun(): # Use the BPIC15_x_sorted.csv to generate new training and test datafiles with anomalies introduced # After running this once you can comment this line out # preProcessData("../Data/") # Indicate which are the training and test files train_file = "../Data/BPIC15_train_1.csv" test_file = "../Data/BPIC15_test_1.csv" # Load logfile to use as training data train_data = LogFile(train_file, ",", 0, 500000, None, "Case") train_data.remove_attributes(["Anomaly"]) # Train the model model = edbn.train(train_data) # Test the model and save the scores in ../Data/output.csv test_data = LogFile(test_file, ",", header=0, rows=500000, time_attr=None, trace_attr="Case", values=train_data.values) edbn.test(test_data, "../Data/output.csv", model, label="Anomaly", normal_val="0") # Plot the ROC curve based on the results plot.plot_single_roc_curve("../Data/output.csv")
def breast_discrete_exec(): data = "../Data/breast_data.csv" labels = "../Data/breast_labels.csv" log = pd.read_csv(data, header=None) labels = pd.read_csv(labels, header=None) log["Label"] = labels[0] cols = [] for c in log.columns: cols.append("V" + str(c)) log.columns = cols log['ID'] = log.reset_index().index print(log) train = log[:100] test = log[100:] train = train[train.VLabel == 0].drop(columns=["VLabel"]) train.to_csv("../Data/breast_train.csv", index=False) test.to_csv("../Data/breast_test.csv", index=False) train_data = LogFile("../Data/breast_train.csv", ",", 0, 500000, None, "ID", activity_attr="Activity") train_data.k = 0 model = edbn.train(train_data) test_data = LogFile("../Data/breast_test.csv", ",", 0, 500000, None, "ID", activity_attr="Activity") test_data.k = 0 print(test_data.data) edbn.test(test_data, "../Data/breast_discrete_output.csv", model, "VLabel", "0") plot.plot_single_roc_curve("../Data/breast_discrete_output.csv", "breast_discrete") plot.plot_single_prec_recall_curve("../Data/breast_discrete_output.csv", "breast_discrete")
for edge in net.edges(): relations.append((edge[0], edge[1])) for relation in relations: # if relation not in mappings: edbn.get_variable(relation[1]).add_parent(edbn.get_variable(relation[0])) print(relation[0], "->", relation[1]) edbn.train(train, single=True) ranking = edbn.test(test) ranking.sort(key=lambda l: l[0].get_total_score()) scores = [] y = [] for r in ranking: scores.append((getattr(r[1], "Index"), r[0].get_total_score(), getattr(r[1], "Class") != 1)) y.append(r[0].get_total_score()) print(len(scores)) with open("../output.csv", "w") as fout: for s in scores: fout.write(",".join([str(i) for i in s])) fout.write("\n") plot.plot_single_roc_curve("../output.csv") plot.plot_single_prec_recall_curve("../output.csv") plt.plot(list(range(len(y))), y) plt.show()
def run_full(): # Use the BPIC15_x_sorted.csv to generate new training and test datafiles with anomalies introduced # After running this once you can comment this line out #preProcessData("../Data/") for i in range(1, 2): # Indicate which are the training and test files train_file = "../Data/bpic15_%i_train.csv" % (i) test_file = "../Data/bpic15_%i_test.csv" % (i) # Load logfile to use as training data train_data = LogFile(train_file, ",", 0, 500000, time_attr="Complete_Timestamp", trace_attr="Case_ID", activity_attr="Activity") train_data.remove_attributes(["Anomaly"]) # train_data.keep_attributes(["Case_ID", "Complete_Timestamp", "Activity", "Resource", "case_termName"]) train_data.remove_attributes(["planned"]) train_data.remove_attributes(["dueDate"]) train_data.remove_attributes(["dateFinished"]) # train_data.keep_attributes(["Case_ID", "Complete_Timestamp", "Activity", "Resource", "Weekday"]) # train_data.create_k_context() # train_data.add_duration_to_k_context() # Train the model model = edbn.train(train_data) # Test the model and save the scores in ../Data/output.csv test_data = LogFile(test_file, ",", header=0, rows=500000, time_attr="Complete_Timestamp", trace_attr="Case_ID", values=train_data.values) # test_data.create_k_context() # test_data.add_duration_to_k_context() edbn.test(test_data, "../Data/output2_%i.csv" % (i), model, label="Anomaly", normal_val="0", train_data=train_data) # Plot the ROC curve based on the results plot.plot_single_roc_curve("../Data/output2_%i.csv" % (i), title="BPIC15_%i" % (i)) plot.plot_single_prec_recall_curve("../Data/output2_%i.csv" % (i), title="BPIC15_%i" % (i)) out_files = [] labels = [] for i in range(1, 6): out_files.append("../Data/output2_%i.csv" % (i)) labels.append("MUNIS_%i" % (i)) plot.plot_compare_roc_curve(out_files, labels, "BPIC15 Comparison") plot.plot_compare_prec_recall_curve(out_files, labels, "BPIC15 Comparison")