def train_lin(data_folder, model_folder): from RelatedMethods.Lin.model import train logfile = LogFile(data_folder + "full_log.csv", ",", 0, None, None, "case", activity_attr="event", convert=False, k=0) logfile.add_end_events() logfile.convert2int() train_log = LogFile(data_folder + "train_log.csv", ",", 0, None, None, "case", activity_attr="event", convert=False, k=0, values=logfile.values) train_log.add_end_events() train_log.convert2int() train(logfile, train_log, model_folder)
def run_experiment(data, prefix_size, add_end_event, split_method, split_cases, train_percentage): logfile = LogFile(data, ",", 0, None, None, "case", activity_attr="event", convert=False, k=prefix_size) if add_end_event: logfile.add_end_events() logfile.keep_attributes(["case", "event", "role"]) logfile.convert2int() logfile.create_k_context() train_log, test_log = logfile.splitTrainTest(train_percentage, case=split_cases, method=split_method) with open("Baseline/results.txt", "a") as fout: fout.write("Data: " + data) fout.write("\nPrefix Size: " + str(prefix_size)) fout.write("\nEnd event: " + str(add_end_event)) fout.write("\nSplit method: " + split_method) fout.write("\nSplit cases: " + str(split_cases)) fout.write("\nTrain percentage: " + str(train_percentage)) fout.write("\nDate: " + time.strftime("%d.%m.%y-%H.%M", time.localtime())) fout.write("\n------------------------------------") baseline_acc = test(test_log, train(train_log, epochs=100, early_stop=10)) fout.write("\nBaseline: " + str(baseline_acc)) fout.write("\n") fout.write("====================================\n\n")
def train_edbn(data_folder, model_folder, k=None, next_event=True): from EDBN.Execute import train from Predictions.eDBN_Prediction import learn_duplicated_events, predict_next_event, predict_suffix if k is None: best_model = {} for k in range(1, 6): train_log = LogFile(data_folder + "train_log.csv", ",", 0, None, None, "case", activity_attr="event", convert=False, k=k) train_train_log, train_test_log = train_log.splitTrainTest(80) train_train_log.add_end_events() train_train_log.convert2int() train_train_log.create_k_context() train_test_log.values = train_train_log.values train_test_log.add_end_events() train_test_log.convert2int() train_test_log.create_k_context() model = train(train_train_log) # Train average number of duplicated events model.duplicate_events = learn_duplicated_events(train_train_log) if next_event: acc = predict_next_event(model, train_test_log) else: acc = predict_suffix(model, train_test_log) print("Testing k=", k, " | Validation acc:", acc) if "Acc" not in best_model or best_model["Acc"] < acc: best_model["Acc"] = acc best_model["Model"] = model best_model["k"] = k print("Best k value:", best_model["k"], " | Validation acc of", best_model["Acc"]) k = best_model["k"] train_log = LogFile(data_folder + "train_log.csv", ",", 0, None, None, "case", activity_attr="event", convert=False, k=k) train_log.add_end_events() train_log.convert2int() train_log.create_k_context() model = train(train_log) # Train average number of duplicated events model.duplicate_events = learn_duplicated_events(train_log) with open(os.path.join(model_folder, "model"), "wb") as pickle_file: pickle.dump(model, pickle_file) with open(os.path.join(model_folder, "k"), "w") as outfile: outfile.write(str(k))
return acc if __name__ == "__main__": # data = "../../Data/Helpdesk.csv" data = "../../Data/Taymouri_bpi_12_w.csv" case_attr = "case" act_attr = "event" logfile = LogFile(data, ",", 0, None, None, case_attr, activity_attr=act_attr, convert=False, k=35) logfile.add_end_events() logfile.convert2int() logfile.create_k_context() train_log, test_log = logfile.splitTrainTest(80, case=True, method="train-test") train_data, test_data = create_input(train_log, test_log, 5) model = train(train_data) test(test_data, model)
def run_experiment(data, prefix_size, add_end_event, split_method, split_cases, train_percentage, filename="results.txt"): data = DATA_FOLDER + data logfile = LogFile(data, ",", 0, None, "completeTime", "case", activity_attr="event", convert=False, k=prefix_size) if prefix_size is None: prefix_size = max(logfile.data.groupby(logfile.trace).size()) if prefix_size > 40: prefix_size = 40 logfile.k = prefix_size if add_end_event: logfile.add_end_events() # logfile.keep_attributes(["case", "event", "role", "completeTime"]) logfile.keep_attributes(["case", "event", "role"]) logfile.convert2int() logfile.create_k_context() train_log, test_log = logfile.splitTrainTest(train_percentage, case=split_cases, method=split_method) with open(filename, "a") as fout: fout.write("Data: " + data) fout.write("\nPrefix Size: " + str(prefix_size)) fout.write("\nEnd event: " + str(add_end_event)) fout.write("\nSplit method: " + split_method) fout.write("\nSplit cases: " + str(split_cases)) fout.write("\nTrain percentage: " + str(train_percentage)) fout.write("\nDate: " + time.strftime("%d.%m.%y-%H.%M", time.localtime())) fout.write("\n------------------------------------\n") processes = [] processes.append( Process(target=execute_tax, args=(train_log, test_log, filename), name="Tax")) processes.append( Process(target=execute_taymouri, args=(train_log, test_log, filename), name="Taymouri")) processes.append( Process(target=execute_camargo, args=(train_log, test_log, filename), name="Camargo")) processes.append( Process(target=execute_lin, args=(train_log, test_log, filename), name="Lin")) processes.append( Process(target=execute_dimauro, args=(train_log, test_log, filename), name="Di Mauro")) processes.append( Process(target=execute_pasquadibisceglie, args=(train_log, test_log, filename), name="Pasquadibisceglie")) processes.append( Process(target=execute_edbn, args=(train_log, test_log, filename), name="EDBN")) processes.append( Process(target=execute_baseline, args=(train_log, test_log, filename), name="Baseline")) # processes.append(Process(target=execute_new_method, args=(train_log, test_log, filename), name="New Method")) print("Starting Processes") for p in processes: p.start() print(p.name, "started") print("All processes running") for p in processes: p.join() print(p.name, "stopped") with open(filename, "a") as fout: fout.write("====================================\n\n") print("All processes stopped")