def load_xes(self): try: with open(self.input_path) as log_file: log = XUniversalParser().parse(log_file)[0] # get classifiers classifiers = [] for cl in log.get_classifiers(): classifiers.append(str(cl)) classifier = XEventAttributeClassifier("activity", [classifiers[0]]) log_list = list(map(lambda trace: list(map(classifier.get_class_identity, trace)), log)) self._event_log = set(tuple(trace) for trace in log_list) except: raise IOError('[ERROR]: Unable to import xes file')
def read_event_log_file(log_filepath): if not os.path.isfile(log_filepath): raise ValueError('{} is not a log file!'.format(log_filepath)) with open(log_filepath, 'r') as f: logs = XUniversalParser.parse(f) log = logs[0] return log
def xlog(): xlog_fp = os.path.join('.', 'tests', 'testdata', 'BPIC2012.xes.gz') start = time.time() with open(xlog_fp, 'r') as f: xlog = XUniversalParser.parse(f)[0] size = sys.getsizeof(xlog) print('BPIC2012 XLog size: {}b'.format(size)) diff = time.time() - start print('Took {} secs to parse BPIC2012'.format(diff)) return xlog
def read_XES_log(path): tic = time() print("Parsing log") with open(path) as log_file: log = XUniversalParser().parse(log_file)[0] # take first log from file toc = time() print("Log parsed, took {} seconds..".format(toc - tic)) return log
def mapXes(self, lfile, header): with open(lfile) as log_file: log = XUniversalParser().parse(log_file)[0] return log
ext_lifecycle = XExtensionParser().parse("http://www.xes-standard.org/lifecycle.xesext") print("[info] XExtensionParser completed the extension parsing") # Then we register the new extension print("[info] XExtensionManager starts the registration of extensions") XExtensionManager().register(meta_general) XExtensionManager().register(meta_concept) XExtensionManager().register(meta_time) XExtensionManager().register(ext_concept) XExtensionManager().register(ext_time) XExtensionManager().register(ext_lifecycle) print("[info] XExtensionManager completed the registration of extensions") # Now we can parse with open("./data/bpic2012.xes") as file: logs = XUniversalParser().parse(file) log = logs[0] drift_idx = 0 for trace in log: # the chosen case' id = 197611 if trace.get_attributes()['concept:name'].get_value() == '197611': print('drift_idx = %d' % drift_idx) break drift_idx+=1 log = log[0:drift_idx] # select the training set before the drift occurs log_dict = {'case_id':[], 'case_execution_time':[], 'case_execution_time_seconds':[], 'num_of_events':[]}
from opyenxes.classification.XEventAttributeClassifier import XEventAttributeClassifier from opyenxes.data_in.XUniversalParser import XUniversalParser file_path = "/home/bartlomiej/process-mining/process-mining-algorithms/event_logs/L1.xes" output_directory = "./output/" with open(file_path) as log_file: # Parse the log log = XUniversalParser().parse(log_file)[0] # Generate the classifier classifier = XEventAttributeClassifier("concept:name", ["concept:name"]) # Convert log object in array with only the Activity attribute of the event log_list = list( map(lambda trace: list(map(classifier.get_class_identity, trace)), log))
factory = XFactory() print(os.listdir(datadir)) for dir in os.listdir(datadir): if not os.path.isdir(os.path.join(datadir, dir)): continue outdir = os.path.join(datadir, dir, 'l1000') os.makedirs(outdir) for xlog_filepath in os.listdir(os.path.join(datadir, dir, 'l5000')): if '.xes.gz' not in xlog_filepath: continue print('Processing {}'.format(xlog_filepath)) with open(os.path.join(datadir, dir, xlog_filepath), 'r') as f: xlog = XUniversalParser().parse(f)[0] assert isinstance(xlog, XLog) new_xlog = factory.create_log(xlog.get_attributes()) traces = np.random.choice(xlog, nb_traces, replace=False) new_xlog.get_classifiers().append(xlog.get_classifiers()[0]) for t in traces: new_xlog.append(t) with open(outdir + os.sep + xlog_filepath, 'w') as f: XesXmlGZIPSerializer().serialize(new_xlog, f)
def readLogFile(file): with open(file) as log_file: return XUniversalParser().parse(log_file), xes_importer.apply(file)
from opyenxes.model.XLog import XLog from opyenxes.data_in.XUniversalParser import XUniversalParser from opyenxes.classification.XEventAttributeClassifier import XEventAttributeClassifier from opyenxes.out.XesXmlSerializer import XesXmlSerializer from sklearn.cluster import KMeans from opyenxes.factory.XFactory import XFactory if __name__ == '__main__': path = "xes_file/log_to_cluster.xes" # Parse Logs with open(path) as log_file: log = XUniversalParser().parse(log_file)[0] # type: XLog # Convert log object in array with only the Activity attribute of the event classifier = XEventAttributeClassifier("activity", ["Activity"]) log_list = list( map(lambda trace: list(map(classifier.get_class_identity, trace)), log)) event_set = set() for trace in log_list: for event in trace: event_set.add(event) all_event = list(event_set) log_vector = [] for trace in log_list: trace_vector = [] for event in all_event: trace_vector.append(trace.count(event))
return log filePath = sys.argv[1] epsilon = sys.argv[2] dbName = sys.argv[3] secure_token = sys.argv[4] #preprocess file #os.mkdir(secure_token) print("\n Starting privatize_df.py \n") outPath = filePath.replace(".xes","_%s.xes" % (epsilon)) log = xes_import_factory.apply(filePath) parser = XUniversalParser() if parser.can_parse(filePath): print("log can be parsed.") else: print("log can not be parsed.") log_file = open(filePath) log_list = parser.parse(log_file) #log = log_list[0] event_mapping = create_event_int_mapping(log) privatize_df(log_list, event_mapping, epsilon, outPath) #write to db print("Writing to DB")
return merged_log if __name__ == "__main__": normLogs = [] devLogs = [] normPathTemp = "out_xraynorm{}.xes" devPathTemp = "out_xraydev{}.xes" normLogCount = 7 devLogCount = 1 for i in range(1, normLogCount + 1): logPath = normPathTemp.format(i) with open(logPath) as log_file: normLogs.append(XUniversalParser().parse(log_file)[0]) for i in range(1, devLogCount + 1): logPath = devPathTemp.format(i) with open(logPath) as log_file: devLogs.append(XUniversalParser().parse(log_file)[0]) merged_log = merge_and_label(normLogs, devLogs) shuffle(merged_log) with open("merged_xray.xes", "w") as f: XesXmlSerializer().serialize(merged_log, f)
""" Anonymize a log. All value of "resource" attribute and "org:resource" attribute will be "UNKNOWN" now: """ from opyenxes.data_in.XUniversalParser import XUniversalParser from opyenxes.data_out.XesXmlSerializer import XesXmlSerializer path = "xes_file/general_example.xes" with open(path) as log_file: log = XUniversalParser().parse(log_file)[0] # Search in global trace attribute for attribute in log.get_global_trace_attributes(): if attribute.get_key() in ["Resource", "org:resource"]: attribute.set_value("UNKNOWN") # Search in global event attribute for attribute in log.get_global_event_attributes(): if attribute.get_key() in ["Resource", "org:resource"]: attribute.set_value("UNKNOWN") # Search in trace for trace in log: # Search in trace attribute for attribute in trace.get_attributes().values(): if attribute.get_key() in ["Resource", "org:resource"]: attribute.set_value("UNKNOWN") # Search in event for event in trace: # Search in event attribute