def __init__(self, path, parameters=None): """ Initialize the iterable log_skeleton object Parameters ------------- path Path to the XES log_skeleton """ if parameters is None: parameters = {} self.path = path self.acceptance_condition = exec_utils.get_param_value( Parameters.ACCEPTANCE_CONDITION, parameters, lambda x: True) self.date_parser = dt_parser.get() self.reset()
def import_log(filename, parameters=None): """ Import a log object from a XML file containing the traces, the events and the simple attributes of them Parameters ----------- filename XES file to parse parameters Parameters of the algorithm, including Parameters.TIMESTAMP_SORT -> Specify if we should sort log by timestamp Parameters.TIMESTAMP_KEY -> If sort is enabled, then sort the log by using this key Parameters.REVERSE_SORT -> Specify in which direction the log should be sorted Parameters.INSERT_TRACE_INDICES -> Specify if trace indexes should be added as event attribute for each event Parameters.MAX_TRACES -> Specify the maximum number of traces to import from the log (read in order in the XML file) Parameters.MAX_BYTES -> Maximum number of bytes to read Parameters.SKYP_BYTES -> Number of bytes to skip Returns ----------- xes XES file """ if parameters is None: parameters = {} date_parser = dt_parser.get() timestamp_sort = param_util.fetch(Parameters.TIMESTAMP_SORT, parameters) timestamp_key = param_util.fetch(Parameters.TIMESTAMP_KEY, parameters) reverse_sort = param_util.fetch(Parameters.REVERSE_SORT, parameters) insert_trace_indexes = param_util.fetch(Parameters.INSERT_TRACE_INDICES, parameters) max_no_traces_to_import = param_util.fetch(Parameters.MAX_TRACES, parameters) skip_bytes = param_util.fetch(Parameters.SKYP_BYTES, parameters) max_bytes_to_read = param_util.fetch(Parameters.MAX_BYTES, parameters) file_size = os.stat(filename).st_size if file_size > max_bytes_to_read: skip_bytes = file_size - max_bytes_to_read log = EventLog() tracecount = 0 trace = None event = None f = open(filename, "r") f.seek(skip_bytes) for line in f: content = line.split("\"") if len(content) > 0: tag = content[0].split("<")[-1] if trace is not None: if event is not None: if len(content) == 5: if tag.startswith("string"): event[content[1]] = content[3] elif tag.startswith("date"): event[content[1]] = date_parser.apply(content[3]) elif tag.startswith("int"): event[content[1]] = int(content[3]) elif tag.startswith("float"): event[content[1]] = float(content[3]) else: event[content[1]] = content[3] elif tag.startswith("/event"): trace.append(event) event = None elif tag.startswith("event"): event = Event() elif len(content) == 5: if tag.startswith("string"): trace.attributes[content[1]] = content[3] elif tag.startswith("date"): trace.attributes[content[1]] = date_parser.apply(content[3]) elif tag.startswith("int"): trace.attributes[content[1]] = int(content[3]) elif tag.startswith("float"): trace.attributes[content[1]] = float(content[3]) else: trace.attributes[content[1]] = content[3] elif tag.startswith("/trace"): log.append(trace) tracecount += 1 if tracecount > max_no_traces_to_import: break trace = None elif tag.startswith("trace"): trace = Trace() f.close() if timestamp_sort: log = sorting.sort_timestamp(log, timestamp_key=timestamp_key, reverse_sort=reverse_sort) if insert_trace_indexes: log.insert_trace_index_as_event_attribute() return log
def from_dict_to_event(event_dict): timestamp_field_name = "time:timestamp" if timestamp_field_name in event_dict.keys(): event_dict[timestamp_field_name] = dt_parser.get().apply( event_dict[timestamp_field_name]) return Event(event_dict)
def import_from_context(context, num_traces, parameters=None): """ Import a XES log from an iterparse context Parameters -------------- context Iterparse context num_traces Number of traces of the XES log parameters Parameters of the algorithm Returns -------------- log Event log """ if parameters is None: parameters = {} max_no_traces_to_import = exec_utils.get_param_value(Parameters.MAX_TRACES, parameters, sys.maxsize) timestamp_sort = exec_utils.get_param_value(Parameters.TIMESTAMP_SORT, parameters, False) timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) reverse_sort = exec_utils.get_param_value(Parameters.REVERSE_SORT, parameters, False) show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True) date_parser = dt_parser.get() progress = None if pkgutil.find_loader("tqdm") and show_progress_bar: from tqdm.auto import tqdm progress = tqdm(total=num_traces, desc="parsing log, completed traces :: ") log = None trace = None event = None tree = {} for tree_event, elem in context: if tree_event == _EVENT_START: # starting to read parent = tree[elem.getparent()] if elem.getparent() in tree else None if elem.tag.endswith(xes_constants.TAG_STRING): if parent is not None: tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), elem.get(xes_constants.KEY_VALUE), tree) continue elif elem.tag.endswith(xes_constants.TAG_DATE): try: dt = date_parser.apply(elem.get(xes_constants.KEY_VALUE)) tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), dt, tree) except TypeError: logging.info("failed to parse date: " + str(elem.get(xes_constants.KEY_VALUE))) except ValueError: logging.info("failed to parse date: " + str(elem.get(xes_constants.KEY_VALUE))) continue elif elem.tag.endswith(xes_constants.TAG_EVENT): if event is not None: raise SyntaxError('file contains <event> in another <event> tag') event = Event() tree[elem] = event continue elif elem.tag.endswith(xes_constants.TAG_TRACE): if len(log) >= max_no_traces_to_import: break if trace is not None: raise SyntaxError('file contains <trace> in another <trace> tag') trace = Trace() tree[elem] = trace.attributes continue elif elem.tag.endswith(xes_constants.TAG_FLOAT): if parent is not None: try: val = float(elem.get(xes_constants.KEY_VALUE)) tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), val, tree) except ValueError: logging.info("failed to parse float: " + str(elem.get(xes_constants.KEY_VALUE))) continue elif elem.tag.endswith(xes_constants.TAG_INT): if parent is not None: try: val = int(elem.get(xes_constants.KEY_VALUE)) tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), val, tree) except ValueError: logging.info("failed to parse int: " + str(elem.get(xes_constants.KEY_VALUE))) continue elif elem.tag.endswith(xes_constants.TAG_BOOLEAN): if parent is not None: try: val0 = elem.get(xes_constants.KEY_VALUE) val = False if str(val0).lower() == "true": val = True tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), val, tree) except ValueError: logging.info("failed to parse boolean: " + str(elem.get(xes_constants.KEY_VALUE))) continue elif elem.tag.endswith(xes_constants.TAG_LIST): if parent is not None: # lists have no value, hence we put None as a value tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), None, tree) continue elif elem.tag.endswith(xes_constants.TAG_ID): if parent is not None: tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), elem.get(xes_constants.KEY_VALUE), tree) continue elif elem.tag.endswith(xes_constants.TAG_EXTENSION): if log is None: raise SyntaxError('extension found outside of <log> tag') if elem.get(xes_constants.KEY_NAME) is not None and elem.get( xes_constants.KEY_PREFIX) is not None and elem.get(xes_constants.KEY_URI) is not None: log.extensions[elem.get(xes_constants.KEY_NAME)] = { xes_constants.KEY_PREFIX: elem.get(xes_constants.KEY_PREFIX), xes_constants.KEY_URI: elem.get(xes_constants.KEY_URI)} continue elif elem.tag.endswith(xes_constants.TAG_GLOBAL): if log is None: raise SyntaxError('global found outside of <log> tag') if elem.get(xes_constants.KEY_SCOPE) is not None: log.omni_present[elem.get(xes_constants.KEY_SCOPE)] = {} tree[elem] = log.omni_present[elem.get(xes_constants.KEY_SCOPE)] continue elif elem.tag.endswith(xes_constants.TAG_CLASSIFIER): if log is None: raise SyntaxError('classifier found outside of <log> tag') if elem.get(xes_constants.KEY_KEYS) is not None: classifier_value = elem.get(xes_constants.KEY_KEYS) if "'" in classifier_value: log.classifiers[elem.get(xes_constants.KEY_NAME)] = [x for x in classifier_value.split("'") if x.strip()] else: log.classifiers[elem.get(xes_constants.KEY_NAME)] = classifier_value.split() continue elif elem.tag.endswith(xes_constants.TAG_LOG): if log is not None: raise SyntaxError('file contains > 1 <log> tags') log = EventLog() tree[elem] = log.attributes continue elif tree_event == _EVENT_END: if elem in tree: del tree[elem] elem.clear() if elem.getprevious() is not None: try: del elem.getparent()[0] except TypeError: pass if elem.tag.endswith(xes_constants.TAG_EVENT): if trace is not None: trace.append(event) event = None continue elif elem.tag.endswith(xes_constants.TAG_TRACE): log.append(trace) if progress is not None: progress.update() trace = None continue elif elem.tag.endswith(xes_constants.TAG_LOG): continue # gracefully close progress bar if progress is not None: progress.close() del context, progress if timestamp_sort: log = sorting.sort_timestamp(log, timestamp_key=timestamp_key, reverse_sort=reverse_sort) # sets the activity key as default classifier in the log's properties log.properties[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_constants.DEFAULT_NAME_KEY log.properties[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = xes_constants.DEFAULT_NAME_KEY # sets the default timestamp key log.properties[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = xes_constants.DEFAULT_TIMESTAMP_KEY # sets the default resource key log.properties[constants.PARAMETER_CONSTANT_RESOURCE_KEY] = xes_constants.DEFAULT_RESOURCE_KEY # sets the default transition key log.properties[constants.PARAMETER_CONSTANT_TRANSITION_KEY] = xes_constants.DEFAULT_TRANSITION_KEY # sets the default group key log.properties[constants.PARAMETER_CONSTANT_GROUP_KEY] = xes_constants.DEFAULT_GROUP_KEY return log
def import_log(filename, parameters=None): """ Imports an XES file into a log object Parameters ---------- filename: Absolute filename parameters Parameters of the algorithm, including Parameters.TIMESTAMP_SORT -> Specify if we should sort log by timestamp Parameters.TIMESTAMP_KEY -> If sort is enabled, then sort the log by using this key Parameters.REVERSE_SORT -> Specify in which direction the log should be sorted Parameters.INSERT_TRACE_INDICES -> Specify if trace indexes should be added as event attribute for each event Parameters.MAX_TRACES -> Specify the maximum number of traces to import from the log (read in order in the XML file) Returns ------- log : :class:`pm4py.log.log.EventLog` A log """ parameters = dict() if parameters is None else parameters insert_trace_indexes = param_util.fetch(Parameters.INSERT_TRACE_INDICES, parameters) max_no_traces_to_import = param_util.fetch(Parameters.MAX_TRACES, parameters) date_parser = dt_parser.get() context = etree.iterparse(filename, events=[_EVENT_START, _EVENT_END]) # check to see if log has a namespace before looking for traces (but this might be more effort than worth) # but you could just assume that log use on the standard namespace desbried in XES # to only find elements that start a trace use tag="{http://www.xes-standard.org}trace" # or just use the {*} syntax to match to all namespaces with a trace element #count number of traces and setup progress bar no_trace = sum([ 1 for trace in etree.iterparse( filename, events=[_EVENT_START], tag="{*}trace") ]) # make tqdm facultative progress = None if pkgutil.find_loader("tqdm"): from tqdm.auto import tqdm progress = tqdm(total=no_trace, desc="parsing log, completed traces :: ") log = None trace = None event = None tree = {} for tree_event, elem in context: if tree_event == _EVENT_START: # starting to read parent = tree[ elem.getparent()] if elem.getparent() in tree else None if elem.tag.endswith(xes_constants.TAG_STRING): if parent is not None: tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), elem.get(xes_constants.KEY_VALUE), tree) continue elif elem.tag.endswith(xes_constants.TAG_DATE): try: dt = date_parser.apply(elem.get(xes_constants.KEY_VALUE)) tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), dt, tree) except TypeError: logging.info("failed to parse date: " + str(elem.get(xes_constants.KEY_VALUE))) except ValueError: logging.info("failed to parse date: " + str(elem.get(xes_constants.KEY_VALUE))) continue elif elem.tag.endswith(xes_constants.TAG_EVENT): if event is not None: raise SyntaxError( 'file contains <event> in another <event> tag') event = Event() tree[elem] = event continue elif elem.tag.endswith(xes_constants.TAG_TRACE): if len(log) >= max_no_traces_to_import: break if trace is not None: raise SyntaxError( 'file contains <trace> in another <trace> tag') trace = Trace() tree[elem] = trace.attributes continue elif elem.tag.endswith(xes_constants.TAG_FLOAT): if parent is not None: try: val = float(elem.get(xes_constants.KEY_VALUE)) tree = __parse_attribute( elem, parent, elem.get(xes_constants.KEY_KEY), val, tree) except ValueError: logging.info("failed to parse float: " + str(elem.get(xes_constants.KEY_VALUE))) continue elif elem.tag.endswith(xes_constants.TAG_INT): if parent is not None: try: val = int(elem.get(xes_constants.KEY_VALUE)) tree = __parse_attribute( elem, parent, elem.get(xes_constants.KEY_KEY), val, tree) except ValueError: logging.info("failed to parse int: " + str(elem.get(xes_constants.KEY_VALUE))) continue elif elem.tag.endswith(xes_constants.TAG_BOOLEAN): if parent is not None: try: val0 = elem.get(xes_constants.KEY_VALUE) val = False if str(val0).lower() == "true": val = True tree = __parse_attribute( elem, parent, elem.get(xes_constants.KEY_KEY), val, tree) except ValueError: logging.info("failed to parse boolean: " + str(elem.get(xes_constants.KEY_VALUE))) continue elif elem.tag.endswith(xes_constants.TAG_LIST): if parent is not None: # lists have no value, hence we put None as a value tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), None, tree) continue elif elem.tag.endswith(xes_constants.TAG_ID): if parent is not None: tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), elem.get(xes_constants.KEY_VALUE), tree) continue elif elem.tag.endswith(xes_constants.TAG_EXTENSION): if log is None: raise SyntaxError('extension found outside of <log> tag') if elem.get(xes_constants.KEY_NAME) is not None and elem.get( xes_constants.KEY_PREFIX) is not None and elem.get( xes_constants.KEY_URI) is not None: log.extensions[elem.get(xes_constants.KEY_NAME)] = { xes_constants.KEY_PREFIX: elem.get(xes_constants.KEY_PREFIX), xes_constants.KEY_URI: elem.get(xes_constants.KEY_URI) } continue elif elem.tag.endswith(xes_constants.TAG_GLOBAL): if log is None: raise SyntaxError('global found outside of <log> tag') if elem.get(xes_constants.KEY_SCOPE) is not None: log.omni_present[elem.get(xes_constants.KEY_SCOPE)] = {} tree[elem] = log.omni_present[elem.get( xes_constants.KEY_SCOPE)] continue elif elem.tag.endswith(xes_constants.TAG_CLASSIFIER): if log is None: raise SyntaxError('classifier found outside of <log> tag') if elem.get(xes_constants.KEY_KEYS) is not None: classifier_value = elem.get(xes_constants.KEY_KEYS) if "'" in classifier_value: log.classifiers[elem.get(xes_constants.KEY_NAME)] = [ x for x in classifier_value.split("'") if x.strip() ] else: log.classifiers[elem.get(xes_constants.KEY_NAME )] = classifier_value.split() continue elif elem.tag.endswith(xes_constants.TAG_LOG): if log is not None: raise SyntaxError('file contains > 1 <log> tags') log = EventLog() tree[elem] = log.attributes continue elif tree_event == _EVENT_END: if elem in tree: del tree[elem] elem.clear() if elem.getprevious() is not None: try: del elem.getparent()[0] except TypeError: pass if elem.tag.endswith(xes_constants.TAG_EVENT): if trace is not None: trace.append(event) event = None continue elif elem.tag.endswith(xes_constants.TAG_TRACE): log.append(trace) #update progress bar as we have a completed trace if progress is not None: progress.update() trace = None continue elif elem.tag.endswith(xes_constants.TAG_LOG): continue #gracefully close progress bar if progress is not None: progress.close() del context, progress if Parameters.TIMESTAMP_SORT in parameters and parameters[ Parameters.TIMESTAMP_SORT]: log = sorting.sort_timestamp( log, timestamp_key=param_util.fetch(Parameters.TIMESTAMP_KEY, parameters), reverse_sort=param_util.fetch(Parameters.REVERSE_SORT, parameters)) if insert_trace_indexes: log = index_attribute.insert_event_index_as_event_attribute(log) return log
def import_log_from_file_object(f, encoding, file_size=sys.maxsize, parameters=None): """ Import a log object from a (XML) file object Parameters ----------- f file object encoding Encoding file_size Size of the file (measured on disk) parameters Parameters of the algorithm, including Parameters.TIMESTAMP_SORT -> Specify if we should sort log by timestamp Parameters.TIMESTAMP_KEY -> If sort is enabled, then sort the log by using this key Parameters.REVERSE_SORT -> Specify in which direction the log should be sorted Parameters.MAX_TRACES -> Specify the maximum number of traces to import from the log (read in order in the XML file) Parameters.MAX_BYTES -> Maximum number of bytes to read Parameters.SKYP_BYTES -> Number of bytes to skip Parameters.SET_ATTRIBUTES_TO_READ -> Names of the attributes that should be parsed. If not specified, then, all the attributes are parsed. Returns ----------- log Log file """ values_dict = {} date_parser = dt_parser.get() set_attributes_to_read = exec_utils.get_param_value( Parameters.SET_ATTRIBUTES_TO_READ, parameters, None) max_no_traces_to_import = exec_utils.get_param_value( Parameters.MAX_TRACES, parameters, sys.maxsize) timestamp_sort = exec_utils.get_param_value(Parameters.TIMESTAMP_SORT, parameters, False) timestamp_key = exec_utils.get_param_value( Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) reverse_sort = exec_utils.get_param_value(Parameters.REVERSE_SORT, parameters, False) skip_bytes = exec_utils.get_param_value(Parameters.SKIP_BYTES, parameters, False) max_bytes_to_read = exec_utils.get_param_value(Parameters.MAX_BYTES, parameters, sys.maxsize) if file_size > max_bytes_to_read: skip_bytes = file_size - max_bytes_to_read log = EventLog() tracecount = 0 trace = None event = None f.seek(skip_bytes) for line in f: content = line.decode(encoding).split("\"") if len(content) > 0: tag = content[0].split("<")[-1] if trace is not None: if event is not None: if len(content) == 5: key, value = read_attribute_key_value( tag, content, date_parser, values_dict, set_attributes_to_read) if value is not None: event[key] = value elif tag.startswith("/event"): trace.append(event) event = None elif tag.startswith("event"): event = Event() elif len(content) == 5: key, value = read_attribute_key_value( tag, content, date_parser, values_dict, set_attributes_to_read) if value is not None: trace.attributes[key] = value elif tag.startswith("/trace"): log.append(trace) tracecount += 1 if tracecount > max_no_traces_to_import: break trace = None elif tag.startswith("trace"): trace = Trace() if timestamp_sort: log = sorting.sort_timestamp(log, timestamp_key=timestamp_key, reverse_sort=reverse_sort) # sets the activity key as default classifier in the log's properties log.properties[ constants. PARAMETER_CONSTANT_ACTIVITY_KEY] = xes_constants.DEFAULT_NAME_KEY log.properties[ constants. PARAMETER_CONSTANT_ATTRIBUTE_KEY] = xes_constants.DEFAULT_NAME_KEY # sets the default timestamp key log.properties[ constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] = xes_constants.DEFAULT_TIMESTAMP_KEY # sets the default resource key log.properties[ constants. PARAMETER_CONSTANT_RESOURCE_KEY] = xes_constants.DEFAULT_RESOURCE_KEY # sets the default transition key log.properties[ constants. PARAMETER_CONSTANT_TRANSITION_KEY] = xes_constants.DEFAULT_TRANSITION_KEY # sets the default group key log.properties[ constants. PARAMETER_CONSTANT_GROUP_KEY] = xes_constants.DEFAULT_GROUP_KEY return log
def import_log(filename, parameters=None): """ Imports an XES file into a log_skeleton object Parameters ---------- filename: Absolute filename parameters Parameters of the algorithm, including Parameters.TIMESTAMP_SORT -> Specify if we should sort log_skeleton by timestamp Parameters.TIMESTAMP_KEY -> If sort is enabled, then sort the log_skeleton by using this key Parameters.REVERSE_SORT -> Specify in which direction the log_skeleton should be sorted Parameters.MAX_TRACES -> Specify the maximum number of traces to import from the log_skeleton (read in order in the XML file) Returns ------- log_skeleton : :class:`pm4py.log_skeleton.log_skeleton.EventLog` A log_skeleton """ from lxml import etree if parameters is None: parameters = {} max_no_traces_to_import = exec_utils.get_param_value( Parameters.MAX_TRACES, parameters, sys.maxsize) timestamp_sort = exec_utils.get_param_value(Parameters.TIMESTAMP_SORT, parameters, False) timestamp_key = exec_utils.get_param_value( Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) reverse_sort = exec_utils.get_param_value(Parameters.REVERSE_SORT, parameters, False) date_parser = dt_parser.get() # count number of traces and setup progress bar no_trace = count_traces(filename) context = etree.iterparse(filename, events=[_EVENT_START, _EVENT_END]) # make tqdm facultative progress = None if pkgutil.find_loader("tqdm"): from tqdm.auto import tqdm progress = tqdm(total=no_trace, desc="parsing log_skeleton, completed traces :: ") log = None trace = None event = None tree = {} for tree_event, elem in context: if tree_event == _EVENT_START: # starting to read parent = tree[ elem.getparent()] if elem.getparent() in tree else None if elem.tag.endswith(xes_constants.TAG_STRING): if parent is not None: tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), elem.get(xes_constants.KEY_VALUE), tree) continue elif elem.tag.endswith(xes_constants.TAG_DATE): try: dt = date_parser.apply(elem.get(xes_constants.KEY_VALUE)) tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), dt, tree) except TypeError: logging.info("failed to parse date: " + str(elem.get(xes_constants.KEY_VALUE))) except ValueError: logging.info("failed to parse date: " + str(elem.get(xes_constants.KEY_VALUE))) continue elif elem.tag.endswith(xes_constants.TAG_EVENT): if event is not None: raise SyntaxError( 'file contains <event> in another <event> tag') event = Event() tree[elem] = event continue elif elem.tag.endswith(xes_constants.TAG_TRACE): if len(log) >= max_no_traces_to_import: break if trace is not None: raise SyntaxError( 'file contains <trace> in another <trace> tag') trace = Trace() tree[elem] = trace.attributes continue elif elem.tag.endswith(xes_constants.TAG_FLOAT): if parent is not None: try: val = float(elem.get(xes_constants.KEY_VALUE)) tree = __parse_attribute( elem, parent, elem.get(xes_constants.KEY_KEY), val, tree) except ValueError: logging.info("failed to parse float: " + str(elem.get(xes_constants.KEY_VALUE))) continue elif elem.tag.endswith(xes_constants.TAG_INT): if parent is not None: try: val = int(elem.get(xes_constants.KEY_VALUE)) tree = __parse_attribute( elem, parent, elem.get(xes_constants.KEY_KEY), val, tree) except ValueError: logging.info("failed to parse int: " + str(elem.get(xes_constants.KEY_VALUE))) continue elif elem.tag.endswith(xes_constants.TAG_BOOLEAN): if parent is not None: try: val0 = elem.get(xes_constants.KEY_VALUE) val = False if str(val0).lower() == "true": val = True tree = __parse_attribute( elem, parent, elem.get(xes_constants.KEY_KEY), val, tree) except ValueError: logging.info("failed to parse boolean: " + str(elem.get(xes_constants.KEY_VALUE))) continue elif elem.tag.endswith(xes_constants.TAG_LIST): if parent is not None: # lists have no value, hence we put None as a value tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), None, tree) continue elif elem.tag.endswith(xes_constants.TAG_ID): if parent is not None: tree = __parse_attribute(elem, parent, elem.get(xes_constants.KEY_KEY), elem.get(xes_constants.KEY_VALUE), tree) continue elif elem.tag.endswith(xes_constants.TAG_EXTENSION): if log is None: raise SyntaxError( 'extension found outside of <log_skeleton> tag') if elem.get(xes_constants.KEY_NAME) is not None and elem.get( xes_constants.KEY_PREFIX) is not None and elem.get( xes_constants.KEY_URI) is not None: log.extensions[elem.get(xes_constants.KEY_NAME)] = { xes_constants.KEY_PREFIX: elem.get(xes_constants.KEY_PREFIX), xes_constants.KEY_URI: elem.get(xes_constants.KEY_URI) } continue elif elem.tag.endswith(xes_constants.TAG_GLOBAL): if log is None: raise SyntaxError( 'global found outside of <log_skeleton> tag') if elem.get(xes_constants.KEY_SCOPE) is not None: log.omni_present[elem.get(xes_constants.KEY_SCOPE)] = {} tree[elem] = log.omni_present[elem.get( xes_constants.KEY_SCOPE)] continue elif elem.tag.endswith(xes_constants.TAG_CLASSIFIER): if log is None: raise SyntaxError( 'classifier found outside of <log_skeleton> tag') if elem.get(xes_constants.KEY_KEYS) is not None: classifier_value = elem.get(xes_constants.KEY_KEYS) if "'" in classifier_value: log.classifiers[elem.get(xes_constants.KEY_NAME)] = [ x for x in classifier_value.split("'") if x.strip() ] else: log.classifiers[elem.get(xes_constants.KEY_NAME )] = classifier_value.split() continue elif elem.tag.endswith(xes_constants.TAG_LOG): if log is not None: raise SyntaxError('file contains > 1 <log_skeleton> tags') log = EventLog() tree[elem] = log.attributes continue elif tree_event == _EVENT_END: if elem in tree: del tree[elem] elem.clear() if elem.getprevious() is not None: try: del elem.getparent()[0] except TypeError: pass if elem.tag.endswith(xes_constants.TAG_EVENT): if trace is not None: trace.append(event) event = None continue elif elem.tag.endswith(xes_constants.TAG_TRACE): log.append(trace) # update progress bar as we have a completed trace if progress is not None: progress.update() trace = None continue elif elem.tag.endswith(xes_constants.TAG_LOG): continue # gracefully close progress bar if progress is not None: progress.close() del context, progress if timestamp_sort: log = sorting.sort_timestamp(log, timestamp_key=timestamp_key, reverse_sort=reverse_sort) return log