def split_loop_infrequent(cut, l, activity_key): n = len(cut) new_logs = [log.EventLog() for i in range(0, n)] for trace in l: s = cut[0] st = log.Trace() for act in trace: if act in s: st.insert(act) else: j = 0 for j in range(0, len(cut)): if cut[j] == s: break new_logs[j].append(st) st = log.Trace() for partition in cut: if act[activity_key] in partition: s.append(partition) # L_j <- L_j + [st] with sigma_j = s j = 0 for j in range(0, len(cut)): if cut[j] == s: break new_logs[j].append(st) if s != cut[0]: new_logs[0].append(log.EventLog()) return new_logs
def filter_log(f, log): """ Filters the log according to a given (lambda) function. Parameters ---------- f function that specifies the filter criterion, may be a lambda log event log; either EventLog or EventStream Object Returns ------- log filtered event log if object provided is correct; original log if not correct """ if isinstance(log, log_inst.EventLog): return log_inst.EventLog(list(filter(f, log)), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) elif isinstance(log, log_inst.EventStream): return log_inst.EventStream(list(filter(f, log)), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) else: warnings.warn( 'input log object not of appropriate type, filter() not applied') return log
def transform_trace_log_to_event_log( log, include_case_attributes=True, case_attribute_prefix=log_util.CASE_ATTRIBUTE_PREFIX): """ Converts the trace log to an event log Parameters ---------- log: :class:`pm4py.log.log.TraceLog` A trace Log include_case_attributes: Default is True case_attribute_prefix: Default is 'case:' Returns ------- log : :class:`pm4py.log.log.EventLog` An Event log """ events = [] for trace in log: for event in trace: if include_case_attributes: for key, value in trace.attributes.items(): event[case_attribute_prefix + key] = value events.append(event) return log_instance.EventLog(events, attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions)
def sort_log(log, key, reverse=False): """ Sorts the event log according to a given key. Parameters ---------- log event log object; either EventLog or EventStream key sorting key reverse indicates whether sorting should be reversed or not Returns ------- sorted event log if object provided is correct; original log if not correct """ if isinstance(log, log_inst.EventLog): return log_inst.EventLog(sorted(log, key=key, reverse=reverse), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) elif isinstance(log, log_inst.EventStream): return log_inst.EventStream(sorted(log, key=key, reverse=reverse), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) else: warnings.warn( 'input log object not of appropriate type, sorted() not applied') return log
def split_sequence(cut, l, activity_key): new_logs = [] for c in cut: # for all cut-partitions lo = log.EventLog() for trace in l: # for all traces in the log_skeleton not_in_c = True trace_new = log.Trace() for j in range(0, len(trace)): # for every event in the current trace if trace[j][activity_key] in c: not_in_c = False while trace[j][activity_key] in c: trace_new.append( trace[j] ) # we only add the events that match the cut partition if j + 1 < len(trace): j += 1 else: j += 1 break lo.append(trace_new) break if not_in_c: lo.append(trace_new) new_logs.append(lo) if len(new_logs) > 0: return new_logs
def map_(func, log): ''' Maps the log_skeleton according to a given lambda function. domain and target of the function need to be of the same type (either trace or event) otherwise, the map behaves unexpected Parameters ---------- func log Returns ------- ''' if isinstance(log, log_inst.EventLog): return log_inst.EventLog(list(map(func, log)), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) elif isinstance(log, log_inst.EventStream): return log_inst.EventStream(list(map(func, log)), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) else: warnings.warn( 'input log_skeleton object not of appropriate type, map() not applied' ) return log
def split_loop(cut, l, activity_key): new_logs = [] for c in cut: # for cut partition lo = log.EventLog() for trace in l: # for all traces j = 0 while j in range(0, len(trace)): # for all events if trace[j][activity_key] in c: trace_new = log.Trace() # declared here and not above, so that we can generate multiple traces from one trace and # cut (repetition) # append those events that are contained in c: while trace[j][activity_key] in c: trace_new.append(trace[j]) if j + 1 < len(trace): j += 1 else: j += 1 break lo.append(trace_new) else: j += 1 if len(lo) != 0: new_logs.append(lo) return new_logs
def filter_(func, log): ''' Filters the log_skeleton according to a given lambda function. Parameters ---------- func log Returns ------- ''' if isinstance(log, log_inst.EventLog): return log_inst.EventLog(list(filter(func, log)), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) elif isinstance(log, log_inst.EventStream): return log_inst.EventStream(list(filter(func, log)), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) else: warnings.warn( 'input log_skeleton object not of appropriate type, filter() not applied' ) return log
def act_once_per_trace(l, activities, activity_key): small_log = log.EventLog() small_trace = log.Trace() new_log = log.EventLog() number_of_traces = len(l) possible_activities = list() # transform dict of activities to list activities_dict = activities for key, value in activities_dict.items(): # if activity appears as often as there are traces, add to list of possible activities: if value == number_of_traces: possible_activities.append(key) chosen_activity = None # find an activity that appears exactly once per trace and save it in chose_activity for act in possible_activities: fits_log = True for trace in l: fits_trace = False for element in trace: # enough to check if element occurs once per trace as number of occurrences equals the number of traces if act == element[activity_key]: fits_trace = True if not fits_trace: fits_log = False if fits_log: chosen_activity = act break # save the chosen activity in a new trace, so that it can later be appended as leaf to our subtree for trace in l: if len(small_trace) > 0: break for element in trace: if element[activity_key] == chosen_activity: small_trace.append(element) small_log.append(small_trace) break if chosen_activity is not None: new_log = filter_activity_from_log(l, chosen_activity, activity_key) logging_output = "activity once per trace: " + str(chosen_activity) logging.debug(logging_output) return True, new_log, small_log else: return False, new_log, chosen_activity
def apply_playout(net, initial_marking, no_traces=100, max_trace_length=100, case_id_key=xes_constants.DEFAULT_TRACEID_KEY, activity_key=xes_constants.DEFAULT_NAME_KEY, timestamp_key=xes_constants.DEFAULT_TIMESTAMP_KEY, final_marking=None): """ Do the playout of a Petrinet generating a log Parameters ---------- net Petri net to play-out initial_marking Initial marking of the Petri net no_traces Number of traces to generate max_trace_length Maximum number of events per trace (do break) case_id_key Trace attribute that is the case ID activity_key Event attribute that corresponds to the activity timestamp_key Event attribute that corresponds to the timestamp final_marking If provided, the final marking of the Petri net """ # assigns to each event an increased timestamp from 1970 curr_timestamp = 10000000 log = log_instance.EventLog() for i in range(no_traces): trace = log_instance.Trace() trace.attributes[case_id_key] = str(i) marking = copy(initial_marking) while len(trace) < max_trace_length: if not semantics.enabled_transitions( net, marking): # supports nets with possible deadlocks break all_enabled_trans = semantics.enabled_transitions(net, marking) if final_marking is not None and marking == final_marking: trans = choice(list(all_enabled_trans.union({None}))) else: trans = choice(list(all_enabled_trans)) if trans is None: break if trans.label is not None: event = log_instance.Event() event[activity_key] = trans.label event[timestamp_key] = datetime.datetime.fromtimestamp( curr_timestamp) trace.append(event) # increases by 1 second curr_timestamp += 1 marking = semantics.execute(trans, net, marking) log.append(trace) return log
def activity_concurrent(self, l, activities, activity_key, parameters=None): from pm4py.algo.discovery.inductive.variants.im.data_structures import subtree_plain as subtree small_log = log.EventLog() test_log = log.EventLog() key = None activities_copy = copy(activities) empty_trace = log.Trace() idx = index_containing(l, activities, activity_key) for key, value in activities_copy.items( ): # iterate through activities (saved in key) test_log = filter_activity_use_idx(l, key, activity_key, idx) #test_log = filter_activity_from_log(l, key, activity_key) # unsure about this one: contains_empty_trace = False for trace in test_log: if len(trace) == 0: contains_empty_trace = True if contains_empty_trace: continue # more efficient deepcopy self_copy = deepcopy(self) cut = subtree.SubtreePlain.check_for_cut( self_copy, test_log, key, parameters=parameters ) # check if leaving out act, leads to finding cut if cut: # save act to small_trace, so that it can be appended as leaf later on for trace in l: small_trace = log.Trace() contains_activity = False for element in trace: if element[activity_key] == key: contains_activity = True small_trace.append(element) small_log.append(small_trace) if not contains_activity: small_log.append(empty_trace) logging_output = "activity concurrent: " + str(key) logging.debug(logging_output) return True, test_log, small_log, key # if so, return new log return False, test_log, small_log, key # if, after iterating through all act's still no cut is found, return false
def __resolveTraceMatching(self,traceMatching,distributionOfAttributes,fillUp): log = event_log.EventLog() for trace in self.__query_log: traceID = trace.attributes["concept:name"] if fillUp or traceID in traceMatching: matchedTrace = self.__resolveTrace(trace,traceMatching.get(traceID,list()),distributionOfAttributes) self.__debugTraceTimestamps(matchedTrace) log.append(matchedTrace) return log
def transform_event_stream_to_event_log( log, case_glue=pmutil.constants.CASE_ATTRIBUTE_GLUE, include_case_attributes=True, case_attribute_prefix=pmutil.constants.CASE_ATTRIBUTE_PREFIX, enable_deepcopy=False): """ Converts the event stream to an event log Parameters ---------- log: :class:`pm4py.log.log.EventLog` An event stream case_glue: Case identifier. Default is 'case:concept:name' include_case_attributes: Default is True case_attribute_prefix: Default is 'case:' enable_deepcopy Enables deepcopy (avoid references between input and output objects) Returns ------- log : :class:`pm4py.log.log.EventLog` An event log """ if enable_deepcopy: log = deepcopy(log) traces = {} for orig_event in log: event = copy(orig_event) glue = event[case_glue] if glue not in traces: trace_attr = {} if include_case_attributes: for k in event.keys(): if k.startswith(case_attribute_prefix): trace_attr[k.replace(case_attribute_prefix, '')] = event[k] if xes.DEFAULT_TRACEID_KEY not in trace_attr: trace_attr[xes.DEFAULT_TRACEID_KEY] = glue traces[glue] = log_instance.Trace(attributes=trace_attr) if include_case_attributes: for k in list(event.keys()): if k.startswith(case_attribute_prefix): del event[k] traces[glue].append(event) return log_instance.EventLog(traces.values(), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions)
def sort_(func, log, reverse=False): if isinstance(log, log_inst.EventLog): return log_inst.EventLog(sorted(log, key=func, reverse=reverse), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) elif isinstance(log, log_inst.EventStream): return log_inst.EventStream(sorted(log, key=func, reverse=reverse), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions) else: warnings.warn('input log object not of appropriate type, map() not applied') return log
def split_parallel(cut, l, activity_key): new_logs = [] for c in cut: lo = log.EventLog() for trace in l: new_trace = log.Trace() for event in trace: if event[activity_key] in c: new_trace.append(event) lo.append(new_trace) new_logs.append(lo) return new_logs
def apply_from_variants_list(var_list, petri_net, initial_marking, final_marking, parameters=None): """ Apply the alignments from the specification of a list of variants in the log Parameters ------------- var_list List of variants (for each item, the first entry is the variant itself, the second entry may be the number of cases) petri_net Petri net initial_marking Initial marking final_marking Final marking parameters Parameters of the algorithm (same as 'apply' method, plus 'variant_delimiter' that is , by default) Returns -------------- dictio_alignments Dictionary that assigns to each variant its alignment """ if parameters is None: parameters = {} activity_key = DEFAULT_NAME_KEY if parameters is None or PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters else \ parameters[ pm4pyutil.constants.PARAMETER_CONSTANT_ACTIVITY_KEY] variant_delimiter = exec_utils.get_param_value( Parameters.PARAMETER_VARIANT_DELIMITER, parameters, pm4pyutil.constants.DEFAULT_VARIANT_SEP) log = log_implementation.EventLog() dictio_alignments = {} for varitem in var_list: variant = varitem[0] trace = log_implementation.Trace() variant_split = variant.split( variant_delimiter) if type(variant) is str else variant for el in variant_split: trace.append(log_implementation.Event({activity_key: el})) log.append(trace) alignment = apply(log, petri_net, initial_marking, final_marking) for index, varitem in enumerate(var_list): variant = varitem[0] dictio_alignments[variant] = alignment[index] return dictio_alignments
def filter_activity_from_log(l, act, activity_key): # remove the activity from every trace in the log # as trace doesnt have remove function, we just create new traces without chosen_activity act_str = str(act) new_log = log.EventLog() for trace in l: new_trace = log.Trace() for event in trace: if not event[activity_key] == act_str: new_trace.append(event) new_log.append(new_trace) return new_log
def empty_trace_filtering(l, f): enough_traces = False empty_traces_present, counter = __count_empty_traces(l) if counter >= len(l) * f: enough_traces = True if empty_traces_present: new_log = log.EventLog() for trace in l: if len(trace) != 0: new_log.append(trace) return empty_traces_present, enough_traces, new_log else: return False, False, l
def split_xor(cut, l, activity_key): new_logs = [] for c in cut: # for cut partition lo = log.EventLog() for i in range(0, len(l)): # for trace in log fits = True for j in range(0, len(l[i])): # for event in trace if l[i][j][activity_key] not in c: fits = False # if not every event fits the current cut-partition, we don't add it's trace if fits: lo.append(l[i]) new_logs.append(lo) return new_logs # new_logs is a list that contains logs
def to(trs): l = pm4py_log.EventLog() i = 0 while i < len(trs): if len(trs[i]) == 2 and type(trs[i][1]) is tuple: if len(trs[i][0]) > 0: t = to_trace(trs[i][1], trs[i][0]) else: t = to_trace(trs[i][1], str(i)) else: t = to_trace(trs[i], str(i)) l.append(t) i = i + 1 return l
def empty_trace(l): # checks if there are empty traces in the log, if so, creates new_log without those empty traces contains_empty_trace = False for trace in l: if len(trace) == 0: contains_empty_trace = True if contains_empty_trace: new_log = log.EventLog() for trace in l: if len(trace) != 0: new_log.append(trace) return True, new_log else: return False, l
def filter_activity_use_idx(l, act, activity_key, idx): act_str = str(act) i_act = idx[act] new_log = log.EventLog() i = 0 j = 0 while i < len(l): if j < len(i_act) and i == i_act[j]: new_trace = log.Trace() for event in l[i]: if not event[activity_key] == act_str: new_trace.append(event) new_log.append(new_trace) j = j + 1 else: new_log.append(l[i]) i = i + 1 return new_log
def generate_pm4py_log(trace_frequencies): log = event_log.EventLog() trace_count = 0 for variant in trace_frequencies.items(): frequency=variant[1] activities=variant[0].split(EVENT_DELIMETER) for i in range (0,frequency): trace = event_log.Trace() trace.attributes["concept:name"] = trace_count trace_count = trace_count + 1 for activity in activities: if not TRACE_END in activity: event = event_log.Event() event["concept:name"] = str(activity) event["time:timestamp"] = datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=tzutc()) trace.append(event) log.append(trace) return log
def strict_tau_loop(l, start_activities, end_activities, activity_key): new_log = log.EventLog() for trace in l: # for all traces t1, t2, found_split = split_between_end_and_start(trace, start_activities, end_activities, activity_key) # look for split if found_split: new_log.append(t1) while found_split: # if split is found t1, t2, found_split = split_between_end_and_start(t2, start_activities, end_activities, activity_key) # continue to split new_log.append(t1) else: new_log.append(trace) # if there is nothing to split, append the whole trace if len(new_log) > len(l): logging.debug("strict_tau_loop") return True, new_log else: return False, new_log
def apply_playout(net, initial_marking, no_traces=100, max_trace_length=100): """ Do the playout of a Petrinet generating a log Parameters ---------- net Petri net to play-out initial_marking Initial marking of the Petri net no_traces Number of traces to generate max_trace_length Maximum number of events per trace (do break) """ # assigns to each event an increased timestamp from 1970 curr_timestamp = 10000000 log = log_instance.EventLog() for i in range(no_traces): trace = log_instance.Trace() trace.attributes["concept:name"] = str(i) marking = copy(initial_marking) for j in range(100000): if not semantics.enabled_transitions(net, marking): break all_enabled_trans = semantics.enabled_transitions(net, marking) all_enabled_trans = list(all_enabled_trans) shuffle(all_enabled_trans) trans = all_enabled_trans[0] if trans.label is not None: event = log_instance.Event() event["concept:name"] = trans.label event["time:timestamp"] = datetime.datetime.fromtimestamp( curr_timestamp) trace.append(event) # increases by 1 second curr_timestamp = curr_timestamp + 1 marking = semantics.execute(trans, net, marking) if len(trace) > max_trace_length: break if len(trace) > 0: log.append(trace) return log
def apply_from_variants_list(var_list, petri_net, initial_marking, final_marking, parameters=None): """ Apply the alignments from the specification of a list of variants in the log Parameters ------------- var_list List of variants (for each item, the first entry is the variant itself, the second entry may be the number of cases) petri_net Petri net initial_marking Initial marking final_marking Final marking parameters Parameters of the algorithm (same as 'apply' method, plus 'variant_delimiter' that is , by default) Returns -------------- dictio_alignments Dictionary that assigns to each variant its alignment """ if parameters is None: parameters = {} log = log_implementation.EventLog() dictio_alignments = {} for varitem in var_list: variant = varitem[0] trace = variants_util.variant_to_trace(variant, parameters=parameters) log.append(trace) alignment = apply(log, petri_net, initial_marking, final_marking) for index, varitem in enumerate(var_list): variant = varitem[0] dictio_alignments[variant] = alignment[index] return dictio_alignments
def split_sequence_infrequent(cut, l, activity_key): # write L_1,...,L_n like in second line of code on page 206 n = len(cut) new_logs = [log.EventLog() for j in range(0, n)] ignore = [] split_points_list = [0] * len(l) for i in range(0, n): split_point = 0 # write our ignore list with all elements from past cut partitions if i != 0: for element in cut[i-1]: ignore.append(element) for j in range(len(l)): trace = l[j] new_split_point = find_split_point(trace, cut[i], split_points_list[j], ignore, activity_key) cutted_trace = cut_trace_between_two_points(trace, split_points_list[j], new_split_point) filtered_trace = filter_trace_on_cut_partition(cutted_trace, cut[i], activity_key) new_logs[i].append(filtered_trace) split_points_list[j] = new_split_point return new_logs
def split_xor_infrequent(cut, l, activity_key): # TODO think of empty logs # creating the empty L_1,...,L_n from the second code-line on page 205 n = len(cut) new_logs = [log.EventLog() for i in range(0, n)] for trace in l: # for all traces number_of_events_in_trace = 0 index_of_cut_partition = 0 i = 0 # use i as index here so that we can write in L_i for i in range(0, len(cut)): # for all cut partitions temp_counter = 0 for event in trace: # for all events in current trace if event[activity_key] in cut[i]: # count amount of events from trace in partition temp_counter += 1 if temp_counter > number_of_events_in_trace: number_of_events_in_trace = temp_counter index_of_cut_partition = i filtered_trace = filter_trace_on_cut_partition(trace, cut[index_of_cut_partition], activity_key) new_logs[index_of_cut_partition].append(filtered_trace) return new_logs
def transform_event_stream_to_event_log(log, case_glue=log_util.CASE_ATTRIBUTE_GLUE, includes_case_attributes=True, case_attribute_prefix=log_util.CASE_ATTRIBUTE_PREFIX): """ Converts the event stream to a log Parameters ---------- log: :class:`pm4py.log.log.EventStream` An event Log case_glue: Case identifier. Default is 'case:concept:name' includes_case_attributes: Default is True case_attribute_prefix: Default is 'case:' Returns ------- log : :class:`pm4py.log.log.EventLog` A log """ traces = {} for event in log: glue = event[case_glue] if glue not in traces: trace_attr = {} if includes_case_attributes: for k in event.keys(): if k.startswith(case_attribute_prefix): trace_attr[k.replace(case_attribute_prefix, '')] = event[k] traces[glue] = log_instance.Trace(attributes=trace_attr) if includes_case_attributes: for k in list(event.keys()): if k.startswith(case_attribute_prefix): del event[k] traces[glue].append(event) return log_instance.EventLog(traces.values(), attributes=log.attributes, classifiers=log.classifiers, omni_present=log.omni_present, extensions=log.extensions)
def tau_loop(l, start_activities, activity_key): # pretty much the same code as in strict_tau_loop, just that we split at a different point new_log = log.EventLog() for trace in l: t1, t2, found_split = split_before_start(trace, start_activities, activity_key) if found_split and len(t2) != 0: new_log.append(t1) while found_split: t2_backup = copy(t2) t1, t2, found_split = split_before_start(t2, start_activities, activity_key) if len(t1) != 0: new_log.append(t1) else: new_log.append(t2_backup) else: new_log.append(trace) if len(new_log) > len(l): logging.debug("tau_loop") return True, new_log else: return False, new_log