def test_footprints_extensive(self): log = pm4py.read_xes("input_data/running-example.xes") fp_log = footprints_discovery.apply( log, variant=footprints_discovery.Variants.TRACE_BY_TRACE) tree = pm4py.discover_process_tree_inductive(log, noise_threshold=0.2) fp_model = footprints_discovery.apply(tree) conf_result = trace_extensive.apply(fp_log, fp_model) diagn_df = trace_extensive.get_diagnostics_dataframe(log, conf_result)
def test_footprints_net(self): log = xes_importer.apply(os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.alpha import algorithm as alpha_miner net, im, fm = alpha_miner.apply(log) from pm4py.algo.discovery.footprints import algorithm as footprints_discovery fp_log = footprints_discovery.apply(log) fp_net = footprints_discovery.apply(net, im) from pm4py.algo.conformance.footprints import algorithm as footprints_conformance conf = footprints_conformance.apply(fp_log, fp_net)
def test_footprints_tree(self): log = xes_importer.apply(os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.inductive import algorithm as inductive_miner tree = inductive_miner.apply_tree(log) from pm4py.algo.discovery.footprints import algorithm as footprints_discovery fp_log = footprints_discovery.apply(log) fp_tree = footprints_discovery.apply(tree) from pm4py.algo.conformance.footprints import algorithm as footprints_conformance conf = footprints_conformance.apply(fp_log, fp_tree)
def test_footprints_tree_df(self): df = pd.read_csv(os.path.join("input_data", "running-example.csv")) df = dataframe_utils.convert_timestamp_columns_in_df(df) from pm4py.algo.discovery.inductive import algorithm as inductive_miner log = converter.apply(df) tree = inductive_miner.apply_tree(log) from pm4py.algo.discovery.footprints import algorithm as footprints_discovery fp_df = footprints_discovery.apply(df) fp_tree = footprints_discovery.apply(tree) from pm4py.algo.conformance.footprints import algorithm as footprints_conformance conf = footprints_conformance.apply(fp_df, fp_tree)
def test_footprints_tree_df(self): df = csv_import_adapter.import_dataframe_from_path( os.path.join("input_data", "running-example.csv")) from pm4py.algo.discovery.inductive import algorithm as inductive_miner log = converter.apply(df) tree = inductive_miner.apply_tree(log) from pm4py.algo.discovery.footprints import algorithm as footprints_discovery fp_df = footprints_discovery.apply(df) fp_tree = footprints_discovery.apply(tree) from pm4py.algo.conformance.footprints import algorithm as footprints_conformance conf = footprints_conformance.apply(fp_df, fp_tree)
def test_footprints_net(self): log = xes_importer.apply(os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.alpha import algorithm as alpha_miner net, im, fm = alpha_miner.apply(log) from pm4py.algo.discovery.footprints import algorithm as footprints_discovery fp_entire_log = footprints_discovery.apply(log, variant=footprints_discovery.Variants.ENTIRE_EVENT_LOG) fp_trace_trace = footprints_discovery.apply(log) fp_net = footprints_discovery.apply(net, im) from pm4py.algo.conformance.footprints import algorithm as footprints_conformance conf1 = footprints_conformance.apply(fp_entire_log, fp_net) conf2 = footprints_conformance.apply(fp_trace_trace, fp_net) conf3 = footprints_conformance.apply(fp_entire_log, fp_net, variant=footprints_conformance.Variants.LOG_EXTENSIVE) conf4 = footprints_conformance.apply(fp_trace_trace, fp_net, variant=footprints_conformance.Variants.TRACE_EXTENSIVE)
def test_footprints_tree(self): log = xes_importer.apply(os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.inductive import algorithm as inductive_miner tree = inductive_miner.apply_tree(log) from pm4py.algo.discovery.footprints import algorithm as footprints_discovery fp_entire_log = footprints_discovery.apply(log, variant=footprints_discovery.Variants.ENTIRE_EVENT_LOG) fp_trace_trace = footprints_discovery.apply(log) fp_tree = footprints_discovery.apply(tree) from pm4py.algo.conformance.footprints import algorithm as footprints_conformance conf1 = footprints_conformance.apply(fp_entire_log, fp_tree) conf2 = footprints_conformance.apply(fp_trace_trace, fp_tree) conf3 = footprints_conformance.apply(fp_entire_log, fp_tree, variant=footprints_conformance.Variants.LOG_EXTENSIVE) conf4 = footprints_conformance.apply(fp_trace_trace, fp_tree, variant=footprints_conformance.Variants.TRACE_EXTENSIVE)
def execute_script(): # imports a XES event log log = pm4py.read_xes( os.path.join("..", "tests", "input_data", "receipt.xes")) # converts the log into a list of events (not anymore grouped in cases) event_stream = pm4py.convert_to_event_stream(log) # calculates a process tree using the IMf algorithm (50% noise) tree = pm4py.discover_tree_inductive(log, noise_threshold=0.5) # discovers the footprint matrix from the process tree footprints = fp_discovery.apply(tree) # creates a live event stream (an object that distributes the messages to the algorithm) live_stream = LiveEventStream() # creates the TBR streaming conformance checking object conf_obj = streaming_fp_conf.apply(footprints) # register the conformance checking object to the live event stream live_stream.register(conf_obj) # start the recording of events from the live event stream live_stream.start() # append each event of the original log to the live event stream # (so it is sent to the conformance checking algorithm) for event in event_stream: live_stream.append(event) # stops the live event stream live_stream.stop() # sends a termination signal to the conformance checking algorithm; # the conditions on the closure of all the cases are checked # (for each case, it is checked whether the end activity of the case # is possible according to the footprints) diagn_df = conf_obj.get() conf_obj.terminate_all() print(diagn_df) print(diagn_df[diagn_df["is_fit"] == False])
def apply(tree: ProcessTree, parameters : Optional[Dict[Union[str, Parameters], Any]] = None) -> EventLog: """ Performs an extensive playout of the process tree Parameters ------------- tree Process tree parameters Possible parameters, including: - Parameters.MIN_TRACE_LENGTH => minimum length of a trace (default: 1) - Parameters.MAX_TRACE_LENGTH => maximum length of a trace (default: min_allowed_trace_length) - Parameters.MAX_LOOP_OCC => maximum number of occurrences for a loop (default: MAX_TRACE_LENGTH) - Parameters.ACTIVITY_KEY => activity key - Parameters.MAX_LIMIT_NUM_TRACES => maximum number to the limit of traces; the playout shall stop when the number is reached (default: 100000) Returns ------------- log Event log """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) # to save memory in the returned log, allocate each activity once. to know the list of activities of the # process tree, use the footprints module fp_tree = fp_discovery.apply(tree, parameters=parameters) activities = fp_tree["activities"] activities = {act: Event({activity_key: act}) for act in activities} min_allowed_trace_length = bottomup_discovery.get_min_trace_length(tree, parameters=parameters) min_trace_length = exec_utils.get_param_value(Parameters.MIN_TRACE_LENGTH, parameters, 1) max_trace_length = exec_utils.get_param_value(Parameters.MAX_TRACE_LENGTH, parameters, min_allowed_trace_length) max_loop_occ = exec_utils.get_param_value(Parameters.MAX_LOOP_OCC, parameters, int(max_trace_length / 2)) max_limit_num_traces = exec_utils.get_param_value(Parameters.MAX_LIMIT_NUM_TRACES, parameters, 100000) return_set_strings = exec_utils.get_param_value(Parameters.RETURN_SET_STRINGS, parameters, False) bottomup = bottomup_discovery.get_bottomup_nodes(tree, parameters=parameters) min_rem_dict = bottomup_discovery.get_min_rem_dict(tree, parameters=parameters) max_rem_dict = bottomup_discovery.get_max_rem_dict(tree, parameters=parameters) playout_dictio = {} for i in range(len(bottomup)): get_playout(bottomup[i], playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict, max_rem_dict, max_limit_num_traces) tree_playout_traces = playout_dictio[tree][TRACES] if return_set_strings: return tree_playout_traces log = EventLog() for tr0 in tree_playout_traces: trace = Trace() for act in tr0: trace.append(activities[act]) log.append(trace) return log
def discover_footprints(*args: Union[EventLog, Tuple[PetriNet, Marking, Marking], ProcessTree]) -> Union[ List[Dict[str, Any]], Dict[str, Any]]: """ Discovers the footprints out of the provided event log / pocess model Parameters -------------- args Event log / process model """ from pm4py.algo.discovery.footprints import algorithm as fp_discovery return fp_discovery.apply(*args)
def execute_script(): # import a log log = importer.apply( os.path.join("..", "tests", "input_data", "receipt.xes")) # found a filtered version of the log that is used to discover a process model filtered_log = variants_filter.apply_auto_filter(deepcopy(log)) # discover a process tree using inductive miner tree = inductive_miner.apply_tree(filtered_log) print(tree) # apply the conversion of a process tree into a Petri net net, im, fm = converter.apply(tree) # Footprints discovery: discover a list of footprints # for all the cases of the log fp_log = footprints_discovery.apply(log) # discover the footpritns from the process tree fp_tree = footprints_discovery.apply(tree) # discover the footpritns from the Petri net fp_net = footprints_discovery.apply(net, im) print(len(fp_tree["sequence"]), len(fp_tree["parallel"]), len(fp_net["sequence"]), len(fp_net["parallel"])) print(fp_tree["sequence"] == fp_net["sequence"] and fp_tree["parallel"] == fp_net["parallel"]) # apply the footprints conformance checking conf = footprints_conformance.apply(fp_log, fp_net) for trace_an in conf: if trace_an: # print the first anomalous trace (containing deviations # that are contained in the trace but not allowed by the model) print(trace_an) break # finds the footprints for the entire log (not case-by-case, but taking # the relations that appear inside the entire log) fp_log_entire = footprints_discovery.apply( log, variant=footprints_discovery.Variants.ENTIRE_EVENT_LOG) # visualize the footprint table gviz = fp_visualizer.apply(fp_log_entire, fp_net, parameters={"format": "svg"}) fp_visualizer.view(gviz)
def execute_script(): log = xes_importer.apply( os.path.join("..", "tests", "input_data", "receipt.xes")) throughput_time = case_statistics.get_median_caseduration(log) variants, variants_times = variants_filter.get_variants_along_with_case_durations( log) dfg = dfg_discovery.apply(log) filtered_log = variants_filter.apply_auto_filter(deepcopy(log)) # filtered_log = log tree = inductive_miner.apply_tree(filtered_log) fp_log = fp_discovery.apply(log, variant=fp_discovery.Variants.ENTIRE_EVENT_LOG) fp_model = fp_discovery.apply(tree) conf = fp_conformance.apply(fp_log, fp_model) conf_occ = sorted([(x, dfg[x]) for x in conf], key=lambda y: (y[1], y[0][0], y[0][1]), reverse=True) print( "source activity\t\ttarget activity\t\toccurrences\t\tthroughput time log\t\tthroughput time traces with path" ) for i in range(min(10, len(conf_occ))): path = conf_occ[i][0] occ = conf_occ[i][1] red_log = paths_filter.apply(log, [path]) red_throughput_time = case_statistics.get_median_caseduration(red_log) print("%s\t\t%s\t\t%d\t\t%s\t\t%s" % (path[0], path[1], occ, human_readable_stat(throughput_time), human_readable_stat(red_throughput_time))) variants_length = sorted([(x, len(variants[x])) for x in variants.keys()], key=lambda y: (y[1], y[0]), reverse=True) print( "\nvariant\t\toccurrences\t\tthroughput time log\t\tthroughput time traces with path" ) for i in range(min(10, len(variants_length))): var = variants_length[i][0] vark = str(var) if len(vark) > 10: vark = vark[:10] occ = variants_length[i][1] fp_log_var = fp_discovery.apply( variants[var], variant=fp_discovery.Variants.ENTIRE_EVENT_LOG) conf_var = fp_conformance.apply(fp_log_var, fp_model) is_fit = str(len(conf_var) == 0) var_throughput = case_statistics.get_median_caseduration(variants[var]) print("%s\t\t%d\t\t%s\t\t%s\t\t%s" % (vark, occ, is_fit, throughput_time, human_readable_stat(var_throughput))) # print(conf_occ) conf_colors = tree_visualization.apply(tree, conf) if True: gviz = pt_visualizer.apply( tree, parameters={ "format": "svg", pt_visualizer.Variants.WO_DECORATION.value.Parameters.COLOR_MAP: conf_colors, pt_visualizer.Variants.WO_DECORATION.value.Parameters.ENABLE_DEEPCOPY: False }) pt_visualizer.view(gviz)
t2 = time.time() times_tokenreplay_alpha[logName] = t2 - t1 t1 = time.time() fitness_token_imdf[logName] = \ fitness_evaluator.apply(log, inductive_model, inductive_im, inductive_fm, parameters=parameters, variant=fitness_evaluator.Variants.TOKEN_BASED)[ 'perc_fit_traces'] print( str(time.time()) + " fitness_token_inductive for " + logName + " succeeded! " + str(fitness_token_imdf[logName])) t2 = time.time() times_tokenreplay_imdf[logName] = t2 - t1 t1 = time.time() fp_log = footprints_discovery.apply(log, parameters=parameters) fp_tree = footprints_discovery.apply(tree, parameters=parameters) conf = footprints_conformance.apply( fp_log, fp_tree, variant=footprints_conformance.Variants.TRACE_EXTENSIVE, parameters=parameters) # fitness_fp = float(len([x for x in conf if len(x) == 0])) / float(len(conf)) * 100.0 if conf else 0.0 fitness_fp = float(len([x for x in conf if x["is_footprints_fit"]])) / float( len(conf)) * 100.0 if conf else 0.0 t2 = time.time() fitness_footprints_imdf[logName] = fitness_fp times_footprints_imdf[logName] = t2 - t1 if ENABLE_ALIGNMENTS:
def extract_deterministic_standardization_feature(df, log): parameters = { constants.PARAMETER_CONSTANT_ACTIVITY_KEY: c.ACTIVITY_ATTRIBUTE_NAME } fp_log = footprints_discovery.apply( log, variant=footprints_discovery.Variants.ENTIRE_EVENT_LOG, parameters=parameters) # Footprint DF from PM4PY directly_follows = fp_log['sequence'] # dict for following activity df_dict = {} # dict for preceding activity dp_dict = {} # Create dicts with activity as key and list of preceding / following activities as value for val in directly_follows: tuple_list = list(val) tuple_list[0] = tuple_list[0].lower().replace('_', ' ') tuple_list[1] = tuple_list[1].lower().replace('_', ' ') if tuple_list[0] not in df_dict: value = [tuple_list[1]] kv = {tuple_list[0]: value} df_dict.update(kv) else: value = df_dict.get(tuple_list[0]) value.append(tuple_list[1]) kv = {tuple_list[0]: value} df_dict.update(kv) if tuple_list[1] not in dp_dict: dp_value = [tuple_list[0]] dp_kv = {tuple_list[1]: dp_value} dp_dict.update(dp_kv) else: dp_value = dp_dict.get(tuple_list[1]) dp_value.append(tuple_list[0]) dp_kv = {tuple_list[1]: dp_value} dp_dict.update(dp_kv) # List for determinism following df_list = [] # List for standardization following sf_list = [] for key in df_dict: df_list.append([key, True if len(df_dict[key]) == 1 else False]) sf_list.append([key, len(df_dict[key])]) df_dataframe = pd.DataFrame( df_list, columns=['activity', 'deterministic_following_activity']) sf_dataframe = pd.DataFrame( sf_list, columns=['activity', 'following_activities_standardization']) # List for determinism preceding dp_list = [] # List for standardization preceding sp_list = [] for key in dp_dict: dp_list.append([key, True if len(dp_dict[key]) == 1 else False]) sp_list.append([key, len(dp_dict[key])]) dp_dataframe = pd.DataFrame( dp_list, columns=['activity', 'deterministic_preceding_activity']) sp_dataframe = pd.DataFrame( sp_list, columns=['activity', 'preceding_activities_standardization']) # Join Dataframes result_df = df.join(df_dataframe.set_index('activity'), on='activity') result_df = result_df.join(dp_dataframe.set_index('activity'), on='activity') result_df = result_df.join(sf_dataframe.set_index('activity'), on='activity') result_df = result_df.join(sp_dataframe.set_index('activity'), on='activity') result_df['deterministic_following_activity'].fillna(False, inplace=True) result_df['deterministic_preceding_activity'].fillna(False, inplace=True) result_df['following_activities_standardization'].fillna(0, inplace=True) result_df['preceding_activities_standardization'].fillna(0, inplace=True) return result_df