def execute_script(): log = pm4py.read_xes( os.path.join("..", "tests", "input_data", "receipt.xes")) activities = pm4py.get_attribute_values(log, "concept:name") dfg, sa, ea = pm4py.discover_dfg(log) # filters the DFG to make a simpler one perc = 0.5 dfg, sa, ea, activities = dfg_filtering.filter_dfg_on_activities_percentage( dfg, sa, ea, activities, perc) dfg, sa, ea, activities = dfg_filtering.filter_dfg_on_paths_percentage( dfg, sa, ea, activities, perc) # creates the simulated log simulated_log = dfg_playout.apply(dfg, sa, ea) print(simulated_log) print(len(simulated_log)) print(sum(x.attributes["probability"] for x in simulated_log)) # shows the two DFGs to show that they are identical pm4py.view_dfg(dfg, sa, ea, log=log, format="svg") new_dfg, new_sa, new_ea = pm4py.discover_dfg(simulated_log) pm4py.view_dfg(new_dfg, new_sa, new_ea, log=simulated_log, format="svg") for trace in simulated_log: print(list(x["concept:name"] for x in trace)) print(trace.attributes["probability"], dfg_playout.get_trace_probability(trace, dfg, sa, ea)) break dfg, sa, ea = pm4py.discover_dfg(log) variants = pm4py.get_variants(log) sum_prob_log_variants = 0.0 for var in variants: sum_prob_log_variants += dfg_playout.get_trace_probability( variants[var][0], dfg, sa, ea) print( "percentage of behavior allowed from DFG that is in the log (from 0.0 to 1.0): ", sum_prob_log_variants)
def execute_script(): log = pm4py.read_xes("../tests/input_data/running-example.xes") dfg, sa, ea = pm4py.discover_dfg(log) tree = pm4py.discover_process_tree_inductive(log) heu_net = pm4py.discover_heuristics_net(log) net, im, fm = pm4py.discover_petri_net_alpha(log) bpmn = pm4py.convert_to_bpmn(tree) ts = ts_discovery.apply(log) x_cases, y_cases = case_statistics.get_kde_caseduration(log) gviz1 = dfg_visualizer.apply(dfg) gviz2 = tree_visualizer.apply(tree) gviz3 = hn_visualizer.apply(heu_net) gviz4 = pn_visualizer.apply(net, im, fm) gviz5 = bpmn_visualizer.apply(bpmn) gviz6 = ts_visualizer.apply(ts) gviz7 = graphs_visualizer.apply(x_cases, y_cases, variant=graphs_visualizer.Variants.CASES, parameters={graphs_visualizer.Variants.CASES.value.Parameters.FORMAT: "svg"}) print("1", len(dfg_visualizer.serialize_dot(gviz1))) print("1", len(dfg_visualizer.serialize(gviz1))) print("2", len(tree_visualizer.serialize_dot(gviz2))) print("2", len(tree_visualizer.serialize(gviz2))) print("3", len(hn_visualizer.serialize(gviz3))) print("4", len(pn_visualizer.serialize_dot(gviz4))) print("4", len(pn_visualizer.serialize(gviz4))) print("5", len(bpmn_visualizer.serialize_dot(gviz5))) print("5", len(bpmn_visualizer.serialize(gviz5))) print("6", len(ts_visualizer.serialize_dot(gviz6))) print("6", len(ts_visualizer.serialize(gviz6))) print("7", len(graphs_visualizer.serialize(gviz7)))
def test_filter_act_percentage(self): from pm4py.algo.filtering.dfg import dfg_filtering log = pm4py.read_xes("input_data/running-example.xes") dfg, sa, ea = pm4py.discover_dfg(log) act_count = pm4py.get_attribute_values(log, "concept:name") dfg_filtering.filter_dfg_on_activities_percentage( dfg, sa, ea, act_count, 0.1)
def test_dfg_align(self): import pm4py from pm4py.objects.dfg.filtering import dfg_filtering from pm4py.objects.dfg.utils import dfg_alignment log = pm4py.read_xes(os.path.join("input_data", "running-example.xes")) dfg, sa, ea = pm4py.discover_dfg(log) act_count = pm4py.get_attribute_values(log, "concept:name") dfg, sa, ea, act_count = dfg_filtering.filter_dfg_on_activities_percentage(dfg, sa, ea, act_count, 0.5) dfg, sa, ea, act_count = dfg_filtering.filter_dfg_on_paths_percentage(dfg, sa, ea, act_count, 0.5) aligned_traces = dfg_alignment.apply(log, dfg, sa, ea)
def execute_script(): log = pm4py.read_xes( os.path.join("..", "tests", "input_data", "receipt.xes")) frequency_dfg, sa, ea = pm4py.discover_dfg(log) performance_dfg, sa, ea = pm4py.discover_performance_dfg(log) simulated_log = dfg_simulator.apply( frequency_dfg, sa, ea, variant=dfg_simulator.Variants.PERFORMANCE, parameters={"performance_dfg": performance_dfg}) print(simulated_log)
def execute_script(): dataframe1 = pd.read_csv( os.path.join("..", "tests", "input_data", "interleavings", "receipt_even.csv")) dataframe1 = pm4py.format_dataframe(dataframe1) dataframe2 = pd.read_csv( os.path.join("..", "tests", "input_data", "interleavings", "receipt_odd.csv")) dataframe2 = pm4py.format_dataframe(dataframe2) case_relations = pd.read_csv( os.path.join("..", "tests", "input_data", "interleavings", "case_relations.csv")) merged = case_relations_merging.apply(dataframe1, dataframe2, case_relations) dfg, sa, ea = pm4py.discover_dfg(merged) pm4py.view_dfg(dfg, sa, ea, format="svg")
def execute_script(): log = pm4py.read_xes( os.path.join("..", "tests", "input_data", "receipt.xes")) print("number of cases", len(log)) print("number of events", sum(len(x) for x in log)) print("number of variants", len(pm4py.get_variants(log))) ac = get.get_attribute_values(log, "concept:name") dfg, sa, ea = pm4py.discover_dfg(log) perc = 0.5 dfg, sa, ea, ac = dfg_filtering.filter_dfg_on_activities_percentage( dfg, sa, ea, ac, perc) dfg, sa, ea, ac = dfg_filtering.filter_dfg_on_paths_percentage( dfg, sa, ea, ac, perc) aa = time.time() aligned_traces = dfg_alignment.apply(log, dfg, sa, ea) bb = time.time() net, im, fm = pm4py.convert_to_petri_net(dfg, sa, ea) for trace in aligned_traces: if trace["cost"] != trace["internal_cost"]: print(trace) pass print(bb - aa) print(sum(x["visited_states"] for x in aligned_traces)) print( sum(x["cost"] // align_utils.STD_MODEL_LOG_MOVE_COST for x in aligned_traces)) gviz = visualizer.apply(dfg, activities_count=ac, parameters={ "start_activities": sa, "end_activities": ea, "format": "svg" }) visualizer.view(gviz) cc = time.time() aligned_traces2 = petri_alignments.apply( log, net, im, fm, variant=petri_alignments.Variants.VERSION_DIJKSTRA_LESS_MEMORY) dd = time.time() print(dd - cc) print(sum(x["visited_states"] for x in aligned_traces2)) print( sum(x["cost"] // align_utils.STD_MODEL_LOG_MOVE_COST for x in aligned_traces2))
def get_process_svg(): parameters = request.args.get("parameters") parameters = __process_parameters(parameters) log = __prepare_event_log(parameters) ext_type = parameters[ "ext_type"] if "ext_type" in parameters else "document_flow_log" log_type = __get_log_type_from_ext_type(ext_type) if log_type == 0: log.type = "succint" from pm4pymdl.algo.mvp.gen_framework import algorithm as discovery from pm4pymdl.visualization.mvp.gen_framework import visualizer as vis_factory model = discovery.apply(log, model_type_variant="model3", node_freq_variant="type31", edge_freq_variant="type11") gviz = vis_factory.apply(model, parameters={"format": "svg"}) elif log_type == 1 or log_type == 2: import pandas as pd if type(log) is pd.DataFrame: from pm4py.objects.dfg.retrieval.pandas import get_dfg_graph dfg = get_dfg_graph(log) from pm4py.statistics.start_activities.pandas import get as pd_sa_get from pm4py.statistics.end_activities.pandas import get as pd_ea_get sa = pd_sa_get.get_start_activities(log) ea = pd_ea_get.get_end_activities(log) else: dfg, sa, ea = pm4py.discover_dfg(log) act_count = pm4py.get_attribute_values(log, "concept:name") dfg, sa, ea, act_count = dfg_filtering.filter_dfg_on_paths_percentage( dfg, sa, ea, act_count, 0.2, keep_all_activities=True) gviz = pm4py.visualization.dfg.visualizer.apply( dfg, activities_count=act_count, parameters={ "format": "svg", "start_activities": sa, "end_activities": ea }) ser = pm4py.visualization.dfg.visualizer.serialize(gviz).decode("utf-8") return ser
def execute_script(): log = pm4py.read_xes("../tests/input_data/receipt.xes") dfg, sa, ea = pm4py.discover_dfg(log) act_count = pm4py.get_attribute_values(log, "concept:name") # keep the specified amount of activities dfg, sa, ea, act_count = pm4py.objects.dfg.filtering.dfg_filtering.filter_dfg_on_activities_percentage( dfg, sa, ea, act_count, 0.3) # keep the specified amount of paths dfg, sa, ea, act_count = pm4py.objects.dfg.filtering.dfg_filtering.filter_dfg_on_paths_percentage( dfg, sa, ea, act_count, 0.3) # view the DFG gviz = dfg_visualizer.apply( dfg, activities_count=act_count, parameters={ dfg_visualizer.Variants.FREQUENCY.value.Parameters.START_ACTIVITIES: sa, dfg_visualizer.Variants.FREQUENCY.value.Parameters.END_ACTIVITIES: ea, dfg_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "svg" }) dfg_visualizer.view(gviz)
def test_dfg(self): log = pm4py.read_xes("input_data/running-example.xes") dfg, sa, ea = pm4py.discover_dfg(log)
def execute_script(): ENABLE_VISUALIZATION = True # reads a XES into an event log log1 = pm4py.read_xes("../tests/input_data/running-example.xes") # reads a CSV into a dataframe df = pd.read_csv("../tests/input_data/running-example.csv") # formats the dataframe with the mandatory columns for process mining purposes df = pm4py.format_dataframe(df, case_id="case:concept:name", activity_key="concept:name", timestamp_key="time:timestamp") # converts the dataframe to an event log log2 = pm4py.convert_to_event_log(df) # converts the log read from XES into a stream and dataframe respectively stream1 = pm4py.convert_to_event_stream(log1) df2 = pm4py.convert_to_dataframe(log1) # writes the log1 to a XES file pm4py.write_xes(log1, "ru1.xes") dfg, dfg_sa, dfg_ea = pm4py.discover_dfg(log1) petri_alpha, im_alpha, fm_alpha = pm4py.discover_petri_net_alpha(log1) petri_inductive, im_inductive, fm_inductive = pm4py.discover_petri_net_inductive( log1) petri_heuristics, im_heuristics, fm_heuristics = pm4py.discover_petri_net_heuristics( log1) tree_inductive = pm4py.discover_tree_inductive(log1) heu_net = pm4py.discover_heuristics_net(log1) pm4py.write_dfg(dfg, dfg_sa, dfg_ea, "ru_dfg.dfg") pm4py.write_petri_net(petri_alpha, im_alpha, fm_alpha, "ru_alpha.pnml") pm4py.write_petri_net(petri_inductive, im_inductive, fm_inductive, "ru_inductive.pnml") pm4py.write_petri_net(petri_heuristics, im_heuristics, fm_heuristics, "ru_heuristics.pnml") pm4py.write_process_tree(tree_inductive, "ru_inductive.ptml") dfg, dfg_sa, dfg_ea = pm4py.read_dfg("ru_dfg.dfg") petri_alpha, im_alpha, fm_alpha = pm4py.read_petri_net("ru_alpha.pnml") petri_inductive, im_inductive, fm_inductive = pm4py.read_petri_net( "ru_inductive.pnml") petri_heuristics, im_heuristics, fm_heuristics = pm4py.read_petri_net( "ru_heuristics.pnml") tree_inductive = pm4py.read_process_tree("ru_inductive.ptml") pm4py.save_vis_petri_net(petri_alpha, im_alpha, fm_alpha, "ru_alpha.png") pm4py.save_vis_petri_net(petri_inductive, im_inductive, fm_inductive, "ru_inductive.png") pm4py.save_vis_petri_net(petri_heuristics, im_heuristics, fm_heuristics, "ru_heuristics.png") pm4py.save_vis_process_tree(tree_inductive, "ru_inductive_tree.png") pm4py.save_vis_heuristics_net(heu_net, "ru_heunet.png") pm4py.save_vis_dfg(dfg, dfg_sa, dfg_ea, "ru_dfg.png") if ENABLE_VISUALIZATION: pm4py.view_petri_net(petri_alpha, im_alpha, fm_alpha, format="svg") pm4py.view_petri_net(petri_inductive, im_inductive, fm_inductive, format="svg") pm4py.view_petri_net(petri_heuristics, im_heuristics, fm_heuristics, format="svg") pm4py.view_process_tree(tree_inductive, format="svg") pm4py.view_heuristics_net(heu_net, format="svg") pm4py.view_dfg(dfg, dfg_sa, dfg_ea, format="svg") aligned_traces = pm4py.conformance_alignments(log1, petri_inductive, im_inductive, fm_inductive) replayed_traces = pm4py.conformance_tbr(log1, petri_inductive, im_inductive, fm_inductive) fitness_tbr = pm4py.evaluate_fitness_tbr(log1, petri_inductive, im_inductive, fm_inductive) print("fitness_tbr", fitness_tbr) fitness_align = pm4py.evaluate_fitness_alignments(log1, petri_inductive, im_inductive, fm_inductive) print("fitness_align", fitness_align) precision_tbr = pm4py.evaluate_precision_tbr(log1, petri_inductive, im_inductive, fm_inductive) print("precision_tbr", precision_tbr) precision_align = pm4py.evaluate_precision_alignments( log1, petri_inductive, im_inductive, fm_inductive) print("precision_align", precision_align) print("log start activities = ", pm4py.get_start_activities(log2)) print("df start activities = ", pm4py.get_start_activities(df2)) print("log end activities = ", pm4py.get_end_activities(log2)) print("df end activities = ", pm4py.get_end_activities(df2)) print("log attributes = ", pm4py.get_attributes(log2)) print("df attributes = ", pm4py.get_attributes(df2)) print("log org:resource values = ", pm4py.get_attribute_values(log2, "org:resource")) print("df org:resource values = ", pm4py.get_attribute_values(df2, "org:resource")) print("start_activities len(filt_log) = ", len(pm4py.filter_start_activities(log2, ["register request"]))) print("start_activities len(filt_df) = ", len(pm4py.filter_start_activities(df2, ["register request"]))) print("end_activities len(filt_log) = ", len(pm4py.filter_end_activities(log2, ["pay compensation"]))) print("end_activities len(filt_df) = ", len(pm4py.filter_end_activities(df2, ["pay compensation"]))) print( "attributes org:resource len(filt_log) (cases) cases = ", len( pm4py.filter_attribute_values(log2, "org:resource", ["Ellen"], level="case"))) print( "attributes org:resource len(filt_log) (cases) events = ", len( pm4py.filter_attribute_values(log2, "org:resource", ["Ellen"], level="event"))) print( "attributes org:resource len(filt_df) (events) cases = ", len( pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], level="case"))) print( "attributes org:resource len(filt_df) (events) events = ", len( pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], level="event"))) print( "attributes org:resource len(filt_df) (events) events notpositive = ", len( pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], level="event", retain=False))) print("variants log = ", pm4py.get_variants(log2)) print("variants df = ", pm4py.get_variants(df2)) print( "variants filter log = ", len( pm4py.filter_variants(log2, [[ "register request", "examine thoroughly", "check ticket", "decide", "reject request" ]]))) print( "variants filter df = ", len( pm4py.filter_variants(df2, [[ "register request", "examine thoroughly", "check ticket", "decide", "reject request" ]]))) print("variants filter percentage = ", len(pm4py.filter_variants_percentage(log2, threshold=0.8))) print( "paths filter log len = ", len( pm4py.filter_directly_follows_relation( log2, [("register request", "examine casually")]))) print( "paths filter dataframe len = ", len( pm4py.filter_directly_follows_relation( df2, [("register request", "examine casually")]))) print( "timeframe filter log events len = ", len( pm4py.filter_time_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="events"))) print( "timeframe filter log traces_contained len = ", len( pm4py.filter_time_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_contained"))) print( "timeframe filter log traces_intersecting len = ", len( pm4py.filter_time_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_intersecting"))) print( "timeframe filter df events len = ", len( pm4py.filter_time_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="events"))) print( "timeframe filter df traces_contained len = ", len( pm4py.filter_time_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_contained"))) print( "timeframe filter df traces_intersecting len = ", len( pm4py.filter_time_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_intersecting"))) # remove the temporary files os.remove("ru1.xes") os.remove("ru_dfg.dfg") os.remove("ru_alpha.pnml") os.remove("ru_inductive.pnml") os.remove("ru_heuristics.pnml") os.remove("ru_inductive.ptml") os.remove("ru_alpha.png") os.remove("ru_inductive.png") os.remove("ru_heuristics.png") os.remove("ru_inductive_tree.png") os.remove("ru_heunet.png") os.remove("ru_dfg.png")
def test_dfg_playout(self): import pm4py from pm4py.algo.simulation.playout.dfg import algorithm as dfg_playout log = pm4py.read_xes(os.path.join("input_data", "running-example.xes")) dfg, sa, ea = pm4py.discover_dfg(log) dfg_playout.apply(dfg, sa, ea)
def test_filter_paths_percentage(self): log = pm4py.read_xes("input_data/running-example.xes") dfg, sa, ea = pm4py.discover_dfg(log) act_count = pm4py.get_attribute_values(log, "concept:name") pm4py.objects.dfg.filtering.dfg_filtering.filter_dfg_on_paths_percentage( dfg, sa, ea, act_count, 0.3)
def test_dfg_playout(self): import pm4py from pm4py.objects.dfg.utils import dfg_playout log = pm4py.read_xes(os.path.join("input_data", "running-example.xes")) dfg, sa, ea = pm4py.discover_dfg(log) dfg_playout.apply(dfg, sa, ea)