Exemplo n.º 1
0
def execute_script():
    log = pm4py.read_xes(
        os.path.join("..", "tests", "input_data", "receipt.xes"))
    activities = pm4py.get_attribute_values(log, "concept:name")
    dfg, sa, ea = pm4py.discover_dfg(log)
    # filters the DFG to make a simpler one
    perc = 0.5
    dfg, sa, ea, activities = dfg_filtering.filter_dfg_on_activities_percentage(
        dfg, sa, ea, activities, perc)
    dfg, sa, ea, activities = dfg_filtering.filter_dfg_on_paths_percentage(
        dfg, sa, ea, activities, perc)
    # creates the simulated log
    simulated_log = dfg_playout.apply(dfg, sa, ea)
    print(simulated_log)
    print(len(simulated_log))
    print(sum(x.attributes["probability"] for x in simulated_log))
    # shows the two DFGs to show that they are identical
    pm4py.view_dfg(dfg, sa, ea, log=log, format="svg")
    new_dfg, new_sa, new_ea = pm4py.discover_dfg(simulated_log)
    pm4py.view_dfg(new_dfg, new_sa, new_ea, log=simulated_log, format="svg")
    for trace in simulated_log:
        print(list(x["concept:name"] for x in trace))
        print(trace.attributes["probability"],
              dfg_playout.get_trace_probability(trace, dfg, sa, ea))
        break
    dfg, sa, ea = pm4py.discover_dfg(log)
    variants = pm4py.get_variants(log)
    sum_prob_log_variants = 0.0
    for var in variants:
        sum_prob_log_variants += dfg_playout.get_trace_probability(
            variants[var][0], dfg, sa, ea)
    print(
        "percentage of behavior allowed from DFG that is in the log (from 0.0 to 1.0): ",
        sum_prob_log_variants)
Exemplo n.º 2
0
def execute_script():
    log = pm4py.read_xes("../tests/input_data/running-example.xes")
    dfg, sa, ea = pm4py.discover_dfg(log)
    tree = pm4py.discover_process_tree_inductive(log)
    heu_net = pm4py.discover_heuristics_net(log)
    net, im, fm = pm4py.discover_petri_net_alpha(log)
    bpmn = pm4py.convert_to_bpmn(tree)
    ts = ts_discovery.apply(log)
    x_cases, y_cases = case_statistics.get_kde_caseduration(log)

    gviz1 = dfg_visualizer.apply(dfg)
    gviz2 = tree_visualizer.apply(tree)
    gviz3 = hn_visualizer.apply(heu_net)
    gviz4 = pn_visualizer.apply(net, im, fm)
    gviz5 = bpmn_visualizer.apply(bpmn)
    gviz6 = ts_visualizer.apply(ts)
    gviz7 = graphs_visualizer.apply(x_cases, y_cases, variant=graphs_visualizer.Variants.CASES,
                                          parameters={graphs_visualizer.Variants.CASES.value.Parameters.FORMAT: "svg"})

    print("1", len(dfg_visualizer.serialize_dot(gviz1)))
    print("1", len(dfg_visualizer.serialize(gviz1)))
    print("2", len(tree_visualizer.serialize_dot(gviz2)))
    print("2", len(tree_visualizer.serialize(gviz2)))
    print("3", len(hn_visualizer.serialize(gviz3)))
    print("4", len(pn_visualizer.serialize_dot(gviz4)))
    print("4", len(pn_visualizer.serialize(gviz4)))
    print("5", len(bpmn_visualizer.serialize_dot(gviz5)))
    print("5", len(bpmn_visualizer.serialize(gviz5)))
    print("6", len(ts_visualizer.serialize_dot(gviz6)))
    print("6", len(ts_visualizer.serialize(gviz6)))
    print("7", len(graphs_visualizer.serialize(gviz7)))
Exemplo n.º 3
0
 def test_filter_act_percentage(self):
     from pm4py.algo.filtering.dfg import dfg_filtering
     log = pm4py.read_xes("input_data/running-example.xes")
     dfg, sa, ea = pm4py.discover_dfg(log)
     act_count = pm4py.get_attribute_values(log, "concept:name")
     dfg_filtering.filter_dfg_on_activities_percentage(
         dfg, sa, ea, act_count, 0.1)
Exemplo n.º 4
0
 def test_dfg_align(self):
     import pm4py
     from pm4py.objects.dfg.filtering import dfg_filtering
     from pm4py.objects.dfg.utils import dfg_alignment
     log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"))
     dfg, sa, ea = pm4py.discover_dfg(log)
     act_count = pm4py.get_attribute_values(log, "concept:name")
     dfg, sa, ea, act_count = dfg_filtering.filter_dfg_on_activities_percentage(dfg, sa, ea, act_count, 0.5)
     dfg, sa, ea, act_count = dfg_filtering.filter_dfg_on_paths_percentage(dfg, sa, ea, act_count, 0.5)
     aligned_traces = dfg_alignment.apply(log, dfg, sa, ea)
Exemplo n.º 5
0
def execute_script():
    log = pm4py.read_xes(
        os.path.join("..", "tests", "input_data", "receipt.xes"))
    frequency_dfg, sa, ea = pm4py.discover_dfg(log)
    performance_dfg, sa, ea = pm4py.discover_performance_dfg(log)
    simulated_log = dfg_simulator.apply(
        frequency_dfg,
        sa,
        ea,
        variant=dfg_simulator.Variants.PERFORMANCE,
        parameters={"performance_dfg": performance_dfg})
    print(simulated_log)
Exemplo n.º 6
0
def execute_script():
    dataframe1 = pd.read_csv(
        os.path.join("..", "tests", "input_data", "interleavings",
                     "receipt_even.csv"))
    dataframe1 = pm4py.format_dataframe(dataframe1)
    dataframe2 = pd.read_csv(
        os.path.join("..", "tests", "input_data", "interleavings",
                     "receipt_odd.csv"))
    dataframe2 = pm4py.format_dataframe(dataframe2)
    case_relations = pd.read_csv(
        os.path.join("..", "tests", "input_data", "interleavings",
                     "case_relations.csv"))
    merged = case_relations_merging.apply(dataframe1, dataframe2,
                                          case_relations)
    dfg, sa, ea = pm4py.discover_dfg(merged)
    pm4py.view_dfg(dfg, sa, ea, format="svg")
Exemplo n.º 7
0
def execute_script():
    log = pm4py.read_xes(
        os.path.join("..", "tests", "input_data", "receipt.xes"))
    print("number of cases", len(log))
    print("number of events", sum(len(x) for x in log))
    print("number of variants", len(pm4py.get_variants(log)))
    ac = get.get_attribute_values(log, "concept:name")
    dfg, sa, ea = pm4py.discover_dfg(log)
    perc = 0.5
    dfg, sa, ea, ac = dfg_filtering.filter_dfg_on_activities_percentage(
        dfg, sa, ea, ac, perc)
    dfg, sa, ea, ac = dfg_filtering.filter_dfg_on_paths_percentage(
        dfg, sa, ea, ac, perc)
    aa = time.time()
    aligned_traces = dfg_alignment.apply(log, dfg, sa, ea)
    bb = time.time()
    net, im, fm = pm4py.convert_to_petri_net(dfg, sa, ea)
    for trace in aligned_traces:
        if trace["cost"] != trace["internal_cost"]:
            print(trace)
            pass
    print(bb - aa)
    print(sum(x["visited_states"] for x in aligned_traces))
    print(
        sum(x["cost"] // align_utils.STD_MODEL_LOG_MOVE_COST
            for x in aligned_traces))
    gviz = visualizer.apply(dfg,
                            activities_count=ac,
                            parameters={
                                "start_activities": sa,
                                "end_activities": ea,
                                "format": "svg"
                            })
    visualizer.view(gviz)
    cc = time.time()
    aligned_traces2 = petri_alignments.apply(
        log,
        net,
        im,
        fm,
        variant=petri_alignments.Variants.VERSION_DIJKSTRA_LESS_MEMORY)
    dd = time.time()
    print(dd - cc)
    print(sum(x["visited_states"] for x in aligned_traces2))
    print(
        sum(x["cost"] // align_utils.STD_MODEL_LOG_MOVE_COST
            for x in aligned_traces2))
Exemplo n.º 8
0
def get_process_svg():
    parameters = request.args.get("parameters")
    parameters = __process_parameters(parameters)

    log = __prepare_event_log(parameters)
    ext_type = parameters[
        "ext_type"] if "ext_type" in parameters else "document_flow_log"
    log_type = __get_log_type_from_ext_type(ext_type)

    if log_type == 0:
        log.type = "succint"
        from pm4pymdl.algo.mvp.gen_framework import algorithm as discovery
        from pm4pymdl.visualization.mvp.gen_framework import visualizer as vis_factory
        model = discovery.apply(log,
                                model_type_variant="model3",
                                node_freq_variant="type31",
                                edge_freq_variant="type11")
        gviz = vis_factory.apply(model, parameters={"format": "svg"})
    elif log_type == 1 or log_type == 2:
        import pandas as pd
        if type(log) is pd.DataFrame:
            from pm4py.objects.dfg.retrieval.pandas import get_dfg_graph
            dfg = get_dfg_graph(log)
            from pm4py.statistics.start_activities.pandas import get as pd_sa_get
            from pm4py.statistics.end_activities.pandas import get as pd_ea_get
            sa = pd_sa_get.get_start_activities(log)
            ea = pd_ea_get.get_end_activities(log)
        else:
            dfg, sa, ea = pm4py.discover_dfg(log)
        act_count = pm4py.get_attribute_values(log, "concept:name")
        dfg, sa, ea, act_count = dfg_filtering.filter_dfg_on_paths_percentage(
            dfg, sa, ea, act_count, 0.2, keep_all_activities=True)
        gviz = pm4py.visualization.dfg.visualizer.apply(
            dfg,
            activities_count=act_count,
            parameters={
                "format": "svg",
                "start_activities": sa,
                "end_activities": ea
            })

    ser = pm4py.visualization.dfg.visualizer.serialize(gviz).decode("utf-8")

    return ser
Exemplo n.º 9
0
def execute_script():
    log = pm4py.read_xes("../tests/input_data/receipt.xes")
    dfg, sa, ea = pm4py.discover_dfg(log)
    act_count = pm4py.get_attribute_values(log, "concept:name")
    # keep the specified amount of activities
    dfg, sa, ea, act_count = pm4py.objects.dfg.filtering.dfg_filtering.filter_dfg_on_activities_percentage(
        dfg, sa, ea, act_count, 0.3)
    # keep the specified amount of paths
    dfg, sa, ea, act_count = pm4py.objects.dfg.filtering.dfg_filtering.filter_dfg_on_paths_percentage(
        dfg, sa, ea, act_count, 0.3)
    # view the DFG
    gviz = dfg_visualizer.apply(
        dfg,
        activities_count=act_count,
        parameters={
            dfg_visualizer.Variants.FREQUENCY.value.Parameters.START_ACTIVITIES:
            sa,
            dfg_visualizer.Variants.FREQUENCY.value.Parameters.END_ACTIVITIES:
            ea,
            dfg_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "svg"
        })
    dfg_visualizer.view(gviz)
Exemplo n.º 10
0
 def test_dfg(self):
     log = pm4py.read_xes("input_data/running-example.xes")
     dfg, sa, ea = pm4py.discover_dfg(log)
Exemplo n.º 11
0
def execute_script():
    ENABLE_VISUALIZATION = True

    # reads a XES into an event log
    log1 = pm4py.read_xes("../tests/input_data/running-example.xes")

    # reads a CSV into a dataframe
    df = pd.read_csv("../tests/input_data/running-example.csv")
    # formats the dataframe with the mandatory columns for process mining purposes
    df = pm4py.format_dataframe(df,
                                case_id="case:concept:name",
                                activity_key="concept:name",
                                timestamp_key="time:timestamp")
    # converts the dataframe to an event log
    log2 = pm4py.convert_to_event_log(df)

    # converts the log read from XES into a stream and dataframe respectively
    stream1 = pm4py.convert_to_event_stream(log1)
    df2 = pm4py.convert_to_dataframe(log1)

    # writes the log1 to a XES file
    pm4py.write_xes(log1, "ru1.xes")

    dfg, dfg_sa, dfg_ea = pm4py.discover_dfg(log1)
    petri_alpha, im_alpha, fm_alpha = pm4py.discover_petri_net_alpha(log1)
    petri_inductive, im_inductive, fm_inductive = pm4py.discover_petri_net_inductive(
        log1)
    petri_heuristics, im_heuristics, fm_heuristics = pm4py.discover_petri_net_heuristics(
        log1)
    tree_inductive = pm4py.discover_tree_inductive(log1)
    heu_net = pm4py.discover_heuristics_net(log1)

    pm4py.write_dfg(dfg, dfg_sa, dfg_ea, "ru_dfg.dfg")
    pm4py.write_petri_net(petri_alpha, im_alpha, fm_alpha, "ru_alpha.pnml")
    pm4py.write_petri_net(petri_inductive, im_inductive, fm_inductive,
                          "ru_inductive.pnml")
    pm4py.write_petri_net(petri_heuristics, im_heuristics, fm_heuristics,
                          "ru_heuristics.pnml")
    pm4py.write_process_tree(tree_inductive, "ru_inductive.ptml")

    dfg, dfg_sa, dfg_ea = pm4py.read_dfg("ru_dfg.dfg")
    petri_alpha, im_alpha, fm_alpha = pm4py.read_petri_net("ru_alpha.pnml")
    petri_inductive, im_inductive, fm_inductive = pm4py.read_petri_net(
        "ru_inductive.pnml")
    petri_heuristics, im_heuristics, fm_heuristics = pm4py.read_petri_net(
        "ru_heuristics.pnml")
    tree_inductive = pm4py.read_process_tree("ru_inductive.ptml")

    pm4py.save_vis_petri_net(petri_alpha, im_alpha, fm_alpha, "ru_alpha.png")
    pm4py.save_vis_petri_net(petri_inductive, im_inductive, fm_inductive,
                             "ru_inductive.png")
    pm4py.save_vis_petri_net(petri_heuristics, im_heuristics, fm_heuristics,
                             "ru_heuristics.png")
    pm4py.save_vis_process_tree(tree_inductive, "ru_inductive_tree.png")
    pm4py.save_vis_heuristics_net(heu_net, "ru_heunet.png")
    pm4py.save_vis_dfg(dfg, dfg_sa, dfg_ea, "ru_dfg.png")

    if ENABLE_VISUALIZATION:
        pm4py.view_petri_net(petri_alpha, im_alpha, fm_alpha, format="svg")
        pm4py.view_petri_net(petri_inductive,
                             im_inductive,
                             fm_inductive,
                             format="svg")
        pm4py.view_petri_net(petri_heuristics,
                             im_heuristics,
                             fm_heuristics,
                             format="svg")
        pm4py.view_process_tree(tree_inductive, format="svg")
        pm4py.view_heuristics_net(heu_net, format="svg")
        pm4py.view_dfg(dfg, dfg_sa, dfg_ea, format="svg")

    aligned_traces = pm4py.conformance_alignments(log1, petri_inductive,
                                                  im_inductive, fm_inductive)
    replayed_traces = pm4py.conformance_tbr(log1, petri_inductive,
                                            im_inductive, fm_inductive)

    fitness_tbr = pm4py.evaluate_fitness_tbr(log1, petri_inductive,
                                             im_inductive, fm_inductive)
    print("fitness_tbr", fitness_tbr)
    fitness_align = pm4py.evaluate_fitness_alignments(log1, petri_inductive,
                                                      im_inductive,
                                                      fm_inductive)
    print("fitness_align", fitness_align)
    precision_tbr = pm4py.evaluate_precision_tbr(log1, petri_inductive,
                                                 im_inductive, fm_inductive)
    print("precision_tbr", precision_tbr)
    precision_align = pm4py.evaluate_precision_alignments(
        log1, petri_inductive, im_inductive, fm_inductive)
    print("precision_align", precision_align)

    print("log start activities = ", pm4py.get_start_activities(log2))
    print("df start activities = ", pm4py.get_start_activities(df2))
    print("log end activities = ", pm4py.get_end_activities(log2))
    print("df end activities = ", pm4py.get_end_activities(df2))
    print("log attributes = ", pm4py.get_attributes(log2))
    print("df attributes = ", pm4py.get_attributes(df2))
    print("log org:resource values = ",
          pm4py.get_attribute_values(log2, "org:resource"))
    print("df org:resource values = ",
          pm4py.get_attribute_values(df2, "org:resource"))

    print("start_activities len(filt_log) = ",
          len(pm4py.filter_start_activities(log2, ["register request"])))
    print("start_activities len(filt_df) = ",
          len(pm4py.filter_start_activities(df2, ["register request"])))
    print("end_activities len(filt_log) = ",
          len(pm4py.filter_end_activities(log2, ["pay compensation"])))
    print("end_activities len(filt_df) = ",
          len(pm4py.filter_end_activities(df2, ["pay compensation"])))
    print(
        "attributes org:resource len(filt_log) (cases) cases = ",
        len(
            pm4py.filter_attribute_values(log2,
                                          "org:resource", ["Ellen"],
                                          level="case")))
    print(
        "attributes org:resource len(filt_log) (cases)  events = ",
        len(
            pm4py.filter_attribute_values(log2,
                                          "org:resource", ["Ellen"],
                                          level="event")))
    print(
        "attributes org:resource len(filt_df) (events) cases = ",
        len(
            pm4py.filter_attribute_values(df2,
                                          "org:resource", ["Ellen"],
                                          level="case")))
    print(
        "attributes org:resource len(filt_df) (events) events = ",
        len(
            pm4py.filter_attribute_values(df2,
                                          "org:resource", ["Ellen"],
                                          level="event")))
    print(
        "attributes org:resource len(filt_df) (events) events notpositive = ",
        len(
            pm4py.filter_attribute_values(df2,
                                          "org:resource", ["Ellen"],
                                          level="event",
                                          retain=False)))

    print("variants log = ", pm4py.get_variants(log2))
    print("variants df = ", pm4py.get_variants(df2))
    print(
        "variants filter log = ",
        len(
            pm4py.filter_variants(log2, [[
                "register request", "examine thoroughly", "check ticket",
                "decide", "reject request"
            ]])))
    print(
        "variants filter df = ",
        len(
            pm4py.filter_variants(df2, [[
                "register request", "examine thoroughly", "check ticket",
                "decide", "reject request"
            ]])))
    print("variants filter percentage = ",
          len(pm4py.filter_variants_percentage(log2, threshold=0.8)))

    print(
        "paths filter log len = ",
        len(
            pm4py.filter_directly_follows_relation(
                log2, [("register request", "examine casually")])))
    print(
        "paths filter dataframe len = ",
        len(
            pm4py.filter_directly_follows_relation(
                df2, [("register request", "examine casually")])))

    print(
        "timeframe filter log events len = ",
        len(
            pm4py.filter_time_range(log2,
                                    "2011-01-01 00:00:00",
                                    "2011-02-01 00:00:00",
                                    mode="events")))
    print(
        "timeframe filter log traces_contained len = ",
        len(
            pm4py.filter_time_range(log2,
                                    "2011-01-01 00:00:00",
                                    "2011-02-01 00:00:00",
                                    mode="traces_contained")))
    print(
        "timeframe filter log traces_intersecting len = ",
        len(
            pm4py.filter_time_range(log2,
                                    "2011-01-01 00:00:00",
                                    "2011-02-01 00:00:00",
                                    mode="traces_intersecting")))
    print(
        "timeframe filter df events len = ",
        len(
            pm4py.filter_time_range(df2,
                                    "2011-01-01 00:00:00",
                                    "2011-02-01 00:00:00",
                                    mode="events")))
    print(
        "timeframe filter df traces_contained len = ",
        len(
            pm4py.filter_time_range(df2,
                                    "2011-01-01 00:00:00",
                                    "2011-02-01 00:00:00",
                                    mode="traces_contained")))
    print(
        "timeframe filter df traces_intersecting len = ",
        len(
            pm4py.filter_time_range(df2,
                                    "2011-01-01 00:00:00",
                                    "2011-02-01 00:00:00",
                                    mode="traces_intersecting")))

    # remove the temporary files
    os.remove("ru1.xes")
    os.remove("ru_dfg.dfg")
    os.remove("ru_alpha.pnml")
    os.remove("ru_inductive.pnml")
    os.remove("ru_heuristics.pnml")
    os.remove("ru_inductive.ptml")
    os.remove("ru_alpha.png")
    os.remove("ru_inductive.png")
    os.remove("ru_heuristics.png")
    os.remove("ru_inductive_tree.png")
    os.remove("ru_heunet.png")
    os.remove("ru_dfg.png")
Exemplo n.º 12
0
 def test_dfg_playout(self):
     import pm4py
     from pm4py.algo.simulation.playout.dfg import algorithm as dfg_playout
     log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"))
     dfg, sa, ea = pm4py.discover_dfg(log)
     dfg_playout.apply(dfg, sa, ea)
Exemplo n.º 13
0
 def test_filter_paths_percentage(self):
     log = pm4py.read_xes("input_data/running-example.xes")
     dfg, sa, ea = pm4py.discover_dfg(log)
     act_count = pm4py.get_attribute_values(log, "concept:name")
     pm4py.objects.dfg.filtering.dfg_filtering.filter_dfg_on_paths_percentage(
         dfg, sa, ea, act_count, 0.3)
Exemplo n.º 14
0
 def test_dfg_playout(self):
     import pm4py
     from pm4py.objects.dfg.utils import dfg_playout
     log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"))
     dfg, sa, ea = pm4py.discover_dfg(log)
     dfg_playout.apply(dfg, sa, ea)