Пример #1
0
 def test_concurrent_activities_xes(self):
     log = xes_importer.apply(
         os.path.join("input_data", "interval_event_log.xes"))
     from pm4py.statistics.concurrent_activities.log import get
     conc_act = get.apply(
         log,
         parameters={get.Parameters.START_TIMESTAMP_KEY: "start_timestamp"})
Пример #2
0
 def test_efg_xes(self):
     log = xes_importer.apply(
         os.path.join("input_data", "interval_event_log.xes"))
     from pm4py.statistics.eventually_follows.log import get
     efg = get.apply(
         log,
         parameters={get.Parameters.START_TIMESTAMP_KEY: "start_timestamp"})
Пример #3
0
 def test_sojourn_time_xes(self):
     log = xes_importer.apply(
         os.path.join("input_data", "interval_event_log.xes"))
     from pm4py.statistics.sojourn_time.log import get
     soj_time = get.apply(
         log,
         parameters={get.Parameters.START_TIMESTAMP_KEY: "start_timestamp"})
Пример #4
0
def apply(dfg, log=None, parameters=None, activities_count=None, soj_time=None):
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY)
    image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters, "png")
    max_no_of_edges_in_diagram = exec_utils.get_param_value(Parameters.MAX_NO_EDGES_IN_DIAGRAM, parameters, 75)
    start_activities = exec_utils.get_param_value(Parameters.START_ACTIVITIES, parameters, [])
    end_activities = exec_utils.get_param_value(Parameters.END_ACTIVITIES, parameters, [])
    activities = dfg_utils.get_activities_from_dfg(dfg)

    if activities_count is None:
        if log is not None:
            activities_count = attr_get.get_attribute_values(log, activity_key, parameters=parameters)
        else:
            activities_count = {key: 1 for key in activities}

    if soj_time is None:
        if log is not None:
            soj_time = soj_time_get.apply(log, parameters=parameters)
        else:
            soj_time = {key: 0 for key in activities}

    return graphviz_visualization(activities_count, dfg, image_format=image_format, measure="frequency",
                                  max_no_of_edges_in_diagram=max_no_of_edges_in_diagram,
                                  start_activities=start_activities, end_activities=end_activities, soj_time=soj_time)
Пример #5
0
def execute_script():
    log_path = os.path.join("..", "tests", "input_data", "interval_event_log.xes")
    #log_path = os.path.join("..", "tests", "input_data", "reviewing.xes")
    log = xes_importer.apply(log_path)
    parameters = {}
    parameters[constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = "start_timestamp"
    parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = "time:timestamp"
    parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = "concept:name"
    parameters["strict"] = False
    parameters["format"] = "svg"
    start_activities = sa_get.get_start_activities(log, parameters=parameters)
    end_activities = ea_get.get_end_activities(log, parameters=parameters)
    parameters["start_activities"] = start_activities
    parameters["end_activities"] = end_activities
    soj_time = soj_time_get.apply(log, parameters=parameters)
    print("soj_time")
    print(soj_time)
    conc_act = conc_act_get.apply(log, parameters=parameters)
    print("conc_act")
    print(conc_act)
    efg = efg_get.apply(log, parameters=parameters)
    print("efg")
    print(efg)
    dfg_freq = dfg_algorithm.apply(log, parameters=parameters, variant=dfg_algorithm.Variants.FREQUENCY)
    dfg_perf = dfg_algorithm.apply(log, parameters=parameters, variant=dfg_algorithm.Variants.PERFORMANCE)
    dfg_gv_freq = dfg_vis_fact.apply(dfg_freq, log=log, variant=dfg_vis_fact.Variants.FREQUENCY,
                                     parameters=parameters)
    dfg_vis_fact.view(dfg_gv_freq)
    dfg_gv_perf = dfg_vis_fact.apply(dfg_perf, log=log, variant=dfg_vis_fact.Variants.PERFORMANCE,
                                     parameters=parameters)
    dfg_vis_fact.view(dfg_gv_perf)
    net, im, fm = dfg_conv.apply(dfg_freq)
    gviz = pn_vis.apply(net, im, fm, parameters=parameters)
    pn_vis.view(gviz)
Пример #6
0
 def test_efg_pandas(self):
     import pm4py
     import pandas as pd
     dataframe = pd.read_csv(os.path.join("input_data", "interval_event_log.csv"))
     from pm4py.objects.log.util import dataframe_utils
     dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe)
     from pm4py.statistics.eventually_follows.pandas import get
     efg = get.apply(dataframe, parameters={get.Parameters.START_TIMESTAMP_KEY: "start_timestamp"})
Пример #7
0
def discover_abstraction_log(
    log: EventLog,
    parameters: Optional[Dict[Any, Any]] = None
) -> Tuple[Any, Any, Any, Any, Any, Any, Any]:
    """
    Discovers an abstraction from a log that is useful for the Heuristics Miner ++ algorithm

    Parameters
    --------------
    log
        Event log
    parameters
        Parameters of the algorithm, including:
        - Parameters.ACTIVITY_KEY
        - Parameters.START_TIMESTAMP_KEY
        - Parameters.TIMESTAMP_KEY
        - Parameters.CASE_ID_KEY

    Returns
    --------------
    start_activities
        Start activities
    end_activities
        End activities
    activities_occurrences
        Activities along with their number of occurrences
    dfg
        Directly-follows graph
    performance_dfg
        (Performance) Directly-follows graph
    sojourn_time
        Sojourn time for each activity
    concurrent_activities
        Concurrent activities
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)
    start_activities = log_sa.get_start_activities(log, parameters=parameters)
    end_activities = log_ea.get_end_activities(log, parameters=parameters)
    activities_occurrences = log_attributes.get_attribute_values(
        log, activity_key, parameters=parameters)
    efg_parameters = copy(parameters)
    efg_parameters[efg_get.Parameters.KEEP_FIRST_FOLLOWING] = True
    dfg = efg_get.apply(log, parameters=efg_parameters)
    performance_dfg = dfg_alg.apply(log,
                                    variant=dfg_alg.Variants.PERFORMANCE,
                                    parameters=parameters)
    sojourn_time = soj_get.apply(log, parameters=parameters)
    concurrent_activities = conc_act_get.apply(log, parameters=parameters)
    return (start_activities, end_activities, activities_occurrences, dfg,
            performance_dfg, sojourn_time, concurrent_activities)
Пример #8
0
def apply(dfg: Dict[Tuple[str, str], int],
          log: EventLog = None,
          parameters: Optional[Dict[Any, Any]] = None,
          activities_count: Dict[str, int] = None,
          soj_time: Dict[str, float] = None) -> Digraph:
    """
    Visualize a frequency directly-follows graph

    Parameters
    -----------------
    dfg
        Frequency Directly-follows graph
    log
        (if provided) Event log for the calculation of statistics
    activities_count
        (if provided) Dictionary associating to each activity the number of occurrences in the log.
    soj_time
        (if provided) Dictionary associating to each activity the average sojourn time
    parameters
        Variant-specific parameters

    Returns
    -----------------
    gviz
        Graphviz digraph
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters, xes.DEFAULT_NAME_KEY)
    image_format = exec_utils.get_param_value(Parameters.FORMAT, parameters,
                                              "png")
    max_no_of_edges_in_diagram = exec_utils.get_param_value(
        Parameters.MAX_NO_EDGES_IN_DIAGRAM, parameters, 100000)
    start_activities = exec_utils.get_param_value(Parameters.START_ACTIVITIES,
                                                  parameters, {})
    end_activities = exec_utils.get_param_value(Parameters.END_ACTIVITIES,
                                                parameters, {})
    font_size = exec_utils.get_param_value(Parameters.FONT_SIZE, parameters,
                                           12)
    font_size = str(font_size)
    activities = dfg_utils.get_activities_from_dfg(dfg)
    bgcolor = exec_utils.get_param_value(Parameters.BGCOLOR, parameters,
                                         "transparent")
    stat_locale = exec_utils.get_param_value(Parameters.STAT_LOCALE,
                                             parameters, None)
    if stat_locale is None:
        stat_locale = {}

    if activities_count is None:
        if log is not None:
            activities_count = attr_get.get_attribute_values(
                log, activity_key, parameters=parameters)
        else:
            # the frequency of an activity in the log is at least the number of occurrences of
            # incoming arcs in the DFG.
            # if the frequency of the start activities nodes is also provided, use also that.
            activities_count = Counter({key: 0 for key in activities})
            for el in dfg:
                activities_count[el[1]] += dfg[el]
            if isinstance(start_activities, dict):
                for act in start_activities:
                    activities_count[act] += start_activities[act]

    if soj_time is None:
        if log is not None:
            soj_time = soj_time_get.apply(log, parameters=parameters)
        else:
            soj_time = {key: 0 for key in activities}

    return graphviz_visualization(
        activities_count,
        dfg,
        image_format=image_format,
        measure="frequency",
        max_no_of_edges_in_diagram=max_no_of_edges_in_diagram,
        start_activities=start_activities,
        end_activities=end_activities,
        soj_time=soj_time,
        font_size=font_size,
        bgcolor=bgcolor,
        stat_locale=stat_locale)