Пример #1
0
def discover_abstraction_dataframe(df: pd.DataFrame, parameters: Optional[Dict[Any, Any]] = None) -> Tuple[
    Any, Any, Any, Any, Any, Any, Any]:
    """
    Discovers an abstraction from a dataframe that is useful for the Heuristics Miner ++ algorithm

    Parameters
    --------------
    df
        Dataframe
    parameters
        Parameters of the algorithm, including:
        - Parameters.ACTIVITY_KEY
        - Parameters.START_TIMESTAMP_KEY
        - Parameters.TIMESTAMP_KEY
        - Parameters.CASE_ID_KEY

    Returns
    --------------
    start_activities
        Start activities
    end_activities
        End activities
    activities_occurrences
        Activities along with their number of occurrences
    dfg
        Directly-follows graph
    performance_dfg
        (Performance) Directly-follows graph
    sojourn_time
        Sojourn time for each activity
    concurrent_activities
        Concurrent activities
    """
    if parameters is None:
        parameters = {}
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY)
    start_timestamp_key = exec_utils.get_param_value(Parameters.START_TIMESTAMP_KEY, parameters,
                                                     None)
    if start_timestamp_key is None:
        start_timestamp_key = xes.DEFAULT_START_TIMESTAMP_KEY
        parameters = copy(parameters)
        parameters[Parameters.START_TIMESTAMP_KEY] = start_timestamp_key
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes.DEFAULT_TIMESTAMP_KEY)
    case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)
    start_activities = pd_sa.get_start_activities(df, parameters=parameters)
    end_activities = pd_ea.get_end_activities(df, parameters=parameters)
    activities_occurrences = pd_attributes.get_attribute_values(df, activity_key, parameters=parameters)
    efg_parameters = copy(parameters)
    efg_parameters[pd_efg.Parameters.KEEP_FIRST_FOLLOWING] = True
    dfg = pd_efg.apply(df, parameters=efg_parameters)
    performance_dfg = df_statistics.get_dfg_graph(df, case_id_glue=case_id_glue,
                                                  activity_key=activity_key, timestamp_key=timestamp_key,
                                                  start_timestamp_key=start_timestamp_key, measure="performance")
    sojourn_time = pd_soj_time.apply(df, parameters=parameters)
    concurrent_activities = pd_conc_act.apply(df, parameters=parameters)
    return (
        start_activities, end_activities, activities_occurrences, dfg, performance_dfg, sojourn_time,
        concurrent_activities)
Пример #2
0
def execute_script():
    log_path = os.path.join("..", "tests", "input_data",
                            "interval_event_log.csv")
    dataframe = pm4py.read_csv(log_path)
    log_path = os.path.join("..", "tests", "input_data", "reviewing.xes")
    log = pm4py.read_xes(log_path)
    dataframe = pm4py.convert_to_dataframe(log)
    parameters = {}
    #parameters[constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = "start_timestamp"
    parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = "time:timestamp"
    parameters[constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = "concept:name"
    parameters[constants.PARAMETER_CONSTANT_CASEID_KEY] = "case:concept:name"
    parameters["strict"] = True
    parameters["format"] = "svg"
    start_activities = sa_get.get_start_activities(dataframe,
                                                   parameters=parameters)
    end_activities = ea_get.get_end_activities(dataframe,
                                               parameters=parameters)
    att_count = att_get.get_attribute_values(dataframe,
                                             "concept:name",
                                             parameters=parameters)
    parameters["start_activities"] = start_activities
    parameters["end_activities"] = end_activities
    soj_time = soj_time_get.apply(dataframe, parameters=parameters)
    print("soj_time")
    print(soj_time)
    conc_act = conc_act_get.apply(dataframe, parameters=parameters)
    print("conc_act")
    print(conc_act)
    efg = efg_get.apply(dataframe, parameters=parameters)
    print("efg")
    print(efg)
    dfg_freq, dfg_perf = df_statistics.get_dfg_graph(
        dataframe, measure="both", start_timestamp_key="start_timestamp")
    dfg_gv_freq = dfg_vis_fact.apply(dfg_freq,
                                     activities_count=att_count,
                                     variant=dfg_vis_fact.Variants.FREQUENCY,
                                     soj_time=soj_time,
                                     parameters=parameters)
    dfg_vis_fact.view(dfg_gv_freq)
    dfg_gv_perf = dfg_vis_fact.apply(dfg_perf,
                                     activities_count=att_count,
                                     variant=dfg_vis_fact.Variants.PERFORMANCE,
                                     soj_time=soj_time,
                                     parameters=parameters)
    dfg_vis_fact.view(dfg_gv_perf)
    net, im, fm = dfg_conv.apply(dfg_freq)
    gviz = pn_vis.apply(net, im, fm, parameters=parameters)
    pn_vis.view(gviz)