def __init__(self, start_activities, end_activities, dfg, activity_key="concept:name"):
     self.__activity_key = activity_key
     self.__start_activities = start_activities
     self.__end_activities = end_activities
     self.__dfg = dfg
     self.__causal_relations = {k: v for k, v in causal_algorithm.apply(self.dfg, variant=CAUSAL_ALPHA).items() if
                                v > 0}.keys()
     self.__parallel = {(f, t) for (f, t) in self.dfg if (t, f) in self.dfg}
Exemplo n.º 2
0
def apply(df, parameters=None):
    """
    Discovers a footprint object from a dataframe
    (the footprints of the dataframe are returned)

    Parameters
    --------------
    df
        Dataframe
    parameters
        Parameters of the algorithm

    Returns
    --------------
    footprints_obj
        Footprints object
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
    caseid_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)
    start_timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters,
                                               None)
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters,
                                               xes_constants.DEFAULT_TIMESTAMP_KEY)
    sort_required = exec_utils.get_param_value(Parameters.SORT_REQUIRED, parameters, DEFAULT_SORT_REQUIRED)
    index_key = exec_utils.get_param_value(Parameters.INDEX_KEY, parameters, DEFAULT_INDEX_KEY)

    df = df[[caseid_key, activity_key, timestamp_key]]
    if sort_required:
        df = pandas_utils.insert_index(df, index_key)
        if start_timestamp_key is not None:
            df = df.sort_values([caseid_key, start_timestamp_key, timestamp_key, index_key])
        else:
            df = df.sort_values([caseid_key, timestamp_key, index_key])

    grouped_df = df.groupby(caseid_key)
    dfg = df_statistics.get_dfg_graph(df, measure="frequency", activity_key=activity_key, case_id_glue=caseid_key,
                                      timestamp_key=timestamp_key, sort_caseid_required=False,
                                      sort_timestamp_along_case_id=False, start_timestamp_key=start_timestamp_key)
    activities = set(df[activity_key].unique())
    start_activities = set(grouped_df.first()[activity_key].unique())
    end_activities = set(grouped_df.last()[activity_key].unique())

    parallel = {(x, y) for (x, y) in dfg if (y, x) in dfg}
    sequence = set(causal_discovery.apply(dfg, causal_discovery.Variants.CAUSAL_ALPHA))

    ret = {}
    ret[Outputs.DFG.value] = dfg
    ret[Outputs.SEQUENCE.value] = sequence
    ret[Outputs.PARALLEL.value] = parallel
    ret[Outputs.ACTIVITIES.value] = activities
    ret[Outputs.START_ACTIVITIES.value] = start_activities
    ret[Outputs.END_ACTIVITIES.value] = end_activities
    ret[Outputs.MIN_TRACE_LENGTH.value] = int(grouped_df.size().min())

    return ret
Exemplo n.º 3
0
def apply(log, parameters=None):
    """
    Discovers a footprint object from an event log
    (the footprints of the event log are returned)

    Parameters
    --------------
    log
        Log
    parameters
        Parameters of the algorithm:
            - Parameters.ACTIVITY_KEY

    Returns
    --------------
    footprints_obj
        Footprints object
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)

    log = converter.apply(log,
                          variant=converter.TO_EVENT_LOG,
                          parameters=parameters)

    dfg = dfg_discovery.apply(log, parameters=parameters)
    parallel = {(x, y) for (x, y) in dfg if (y, x) in dfg}
    sequence = set(
        causal_discovery.apply(dfg, causal_discovery.Variants.CAUSAL_ALPHA))

    start_activities = set(
        get_start_activities.get_start_activities(log, parameters=parameters))
    end_activities = set(
        get_end_activities.get_end_activities(log, parameters=parameters))
    activities = set(y[activity_key] for x in log for y in x)

    return {
        Outputs.DFG.value:
        dfg,
        Outputs.SEQUENCE.value:
        sequence,
        Outputs.PARALLEL.value:
        parallel,
        Outputs.START_ACTIVITIES.value:
        start_activities,
        Outputs.END_ACTIVITIES.value:
        end_activities,
        Outputs.ACTIVITIES.value:
        activities,
        Outputs.MIN_TRACE_LENGTH.value:
        min(len(x) for x in log) if len(log) > 0 else 0
    }
Exemplo n.º 4
0
    def get_distr_log_footprints(self, parameters=None):
        comp_obj = self.calculate_composite_object(parameters=parameters)

        parallel = {(x, y) for (x, y) in comp_obj["frequency_dfg"] if (y, x) in comp_obj["frequency_dfg"]}
        sequence = set(causal_discovery.apply(comp_obj["frequency_dfg"], causal_discovery.Variants.CAUSAL_ALPHA))

        ret = {}
        ret["dfg"] = comp_obj["frequency_dfg"]
        ret["sequence"] = sequence
        ret["parallel"] = parallel
        ret["start_activities"] = set(comp_obj["start_activities"])
        ret["end_activities"] = set(comp_obj["end_activities"])

        return ret