def __init__(self, start_activities, end_activities, dfg, activity_key="concept:name"): self.__activity_key = activity_key self.__start_activities = start_activities self.__end_activities = end_activities self.__dfg = dfg self.__causal_relations = {k: v for k, v in causal_algorithm.apply(self.dfg, variant=CAUSAL_ALPHA).items() if v > 0}.keys() self.__parallel = {(f, t) for (f, t) in self.dfg if (t, f) in self.dfg}
def apply(df, parameters=None): """ Discovers a footprint object from a dataframe (the footprints of the dataframe are returned) Parameters -------------- df Dataframe parameters Parameters of the algorithm Returns -------------- footprints_obj Footprints object """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) caseid_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME) start_timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, None) timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) sort_required = exec_utils.get_param_value(Parameters.SORT_REQUIRED, parameters, DEFAULT_SORT_REQUIRED) index_key = exec_utils.get_param_value(Parameters.INDEX_KEY, parameters, DEFAULT_INDEX_KEY) df = df[[caseid_key, activity_key, timestamp_key]] if sort_required: df = pandas_utils.insert_index(df, index_key) if start_timestamp_key is not None: df = df.sort_values([caseid_key, start_timestamp_key, timestamp_key, index_key]) else: df = df.sort_values([caseid_key, timestamp_key, index_key]) grouped_df = df.groupby(caseid_key) dfg = df_statistics.get_dfg_graph(df, measure="frequency", activity_key=activity_key, case_id_glue=caseid_key, timestamp_key=timestamp_key, sort_caseid_required=False, sort_timestamp_along_case_id=False, start_timestamp_key=start_timestamp_key) activities = set(df[activity_key].unique()) start_activities = set(grouped_df.first()[activity_key].unique()) end_activities = set(grouped_df.last()[activity_key].unique()) parallel = {(x, y) for (x, y) in dfg if (y, x) in dfg} sequence = set(causal_discovery.apply(dfg, causal_discovery.Variants.CAUSAL_ALPHA)) ret = {} ret[Outputs.DFG.value] = dfg ret[Outputs.SEQUENCE.value] = sequence ret[Outputs.PARALLEL.value] = parallel ret[Outputs.ACTIVITIES.value] = activities ret[Outputs.START_ACTIVITIES.value] = start_activities ret[Outputs.END_ACTIVITIES.value] = end_activities ret[Outputs.MIN_TRACE_LENGTH.value] = int(grouped_df.size().min()) return ret
def apply(log, parameters=None): """ Discovers a footprint object from an event log (the footprints of the event log are returned) Parameters -------------- log Log parameters Parameters of the algorithm: - Parameters.ACTIVITY_KEY Returns -------------- footprints_obj Footprints object """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) log = converter.apply(log, variant=converter.TO_EVENT_LOG, parameters=parameters) dfg = dfg_discovery.apply(log, parameters=parameters) parallel = {(x, y) for (x, y) in dfg if (y, x) in dfg} sequence = set( causal_discovery.apply(dfg, causal_discovery.Variants.CAUSAL_ALPHA)) start_activities = set( get_start_activities.get_start_activities(log, parameters=parameters)) end_activities = set( get_end_activities.get_end_activities(log, parameters=parameters)) activities = set(y[activity_key] for x in log for y in x) return { Outputs.DFG.value: dfg, Outputs.SEQUENCE.value: sequence, Outputs.PARALLEL.value: parallel, Outputs.START_ACTIVITIES.value: start_activities, Outputs.END_ACTIVITIES.value: end_activities, Outputs.ACTIVITIES.value: activities, Outputs.MIN_TRACE_LENGTH.value: min(len(x) for x in log) if len(log) > 0 else 0 }
def get_distr_log_footprints(self, parameters=None): comp_obj = self.calculate_composite_object(parameters=parameters) parallel = {(x, y) for (x, y) in comp_obj["frequency_dfg"] if (y, x) in comp_obj["frequency_dfg"]} sequence = set(causal_discovery.apply(comp_obj["frequency_dfg"], causal_discovery.Variants.CAUSAL_ALPHA)) ret = {} ret["dfg"] = comp_obj["frequency_dfg"] ret["sequence"] = sequence ret["parallel"] = parallel ret["start_activities"] = set(comp_obj["start_activities"]) ret["end_activities"] = set(comp_obj["end_activities"]) return ret