def test_filtering_attributes_traces(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" input_log = os.path.join(INPUT_DATA_DIR, "running-example.xes") log = xes_importer.import_log(input_log) log1 = attributes_filter.apply(log, ["reject request"], parameters={"positive": True}) log2 = attributes_filter.apply(log, ["reject request"], parameters={"positive": True}) del log1 del log2
def get_log_traces_to_activities(log, activities, parameters=None): """ Get sublogs taking to each one of the specified activities Parameters ------------- log Trace log object activities List of activities in the log parameters Possible parameters of the algorithm, including: PARAMETER_CONSTANT_ACTIVITY_KEY -> activity PARAMETER_CONSTANT_TIMESTAMP_KEY -> timestamp Returns ------------- list_logs List of event logs taking to the first occurrence of each activity considered_activities All activities that are effectively have been inserted in the list of logs (in some of them, the resulting log may be empty) """ if parameters is None: parameters = {} activity_key = parameters[ constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY parameters[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key list_logs = [] considered_activities = [] for act in activities: other_acts = [ac for ac in activities if not ac == act] parameters_filt1 = deepcopy(parameters) parameters_filt2 = deepcopy(parameters) parameters_filt1["positive"] = True parameters_filt2["positive"] = False filtered_log = attributes_filter.apply(log, [act], parameters=parameters_filt1) logging.info("get_log_traces_to_activities activities=" + str(activities) + " act=" + str( act) + " 0 len(filtered_log)=" + str(len(filtered_log))) filtered_log = attributes_filter.apply(filtered_log, other_acts, parameters=parameters_filt2) logging.info("get_log_traces_to_activities activities=" + str(activities) + " act=" + str( act) + " 1 len(filtered_log)=" + str(len(filtered_log))) filtered_log, act_durations = get_log_traces_until_activity(filtered_log, act, parameters=parameters) logging.info("get_log_traces_to_activities activities=" + str(activities) + " act=" + str( act) + " 2 len(filtered_log)=" + str(len(filtered_log))) if filtered_log: list_logs.append(filtered_log) considered_activities.append(act) return list_logs, considered_activities
def test_35(self): from pm4py.util import constants from pm4py.algo.filtering.pandas.attributes import attributes_filter df = self.load_receipt_df() df_traces_pos = attributes_filter.apply(df, ["Resource10"], parameters={ attributes_filter.Parameters.CASE_ID_KEY: "case:concept:name", attributes_filter.Parameters.ATTRIBUTE_KEY: "org:resource", attributes_filter.Parameters.POSITIVE: True}) df_traces_neg = attributes_filter.apply(df, ["Resource10"], parameters={ attributes_filter.Parameters.CASE_ID_KEY: "case:concept:name", attributes_filter.Parameters.ATTRIBUTE_KEY: "org:resource", attributes_filter.Parameters.POSITIVE: False})
def apply(dataframe, filter, parameters=None): """ Apply a filter to the current log (attributes filter) Parameters ------------ log Event log filter Filter to apply parameters Parameters of the algorithm Returns ------------ log Event log """ if parameters is None: parameters = {} parameters[constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = filter[1][0] parameters["positive"] = False return attributes_filter.apply(dataframe, filter[1][1], parameters=parameters)
def diagnose_from_trans_fitness(log, trans_fitness, parameters=None): """ Provide some conformance diagnostics related to transitions that are executed in a unfit manner Parameters ------------- log Trace log trans_fitness For each transition, keeps track of unfit executions parameters Possible parameters of the algorithm, including: PARAMETER_CONSTANT_TIMESTAMP_KEY -> attribute of the event containing the timestamp Returns ------------- diagnostics For each problematic transition, diagnostics about case duration """ if parameters is None: parameters = {} timestamp_key = parameters[ constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY diagnostics = {} parameters_filtering = deepcopy(parameters) parameters_filtering["positive"] = True for trans in trans_fitness: if len(trans_fitness[trans]["underfed_traces"]) > 0: filtered_log_act = attributes_filter.apply( log, [trans.label], parameters=parameters_filtering) fit_cases = [] underfed_cases = [] for trace in log: if trace in trans_fitness[trans]["underfed_traces"]: underfed_cases.append(trace) elif trace in filtered_log_act: fit_cases.append(trace) if fit_cases and underfed_cases: n_fit = len(fit_cases) n_underfed = len(underfed_cases) fit_median_time = get_median_case_duration( fit_cases, timestamp_key=timestamp_key) underfed_median_time = get_median_case_duration( underfed_cases, timestamp_key=timestamp_key) relative_throughput = underfed_median_time / fit_median_time if fit_median_time > 0 else 0 diagn_dict = { "n_fit": n_fit, "n_underfed": n_underfed, "fit_median_time": fit_median_time, "underfed_median_time": underfed_median_time, "relative_throughput": relative_throughput } diagnostics[trans] = diagn_dict return diagnostics
def filter_log_by_caseid(log, values): """ Filters log by case ID. :param log: log to be filtered :param values: value that should be filtered :return: filtered log """ parameters = {constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: "caseid"} return attributes_filter.apply(log, values, parameters=parameters)
def filterfile(sourceFile, outputFile, patternText, inclusive): log = importer.apply(sourceFile) activities = attributes_filter.get_attribute_values(log, CONCEPT_NAME) filteredLog = attributes_filter.apply( log, [patternText], parameters={ attributes_filter.Parameters.ATTRIBUTE_KEY: CONCEPT_NAME, attributes_filter.Parameters.POSITIVE: inclusive }) xes_exporter.apply(log, outputFile)
def filter_open_cases(log): log_selected = attributes_filter.apply( log, ["Payment Handled"], parameters={ xes_constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: constants.concept_key, "positive": True }) util.print_filtered_cases_count(len(log), len(log_selected)) return log_selected
def filter_cases(log, starts, ends, parameters): for classifier_attributes in [starts, ends]: log = attributes_filter.apply( log, classifier_attributes, parameters={ constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: parameters[performance_constants.EVENT_CLASSIFIER], 'positive': True }) return log
def average_case_duration( log: EventLog, t1: Union[datetime, str], t2: Union[datetime, str], r: str, parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> float: """ The average duration of cases completed during a given time slot in which a given resource was involved. Metric RBI 4.4 in Pika, Anastasiia, et al. "Mining resource profiles from event logs." ACM Transactions on Management Information Systems (TMIS) 8.1 (2017): 1-30. Parameters ----------------- log Event log t1 Left interval t2 Right interval r Resource Returns ---------------- metric Value of the metric """ if parameters is None: parameters = {} resource_key = exec_utils.get_param_value( Parameters.RESOURCE_KEY, parameters, xes_constants.DEFAULT_RESOURCE_KEY) from pm4py.algo.filtering.log.attributes import attributes_filter parameters_filter = { attributes_filter.Parameters.ATTRIBUTE_KEY: resource_key } log = attributes_filter.apply(log, [r], parameters=parameters_filter) from pm4py.algo.filtering.log.timestamp import timestamp_filter log = timestamp_filter.filter_traces_intersecting(log, t1, t2, parameters=parameters) from pm4py.statistics.traces.generic.log import case_statistics cd = case_statistics.get_cases_description(log, parameters=parameters).values() return mean(x["caseDuration"] for x in cd)
def social_position( log: EventLog, t1_0: Union[datetime, str], t2_0: Union[datetime, str], r: str, parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> float: """ The fraction of resources involved in the same cases with a given resource during a given time slot with respect to the total number of resources active during the time slot. Metric RBI 5.2 in Pika, Anastasiia, et al. "Mining resource profiles from event logs." ACM Transactions on Management Information Systems (TMIS) 8.1 (2017): 1-30. Parameters ----------------- df Dataframe t1_0 Left interval t2_0 Right interval r Resource Returns ---------------- metric Value of the metric """ if parameters is None: parameters = {} resource_key = exec_utils.get_param_value( Parameters.RESOURCE_KEY, parameters, xes_constants.DEFAULT_RESOURCE_KEY) from pm4py.algo.filtering.log.timestamp import timestamp_filter log = timestamp_filter.apply_events(log, t1_0, t2_0, parameters=parameters) from pm4py.algo.filtering.log.attributes import attributes_filter parameters_filter = { attributes_filter.Parameters.ATTRIBUTE_KEY: resource_key } filtered_log = attributes_filter.apply(log, [r], parameters=parameters_filter) q1 = float(len(filtered_log)) q2 = float(len(log)) return q1 / q2 if q2 > 0 else 0.0
def filter_abnormal_cases(log, criteria): # Shift+Alt+Insert to disable multi caret tofilter_log = copy.deepcopy(log) for key, values in criteria.items(): for value in values: tofilter_log = attributes_filter.apply( tofilter_log, [value], parameters={ xes_constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: key, "positive": True }) tofilter_cases = [ case.attributes[constants.concept_key] for case in tofilter_log ]
def apply(log, parameters=None, classic_output=False): """ Gets a simple model out of a log Parameters ------------- log Trace log parameters Parameters of the algorithm, including: maximum_number_activities -> Maximum number of activities to keep discovery_algorithm -> Discovery algorithm to use (alpha, inductive) desidered_output -> Desidered output of the algorithm (default: Petri) include_filtered_log -> Include the filtered log in the output include_dfg_frequency -> Include the DFG of frequencies in the output include_dfg_performance -> Include the DFG of performance in the output include_filtered_dfg_frequency -> Include the filtered DFG of frequencies in the output include_filtered_dfg_performance -> Include the filtered DFG of performance in the output classic_output Determine if the output shall contains directly the objects (e.g. net, initial_marking, final_marking) or can return a more detailed dictionary """ if parameters is None: parameters = {} returned_dictionary = {} net = None initial_marking = None final_marking = None bpmn_graph = None dfg_frequency = None dfg_performance = None filtered_dfg_frequency = None filtered_dfg_performance = None maximum_number_activities = parameters[ "maximum_number_activities"] if "maximum_number_activities" in parameters else 20 discovery_algorithm = parameters["discovery_algorithm"] if "discovery_algorithm" in parameters else "alpha" desidered_output = parameters["desidered_output"] if "desidered_output" in parameters else "petri" include_filtered_log = parameters["include_filtered_log"] if "include_filtered_log" in parameters else True include_dfg_frequency = parameters["include_dfg_frequency"] if "include_dfg_frequency" in parameters else True include_dfg_performance = parameters[ "include_dfg_performance"] if "include_dfg_performance" in parameters else False include_filtered_dfg_frequency = parameters[ "include_filtered_dfg_frequency"] if "include_filtered_dfg_frequency" in parameters else True include_filtered_dfg_performance = parameters[ "include_filtered_dfg_performance"] if "include_filtered_dfg_performance" in parameters else False if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters: activity_key = parameters[ PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key else: log, activity_key = insert_classifier.search_act_class_attr(log) if activity_key is None: activity_key = DEFAULT_NAME_KEY parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key if PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[PARAMETER_CONSTANT_ACTIVITY_KEY] = parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] activities_count_dictio = attributes_filter.get_attribute_values(log, activity_key) activities_count_list = [] for activity in activities_count_dictio: activities_count_list.append([activity, activities_count_dictio[activity]]) activities_count_list = sorted(activities_count_list, key=lambda x: x[1], reverse=True) activities_count_list = activities_count_list[:min(len(activities_count_list), maximum_number_activities)] activities_keep_list = [x[0] for x in activities_count_list] log = attributes_filter.apply(log, activities_keep_list, parameters=parameters) filtered_log = None if "alpha" in discovery_algorithm: # parameters_sa = deepcopy(parameters) # parameters_sa["decreasingFactor"] = 1.0 filtered_log = start_activities_filter.apply_auto_filter(log, parameters=parameters) filtered_log = end_activities_filter.apply_auto_filter(filtered_log, parameters=parameters) filtered_log = filter_topvariants_soundmodel.apply(filtered_log, parameters=parameters) elif "dfg_mining" in discovery_algorithm: filtered_log = start_activities_filter.apply_auto_filter(log, parameters=parameters) filtered_log = end_activities_filter.apply_auto_filter(filtered_log, parameters=parameters) filtered_log = auto_filter.apply_auto_filter(filtered_log, parameters=parameters) if include_dfg_frequency or "dfg_mining" in discovery_algorithm: dfg_frequency = dfg_factory.apply(log, parameters=parameters, variant="frequency") if include_dfg_performance: dfg_performance = dfg_factory.apply(log, parameters=parameters, variant="performance") if include_filtered_dfg_frequency: filtered_dfg_frequency = dfg_factory.apply(filtered_log, parameters=parameters, variant="frequency") if include_filtered_dfg_performance: filtered_dfg_performance = dfg_factory.apply(filtered_log, parameters=parameters, variant="performance") if "alpha" in discovery_algorithm: net, initial_marking, final_marking = alpha_miner.apply(filtered_log, parameters=parameters) elif "dfg_mining" in discovery_algorithm: start_activities = start_activities_filter.get_start_activities(filtered_log, parameters=parameters) end_activities = end_activities_filter.get_end_activities(filtered_log, parameters=parameters) parameters_conv = {} parameters_conv["start_activities"] = start_activities parameters_conv["end_activities"] = end_activities net, initial_marking, final_marking = dfg_conv_factory.apply(dfg_frequency, parameters=parameters_conv) if filtered_log is not None and include_filtered_log: returned_dictionary["filtered_log"] = filtered_log if net is not None and desidered_output == "petri": returned_dictionary["net"] = net if initial_marking is not None and desidered_output == "petri": returned_dictionary["initial_marking"] = initial_marking if final_marking is not None and desidered_output == "petri": returned_dictionary["final_marking"] = final_marking if bpmn_graph is not None and desidered_output == "bpmn": returned_dictionary["bpmn_graph"] = bpmn_graph if dfg_frequency is not None and include_dfg_frequency: returned_dictionary["dfg_frequency"] = dfg_frequency if dfg_performance is not None and include_dfg_performance: returned_dictionary["dfg_performance"] = dfg_performance if filtered_dfg_frequency is not None and include_filtered_dfg_frequency: returned_dictionary["filtered_dfg_frequency"] = filtered_dfg_frequency if filtered_dfg_performance is not None and include_filtered_dfg_performance: returned_dictionary["filtered_dfg_performance"] = filtered_dfg_performance if classic_output: if net is not None and desidered_output == "petri": return net, initial_marking, final_marking return returned_dictionary
def diagnose_from_notexisting_activities(log, notexisting_activities_in_model, parameters=None): """ Perform root cause analysis related to activities that are not present in the model Parameters ------------- log Trace log object notexisting_activities_in_model Not existing activities in the model parameters Possible parameters of the algorithm, including: string_attributes -> List of string event attributes to consider in building the decision tree numeric_attributes -> List of numeric event attributes to consider in building the decision tree Returns ----------- diagnostics For each problematic transition: - a decision tree comparing fit and unfit executions - feature names - classes """ if parameters is None: parameters = {} diagnostics = {} string_attributes = parameters[ "string_attributes"] if "string_attributes" in parameters else [] numeric_attributes = parameters[ "numeric_attributes"] if "numeric_attributes" in parameters else [] enable_multiplier = parameters[ "enable_multiplier"] if "enable_multiplier" in parameters else False parameters_filtering = deepcopy(parameters) parameters_filtering["positive"] = False values = list(notexisting_activities_in_model.keys()) filtered_log = attributes_filter.apply(log, values, parameters=parameters_filtering) for act in notexisting_activities_in_model: fit_cases_repr = [] containing_cases_repr = [] for trace in log: if trace in notexisting_activities_in_model[act]: containing_cases_repr.append( notexisting_activities_in_model[act][trace]) elif trace in filtered_log: fit_cases_repr.append(dict(trace[-1])) if fit_cases_repr and containing_cases_repr: data, feature_names = form_representation_from_dictio_couple( fit_cases_repr, containing_cases_repr, string_attributes, numeric_attributes, enable_multiplier=enable_multiplier) target = [] classes = [] if enable_multiplier: multiplier_first = int( max( float(len(containing_cases_repr)) / float(len(fit_cases_repr)), 1)) multiplier_second = int( max( float(len(fit_cases_repr)) / float(len(containing_cases_repr)), 1)) else: multiplier_first = 1 multiplier_second = 1 for j in range(multiplier_first): for i in range(len(fit_cases_repr)): target.append(0) classes.append("fit") for j in range(multiplier_second): for i in range(len(containing_cases_repr)): target.append(1) classes.append("containing") target = np.asarray(target) clf = tree.DecisionTreeClassifier(max_depth=7) clf.fit(data, target) diagn_dict = { "clf": clf, "data": data, "feature_names": feature_names, "target": target, "classes": classes } diagnostics[act] = diagn_dict return diagnostics
variants_count_filtered_log1 = case_statistics.get_variant_statistics( filtered_log1) print(variants_count_filtered_log1) #--- from pm4py.algo.filtering.log.attributes import attributes_filter activities = attributes_filter.get_attribute_values(log, "concept:name") resources = attributes_filter.get_attribute_values(log, "org:resource") activities resources #not containing any resource from pm4py.util import constants tracefilter_log_pos = attributes_filter.apply( log, ["Resource10"], parameters={ constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: "org:resource", "positive": True }) tracefilter_log_neg = attributes_filter.apply( log, ["Resource10"], parameters={ constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: "org:resource", "positive": False }) eventsfilter_log = attributes_filter.apply_events( log, ["Resource10"], parameters={ constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: "org:resource", "positive": True })
def diagnose_from_notexisting_activities(log, notexisting_activities_in_model, parameters=None): """ Provide some conformance diagnostics related to activities that are not present in the model Parameters ------------- log Trace log notexisting_activities_in_model Not existing activities in the model parameters Possible parameters of the algorithm, including: PARAMETER_CONSTANT_TIMESTAMP_KEY -> attribute of the event containing the timestamp Returns ------------- diagnostics For each problematic activity, diagnostics about case duration """ if parameters is None: parameters = {} timestamp_key = parameters[ constants. PARAMETER_CONSTANT_TIMESTAMP_KEY] if constants.PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else xes.DEFAULT_TIMESTAMP_KEY diagnostics = {} parameters_filtering = deepcopy(parameters) parameters_filtering["positive"] = False values = list(notexisting_activities_in_model.keys()) filtered_log = attributes_filter.apply(log, values, parameters=parameters_filtering) for act in notexisting_activities_in_model: fit_cases = [] containing_cases = [] for trace in log: if trace in notexisting_activities_in_model[act]: containing_cases.append(trace) elif trace in filtered_log: fit_cases.append(trace) if containing_cases and fit_cases: n_containing = len(containing_cases) n_fit = len(fit_cases) fit_median_time = get_median_case_duration( fit_cases, timestamp_key=timestamp_key) containing_median_time = get_median_case_duration( containing_cases, timestamp_key=timestamp_key) relative_throughput = containing_median_time / fit_median_time if fit_median_time > 0 else 0 diagn_dict = { "n_containing": n_containing, "n_fit": n_fit, "fit_median_time": fit_median_time, "containing_median_time": containing_median_time, "relative_throughput": relative_throughput } diagnostics[act] = diagn_dict return diagnostics
from grm import preprocessing, GRM from grm.util import get_activities from pm4py.algo.filtering.log.attributes import attributes_filter from pm4py.util import constants from pm4py.objects.log.util import sampling model_path = '../best_models/sp2020/2020-05-06-05-40_best_model.pickle' logfile = "sp2020.csv" name_of_case_id = "CASE_ID" name_of_activity = "ACTIVITY" name_of_timestamp = "TIMESTAMP" name_of_label = "REPAIR_IN_TIME_5D" log = preprocessing.import_data("data", logfile, separator=";", quote='"', case_id=name_of_case_id, activity=name_of_activity, time_stamp=name_of_timestamp, target=name_of_label) activities = get_activities(log) grm_model = GRM.GRM(log, activities, restore_file=model_path) log = attributes_filter.apply(log, [0], parameters={constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY: "label", "positive": True}) log = sampling.sample(log, n=5000) grm_model.visualize_dfg(save_file=True, log=log, file_name="sp2020_", variant="all")
if sb_1_trib: list_ojs, list_classes, list_ojs_cod, list_classes_cod = filtra_tribunal(sb_1_trib) sb_1_classes = st.selectbox("Classe", list_classes, 2) sb_1_OJ = st.selectbox("Órgão Julgador 1", list_ojs, 0 ) sb_2_OJ = st.selectbox("Órgão Julgador 2", list_ojs, 1) rd_metrica =st.radio("Métrica",('Frequência', 'Tempo')) tracefilter_log_pos = log if sb_1_trib: tracefilter_log_pos = attributes_filter.apply(tracefilter_log_pos, sb_1_trib, parameters={ #attributes_filter.Parameters.CASE_ID_KEY: 'case:concept:name', attributes_filter.Parameters.ATTRIBUTE_KEY: "org:siglaTribunal", attributes_filter.Parameters.POSITIVE: True } ) print("trib",sb_1_trib,len(tracefilter_log_pos)) if sb_1_classes: tracefilter_log_pos = attributes_filter.apply(tracefilter_log_pos, sb_1_classes, parameters={ #attributes_filter.Parameters.CASE_ID_KEY: 'case:concept:name', attributes_filter.Parameters.ATTRIBUTE_KEY: "org:Classe", attributes_filter.Parameters.POSITIVE: True } ) print("classes",sb_1_classes,len(tracefilter_log_pos)) if sb_1_OJ:
events_2017 = 0 for trace in fil_log_17: events_2017 += len(trace) print("2017 events", events_2017) # activities activities = attributes_filter.get_attribute_values(fil_log_17, "concept:name") print("2017 activities", len(activities)) # class distribution labels = attributes_filter.get_attribute_values(fil_log_17, "Accepted") print(labels) trace_filter_log_pos = attributes_filter.apply( fil_log_17, [True], parameters={ attributes_filter.PARAMETER_CONSTANT_ATTRIBUTE_KEY: name_of_label, "positive": True }) tracefilter_log_neg = attributes_filter.apply( fil_log_17, [True], parameters={ attributes_filter.PARAMETER_CONSTANT_ATTRIBUTE_KEY: name_of_label, "positive": False }) pos = len(trace_filter_log_pos) neg = len(tracefilter_log_neg) print("2017 pos", pos, ", part: ", pos / (pos + neg)) print("2017 neg", neg, ", part: ", neg / (pos + neg)) # sp2020____________________________________________________________________________________________________________ log_file = "coffeemachine_service_repair.csv"
def create_process_models(output_case_traces_cluster, path_data_sources, dir_runtime_files, dir_dfg_cluster_files, filename_dfg_cluster, rel_proportion_dfg_threshold, logging_level): """ Creates directly follows graphs out of a event log. :param output_case_traces_cluster: traces that are visualised :param path_data_sources: path of sources and outputs :param dir_runtime_files: folder containing files read and written during runtime :param dir_dfg_cluster_files: folder containing dfg png files :param filename_dfg_cluster: filename of dfg file (per cluster) :param rel_proportion_dfg_threshold: threshold for filtering out sensors in dfg relative to max occurrences of a sensor :param logging_level: level of logging :return: """ # keep only needed columns output_case_traces_cluster = output_case_traces_cluster.reindex( columns={'Case', 'LC_Activity', 'Timestamp', 'Cluster'}) output_case_traces_cluster = output_case_traces_cluster.rename( columns={ 'Case': 'case:concept:name', 'LC_Activity': 'concept:name', 'Timestamp': 'time:timestamp' }) # create directory for dfg pngs os.mkdir(path_data_sources + dir_runtime_files + dir_dfg_cluster_files) # create dfg for each cluster clusters = output_case_traces_cluster.Cluster.unique() for cluster in clusters: log = output_case_traces_cluster.loc[output_case_traces_cluster.Cluster == cluster] log = log.astype(str) # convert pandas data frame to pm4py event log for further processing log = log_converter.apply(log) # keep only activities with more than certain number of occurrences activities = attributes_get.get_attribute_values(log, 'concept:name') # determine that number relative to the max number of occurrences of a sensor in a cluster. (the result is # the threshold at which an activity/activity strand is kept) min_number_of_occurrences = round( (max(activities.values()) * rel_proportion_dfg_threshold), 0) activities = { x: y for x, y in activities.items() if y >= min_number_of_occurrences } log = attributes_filter.apply(log, activities) # create dfg out of event log dfg = dfg_discovery.apply(log) # define start and start_activities = sa_get.get_start_activities(log) end_activities = ea_get.get_end_activities(log) # create png of dfg (if the graph does not show a graph, it is possible that the sensors did not trigger often) gviz = dfg_visualization.apply( dfg=dfg, log=log, variant=dfg_visualization.Variants.FREQUENCY, parameters={ 'start_activities': start_activities, 'end_activities': end_activities }) dfg_visualization.save( gviz, path_data_sources + dir_runtime_files + dir_dfg_cluster_files + (filename_dfg_cluster.format(cluster=str(cluster)))) # logger logger = logging.getLogger(inspect.stack()[0][3]) logger.setLevel(logging_level) logger.info("Saved directly follows graphs into '../%s'.", path_data_sources + dir_runtime_files + dir_dfg_cluster_files)