def execute_script(): log = xes_importer.apply( os.path.join("..", "tests", "input_data", "running-example.xes")) performance_dfg = dfg_miner.apply(log, variant=dfg_miner.DFG_PERFORMANCE) reach_graph, tang_reach_graph, stochastic_map, q_matrix = ctmc.get_tangible_reachability_and_q_matrix_from_dfg_performance( performance_dfg) # pick the source state state = [x for x in tang_reach_graph.states if x.name == "source1"][0] # analyse the distribution over the states of the system starting from the source after 86400.0 seconds (1 day) transient_result = ctmc.transient_analysis_from_tangible_q_matrix_and_single_state( tang_reach_graph, q_matrix, state, 86400.0) print(transient_result)
def test_alpha_miner_log(self): log = xes_importer.apply( os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.alpha import factory as alpha_miner net1, im1, fm1 = alpha_miner.apply( log, variant=alpha_miner.ALPHA_VERSION_CLASSIC) net2, im2, fm2 = alpha_miner.apply( log, variant=alpha_miner.ALPHA_VERSION_PLUS) from pm4py.algo.discovery.dfg import factory as dfg_discovery dfg = dfg_discovery.apply(log) net3, im3, fm3 = alpha_miner.apply_dfg( dfg, variant=alpha_miner.ALPHA_VERSION_CLASSIC)
def apply(log, activity, parameters=None): """ Gets the time passed to each succeeding activity Parameters ------------- log Log activity Activity that we are considering parameters Possible parameters of the algorithm Returns ------------- dictio Dictionary containing a 'post' key with the list of aggregates times from the given activity to each succeeding activity """ if parameters is None: parameters = {} dfg_frequency = dfg_factory.apply(log, variant="frequency", parameters=parameters) dfg_performance = dfg_factory.apply(log, variant="performance", parameters=parameters) post = [] sum_perf_post = 0.0 sum_acti_post = 0.0 for entry in dfg_performance.keys(): if entry[0] == activity: post.append([entry[1], float(dfg_performance[entry]), int(dfg_frequency[entry])]) sum_perf_post = sum_perf_post + float(dfg_performance[entry]) * float(dfg_frequency[entry]) sum_acti_post = sum_acti_post + float(dfg_frequency[entry]) perf_acti_post = 0.0 if sum_acti_post > 0: perf_acti_post = sum_perf_post / sum_acti_post return {"post": post, "post_avg_perf": perf_acti_post}
def highLevelDFG(self): """ Create high level DFG of entire process """ try: df, log, parameters = modules.eventAbstraction.aggregateData(self.dataframe, remove_duplicates=False) dfg = dfg_factory.apply(log, variant="frequency", parameters=parameters) gviz_parameters = self._createImageParameters(log=log, high_level=True) gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency", parameters=gviz_parameters) self._create_image(gviz, "DFG_model") except Exception as e: print(f"[PROCESS MINING] Could not create DFG: {e}") return False
def apply_heu(log, parameters=None): """ Discovers an Heuristics Net using Heuristics Miner Parameters ------------ log Event log parameters Possible parameters of the algorithm, including: activity_key, case_id_glue, timestamp_key, dependency_thresh, and_measure_thresh, min_act_count, min_dfg_occurrences, dfg_pre_cleaning_noise_thresh, loops_length_two_thresh Returns ------------ heu Heuristics Net """ if parameters is None: parameters = {} activity_key = parameters[ constants.PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY start_activities = log_sa_filter.get_start_activities(log, parameters=parameters) end_activities = log_ea_filter.get_end_activities(log, parameters=parameters) activities_occurrences = log_attributes.get_attribute_values(log, activity_key, parameters=parameters) activities = list(activities_occurrences.keys()) dfg = dfg_factory.apply(log, parameters=parameters) parameters_w2 = deepcopy(parameters) parameters_w2["window"] = 2 dfg_window_2 = dfg_factory.apply(log, parameters=parameters_w2) freq_triples = dfg_factory.apply(log, parameters=parameters, variant="freq_triples") return apply_heu_dfg(dfg, activities=activities, activities_occurrences=activities_occurrences, start_activities=start_activities, end_activities=end_activities, dfg_window_2=dfg_window_2, freq_triples=freq_triples, parameters=parameters)
def Hueristics(file): #import os from pm4py.objects.log.importer.xes import factory as xes_importer log = xes_importer.import_log(file) from pm4py.algo.discovery.dfg import factory as dfg_factory dfg = dfg_factory.apply(log) from pm4py.visualization.dfg import factory as dfg_vis_factory gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency") location = "/mnt/c/Users/harim/Downloads/dfg.png" dfg_vis_factory.save(gviz, location) return location
def visualize_dfg(log, filename): """ Visualizes an event log as a DFG :param log: event log that will be visualized :param filename: filename for the created DFG """ dfg = dfg_factory.apply(log) parameters = {"format": "svg"} gviz = dfg_vis_factory.apply(dfg, log=log, parameters=parameters, variant='frequency') dfg_vis_factory.save(gviz, filename)
def test_dfdoc1(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" from pm4py.objects.log.importer.xes import factory as xes_importer log = xes_importer.import_log(os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.dfg import factory as dfg_factory dfg = dfg_factory.apply(log) from pm4py.algo.filtering.log.attributes import attributes_filter activities_count = attributes_filter.get_attribute_values(log, "concept:name") from pm4py.visualization.dfg.versions import simple_visualize as dfg_visualize gviz = dfg_visualize.graphviz_visualization(activities_count, dfg) del gviz
def generate_process_model(log): ''' Description: to generate graphical process model in .svg format using pm4py library function Used: generate process model under provided log Input: log file Output: Display process model ''' dfg = dfg_factory.apply(log) '''To decorate DFG with the frequency of activities''' gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency") dfg_vis_factory.view(gviz) return dfg
def highLevelPetriNet(self): """ Create high level petri net of entire process """ try: df, log, parameters = modules.eventAbstraction.aggregateData(self.dataframe, remove_duplicates=False) dfg = dfg_factory.apply(log, variant="frequency", parameters=parameters) gviz_parameters = self._createImageParameters(log=log, high_level=True) net, im, fm = dfg_conv_factory.apply(dfg, parameters=gviz_parameters) pnml_factory.apply(net, im, os.path.join(self.discovery_path, f'{self.filename}_petri_net.pnml'), final_marking=fm) # gviz = pn_vis_factory.apply(net, im, fm, parameters=gviz_parameters) # self._create_image(gviz, "petri_net") except Exception as e: print(f"[PROCESS MINING] Could not create Petri Net: {e}") return False
def _createDFG(self, log=None, parameters=None, high_level=False): """ create df using dataframe with all traces :param log: low-level event log :param parameters: ooptional parameters to generate image """ if high_level: df, log, parameters = modules.eventAbstraction.aggregateData(self.dataframe, remove_duplicates=False) else: if parameters is None: parameters = {} if log is None: log = self._log dfg = dfg_factory.apply(log, variant="frequency", parameters=parameters) return dfg, log
def execute_script(): # import csv & create log dataframe = csv_import_adapter.import_dataframe_from_path( datasourceMockdata(), sep=";") dataframe = dataframe.rename(columns={ 'coID': 'case:concept:name', 'Activity': 'concept:name' }) log = conversion_factory.apply(dataframe) # option 1: Directly-Follows Graph, represent frequency or performance parameters = {constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name"} variant = 'frequency' dfg = dfg_factory.apply(log, variant=variant, parameters=parameters) gviz1 = dfg_vis_factory.apply(dfg, log=log, variant=variant, parameters=parameters) dfg_vis_factory.view(gviz1) # option 2: Heuristics Miner, acts on the Directly-Follows Graph, find common structures, output: Heuristic Net (.svg) heu_net = heuristics_miner.apply_heu( log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.00 }) gviz2 = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"}) hn_vis.view(gviz2) # option 3: Petri Net based on Heuristic Miner (.png) net, im, fm = heuristics_miner.apply( log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.00 }) gviz3 = petri_vis.apply( net, im, fm, parameters={ petri_vis.Variants.WO_DECORATION.value.Parameters.FORMAT: "png" }) petri_vis.view(gviz3)
def get_paths(self, attribute_key, parameters=None): """ Gets the paths from the log Parameters ------------- attribute_key Attribute key Returns ------------- paths List of paths """ dfg = dfg_factory.apply(self.log, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: attribute_key}) return dfg
def read_xes(data_dir, dataset, aggregate_type, mode="pruning"): prune_parameter_freq = 350 prune_parameter_time = -1 #keep all #read the xes file if dataset in "BPIC14": # log = csv_importer.import_event_stream(os.path.join(data_dir, dataset + ".csv")) data = csv_import_adapter.import_dataframe_from_path(os.path.join( data_dir, dataset + ".csv"), sep=";") data['case:concept:name'] = data['Incident ID'] data['time:timestamp'] = data['DateStamp'] data['concept:name'] = data['IncidentActivity_Type'] log = conversion_factory.apply(data) elif dataset == "Unrineweginfectie": data = csv_import_adapter.import_dataframe_from_path(os.path.join( data_dir, dataset + ".csv"), sep=",") data['case:concept:name'] = data['Patientnummer'] data['time:timestamp'] = data['Starttijd'] data['concept:name'] = data['Aciviteit'] log = conversion_factory.apply(data) else: log = xes_import_factory.apply(os.path.join(data_dir, dataset + ".xes")) data = get_dataframe_from_event_stream(log) # dataframe = log_converter.apply(log, variant=log_converter.Variants.TO_DATA_FRAME) # dfg_freq = dfg_factory.apply(log,variant="frequency") # dfg_time =get_dfg_time(data,aggregate_type,dataset) if aggregate_type == AggregateType.FREQ: dfg = dfg_factory.apply(log, variant="frequency") else: dfg = get_dfg_time(data, aggregate_type, dataset) """Getting Start and End activities""" # log = xes_importer.import_log(xes_file) log_start = start_activities_filter.get_start_activities(log) log_end = end_activities_filter.get_end_activities(log) # return dfg_freq,dfg_time return dfg
def execute_script(variant="frequency"): # read the log using the nonstandard importer (faster) log_path = os.path.join("..", "tests", "input_data", "receipt.xes") log = xes_importer.import_log(log_path, variant="nonstandard") # applies Inductive Miner on the log net, initial_marking, final_marking = inductive_miner.apply(log) # find shortest paths in the net spaths = get_shortest_paths(net) # then we start to decorate the net # we decide if we should decorate it with frequency or performance # we decide the aggregation measure (sum, min, max, mean, median, stdev) aggregation_measure = "mean" if variant == "frequency": aggregation_measure = "sum" # we find the DFG dfg = dfg_factory.apply(log, variant=variant) # we find the number of activities occurrences in the trace log activities_count = attributes_filter.get_attribute_values( log, "concept:name") # we calculate the statistics on the Petri net applying the greedy algorithm aggregated_statistics = get_decorations_from_dfg_spaths_acticount( net, dfg, spaths, activities_count, variant=variant, aggregation_measure=aggregation_measure) # we find the gviz gviz = pn_vis_factory.apply(net, initial_marking, final_marking, variant=variant, aggregated_statistics=aggregated_statistics, parameters={"format": "svg"}) # we show the viz on screen pn_vis_factory.view(gviz)
def execute_script(): log = xes_importer.apply( os.path.join("..", "tests", "input_data", "running-example.xes")) frequency_dfg = dfg_miner.apply(log, variant="frequency") net, im, fm = dfg_conv_factory.apply(frequency_dfg) # perform the Montecarlo simulation with the arrival rate inferred by the log (the simulation lasts 5 secs) log, res = montecarlo_simulation.apply(log, net, im, fm, parameters={ "token_replay_variant": Variants.BACKWARDS, "enable_diagnostics": False, "max_thread_exec_time": 5 }) print( "\n(Montecarlo - Petri net) case arrival ratio inferred from the log") print(res["median_cases_ex_time"]) print(res["total_cases_time"]) # perform the Montecarlo simulation with the arrival rate specified (the simulation lasts 5 secs) log, res = montecarlo_simulation.apply(log, net, im, fm, parameters={ "token_replay_variant": Variants.BACKWARDS, "enable_diagnostics": False, "max_thread_exec_time": 5, "case_arrival_ratio": 60 }) print( "\n(Montecarlo - Petri net) case arrival ratio specified by the user") print(res["median_cases_ex_time"]) print(res["total_cases_time"])
tel_tree_num = 0 log_tree_num = 0 sum_tree_num = 0 print(tree) tree_avg_tel = 0 tree_avg_log = 0 tree_avg_sum = 0 for sam in range(1, 11): print(sam) path = os.path.join("input_data", "df_complete_logs", "%d_1000_%d.xes" % (tree, sam)) log = xes_importer.apply(path) tel = xes_importer.apply(path) xes_utils.set_enabled(tel) dfg_100 = dfg_factory.apply(log) start_act = set(get_start_activities(log).keys()) end_act = set(get_end_activities(log).keys()) result_norm = [] result_tel = [] num = len(dfg_100.keys()) score_tel = 0 score_log = 0 score_sum = 0 su_tel = 0 su_log = 0 su_sum = 0 for k in range(10): found_tel = False
def visualize_dfg(self, log, save_file=False, file_name="dfg", variant="relevance"): """ Visualises the event log as direct follower graph (DFG). :param log: event log as a list of traces [list]. :param save_file: boolean flog indicating to save the DFG or not [bool]. :param file_name: name of the file [str]. :param variant: dfg version to be produced: "frequency", "time", "relevance" or "all" [str] :return: file_names [list]. """ parameters = {"format": "svg"} file_names = [] relevance_scores = self.aggregate_relevance_scores(log) if variant == "relevance" or variant == "all": for label, items in relevance_scores.items(): data = filter_log_by_caseid(log, items['traces']) dfg = dfg_factory.apply(data) gviz = dfg_vis_factory.apply(dfg, activities_count=items['scores'], parameters=parameters) if len(items['traces']) == 1: title = "Prediction: " + str( label) + ", Case ID: " + items['traces'][0] else: title = "No of Service Orders: " + str(len( log)) + ", Filter: Repair not on time (Label = " + str( label) + ")" gviz.body.append('\t// title') gviz.body.append('\tfontsize = 50;') gviz.body.append('\tlabelloc = "t";') gviz.body.append('\tlabel = "' + title + '";') print("rel_sc: ", items['scores']) if save_file: filen = file_name + "_rel_" + str(label) + ".svg" dfg_vis_factory.save(gviz, filen) print("Saved DFG image to: " + filen) file_names.append(filen) if variant == "frequency" or variant == "all": for label, items in relevance_scores.items(): data = filter_log_by_caseid(log, items['traces']) dfg = dfg_factory.apply(data) activities_cnt = attributes_filter.get_attribute_values( log, attribute_key="concept:name") gviz = dfg_vis_factory.apply(dfg, activities_count=activities_cnt, parameters=parameters) if len(items['traces']) == 1: title = "Prediction: " + str( label) + ", Case ID: " + items['traces'][0] else: title = "No of Service Orders: " + str(len( log)) + ", Filter: Repair not on time (Label = " + str( label) + ")" gviz.body.append('\t// title') gviz.body.append('\tfontsize = 50;') gviz.body.append('\tlabelloc = "t";') gviz.body.append('\tlabel = "' + title + '";') if save_file: filen = file_name + "_freq_" + str(label) + ".svg" dfg_vis_factory.save(gviz, filen) print("Saved DFG image to: " + filen) file_names.append(filen) if variant == "time" or variant == "all": for label, items in relevance_scores.items(): data = filter_log_by_caseid(log, items['traces']) dfg = dfg_factory.apply(data) parameters = {"format": "svg", "AGGREGATION_MEASURE": "mean"} gviz = dfg_vis_factory.apply(dfg, variant="performance", parameters=parameters) if len(items['traces']) == 1: title = "Prediction: " + str( label) + ", Case ID: " + items['traces'][0] else: title = "No of Service Orders: " + str(len( log)) + ", Filter: Repair not on time (Label = " + str( label) + ")" gviz.body.append('\t// title') gviz.body.append('\tfontsize = 50;') gviz.body.append('\tlabelloc = "t";') gviz.body.append('\tlabel = "' + title + '";') if save_file: filen = file_name + "_time_" + str(label) + ".svg" dfg_vis_factory.save(gviz, filen) print("Saved DFG image to: " + filen) file_names.append(filen) return file_names
log) inductive_petri, inductive_initial_marking, inductive_final_marking = inductive_miner.apply( log) precision_alpha = precision_factory.apply(log, alpha_petri, alpha_initial_marking, alpha_final_marking) precision_inductive = precision_factory.apply(log, inductive_petri, inductive_initial_marking, inductive_final_marking) print("precision_alpha=", precision_alpha) print("precision_inductive=", precision_inductive) from pm4py.algo.discovery.dfg import factory as dfg_factory dfg = dfg_factory.apply(log) #----------------------------- from collections import Counter dfg1 = Counter({ ('scroll', 'blur'): 1, ('selection', 'blur'): 1, ('click-0', 'scroll'): 1, ('focus', 'selection'): 1, ('click-0', 'blur'): 1, ('blur', 'focus'): 1, ('scroll', 'click-0'): 1, ('focus', 'blur'): 1, ('scroll', 'selection'): 1, ('focus', 'scroll'): 1, ('load', 'click-0'): 1, ('load', 'scroll'): 1,
alpha_avg = [] alpha_num = [] for tree in range(1, 11): tel_tree_num = 0 log_tree_num = 0 print(tree) tree_avg_tel = 0 tree_avg_log = 0 for sam in range(1, 11): print(sam) path = os.path.join("input_data", "df_complete_logs", "%d_1000_%d.xes" % (tree, sam)) log = xes_importer.apply(path) dfg_org = dfg_factory.apply(log) start_act = set(get_start_activities(log).keys()) end_act = set(get_end_activities(log).keys()) alpha_path = os.path.join("input_data", "df_complete_logs", "df_complete_alpha", "%d_alpha_%d.xes" % (tree, sam)) alpha_log = xes_importer.apply(alpha_path) num = len(dfg_org.keys()) score_tel = 0 su_tel = 0 for k in range(10): for n in range(1, 1000): sampled_log = sampling.sample_log(alpha_log, no_traces=n) dfg_log = dfg_factory.apply(sampled_log)
def create_graphs(without_error, log, approach): """ creates visualization: Directly-Follows-Graph and Heuristic Net """ # create dfg frequency path = "common_path" vis_type = "dfg_frequency" naming_error = "with_error" if without_error: naming_error = "no_error" file = f"{vis_type}_{approach}_{naming_error}.svg" filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg" parameters = { constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name", "format": "svg" } variant = 'frequency' dfg = dfg_factory.apply(log, variant=variant, parameters=parameters) gviz = dfg_vis_factory.apply(dfg, log=log, variant=variant, parameters=parameters) dfg_vis_factory.view(gviz) dfg_vis_factory.save(gviz, filename) log_info.info("DFG frequency has been stored in '%s' in file '%s'", path, file) # create dfg performance vis_type = "dfg_performance" file = f"{vis_type}_{approach}_{naming_error}.svg" filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg" variant = 'performance' dfg = dfg_factory.apply(log, variant=variant, parameters=parameters) gviz = dfg_vis_factory.apply(dfg, log=log, variant=variant, parameters=parameters) dfg_vis_factory.view(gviz) dfg_vis_factory.save(gviz, filename) log_info.info("DFG performance has been stored in '%s' in file '%s'", path, file) # create heuristic net vis_type = "heuristicnet" file = f"{vis_type}_{approach}_{naming_error}.svg" filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg" heu_net = heuristics_miner.apply_heu( log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.60 }) gviz = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"}) hn_vis.view(gviz) hn_vis.save(gviz, filename) log_info.info("Heuristic Net has been stored in '%s' in file '%s'", path, file) # save heuristic net in plain-ext format file = f"{vis_type}_{approach}_{naming_error}.plain-ext" filename = f"{path}/{vis_type}_{approach}_{naming_error}.plain-ext" gviz = hn_vis.apply(heu_net, parameters={ hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "plain-ext" }) hn_vis.save(gviz, filename) log_info.info( "Heuristic Net as .plain-ext has been stored in '%s' " "in file '%s'", path, file) # save heuristic net in dot format file = f"{vis_type}_{approach}_{naming_error}.dot" filename = f"{path}/{vis_type}_{approach}_{naming_error}.dot" gviz = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "dot"}) hn_vis.save(gviz, filename) log_info.info( "Heuristic Net as .dot has been stored in '%s' " "in file '%s'", path, file) # save heuristic net in xdot format file = f"{vis_type}_{approach}_{naming_error}.xdot" filename = f"{path}/{vis_type}_{approach}_{naming_error}.xdot" gviz = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "xdot"}) hn_vis.save(gviz, filename) log_info.info( "Heuristic Net as .xdot has been stored in '%s' " "in file '%s'", path, file)
def run_dfg_miner(log,variant="frequency"): dfg = dfg_factory.apply(log) #gviz = dfg_vis_factory.apply(dfg, log=log, variant=variant) #dfg_vis_factory.view(gviz) return dfg
def directly_follows_graphs_perf(log_file): dfg = dfg_factory.apply(log_file) gviz = dfg_vis_factory.apply(dfg, log=log_file, variant="performance") pn_vis_factory.save(gviz, "static/dag_performance.png") return "success!"
def show(model, tel, file_name, parameters): ''' Show model and its quality measures :param model: model type (transition system, state based region, DFG miner, alpha miner) :param tel: input log :param file_name: img file name to show model :param parameters: parmater for transition system (afreq, sfreq) :return: ''' tel_flag = False if isinstance(tel[0][0], tel_event): tel_flag = True if model in ['ts', 'sbr']: if tel_flag: output_file_path = os.path.join( "static", "images", file_name[:file_name.find('.')] + '_' + model + '_' + str(parameters['afreq_thresh']) + '_' + str(parameters['sfreq_thresh']) + ".png") else: output_file_path = os.path.join( "static", "images", "2" + "_" + file_name[:file_name.find('.')] + '_' + model + '_' + str(parameters[PARAM_KEY_DIRECTION]) + '_' + str(parameters[PARAM_KEY_WINDOW]) + "_" + str(parameters[PARAM_KEY_VIEW]) + ".png") auto = utils.discover_annotated_automaton(tel, parameters=parameters) max_thresh = {} max_afreq = 0 max_sfreq = 0 if tel_flag: for trans in auto.transitions: max_afreq = max(max_afreq, trans.afreq) for state in auto.states: max_sfreq = max(max_sfreq, state.sfreq) max_thresh['afreq'] = max_afreq max_thresh['sfreq'] = max_sfreq if model == 'ts': result = {} gviz = vis_factory.apply(auto) vis_factory.save(gviz, output_file_path) result['num of transitions'] = len(auto.transitions) result['num of states'] = len(auto.states) else: net, im, fm = sb.petri_net_synthesis(auto) gviz = petri_vis_factory.apply(net, im, fm) petri_vis_factory.save(gviz, output_file_path) result = evaluation(net, im, fm, tel) else: if tel_flag: output_file_path = os.path.join( "static", "images", file_name[:file_name.find('.')] + '_' + model + '_' + ".png") else: output_file_path = os.path.join( "static", "images", "2" + file_name[:file_name.find('.')] + '_' + model + '_' + ".png") if model == 'alpha': if isinstance(tel[0][0], Event): net, im, fm = trans_alpha(tel) else: net, im, fm = alpha_miner.apply(tel) gviz = petri_vis_factory.apply(net, im, fm) petri_vis_factory.save(gviz, output_file_path) result = evaluation(net, im, fm, tel) else: dfg = dfg_factory.apply(tel) if tel_flag: dfg_tel = inductive_revise.get_dfg_graph_trans(tel) #dfg = dfg_tel + dfg dfg = dfg_tel gviz = dfg_vis_factory.apply(dfg, log=tel) dfg_vis_factory.save(gviz, output_file_path) result = dict( sorted(dfg.items(), key=operator.itemgetter(1), reverse=True)) max_thresh = None return output_file_path, result, max_thresh
def apply(log, parameters=None, classic_output=False): """ Gets a simple model out of a log Parameters ------------- log Trace log parameters Parameters of the algorithm, including: maximum_number_activities -> Maximum number of activities to keep discovery_algorithm -> Discovery algorithm to use (alpha, inductive) desidered_output -> Desidered output of the algorithm (default: Petri) include_filtered_log -> Include the filtered log in the output include_dfg_frequency -> Include the DFG of frequencies in the output include_dfg_performance -> Include the DFG of performance in the output include_filtered_dfg_frequency -> Include the filtered DFG of frequencies in the output include_filtered_dfg_performance -> Include the filtered DFG of performance in the output classic_output Determine if the output shall contains directly the objects (e.g. net, initial_marking, final_marking) or can return a more detailed dictionary """ if parameters is None: parameters = {} returned_dictionary = {} net = None initial_marking = None final_marking = None bpmn_graph = None dfg_frequency = None dfg_performance = None filtered_dfg_frequency = None filtered_dfg_performance = None maximum_number_activities = parameters[ "maximum_number_activities"] if "maximum_number_activities" in parameters else 20 discovery_algorithm = parameters["discovery_algorithm"] if "discovery_algorithm" in parameters else "alpha" desidered_output = parameters["desidered_output"] if "desidered_output" in parameters else "petri" include_filtered_log = parameters["include_filtered_log"] if "include_filtered_log" in parameters else True include_dfg_frequency = parameters["include_dfg_frequency"] if "include_dfg_frequency" in parameters else True include_dfg_performance = parameters[ "include_dfg_performance"] if "include_dfg_performance" in parameters else False include_filtered_dfg_frequency = parameters[ "include_filtered_dfg_frequency"] if "include_filtered_dfg_frequency" in parameters else True include_filtered_dfg_performance = parameters[ "include_filtered_dfg_performance"] if "include_filtered_dfg_performance" in parameters else False if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters: activity_key = parameters[ PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key else: log, activity_key = insert_classifier.search_act_class_attr(log) if activity_key is None: activity_key = DEFAULT_NAME_KEY parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = activity_key if PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[PARAMETER_CONSTANT_ACTIVITY_KEY] = parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] activities_count_dictio = attributes_filter.get_attribute_values(log, activity_key) activities_count_list = [] for activity in activities_count_dictio: activities_count_list.append([activity, activities_count_dictio[activity]]) activities_count_list = sorted(activities_count_list, key=lambda x: x[1], reverse=True) activities_count_list = activities_count_list[:min(len(activities_count_list), maximum_number_activities)] activities_keep_list = [x[0] for x in activities_count_list] log = attributes_filter.apply(log, activities_keep_list, parameters=parameters) filtered_log = None if "alpha" in discovery_algorithm: # parameters_sa = deepcopy(parameters) # parameters_sa["decreasingFactor"] = 1.0 filtered_log = start_activities_filter.apply_auto_filter(log, parameters=parameters) filtered_log = end_activities_filter.apply_auto_filter(filtered_log, parameters=parameters) filtered_log = filter_topvariants_soundmodel.apply(filtered_log, parameters=parameters) elif "dfg_mining" in discovery_algorithm: filtered_log = start_activities_filter.apply_auto_filter(log, parameters=parameters) filtered_log = end_activities_filter.apply_auto_filter(filtered_log, parameters=parameters) filtered_log = auto_filter.apply_auto_filter(filtered_log, parameters=parameters) if include_dfg_frequency or "dfg_mining" in discovery_algorithm: dfg_frequency = dfg_factory.apply(log, parameters=parameters, variant="frequency") if include_dfg_performance: dfg_performance = dfg_factory.apply(log, parameters=parameters, variant="performance") if include_filtered_dfg_frequency: filtered_dfg_frequency = dfg_factory.apply(filtered_log, parameters=parameters, variant="frequency") if include_filtered_dfg_performance: filtered_dfg_performance = dfg_factory.apply(filtered_log, parameters=parameters, variant="performance") if "alpha" in discovery_algorithm: net, initial_marking, final_marking = alpha_miner.apply(filtered_log, parameters=parameters) elif "dfg_mining" in discovery_algorithm: start_activities = start_activities_filter.get_start_activities(filtered_log, parameters=parameters) end_activities = end_activities_filter.get_end_activities(filtered_log, parameters=parameters) parameters_conv = {} parameters_conv["start_activities"] = start_activities parameters_conv["end_activities"] = end_activities net, initial_marking, final_marking = dfg_conv_factory.apply(dfg_frequency, parameters=parameters_conv) if filtered_log is not None and include_filtered_log: returned_dictionary["filtered_log"] = filtered_log if net is not None and desidered_output == "petri": returned_dictionary["net"] = net if initial_marking is not None and desidered_output == "petri": returned_dictionary["initial_marking"] = initial_marking if final_marking is not None and desidered_output == "petri": returned_dictionary["final_marking"] = final_marking if bpmn_graph is not None and desidered_output == "bpmn": returned_dictionary["bpmn_graph"] = bpmn_graph if dfg_frequency is not None and include_dfg_frequency: returned_dictionary["dfg_frequency"] = dfg_frequency if dfg_performance is not None and include_dfg_performance: returned_dictionary["dfg_performance"] = dfg_performance if filtered_dfg_frequency is not None and include_filtered_dfg_frequency: returned_dictionary["filtered_dfg_frequency"] = filtered_dfg_frequency if filtered_dfg_performance is not None and include_filtered_dfg_performance: returned_dictionary["filtered_dfg_performance"] = filtered_dfg_performance if classic_output: if net is not None and desidered_output == "petri": return net, initial_marking, final_marking return returned_dictionary
def get_decorated_net(net, initial_marking, final_marking, log, parameters=None, variant="frequency"): """ Get a decorated net according to the specified variant (decorate Petri net based on DFG) Parameters ------------ net Petri net initial_marking Initial marking final_marking Final marking log Log to use to decorate the Petri net parameters Algorithm parameters variant Specify if the decoration should take into account the frequency or the performance Returns ------------ gviz GraphViz object """ if parameters is None: parameters = {} aggregation_measure = "mean" if "frequency" in variant: aggregation_measure = "sum" elif "performance" in variant: aggregation_measure = "mean" if "aggregationMeasure" in parameters: aggregation_measure = parameters["aggregationMeasure"] activity_key = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] if PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY # we find the DFG dfg = dfg_factory.apply(log, variant=variant, parameters=parameters) # we find shortest paths spaths = get_shortest_paths(net) # we find the number of activities occurrences in the trace log activities_count = attributes_filter.get_attribute_values( log, activity_key, parameters=parameters) aggregated_statistics = get_decorations_from_dfg_spaths_acticount( net, dfg, spaths, activities_count, variant=variant, aggregation_measure=aggregation_measure) return visualize.apply(net, initial_marking, final_marking, parameters=parameters, decorations=aggregated_statistics)
def directly_follows_graphs_freq(log_file): dfg = dfg_factory.apply(log_file) gviz = dfg_vis_factory.apply(dfg, log=log_file, variant="frequency") pn_vis_factory.save(gviz, "static/dag_frequency.png") return "success!"
def apply(log, parameters=None): """ Gets the Petri net through Inductive Miner, decorated by frequency metric Parameters ------------ log Log parameters Parameters of the algorithm Returns ------------ base64 Base64 of an SVG representing the model model Text representation of the model format Format of the model """ if parameters is None: parameters = {} decreasingFactor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR activity_key = parameters[ pm4_constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY # reduce the depth of the search done by token-based replay token_replay.MAX_REC_DEPTH = 1 token_replay.MAX_IT_FINAL1 = 1 token_replay.MAX_IT_FINAL2 = 1 token_replay.MAX_REC_DEPTH_HIDTRANSENABL = 1 log = attributes_filter.filter_log_on_max_no_activities( log, max_no_activities=constants.MAX_NO_ACTIVITIES, parameters=parameters) filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters) activities_count = attributes_filter.get_attribute_values( filtered_log, activity_key) activities = list(activities_count.keys()) start_activities = list( start_activities_filter.get_start_activities( filtered_log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities(filtered_log, parameters=parameters).keys()) dfg = dfg_factory.apply(filtered_log, parameters=parameters) dfg = clean_dfg_based_on_noise_thresh( dfg, activities, decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER, parameters=parameters) net, im, fm = inductive_miner.apply_dfg(dfg, parameters=parameters, activities=activities, start_activities=start_activities, end_activities=end_activities) parameters["format"] = "svg" gviz = pn_vis_factory.apply(net, im, fm, log=filtered_log, variant="frequency", parameters=parameters) svg = get_base64_from_gviz(gviz) gviz_base64 = base64.b64encode(str(gviz).encode('utf-8')) ret_graph = get_graph.get_graph_from_petri(net, im, fm) return svg, export_petri_as_string( net, im, fm ), ".pnml", "xes", activities, start_activities, end_activities, gviz_base64, ret_graph, "inductive", "freq", None, "", activity_key
def apply(log, parameters=None): """ Gets the frequency DFG Parameters ------------ log Log parameters Parameters of the algorithm Returns ------------ base64 Base64 of an SVG representing the model model Text representation of the model format Format of the model """ if parameters is None: parameters = {} decreasingFactor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR activity_key = parameters[ pm4_constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY log = attributes_filter.filter_log_on_max_no_activities( log, max_no_activities=constants.MAX_NO_ACTIVITIES, parameters=parameters) filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters) activities_count = attributes_filter.get_attribute_values( filtered_log, activity_key) activities = list(activities_count.keys()) start_activities = list( start_activities_filter.get_start_activities( filtered_log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities(filtered_log, parameters=parameters).keys()) dfg = dfg_factory.apply(filtered_log, parameters=parameters) dfg = clean_dfg_based_on_noise_thresh( dfg, activities, decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER, parameters=parameters) parameters["format"] = "svg" parameters["start_activities"] = start_activities parameters["end_activities"] = end_activities gviz = dfg_vis_factory.apply(dfg, log=filtered_log, variant="frequency", parameters=parameters) gviz_base64 = base64.b64encode(str(gviz).encode('utf-8')) ret_graph = get_graph.get_graph_from_dfg(dfg, start_activities, end_activities) net, im, fm = dfg_conv_factory.apply(dfg, parameters={ "start_activities": start_activities, "end_activities": end_activities }) return get_base64_from_gviz(gviz), export_petri_as_string( net, im, fm ), ".pnml", "xes", activities, start_activities, end_activities, gviz_base64, ret_graph, "dfg", "freq", None, "", activity_key
def apply(log, parameters=None): """ Gets the process tree using Inductive Miner Directly-Follows Parameters ------------ log Log parameters Parameters of the algorithm Returns ------------ base64 Base64 of an SVG representing the model model Text representation of the model format Format of the model """ if parameters is None: parameters = {} decreasingFactor = parameters[ "decreasingFactor"] if "decreasingFactor" in parameters else constants.DEFAULT_DEC_FACTOR activity_key = parameters[ pm4_constants. PARAMETER_CONSTANT_ACTIVITY_KEY] if pm4_constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY log = attributes_filter.filter_log_on_max_no_activities( log, max_no_activities=constants.MAX_NO_ACTIVITIES, parameters=parameters) filtered_log = auto_filter.apply_auto_filter(log, parameters=parameters) activities_count = attributes_filter.get_attribute_values( filtered_log, activity_key) activities = list(activities_count.keys()) start_activities = list( start_activities_filter.get_start_activities( filtered_log, parameters=parameters).keys()) end_activities = list( end_activities_filter.get_end_activities(filtered_log, parameters=parameters).keys()) dfg = dfg_factory.apply(filtered_log, parameters=parameters) dfg = clean_dfg_based_on_noise_thresh( dfg, activities, decreasingFactor * constants.DEFAULT_DFG_CLEAN_MULTIPLIER, parameters=parameters) tree = inductive_miner.apply_tree_dfg(dfg, parameters=parameters, activities=activities, start_activities=start_activities, end_activities=end_activities) parameters["format"] = "svg" gviz = pt_vis_factory.apply(tree, parameters=parameters) gviz_base64 = base64.b64encode(str(gviz).encode('utf-8')) return get_base64_from_gviz(gviz), None, "", "xes", activities, start_activities, end_activities, gviz_base64, [], "tree", "freq", None, "", activity_key