def execute_script(): log_input_directory = "xesinput" all_logs_names = os.listdir(log_input_directory) all_logs_names = [log for log in all_logs_names if ".xe" in log] for logName in all_logs_names: # logPath = os.path.join("..", "tests", "inputData", logName) log_path = log_input_directory + "\\" + logName log = xes_importer.import_log(log_path) print("\n\n") print("log loaded") print("Number of traces - ", len(log)) event_log = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM) print("Number of events - ", len(event_log)) print("Classifiers ", log.classifiers) exp_log_name = "xescert_exportlogs" + "\\" + "exp_" + logName print("exporting log", exp_log_name) xes_exporter.export_log(log, exp_log_name) print("exported log", exp_log_name) log, classifier_attr_key = insert_classifier.search_act_class_attr(log) classifiers = list(log.classifiers.keys()) if classifier_attr_key is None and classifiers: try: print(classifiers) log, classifier_attr_key = insert_classifier.insert_activity_classifier_attribute( log, classifiers[0]) print(classifier_attr_key) except: print("exception in handling classifier") if classifier_attr_key is None: classifier_attr_key = "concept:name" if len(event_log) > 0 and classifier_attr_key in event_log[0]: parameters = { constants.PARAMETER_CONSTANT_ACTIVITY_KEY: classifier_attr_key } dfg = dfg_factory.apply(log, parameters=parameters) gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency", parameters=parameters) # dfg_vis_factory.view(gviz) dfg_vis_factory.save( gviz, "xescert_images\\" + logName.replace("xes", "png")) print("Reimporting log file just exported - ", exp_log_name) log = xes_importer.import_log(exp_log_name) print("log loaded", exp_log_name) print("Number of traces - ", len(log)) event_log = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM) print("Number of events - ", len(event_log)) print("Classifiers ", log.classifiers)
def bot_dfg(chat_id): log = get_current_log(chat_id) dfg = dfg_factory.apply(log) gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency") new_file, filename = tempfile.mkstemp(suffix="png") dfg_vis_factory.save(gviz, filename) return filename
def draw_DFG(dfg): parameters = {"format": "svg"} gviz = dfg_vis_factory.apply(dfg, variant="performance", parameters=parameters) # log = xes_importer.import_log(r"") # dfg = dfg_factory.apply(log) # gviz = dfg_vis_factory.apply(dfg, log=log, variant="performance", parameters=parameters) dfg_vis_factory.save(gviz, "dfg.svg")
def visualize_dfg(log, filename): """ Visualizes an event log as a DFG :param log: event log that will be visualized :param filename: filename for the created DFG """ dfg = dfg_factory.apply(log) parameters = {"format": "svg"} gviz = dfg_vis_factory.apply(dfg, log=log, parameters=parameters, variant='frequency') dfg_vis_factory.save(gviz, filename)
def Hueristics(file): #import os from pm4py.objects.log.importer.xes import factory as xes_importer log = xes_importer.import_log(file) from pm4py.algo.discovery.dfg import factory as dfg_factory dfg = dfg_factory.apply(log) from pm4py.visualization.dfg import factory as dfg_vis_factory gviz = dfg_vis_factory.apply(dfg, log=log, variant="frequency") location = "/mnt/c/Users/harim/Downloads/dfg.png" dfg_vis_factory.save(gviz, location) return location
def export_process_model(dfgModel, log, filename): ''' Description: to export graphical process model in .svg format using pm4py library function Used: generate and export process model under provided file name Input: dfgModel, log file, file name Output: N/A ''' # dfg = dfg_factory.apply(log, variant="performance") parameters = {"format": "svg"} gviz = dfg_vis_factory.apply(dfgModel, log=log, variant="frequency", parameters=parameters) dfg_vis_factory.save(gviz, filename)
def view_model(dfg, dir=""): #rounding for better plot for key in dfg.keys(): dfg[key] = round(dfg[key]) parameters = {"format": "svg"} gviz = dfg_vis_factory.apply(dfg, variant="frequency", parameters=parameters) # gviz = dfg_vis_factory.apply(dfg, variant="performance", parameters=parameters)# for time dfg_vis_factory.save(gviz, dir + ".svg") drawing = svg2rlg(dir + ".svg") renderPDF.drawToFile(drawing, dir + ".pdf") return # log = xes_import_factory.apply(r"C:\Gamal Elkoumy\PhD\OneDrive - Tartu Ülikool\Data\Data XES\Sepsis Cases - Event Log.xes") # data=get_dataframe_from_event_stream(log) # dfg_freq = dfg_factory.apply(log,variant="frequency") # # view_model(dfg_freq,r"C:\Gamal Elkoumy\PhD\OneDrive - Tartu Ülikool\Differential Privacy\source code\experiment_figures\temp")
def _create_image(self, gviz, img_name, verbose=False): """ Create image file of the generated diagram (DFG,BPMN,Petr net) :param gviz: image file generated by pm4py :param img_name: name of the image to be saved :param verbose: display log while generating images """ try: img_path = os.path.join(self.discovery_path, f'{self.filename}_{img_name}.pdf') if "alpha_miner" in img_name: vis_factory.save(gviz, img_path) elif "heuristic_miner" in img_name: hn_vis_factory.save(gviz, img_path) elif "petri_net" in img_name: pn_vis_factory.save(gviz, img_path) elif "DFG" in img_name: dfg_vis_factory.save(gviz, img_path) # elif "BPMN" in img_name: # bpmn_vis_factory.save(gviz, img_path) if verbose: self.status_queue.put(f"[PROCESS MINING] Generated {img_name} in {img_path}") except PermissionError as e: print(f"[PROCESS MINING] Could not save image because of permission error: {e}") print(f"Trying to save image on desktop") img_path = os.path.join(utils.utils.DESKTOP, f'{self.filename}_{img_name}.pdf') if "alpha_miner" in img_name: vis_factory.save(gviz, img_path) elif "heuristic_miner" in img_name: hn_vis_factory.save(gviz, img_path) elif "petri_net" in img_name: pn_vis_factory.save(gviz, img_path) elif "DFG" in img_name: dfg_vis_factory.save(gviz, img_path) # elif "BPMN" in img_name: # bpmn_vis_factory.save(gviz, img_path) except Exception as e: print(f"[PROCESS MINING] Could not save image: {e}")
def show(model, tel, file_name, parameters): ''' Show model and its quality measures :param model: model type (transition system, state based region, DFG miner, alpha miner) :param tel: input log :param file_name: img file name to show model :param parameters: parmater for transition system (afreq, sfreq) :return: ''' tel_flag = False if isinstance(tel[0][0], tel_event): tel_flag = True if model in ['ts', 'sbr']: if tel_flag: output_file_path = os.path.join( "static", "images", file_name[:file_name.find('.')] + '_' + model + '_' + str(parameters['afreq_thresh']) + '_' + str(parameters['sfreq_thresh']) + ".png") else: output_file_path = os.path.join( "static", "images", "2" + "_" + file_name[:file_name.find('.')] + '_' + model + '_' + str(parameters[PARAM_KEY_DIRECTION]) + '_' + str(parameters[PARAM_KEY_WINDOW]) + "_" + str(parameters[PARAM_KEY_VIEW]) + ".png") auto = utils.discover_annotated_automaton(tel, parameters=parameters) max_thresh = {} max_afreq = 0 max_sfreq = 0 if tel_flag: for trans in auto.transitions: max_afreq = max(max_afreq, trans.afreq) for state in auto.states: max_sfreq = max(max_sfreq, state.sfreq) max_thresh['afreq'] = max_afreq max_thresh['sfreq'] = max_sfreq if model == 'ts': result = {} gviz = vis_factory.apply(auto) vis_factory.save(gviz, output_file_path) result['num of transitions'] = len(auto.transitions) result['num of states'] = len(auto.states) else: net, im, fm = sb.petri_net_synthesis(auto) gviz = petri_vis_factory.apply(net, im, fm) petri_vis_factory.save(gviz, output_file_path) result = evaluation(net, im, fm, tel) else: if tel_flag: output_file_path = os.path.join( "static", "images", file_name[:file_name.find('.')] + '_' + model + '_' + ".png") else: output_file_path = os.path.join( "static", "images", "2" + file_name[:file_name.find('.')] + '_' + model + '_' + ".png") if model == 'alpha': if isinstance(tel[0][0], Event): net, im, fm = trans_alpha(tel) else: net, im, fm = alpha_miner.apply(tel) gviz = petri_vis_factory.apply(net, im, fm) petri_vis_factory.save(gviz, output_file_path) result = evaluation(net, im, fm, tel) else: dfg = dfg_factory.apply(tel) if tel_flag: dfg_tel = inductive_revise.get_dfg_graph_trans(tel) #dfg = dfg_tel + dfg dfg = dfg_tel gviz = dfg_vis_factory.apply(dfg, log=tel) dfg_vis_factory.save(gviz, output_file_path) result = dict( sorted(dfg.items(), key=operator.itemgetter(1), reverse=True)) max_thresh = None return output_file_path, result, max_thresh
def visualize_dfg(self, log, save_file=False, file_name="dfg", variant="relevance"): """ Visualises the event log as direct follower graph (DFG). :param log: event log as a list of traces [list]. :param save_file: boolean flog indicating to save the DFG or not [bool]. :param file_name: name of the file [str]. :param variant: dfg version to be produced: "frequency", "time", "relevance" or "all" [str] :return: file_names [list]. """ parameters = {"format": "svg"} file_names = [] relevance_scores = self.aggregate_relevance_scores(log) if variant == "relevance" or variant == "all": for label, items in relevance_scores.items(): data = filter_log_by_caseid(log, items['traces']) dfg = dfg_factory.apply(data) gviz = dfg_vis_factory.apply(dfg, activities_count=items['scores'], parameters=parameters) if len(items['traces']) == 1: title = "Prediction: " + str( label) + ", Case ID: " + items['traces'][0] else: title = "No of Service Orders: " + str(len( log)) + ", Filter: Repair not on time (Label = " + str( label) + ")" gviz.body.append('\t// title') gviz.body.append('\tfontsize = 50;') gviz.body.append('\tlabelloc = "t";') gviz.body.append('\tlabel = "' + title + '";') print("rel_sc: ", items['scores']) if save_file: filen = file_name + "_rel_" + str(label) + ".svg" dfg_vis_factory.save(gviz, filen) print("Saved DFG image to: " + filen) file_names.append(filen) if variant == "frequency" or variant == "all": for label, items in relevance_scores.items(): data = filter_log_by_caseid(log, items['traces']) dfg = dfg_factory.apply(data) activities_cnt = attributes_filter.get_attribute_values( log, attribute_key="concept:name") gviz = dfg_vis_factory.apply(dfg, activities_count=activities_cnt, parameters=parameters) if len(items['traces']) == 1: title = "Prediction: " + str( label) + ", Case ID: " + items['traces'][0] else: title = "No of Service Orders: " + str(len( log)) + ", Filter: Repair not on time (Label = " + str( label) + ")" gviz.body.append('\t// title') gviz.body.append('\tfontsize = 50;') gviz.body.append('\tlabelloc = "t";') gviz.body.append('\tlabel = "' + title + '";') if save_file: filen = file_name + "_freq_" + str(label) + ".svg" dfg_vis_factory.save(gviz, filen) print("Saved DFG image to: " + filen) file_names.append(filen) if variant == "time" or variant == "all": for label, items in relevance_scores.items(): data = filter_log_by_caseid(log, items['traces']) dfg = dfg_factory.apply(data) parameters = {"format": "svg", "AGGREGATION_MEASURE": "mean"} gviz = dfg_vis_factory.apply(dfg, variant="performance", parameters=parameters) if len(items['traces']) == 1: title = "Prediction: " + str( label) + ", Case ID: " + items['traces'][0] else: title = "No of Service Orders: " + str(len( log)) + ", Filter: Repair not on time (Label = " + str( label) + ")" gviz.body.append('\t// title') gviz.body.append('\tfontsize = 50;') gviz.body.append('\tlabelloc = "t";') gviz.body.append('\tlabel = "' + title + '";') if save_file: filen = file_name + "_time_" + str(label) + ".svg" dfg_vis_factory.save(gviz, filen) print("Saved DFG image to: " + filen) file_names.append(filen) return file_names
from pm4py.visualization.dfg import factory as dfg_vis_factory from pm4pyspark.importer.csv import spark_df_imp as importer from pm4pyspark.algo.discovery.dfg import factory as dfg_factory parameters = {"format": "svg"} event_stream_ex = importer.import_event_stream(os.path.join( INPUT_DATA_DIR, "running-example.csv"), parameters={"header": True}) log_ex = importer.transform_event_stream_to_event_log(event_stream_ex) df_ex = importer.import_sparkdf_from_path(os.path.join(INPUT_DATA_DIR, "running-example.csv"), header=True, inferSchema=True) dfg_freq = dfg_factory.apply(df_ex) gviz_freq = dfg_vis_factory.apply(dfg_freq, log=log_ex, parameters=parameters, variant="frequency") dfg_vis_factory.save(gviz_freq, os.path.join(OUTPUT_DATA_DIR, "running-example_freq.svg")) dfg_perf = dfg_factory.apply(df_ex, variant="performance") gviz_perf = dfg_vis_factory.apply(dfg_perf, log=log_ex, parameters=parameters, variant="performance") dfg_vis_factory.save(gviz_perf, os.path.join(OUTPUT_DATA_DIR, "running-example_perf.svg"))
def create_graphs(without_error, log, approach): """ creates visualization: Directly-Follows-Graph and Heuristic Net """ # create dfg frequency path = "common_path" vis_type = "dfg_frequency" naming_error = "with_error" if without_error: naming_error = "no_error" file = f"{vis_type}_{approach}_{naming_error}.svg" filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg" parameters = { constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name", "format": "svg" } variant = 'frequency' dfg = dfg_factory.apply(log, variant=variant, parameters=parameters) gviz = dfg_vis_factory.apply(dfg, log=log, variant=variant, parameters=parameters) dfg_vis_factory.view(gviz) dfg_vis_factory.save(gviz, filename) log_info.info("DFG frequency has been stored in '%s' in file '%s'", path, file) # create dfg performance vis_type = "dfg_performance" file = f"{vis_type}_{approach}_{naming_error}.svg" filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg" variant = 'performance' dfg = dfg_factory.apply(log, variant=variant, parameters=parameters) gviz = dfg_vis_factory.apply(dfg, log=log, variant=variant, parameters=parameters) dfg_vis_factory.view(gviz) dfg_vis_factory.save(gviz, filename) log_info.info("DFG performance has been stored in '%s' in file '%s'", path, file) # create heuristic net vis_type = "heuristicnet" file = f"{vis_type}_{approach}_{naming_error}.svg" filename = f"{path}/{vis_type}_{approach}_{naming_error}.svg" heu_net = heuristics_miner.apply_heu( log, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.60 }) gviz = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "svg"}) hn_vis.view(gviz) hn_vis.save(gviz, filename) log_info.info("Heuristic Net has been stored in '%s' in file '%s'", path, file) # save heuristic net in plain-ext format file = f"{vis_type}_{approach}_{naming_error}.plain-ext" filename = f"{path}/{vis_type}_{approach}_{naming_error}.plain-ext" gviz = hn_vis.apply(heu_net, parameters={ hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "plain-ext" }) hn_vis.save(gviz, filename) log_info.info( "Heuristic Net as .plain-ext has been stored in '%s' " "in file '%s'", path, file) # save heuristic net in dot format file = f"{vis_type}_{approach}_{naming_error}.dot" filename = f"{path}/{vis_type}_{approach}_{naming_error}.dot" gviz = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "dot"}) hn_vis.save(gviz, filename) log_info.info( "Heuristic Net as .dot has been stored in '%s' " "in file '%s'", path, file) # save heuristic net in xdot format file = f"{vis_type}_{approach}_{naming_error}.xdot" filename = f"{path}/{vis_type}_{approach}_{naming_error}.xdot" gviz = hn_vis.apply( heu_net, parameters={hn_vis.Variants.PYDOTPLUS.value.Parameters.FORMAT: "xdot"}) hn_vis.save(gviz, filename) log_info.info( "Heuristic Net as .xdot has been stored in '%s' " "in file '%s'", path, file)
def makeDFG_connector(ConnectorBasicStructure, frequency_threshold, dfg_path, **keyword_param): unique_activities = ConnectorBasicStructure['activity'].unique() unique_next_activities = ConnectorBasicStructure[ 'prev_activity'].unique() activitySet = set(unique_activities) | set(unique_next_activities) activityList = list(activitySet) activityList.sort() activityList.remove(':Start:') #edges groupedbyactivityPairs = ConnectorBasicStructure.groupby( ['prev_activity', 'activity']).size().reset_index(name='counts') #just to return as matrix ActActMatrix = np.zeros([len(activityList), len(activityList)]) for prev_activity, activity in zip( ConnectorBasicStructure['prev_activity'], ConnectorBasicStructure['activity']): if (prev_activity == ":Start:"): continue ActActMatrix[activityList.index(prev_activity)][activityList.index( activity)] += 1 edges_dict = {} sumFrequency = groupedbyactivityPairs['counts'].sum( ) - groupedbyactivityPairs.loc[ groupedbyactivityPairs['prev_activity'] == ":Start:", 'counts'][0] #edges_list = [] for index, row in groupedbyactivityPairs.iterrows(): if (row['prev_activity'] == ":Start:"): continue #edge_dict = {} edge_list = [] if (keyword_param['encryption']): edge_list.append( Utilities.AES_ECB_Encrypt( row['prev_activity'].encode('utf-8')[0:5], keyword_param['key'])) edge_list.append( Utilities.AES_ECB_Encrypt( row['activity'].encode('utf-8')[0:5], keyword_param['key'])) else: edge_list.append(row['prev_activity']) edge_list.append(row['activity']) edge_tuple = tuple(edge_list) if (row['counts'] / sumFrequency >= frequency_threshold): edges_dict[edge_tuple] = row['counts'] #edges_list.append(edge_dict) #edges_dict.append(edge_dict) #nodes activity_frequencyDF = ConnectorBasicStructure.groupby( ['activity']).size().reset_index(name='counts') prev_activity_frequencyDF = ConnectorBasicStructure.groupby( ['prev_activity']).size().reset_index(name='counts') prev_activity_frequencyDF = prev_activity_frequencyDF.rename( columns={'prev_activity': 'activity'}) final_activity_fequency = pd.concat([ activity_frequencyDF, prev_activity_frequencyDF ]).drop_duplicates(subset='activity', keep="first").reset_index(drop=True) nodes = final_activity_fequency.set_index('activity').T.to_dict( 'records') nodes[0].pop(':Start:') #Making encrypted nodes nodes_new = {} for key, value in nodes[0].items(): nodes_new[Utilities.AES_ECB_Encrypt( key.encode('utf-8'), keyword_param['key'])[0:5]] = value if (keyword_param['encryption']): gviz = dfg_vis_factory.apply(edges_dict, activities_count=nodes_new, parameters={"format": "svg"}) else: gviz = dfg_vis_factory.apply(edges_dict, activities_count=nodes[0], parameters={"format": "svg"}) if (keyword_param['visualization']): dfg_vis_factory.view(gviz) dfg_vis_factory.save(gviz, dfg_path) return ActActMatrix, activityList
######################################## #### mean of number of days between each process ####### from pm4py.algo.discovery.dfg import factory as dfg_factory dfg = dfg_factory.apply(conv_log, variant="performance") parameters = {"format": "png"} from pm4py.visualization.dfg import factory as dfg_vis_factory gviz = dfg_vis_factory.apply(dfg, log=conv_log, variant="performance", parameters=parameters) dfg_vis_factory.save(gviz, "performance_invoice.png") dfg_vis_factory.view(gviz) ################################## #### mean of frequency between each process ####### from pm4py.algo.discovery.dfg import factory as dfg_factory from pm4py.visualization.dfg import factory as dfg_vis_factory dfg = dfg_factory.apply(conv_log) parameters = {"format": "png"} gviz1 = dfg_vis_factory.apply(dfg, variant="frequency", log=conv_log,