def f1_score(xes_file, dfg1, dfg2): f1_score_1, f1_score_2 = 0, 0 #first we use inductive miner to generate the petric nets of both the DFGs net1, initial_marking1, final_marking1 = inductive_miner.apply(dfg1) net2, initial_marking2, final_marking2 = inductive_miner.apply(dfg2) fitness_1 = replay_factory.apply(xes_file, net1, initial_marking1, final_marking1) fitness_2 = replay_factory.apply(xes_file, net2, initial_marking2, final_marking2) return fitness_1, fitness_2
def test_docmeasures11(self): from pm4py.log.importer import xes as xes_importer log = xes_importer.import_from_file_xes('inputData\\receipt.xes') from pm4py.algo.alpha import factory as alpha_miner from pm4py.algo.inductive import factory as inductive_miner alpha_petri, alpha_initial_marking, alpha_final_marking = alpha_miner.apply(log) inductive_petri, inductive_initial_marking, inductive_final_marking = inductive_miner.apply(log) from pm4py.evaluation.replay_fitness import factory as replay_factory fitness_alpha = replay_factory.apply(log, alpha_petri, alpha_initial_marking, alpha_final_marking) fitness_inductive = replay_factory.apply(log, inductive_petri, inductive_initial_marking, inductive_final_marking) # print("fitness_alpha=",fitness_alpha) # print("fitness_inductive=",fitness_inductive) from pm4py.evaluation.precision import factory as precision_factory precision_alpha = precision_factory.apply(log, alpha_petri, alpha_initial_marking, alpha_final_marking) precision_inductive = precision_factory.apply(log, inductive_petri, inductive_initial_marking, inductive_final_marking) # print("precision_alpha=",precision_alpha) # print("precision_inductive=",precision_inductive) from pm4py.evaluation.generalization import factory as generalization_factory generalization_alpha = generalization_factory.apply(log, alpha_petri, alpha_initial_marking, alpha_final_marking) generalization_inductive = generalization_factory.apply(log, inductive_petri, inductive_initial_marking, inductive_final_marking) # print("generalization_alpha=",generalization_alpha) # print("generalization_inductive=",generalization_inductive) from pm4py.evaluation.simplicity import factory as simplicity_factory simplicity_alpha = simplicity_factory.apply(alpha_petri) simplicity_inductive = simplicity_factory.apply(inductive_petri) # print("simplicity_alpha=",simplicity_alpha) # print("simplicity_inductive=",simplicity_inductive) from pm4py.evaluation import factory as evaluation_factory alpha_evaluation_result = evaluation_factory.apply(log, alpha_petri, alpha_initial_marking, alpha_final_marking) # print("alpha_evaluation_result=",alpha_evaluation_result) inductive_evaluation_result = evaluation_factory.apply(log, inductive_petri, inductive_initial_marking, inductive_final_marking)
def test_docmeasures11(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" from pm4py.objects.log.importer.xes import factory as xes_importer log = xes_importer.import_log(os.path.join("input_data", "receipt.xes")) from pm4py.algo.discovery.alpha import factory as alpha_miner from pm4py.algo.discovery.inductive import factory as inductive_miner alpha_petri, alpha_initial_marking, alpha_final_marking = alpha_miner.apply( log) inductive_petri, inductive_initial_marking, inductive_final_marking = inductive_miner.apply( log) from pm4py.evaluation.replay_fitness import factory as replay_factory fitness_alpha = replay_factory.apply(log, alpha_petri, alpha_initial_marking, alpha_final_marking) fitness_inductive = replay_factory.apply(log, inductive_petri, inductive_initial_marking, inductive_final_marking) del fitness_alpha del fitness_inductive from pm4py.evaluation.precision import factory as precision_factory precision_alpha = precision_factory.apply(log, alpha_petri, alpha_initial_marking, alpha_final_marking) precision_inductive = precision_factory.apply( log, inductive_petri, inductive_initial_marking, inductive_final_marking) del precision_alpha del precision_inductive from pm4py.evaluation.generalization import factory as generalization_factory generalization_alpha = generalization_factory.apply( log, alpha_petri, alpha_initial_marking, alpha_final_marking) generalization_inductive = generalization_factory.apply( log, inductive_petri, inductive_initial_marking, inductive_final_marking) del generalization_alpha del generalization_inductive from pm4py.evaluation.simplicity import factory as simplicity_factory simplicity_alpha = simplicity_factory.apply(alpha_petri) simplicity_inductive = simplicity_factory.apply(inductive_petri) del simplicity_alpha del simplicity_inductive from pm4py.evaluation import factory as evaluation_factory alpha_evaluation_result = evaluation_factory.apply( log, alpha_petri, alpha_initial_marking, alpha_final_marking) inductive_evaluation_result = evaluation_factory.apply( log, inductive_petri, inductive_initial_marking, inductive_final_marking) del alpha_evaluation_result del inductive_evaluation_result
def evaluation(net, im, fm, log): ''' calculate fitness, precision, simplicity, generalization for petri net :param net: petri net :param im: initial marking of petri net :param fm: final marking of petri net :param log: event log that made petri net :return: fitness, precision, generalization, simplicity, metricsAverage ''' #pn_vis_factory.view(gviz) result = evaluation_factory.apply(log, net, im, fm) #to calculate alignment based fitness #fitness = replay_factory.apply(log, net, im, fm, variant="alignments") # result['fitness'] = result['fitness']['perc_fit_traces'] #todo: alignment checking option (token ,align) #todo: revise tables for token, f1score, 4avg try: fitness = replay_factory.apply(log, net, im, fm, variant="alignments") result['fitness'] = fitness['percFitTraces'] except TypeError: result['fitness'] = result['fitness']['perc_fit_traces'] result['token'] = True return result
def test_tokenreplay(self): log = xes_importer.apply( os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.alpha import factory as alpha_miner net, im, fm = alpha_miner.apply(log) from pm4py.algo.conformance.tokenreplay import factory as token_replay replayed_traces = token_replay.apply(log, net, im, fm, variant="token_replay") replayed_traces = token_replay.apply(log, net, im, fm, variant="backwards") from pm4py.evaluation.replay_fitness import factory as rp_fitness_evaluator fitness = rp_fitness_evaluator.apply( log, net, im, fm, variant=rp_fitness_evaluator.TOKEN_BASED) evaluation = rp_fitness_evaluator.evaluate( replayed_traces, variant=rp_fitness_evaluator.TOKEN_BASED) from pm4py.evaluation.precision import factory as precision_evaluator precision = precision_evaluator.apply( log, net, im, fm, variant=precision_evaluator.ETCONFORMANCE_TOKEN) from pm4py.evaluation.generalization import factory as generalization_evaluation generalization = generalization_evaluation.apply( log, net, im, fm, variant=generalization_evaluation.GENERALIZATION_TOKEN)
def test_evaluation_pm1(self): log = xes_importer.import_from_file_xes( os.path.join(INPUT_DATA_DIR, "running-example.xes")) net, marking, final_marking = dfg_only.apply(log, None) fitness = fitness_factory.apply(log, net, marking, final_marking) precision = precision_factory.apply(log, net, marking, final_marking) generalization = generalization_factory.apply(log, net, marking, final_marking) simplicity = simplicity_factory.apply(net)
def test_heu_log(self): log = xes_importer.apply( os.path.join("input_data", "running-example.xes")) net, im, fm = heuristics_miner.apply(log) aligned_traces_tr = tr_factory.apply(log, net, im, fm) aligned_traces_alignments = align_factory.apply(log, net, im, fm) evaluation = eval_factory.apply(log, net, im, fm) fitness = rp_fit_factory.apply(log, net, im, fm) precision = precision_factory.apply(log, net, im, fm) generalization = generalization_factory.apply(log, net, im, fm) simplicity = simplicity_factory.apply(net)
def test_inductiveminer_stream(self): stream = csv_importer.apply( os.path.join("input_data", "running-example.csv")) net, im, fm = inductive_miner.apply(stream) aligned_traces_tr = tr_factory.apply(stream, net, im, fm) aligned_traces_alignments = align_factory.apply(stream, net, im, fm) evaluation = eval_factory.apply(stream, net, im, fm) fitness = rp_fit_factory.apply(stream, net, im, fm) precision = precision_factory.apply(stream, net, im, fm) generalization = generalization_factory.apply(stream, net, im, fm) simplicity = simplicity_factory.apply(net)
def test_inductiveminer_df(self): log = csv_import_adapter.import_dataframe_from_path( os.path.join("input_data", "running-example.csv")) net, im, fm = inductive_miner.apply(log) aligned_traces_tr = tr_factory.apply(log, net, im, fm) aligned_traces_alignments = align_factory.apply(log, net, im, fm) evaluation = eval_factory.apply(log, net, im, fm) fitness = rp_fit_factory.apply(log, net, im, fm) precision = precision_factory.apply(log, net, im, fm) generalization = generalization_factory.apply(log, net, im, fm) simplicity = simplicity_factory.apply(net)
def test_evaluation_pm1(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" log = xes_importer.import_log( os.path.join(INPUT_DATA_DIR, "running-example.xes")) net, marking, final_marking = inductive_miner.apply(log) fitness = fitness_factory.apply(log, net, marking, final_marking) precision = precision_factory.apply(log, net, marking, final_marking) generalization = generalization_factory.apply(log, net, marking, final_marking) simplicity = simplicity_factory.apply(net) del fitness del precision del generalization del simplicity
def test_alignment(self): log = xes_importer.apply( os.path.join("input_data", "running-example.xes")) from pm4py.algo.discovery.alpha import factory as alpha_miner net, im, fm = alpha_miner.apply(log) from pm4py.algo.conformance.alignments import factory as alignments aligned_traces = alignments.apply( log, net, im, fm, version=alignments.VERSION_STATE_EQUATION_A_STAR) aligned_traces = alignments.apply( log, net, im, fm, version=alignments.VERSION_DIJKSTRA_NO_HEURISTICS) from pm4py.evaluation.replay_fitness import factory as rp_fitness_evaluator fitness = rp_fitness_evaluator.apply( log, net, im, fm, variant=rp_fitness_evaluator.ALIGNMENT_BASED) evaluation = rp_fitness_evaluator.evaluate( aligned_traces, variant=rp_fitness_evaluator.ALIGNMENT_BASED) from pm4py.evaluation.precision import factory as precision_evaluator precision = precision_evaluator.apply( log, net, im, fm, variant=precision_evaluator.ALIGN_ETCONFORMANCE)
vis_save( heuristics_vis, os.path.join(pngFolder, logNamePrefix + "_heuristics.png")) inductive_vis = petri_vis_factory.apply(inductive_model, inductive_im, inductive_fm, log=log, parameters=parameters, variant="frequency") vis_save(inductive_vis, os.path.join(pngFolder, logNamePrefix + "_inductive.png")) t1 = time.time() fitness_token_alpha[logName] = \ fitness_factory.apply(log, alpha_model, alpha_initial_marking, alpha_final_marking, parameters=parameters, variant="token_replay")[ 'perc_fit_traces'] t2 = time.time() times_tokenreplay_alpha[logName] = t2 - t1 t1 = time.time() fitness_token_imdf[logName] = \ fitness_factory.apply(log, inductive_model, inductive_im, inductive_fm, parameters=parameters, variant="token_replay")[ 'perc_fit_traces'] t2 = time.time() times_tokenreplay_imdf[logName] = t2 - t1 if ENABLE_ALIGNMENTS: t1 = time.time() fitness_align_imdf[logName] = \
def main(system, miner): if DATA_PATH is None: log = xes_importer.import_log( os.path.join(WORK_PATH, "data", "variants", str(system) + "_train.xes")) else: log = xes_importer.import_log( os.path.join(DATA_PATH, "variants", str(system) + "_train.xes")) bestmodel = None bestfit = None bestPrec = None bestGen = 0 bestfittraces = 0 gen_bestmodel = None gen_bestfit = None gen_bestPrec = None gen_bestGen = 0 if DATA_PATH is None: dir = os.listdir(os.path.join(WORK_PATH, "data", "pns", str(system))) else: dir = os.listdir(os.path.join(DATA_PATH, "pns", str(system))) for file in dir: if system in file and miner in file: if DATA_PATH is None: path = os.path.join(WORK_PATH, "data", "pns", str(system), file) else: path = os.path.join(DATA_PATH, "pns", str(system), file) print("Checking conformance of file:", path) net, initial_marking, final_marking = pnml_importer.import_net( path) fitness = replay_factory.apply(log, net, initial_marking, final_marking) precision = precision_factory.apply(log, net, initial_marking, final_marking) generalization = generalization_factory.apply( log, net, initial_marking, final_marking) if fitness['perc_fit_traces'] > bestfittraces: bestfittraces = fitness['perc_fit_traces'] bestmodel = path bestfit = fitness bestPrec = precision bestGen = generalization elif generalization > bestGen and fitness[ 'perc_fit_traces'] == bestfittraces: bestmodel = path bestfit = fitness bestPrec = precision bestGen = generalization if generalization > gen_bestGen: gen_bestmodel = path gen_bestfit = fitness gen_bestPrec = precision gen_bestGen = generalization net, initial_marking, final_marking = pnml_importer.import_net( gen_bestmodel) try: align_fitness = replay_factory.apply(log, net, initial_marking, final_marking, variant="alignments") except: align_fitness = {"averageFitness": "N/A"} try: align_precision = precision_factory.apply( log, net, initial_marking, final_marking, variant="align_etconformance") except: align_precision = "N/A" print("") print("") print( "*********** Petri net w/ highest ratio of fitting traces and high generalization *************** " ) print("Petri net file:", gen_bestmodel) print("Token-based Fitness=", gen_bestfit['average_trace_fitness']) print("Token-based Precision=", gen_bestPrec) print("Alignment-based Fitness=", align_fitness['averageFitness']) print("Alignment-based Precision=", align_precision) print("Generalization=", gen_bestGen) net, initial_marking, final_marking = pnml_importer.import_net(bestmodel) try: align_fitness = replay_factory.apply(log, net, initial_marking, final_marking, variant="alignments") except: align_fitness = {"averageFitness": "N/A"} try: align_precision = precision_factory.apply( log, net, initial_marking, final_marking, variant="align_etconformance") except: align_precision = "N/A" print("") print( "*********** Petri net w/ highest ratio of fitting traces and high generalization *************** " ) print("Petri net file:", bestmodel) print("Token-based Fitness=", bestfit['average_trace_fitness']) print("Token-based Precision=", bestPrec) print("Alignment-based Fitness=", align_fitness['averageFitness']) print("Alignment-based Precision=", align_precision) print("Generalization=", bestGen)
def apply(df, parameters=None): """ Returns a Pandas dataframe from which a sound workflow net could be extracted taking into account a discovery algorithm returning models only with visible transitions Parameters ------------ df Pandas dataframe parameters Possible parameters of the algorithm, including: max_no_variants -> Maximum number of variants to consider to return a Petri net Returns ------------ filtered_df Filtered dataframe """ if parameters is None: parameters = {} if PARAMETER_CONSTANT_CASEID_KEY not in parameters: parameters[PARAMETER_CONSTANT_CASEID_KEY] = CASE_CONCEPT_NAME if PARAMETER_CONSTANT_ACTIVITY_KEY not in parameters: parameters[PARAMETER_CONSTANT_ACTIVITY_KEY] = DEFAULT_NAME_KEY if PARAMETER_CONSTANT_TIMESTAMP_KEY not in parameters: parameters[PARAMETER_CONSTANT_TIMESTAMP_KEY] = DEFAULT_TIMESTAMP_KEY if PARAMETER_CONSTANT_ATTRIBUTE_KEY not in parameters: parameters[PARAMETER_CONSTANT_ATTRIBUTE_KEY] = parameters[ PARAMETER_CONSTANT_ACTIVITY_KEY] caseid_glue = parameters[PARAMETER_CONSTANT_CASEID_KEY] activity_key = parameters[PARAMETER_CONSTANT_ACTIVITY_KEY] timest_key = parameters[PARAMETER_CONSTANT_TIMESTAMP_KEY] max_no_variants = parameters[ "max_no_variants"] if "max_no_variants" in parameters else 20 variants_df = case_statistics.get_variants_df(df, parameters=parameters) parameters["variants_df"] = variants_df variant_stats = case_statistics.get_variant_statistics( df, parameters=parameters) all_variants_list = [] for var in variant_stats: all_variants_list.append([var["variant"], var[caseid_glue]]) all_variants_list = sorted(all_variants_list, key=lambda x: (x[1], x[0]), reverse=True) considered_variants = [] considered_traces = [] i = 0 while i < min(len(all_variants_list), max_no_variants): variant = all_variants_list[i][0] considered_variants.append(variant) filtered_df = variants_filter.apply(df, considered_variants, parameters=parameters) dfg_frequency = dfg_util.get_dfg_graph(filtered_df, measure="frequency", perf_aggregation_key="median", case_id_glue=caseid_glue, activity_key=activity_key, timestamp_key=timest_key) net, initial_marking, final_marking = alpha_miner.apply_dfg( dfg_frequency, parameters=parameters) is_sound = check_soundness.check_petri_wfnet_and_soundness(net) if not is_sound: del considered_variants[-1] else: traces_of_this_variant = variants_filter.apply( df, [variant], parameters=parameters).groupby(caseid_glue) traces_of_this_variant_keys = list( traces_of_this_variant.groups.keys()) trace_of_this_variant = traces_of_this_variant.get_group( traces_of_this_variant_keys[0]) this_trace = transform.transform_event_log_to_trace_log( pandas_df_imp.convert_dataframe_to_event_log( trace_of_this_variant), case_glue=caseid_glue)[0] if not activity_key == DEFAULT_NAME_KEY: for j in range(len(this_trace)): this_trace[j][DEFAULT_NAME_KEY] = this_trace[j][ activity_key] considered_traces.append(this_trace) filtered_log = TraceLog(considered_traces) try: alignments = alignment_factory.apply(filtered_log, net, initial_marking, final_marking) del alignments fitness = replay_fitness_factory.apply(filtered_log, net, initial_marking, final_marking, parameters=parameters) if fitness["log_fitness"] < 0.99999: del considered_variants[-1] del considered_traces[-1] except TypeError: del considered_variants[-1] del considered_traces[-1] i = i + 1 return variants_filter.apply(df, considered_variants, parameters=parameters)
#----------------- from pm4py.objects.log.importer.csv import factory as csv_importer excellentLog1A = csv_importer.import_event_stream('Excellent1A_fixed.csv') from pm4py.objects.conversion.log import factory as conversion_factory log1 = conversion_factory.apply(excellentLog1A) from pm4py.visualization.dfg import factory as dfg_vis_factory gviz = dfg_vis_factory.apply(dfg1, log=log1, variant="frequency") dfg_vis_factory.view(gviz) from pm4py.objects.conversion.dfg import factory as dfg_mining_factory net, im, fm = dfg_mining_factory.apply(dfg1) from pm4py.visualization.petrinet import factory as pn_vis_factory gviz = pn_vis_factory.apply(net, im, fm) pn_vis_factory.view(gviz) from pm4py.evaluation.replay_fitness import factory as replay_factory fitness_alpha = replay_factory.apply(log1, net, im, fm) from pm4py.algo.conformance.alignments import factory as align_factory alignments = align_factory.apply(log1, net, im, fm) print(alignments) #excellentLog1A = excellentLog1A.sort_values(by=['org:resource','case','time:timestamp'])
variant="frequency") vis_save(alpha_vis, os.path.join(pngFolder, logNamePrefix + "_alpha.png")) inductive_vis = petri_vis_factory.apply(inductive_model, inductive_initial_marking, inductive_final_marking, log=log, parameters=parameters, variant="frequency") vis_save(inductive_vis, os.path.join(pngFolder, logNamePrefix + "_inductive.png")) t1 = time.time() fitness_token_alpha[logName] = fitness_factory.apply( log, alpha_model, alpha_initial_marking, alpha_final_marking, parameters=parameters)['percFitTraces'] t2 = time.time() times_tokenreplay_alpha[logName] = t2 - t1 t1 = time.time() fitness_token_imdf[logName] = fitness_factory.apply( log, inductive_model, inductive_initial_marking, inductive_final_marking, parameters=parameters)['percFitTraces'] t2 = time.time() times_tokenreplay_imdf[logName] = t2 - t1
def apply(log, parameters=None): """ Returns a log from which a sound workflow net could be extracted taking into account a discovery algorithm returning models only with visible transitions Parameters ------------ log Trace log parameters Possible parameters of the algorithm, including: discovery_algorithm -> Discovery algorithm to consider, possible choices: alphaclassic max_no_variants -> Maximum number of variants to consider to return a Petri net Returns ------------ filtered_log Filtered trace log """ if parameters is None: parameters = {} discovery_algorithm = parameters[ "discovery_algorithm"] if "discovery_algorithm" in parameters else "alphaclassic" max_no_variants = parameters[ "max_no_variants"] if "max_no_variants" in parameters else 20 all_variants_dictio = variants_filter.get_variants(log, parameters=parameters) all_variants_list = [] for var in all_variants_dictio: all_variants_list.append([var, len(all_variants_dictio[var])]) all_variants_list = sorted(all_variants_list, key=lambda x: (x[1], x[0]), reverse=True) considered_variants = [] considered_traces = [] i = 0 while i < min(len(all_variants_list), max_no_variants): variant = all_variants_list[i][0] considered_variants.append(variant) considered_traces.append(all_variants_dictio[variant][0]) filtered_log = TraceLog(considered_traces) net = None initial_marking = None final_marking = None if discovery_algorithm == "alphaclassic" or discovery_algorithm == "alpha": net, initial_marking, final_marking = alpha_miner.apply( filtered_log, parameters=parameters) is_sound = check_soundness.check_petri_wfnet_and_soundness(net) if not is_sound: del considered_variants[-1] del considered_traces[-1] else: try: alignments = alignment_factory.apply(filtered_log, net, initial_marking, final_marking) del alignments fitness = replay_fitness_factory.apply(filtered_log, net, initial_marking, final_marking, parameters=parameters) if fitness["log_fitness"] < 0.99999: del considered_variants[-1] del considered_traces[-1] except TypeError: del considered_variants[-1] del considered_traces[-1] i = i + 1 sound_log = TraceLog() if considered_variants: sound_log = variants_filter.apply(log, considered_variants, parameters=parameters) return sound_log
for j in log_param_list: print(j) auto_log = utils.discover_annotated_automaton(log, j) net_log, im_log, fm_log = sb.petri_net_synthesis(auto_log) print("# of trans: ", len(auto_log.transitions)) print("# of states: ", len(auto_log.states)) ev = evaluation_factory.apply(log, net_log, im_log, fm_log) ev['fitness'] = ev['fitness']['averageFitness'] print("fitness: ", round(ev['fitness']['averageFitness'], 2)) print("precision: ", round(ev['precision'], 2)) print("simplicity: ", round(ev['simplicity'], 2)) print("generalization: ", round(ev['generalization'], 2)) soundness_log = check_soundness.check_petri_wfnet_and_soundness( net_log) if soundness_log: fitness = replay_factory.apply(log, net_log, im_log, fm_log) ev['align_fitness'] = fitness['averageFitness'] else: ev['align_fitness'] = -1 wr.writerow([ j, len(auto_log.transitions), len(auto_log.states), soundness_log, round(ev['fitness'], 2), round(ev['align_fitness'], 2), round(ev['precision'], 2), round(ev['simplicity'], 2), round(ev['generalization'], 2) ], round(ev['fscore'], 2), round(ev['metricsAverageWeight'], 2)) print(" ")