def visualization(log, C, petrinet=True, heu_net=False): if petrinet: # net, im, fm = inductive_miner.apply(variants_filter.apply(log, C)) net, im, fm = heuristics_miner.apply(variants_filter.apply(log, C)) gviz = pn_visualizer.apply(net, im, fm) pn_visualizer.view(gviz) if heu_net: heu_net = inductive_miner.apply_heu(variants_filter.apply(log, C)) gviz = hn_vis_factory.apply(heu_net) hn_vis_factory.view(gviz)
def test_filtering_variants(self): # to avoid static method warnings in tests, # that by construction of the unittest package have to be expressed in such way self.dummy_variable = "dummy_value" input_log = os.path.join(INPUT_DATA_DIR, "running-example.xes") log = xes_importer.import_log(input_log) considered_variant = "register request,examine casually,check ticket,decide,reinitiate request" considered_variant = considered_variant + ",examine thoroughly,check ticket,decide,pay compensation" log1 = variants_module.apply(log, [considered_variant], parameters={"positive": False}) log2 = variants_module.apply(log, [considered_variant], parameters={"positive": True}) del log1 del log2
def W_creater(log, R, w, output=False): W = [] log = variants_filter.apply(log, R) target_size = len(log) * w # it determines the size of W variant = case_statistics.get_variant_statistics(log) variant = sorted(variant, key=lambda x: x['count'], reverse=True) if output: print( "=" * 100, "\nW creater called with w : {} and target size {}\n".format( w, target_size)) W_size = 0 for v in variant: W_size += v['count'] W.append(v['variant']) if output: print( "\t\t{}___added with size {} // {} out of {} // total size : {}" .format(v['variant'][:60], v['count'], W_size, target_size, len(log))) if W_size > target_size: break if output: print("W creater END with its size: {}".format(len(W))) print("=" * 100) return W
def read_xes(filename, p=1, n_DPI=False): ''' read event log in xes format input filename, percentage output log object, variants_count filename = filename in xes format p = percentage of traces % to exploit from the log ''' log = xes_importer.apply(filename) if p < 1: log = variants_filter.filter_log_variants_percentage(log, percentage=p) # variants = variants_filter.get_variants(log) variants = case_statistics.get_variant_statistics(log) # # VARIANT = [] for v in variants: VARIANT.append(v['variant']) # # VARIANT = list(variants.keys()) if n_DPI: VARIANT = VARIANT[:n_DPI] log = variants_filter.apply(log, VARIANT) print('=' * 100, '\n=READ THE XES FILE\n' 'length of log', len(log), '\nlength of event', sum(len(trace) for trace in log), '\nnumber of variants : {}'.format(len(VARIANT))) return log, VARIANT
def test_27(self): from pm4py.algo.filtering.pandas.variants import variants_filter df = self.load_running_example_df() variants = ["register request,examine thoroughly,check ticket,decide,reject request"] filtered_df1 = variants_filter.apply(df, variants, parameters={variants_filter.Parameters.CASE_ID_KEY: "case:concept:name", variants_filter.Parameters.ACTIVITY_KEY: "concept:name"})
def fit_check(log: list, C: list) -> float: log = variants_filter.apply( # get the log containing variants in C log, [c for c in C]) net, im, fm = heuristics_miner.apply(log) # net, im, fm = inductive_miner.apply(log) fit = replay_fitness_evaluator.apply( log, net, im, fm, variant=replay_fitness_evaluator.Variants.TOKEN_BASED) return fit['log_fitness']
def filter_variants(self, filter_level): variants_count = case_statistics.get_variant_statistics(self.log) variants_count = \ sorted(variants_count, key=lambda x: x['count'], reverse=True) total_traces = len(self.log) total_variants = len(variants_count) filter_threshold = (1 / total_variants) * filter_level desired_variants = \ [v['variant'] for v in variants_count \ if v['count']/total_traces >= filter_threshold] self.log = variants_filter.apply(self.log, desired_variants)
def look_ahead(log: list, C, R, output=False): if output: print("\n * Look_ahead()") C_log = variants_filter.apply(log, C) net, im, fm = heuristics_miner.apply(C_log) # net, im, fm = inductive_miner.apply(C_log) for i, r in enumerate(R): if i % 10 == 0: print("\t = {} dpi(s) checked".format(i)) r_log = [variants_filter.apply(log, [r])[0]] fit = replay_fitness_evaluator.apply( r_log, net, im, fm, variant=replay_fitness_evaluator.Variants.TOKEN_BASED) if fit == 1: print("fitness:", fit) if output: print("\tFound a perfect fitness - {}".format(r)) R.remove(r) C.append(r) return C, R
def visualization_total(log, VARIANT, CS, freq_check=False): print("visualization of VARIANT") if freq_check: fitness = fit_check(log, VARIANT) print("#variants:{} / #traces:{} / fitness{}".format( len(VARIANT), len(log), fitness)) visualization(log, VARIANT, True, False) print("visualization of each cluster in CS") for cs in CS: if freq_check: cs_log = variants_filter.apply(log, cs) fitness = fit_check(cs_log, cs) print("#variants:{} / #traces:{} / fitness{}".format( len(cs), len(cs_log), fitness)) visualization(log, cs, True, False)
def get_case_statistics(self, parameters=None): """ Gets the statistics on cases Parameters ------------- parameters Possible parameters of the algorithm Returns ------------- list_cases List of cases """ if parameters is None: parameters = {} parameters[ constants.PARAMETER_CONSTANT_ACTIVITY_KEY] = self.activity_key parameters[ constants.PARAMETER_CONSTANT_ATTRIBUTE_KEY] = self.activity_key #parameters["max_ret_cases"] = ws_constants.MAX_NO_CASES_TO_RETURN parameters["sort_by_index"] = parameters[ "sort_by_index"] if "sort_by_index" in parameters else 0 parameters["sort_ascending"] = parameters[ "sort_ascending"] if "sort_ascending" in parameters else False parameters["variants"] = self.variants if "variant" in parameters: var_to_filter = parameters["variant"] # TODO: TECHNICAL DEBT # quick turnaround for bug var_to_filter = var_to_filter.replace(" start", "+start") var_to_filter = var_to_filter.replace(" START", "+START") var_to_filter = var_to_filter.replace(" complete", "+complete") var_to_filter = var_to_filter.replace(" COMPLETE", "+COMPLETE") filtered_log = variants_filter.apply(self.log, [var_to_filter], parameters=parameters) return [ casestats.include_key_in_value_list( case_statistics.get_cases_description( filtered_log, parameters=parameters)) ] + [self.get_log_summary_dictio()] else: return [ casestats.include_key_in_value_list( case_statistics.get_cases_description( self.log, parameters=parameters)) ] + [self.get_log_summary_dictio()]
def apply(log, filter, parameters=None): """ Apply a filter to the current log (variants filter) Parameters ------------ log Log object filter Filter to apply parameters Parameters of the algorithm Returns ------------ log Log object """ if parameters is None: parameters = {} return variants_filter.apply(log, filter[1], parameters=parameters)
def quality_measure(log, CS): # fitness, prec, gen, simp, weighted by # traces eval = [] for cs in CS: l = variants_filter.apply(log, cs) eval.append(evaluation_w_hm(l)) DATA = np.array(eval) # print(DATA) metrics = [] for i in range(1, DATA.shape[1]): metrics.append( sum( DATA[:, 0] * DATA[:, i] )/sum(DATA[:, 0]) ) # print( # "fitness:{}, prec:{}, gen:{}, simp:{}, weighted by # traces". # format(metrics[0], metrics[1], metrics[2], metrics[3]) # ) return metrics
from pm4py.objects.log.importer.xes import factory as xes_import_factory from pm4py.objects.log.exporter.xes import factory as xes_exporter from pm4py.statistics.traces.log import case_statistics from pm4py.algo.filtering.log.variants import variants_filter K = [20] for k in K: event_log = "Sepsis Cases - Event Log.xes" log = xes_import_factory.apply(event_log) var_with_count = case_statistics.get_variant_statistics(log) variants_count = sorted(var_with_count, key=lambda x: x['count'], reverse=True) to_filter = [] count = 0 for j in range(0, len(variants_count)): dict = variants_count[j] if dict["count"] < k: to_filter.append([dict["variant"]]) else: count += dict["count"] for delete in to_filter: log = variants_filter.apply(log, delete, parameters={"positive": False}) xes_exporter.export_log( log, "baseline" + "_" + str(k) + "-" + "Annonymity" + ".xes") print("baseline" + "_" + str(k) + "-" + "Annonymity" + ".xes" + " has been exported!")
def test_28(self): from pm4py.algo.filtering.log.variants import variants_filter log = self.load_running_example_xes() variants = ["register request,examine thoroughly,check ticket,decide,reject request"] filtered_log2 = variants_filter.apply(log, variants, parameters={variants_filter.Parameters.POSITIVE: False})
print(end_activities_filter.get_end_activities(log_af_ea)) #traces from pm4py.algo.filtering.log.variants import variants_filter variants = variants_filter.get_variants(log) variants from pm4py.statistics.traces.log import case_statistics variants_count = case_statistics.get_variant_statistics(log) variants_count = sorted(variants_count, key=lambda x: x['count'], reverse=True) print(variants_count) print(len(variants_count)) #most common filtered_log1 = variants_filter.apply(log, [ "Confirmation of receipt,T02 Check confirmation of receipt,T04 Determine confirmation of receipt,T05 Print and send confirmation of receipt,T06 Determine necessity of stop advice,T10 Determine necessity to stop indication" ]) filtered_log1 variants_count_filtered_log1 = case_statistics.get_variant_statistics( filtered_log1) print(variants_count_filtered_log1) #--- from pm4py.algo.filtering.log.attributes import attributes_filter activities = attributes_filter.get_attribute_values(log, "concept:name") resources = attributes_filter.get_attribute_values(log, "org:resource") activities resources #not containing any resource from pm4py.util import constants
def test_26(self): from pm4py.algo.filtering.log.variants import variants_filter log = self.load_running_example_xes() variants = ["register request,examine thoroughly,check ticket,decide,reject request"] filtered_log1 = variants_filter.apply(log, variants)
def apply_filter(req): sessions[req.session["id"]] = datetime.now() filters = { "time": True, "variants": True, "performance": True, "activities": True, "attribute": True } req.session.set_expiry(7200) #print(str(req.body)) o = json.loads(req.body) print(str(o)) custom_time_range = [] for pair in o["filter1"]: #custom_time_range.append((dateutil.parser.parse(pair[0]),dateutil.parser.parse(pair[1]))) custom_time_range.append((pair[0],pair[1])) if o["filter1"] == []: filters["time"] = False #print(o["filter1"][0]) #print(custom_time_range[0][0]) #print(custom_time_range) custom_path_range = [] for pair in o["filter2"]: custom_path_range.append((float(pair[0]),float(pair[1]))) if o["filter2"] == []: filters["variants"] = False #custom_path_range = [(0,1)] #filter2 custom_performance_range = [] for pair in o["filter3"]: custom_performance_range.append((float(pair[0]),float(pair[1]))) if o["filter3"] == []: filters["performance"] = False custom_activitiy_range = [] for pair in o["filter4"]: custom_activitiy_range.append((float(pair[0]),float(pair[1]))) if o["filter4"] == []: filters["activities"] = False #custom_activitiy_range = [(0,1)] #filter3 custom_attribute_range = [] for pair in o["filter5"]: custom_attribute_range.append((float(pair[0]),float(pair[1]))) if o["filter5"] == [] or o["filter5attribute"] == "Empty": filters["attribute"] = False additional_attribute = o["filter5attribute"] selected_viz = o["visualization"] calc_lev = o["distance"] #input_file = os.path.join("webapp","static", req.session["id"] + "_l0.xes") input_file = os.path.join("webapp","static", "sepsis.xes") input_log = xes_importer.apply(input_file) not_filtered_logs = {} flatten = lambda l: [item for sublist in l for item in sublist] time_timestamp_started = datetime.now() if filters["time"]: #TODO check overlapping for filter custom_time_range = sorted(custom_time_range, reverse=False) for i in range(0,len(custom_time_range)-1): if(custom_time_range[i][1] > custom_time_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for time filter"})) response.status_code = 200 return response #raise ValueError("Overlapping time ranges") logs = [] for (x,y) in custom_time_range: logs.append(timestamp_filter.filter_traces_contained(input_log, x, y)) #log = timestamp_filter.filter_traces_contained(input_log, custom_time_range[0][0], custom_time_range[0][1]) log = pm4py.objects.log.log.EventLog() for timeslice in logs: for trace in timeslice: log.append(trace) print(len(input_log)) print(len(log)) #l2 not_filtered_logs["timestamp_filter"] = pm4py.objects.log.log.EventLog() for trace in input_log: if trace not in log: not_filtered_logs["timestamp_filter"].append(trace) print(len(not_filtered_logs["timestamp_filter"])) else: log = input_log time_variants_started = datetime.now() # where should I start? if filters["variants"]: variants = variants_filter.get_variants(log) variants_count = case_statistics.get_variant_statistics(log) variants_count = sorted(variants_count, key=lambda x: x['count'], reverse=False) custom_path_range = sorted(custom_path_range, reverse=False) # check overlapping for i in range(0,len(custom_path_range)-1): if(custom_path_range[i][1] > custom_path_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for variants filter"})) response.status_code = 200 return response #raise ValueError("Overlapping variants ranges") nr_variants = len(variants_count) custom_path_range * nr_variants idx = [(math.floor(x*nr_variants), math.ceil(y*nr_variants)) for (x,y) in custom_path_range] variants_subset = [variants_count[x:y+1] for (x,y) in idx] variants_subset = flatten(variants_subset) filtered_variants = {k:v for k,v in variants.items() if k in [x["variant"] for x in variants_subset]} #l2 not_filtered_variants = {k:v for k,v in variants.items() if k not in [x["variant"] for x in variants_subset]} filtered_log = variants_filter.apply(log, filtered_variants) #l2 not_filtered_logs["variant_filter"] = variants_filter.apply(log, not_filtered_variants) else: filtered_log = log time_variants_finished = datetime.now() # note: incl log2 generation if filters["performance"]: custom_performance_range = sorted(custom_performance_range, reverse=False) # check overlapping for i in range(0,len(custom_performance_range)-1): if(custom_performance_range[i][1] > custom_performance_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for performance filter"})) response.status_code = 200 return response #raise ValueError("Overlapping performance ranges") #all_case_durations = case_statistics.get_all_casedurations(log, parameters={case_statistics.Parameters.TIMESTAMP_KEY: "time:timestamp"}) #case_filter.filter_case_performance(log, 86400, 864000) performances = [] for i in range(len(filtered_log)): filtered_log[i].attributes["throughput"] = (max([event["time:timestamp"]for event in filtered_log[i]])-min([event["time:timestamp"] for event in filtered_log[i]])).total_seconds() performances.append(filtered_log[i].attributes["throughput"]) nr_cases = len(filtered_log) performances = sorted(performances, reverse=False) idx = [(math.floor(x*nr_cases), math.ceil(y*nr_cases)) for (x,y) in custom_performance_range] perf_subset = [performances[x:y+1] for (x,y) in idx] perf_subset = flatten(perf_subset) performance_log = pm4py.objects.log.log.EventLog([trace for trace in filtered_log if trace.attributes["throughput"] in perf_subset]) #l2 not_filtered_logs["performance_filter"] = pm4py.objects.log.log.EventLog([trace for trace in filtered_log if trace.attributes["throughput"] not in perf_subset]) #print(str(len(not_filtered_logs["performance_filter"]))) else: performance_log = filtered_log time_performance_finished = datetime.now() if filters["activities"]: variants = variants_filter.get_variants(performance_log) variants_count = case_statistics.get_variant_statistics(performance_log) variants_count = sorted(variants_count, key=lambda x: x['count'], reverse=False) activities = dict() for variant in variants_count: for activity in variant["variant"].split(","): if (activity not in activities.keys()): activities[activity] = variant["count"] else: activities[activity] += variant["count"] sorted_activities = {k: v for k, v in sorted(activities.items(), key=lambda item: item[1])} activities_sorted_list = list(sorted_activities) custom_activitiy_range = sorted(custom_activitiy_range, reverse=False) # check overlapping for i in range(0,len(custom_activitiy_range)-1): if(custom_activitiy_range[i][1] > custom_activitiy_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for activities filter"})) response.status_code = 200 return response #raise ValueError("Overlapping activities ranges") nr_activities = len(activities_sorted_list) idx = [(math.floor(x*nr_activities), math.ceil(y*nr_activities)) for (x,y) in custom_activitiy_range] activities_to_keep = [activities_sorted_list[x:y+1] for (x,y) in idx] activities_to_keep = flatten(activities_to_keep) variants_idx = [] for i in range(len(variants_count)): for activity in activities_to_keep: if (activity in variants_count[i]["variant"].split(",") and (i not in variants_idx)): variants_idx.append(i) variants_subset = [variants_count[i] for i in variants_idx] filtered_variants = {k:v for k,v in variants.items() if k in [x["variant"] for x in variants_subset]} #l2 not_filtered_variants = {k:v for k,v in variants.items() if k not in [x["variant"] for x in variants_subset]} filtered_log = variants_filter.apply(performance_log, filtered_variants) #l2 not_filtered_logs["activities_filter"] = variants_filter.apply(performance_log, not_filtered_variants) new_log = pm4py.objects.log.log.EventLog() #not_filtered_logs["activities_filter_traces"] = pm4py.objects.log.log.EventLog() for trace in filtered_log: new_trace = pm4py.objects.log.log.Trace() not_new_trace = pm4py.objects.log.log.Trace() for event in trace: if(event['concept:name'] in activities_to_keep): new_trace.append(event) else: not_new_trace.append(event) if(len(new_trace)>0): new_log.append(new_trace) if(len(not_new_trace)>0): not_filtered_logs["activities_filter"].append(not_new_trace) else: new_log = performance_log time_activities_finished = datetime.now() if filters["attribute"]: custom_attribute_range = sorted(custom_attribute_range, reverse=False) # check overlapping for i in range(0,len(custom_attribute_range)-1): if(custom_attribute_range[i][1] > custom_attribute_range[i+1][0]): response = HttpResponse(json.dumps({'error': "Wrong intervals for additional attribute filter"})) response.status_code = 200 return response newest_log = pm4py.objects.log.log.EventLog() not_filtered_logs["additional_filter"] = pm4py.objects.log.log.EventLog() traces_with_attr = [] not_traces_with_attr = [] for trace in new_log: if additional_attribute in trace.attributes.keys(): traces_with_attr.append(trace) else: not_traces_with_attr.append(trace) #check if trace attribute if len(traces_with_attr)>0: #check if numeric if type(traces_with_attr[0].attributes[additional_attribute]) in [int, float]: for trace in traces_with_attr: if any([trace.attributes[additional_attribute] >= x and trace.attributes[additional_attribute] <= y for (x,y) in custom_attribute_range]): newest_log.append(trace) else: not_filtered_logs["additional_filter"].append(trace) for trace in not_traces_with_attr: not_filtered_logs["additional_filter"].append(trace) else: #string attribute_frequencies = dict() for trace in traces_with_attr: if trace.attributes[additional_attribute] not in attribute_frequencies.keys(): attribute_frequencies[trace.attributes[additional_attribute]] = 0 attribute_frequencies[trace.attributes[additional_attribute]] += 1 sorted_frequencies = {k: v for k, v in sorted(attribute_frequencies.items(), key=lambda item: item[1])} frequencies_sorted_list = list(sorted_frequencies) nr_values = len(frequencies_sorted_list) idx = [(math.floor(x*nr_values), math.ceil(y*nr_values)) for (x,y) in custom_attribute_range] values_to_keep = [frequencies_sorted_list[x:y+1] for (x,y) in idx] values_to_keep = flatten(values_to_keep) for trace in traces_with_attr: if trace.attributes[additional_attribute] in values_to_keep: newest_log.append(trace) else: not_filtered_logs["additional_filter"].append(trace) for trace in not_traces_with_attr: not_filtered_logs["additional_filter"].append(trace) else: #event attribute if [type(event[additional_attribute]) for trace in new_log for event in trace if additional_attribute in event.keys()][0] in [int, float]: for trace in new_log: new_trace = pm4py.objects.log.log.Trace() not_new_trace = pm4py.objects.log.log.Trace() for event in trace: if(additional_attribute in event.keys() and any([event[additional_attribute] >= x and event[additional_attribute] <= y for (x,y) in custom_attribute_range ])): new_trace.append(event) else: not_new_trace.append(event) if(len(new_trace)>0): newest_log.append(new_trace) if(len(not_new_trace)>0): not_filtered_logs["additional_filter"].append(not_new_trace) else: #string attribute_frequencies = dict() for trace in new_log: for event in trace: if additional_attribute in event.keys(): if event[additional_attribute] not in attribute_frequencies.keys(): attribute_frequencies[event[additional_attribute]] = 0 attribute_frequencies[event[additional_attribute]] += 1 sorted_frequencies = {k: v for k, v in sorted(attribute_frequencies.items(), key=lambda item: item[1])} frequencies_sorted_list = list(sorted_frequencies) nr_values = len(frequencies_sorted_list) idx = [(math.floor(x*nr_values), math.ceil(y*nr_values)) for (x,y) in custom_attribute_range] values_to_keep = [frequencies_sorted_list[x:y+1] for (x,y) in idx] values_to_keep = flatten(values_to_keep) for trace in new_log: new_trace = pm4py.objects.log.log.Trace() not_new_trace = pm4py.objects.log.log.Trace() for event in trace: if(additional_attribute in event.keys() and event[additional_attribute] in values_to_keep): new_trace.append(event) else: not_new_trace.append(event) if(len(new_trace)>0): newest_log.append(new_trace) if(len(not_new_trace)>0): not_filtered_logs["additional_filter"].append(not_new_trace) else: newest_log = new_log time_attribute_finished = datetime.now() if(selected_viz=="dfgf"): dfg = dfg_discovery.apply(newest_log) gviz = dfg_visualization.apply(dfg, log=newest_log, variant=dfg_visualization.Variants.FREQUENCY) dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png")) elif(selected_viz=="dfgp"): dfg = dfg_discovery.apply(newest_log) gviz = dfg_visualization.apply(dfg, log=newest_log, variant=dfg_visualization.Variants.PERFORMANCE) dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png")) else: heu_net = heuristics_miner.apply_heu(newest_log, parameters={"dependency_thresh": 0.99}) gviz = hn_vis_factory.apply(heu_net) hn_vis_factory.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l1.png")) xes_exporter.apply(newest_log, os.path.join("webapp","static", req.session["id"] + "_l1.xes")) #l2 not_filtered_log = pm4py.objects.log.log.EventLog() for part in not_filtered_logs.keys(): for trace in not_filtered_logs[part]: not_filtered_log.append(trace) if(selected_viz=="dfgf"): dfg = dfg_discovery.apply(not_filtered_log) gviz = dfg_visualization.apply(dfg, log=not_filtered_log, variant=dfg_visualization.Variants.FREQUENCY) dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png")) elif(selected_viz=="dfgp"): dfg = dfg_discovery.apply(not_filtered_log) gviz = dfg_visualization.apply(dfg, log=not_filtered_log, variant=dfg_visualization.Variants.PERFORMANCE) dfg_visualization.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png")) else: heu_net = heuristics_miner.apply_heu(not_filtered_log, parameters={"dependency_thresh": 0.99}) gviz = hn_vis_factory.apply(heu_net) hn_vis_factory.save(gviz, os.path.join("webapp","static", req.session["id"] + "_l2.png")) xes_exporter.apply(not_filtered_log, os.path.join("webapp","static", req.session["id"] + "_l2.xes")) if(calc_lev): lev_new = [0]*len(newest_log) for i in range(len(newest_log)): lev_new[i] = [hash(event['concept:name']) for event in newest_log[i]] lev_not = [0]*len(not_filtered_log) for i in range(len(not_filtered_log)): lev_not[i] = [hash(event['concept:name']) for event in not_filtered_log[i]] distances = [] for i in range(len(lev_new)): for j in range(len(lev_not)): distances.append(lev_dist(lev_new[i], lev_not[j])) lev_d = sum(distances)/len(distances) print("Levenshtein's distance: "+str(lev_d)) else: lev_d = "null" used_paths = 0 for lower, higher in custom_path_range: used_paths += round((higher-lower)*100) print(f"Using {used_paths}% of paths. {100-used_paths}% of paths are discarded.") print("Timestamp filter: {} seconds. \nVariants filter: {} seconds. \nPerformance filter: {} seconds. \nActivities filter: {} seconds. \nAttribute filter: {} seconds.".format((time_variants_started - time_timestamp_started).total_seconds(), (time_variants_finished - time_variants_started).total_seconds(), (time_performance_finished - time_variants_finished).total_seconds(), (time_activities_finished - time_performance_finished).total_seconds(), (time_attribute_finished - time_activities_finished).total_seconds())) response = HttpResponse(json.dumps({'time':(time_variants_started - time_timestamp_started).total_seconds(), 'variants':(time_variants_finished - time_variants_started).total_seconds(),'performance':(time_performance_finished - time_variants_finished).total_seconds(), 'activities':(time_activities_finished - time_performance_finished).total_seconds(), 'attribute':(time_attribute_finished - time_activities_finished).total_seconds(), 'traces':[len(newest_log), len(not_filtered_log)], 'distance':lev_d})) response.status_code = 200 return response
import pm4py from pm4py.objects.log.importer.xes import importer as xes_importer from pm4py.algo.filtering.log.variants import variants_filter from pm4py.algo.enhancement.sna import algorithm as sna from pm4py.visualization.sna import visualizer as sna_visualizer log = xes_importer.apply( r'C:\Users\HAXY8W\Desktop\Process Mining\Audit Process Mining\financial_log.xes.gz' ) filtered_log1 = variants_filter.apply( log, {'A_SUBMITTED,A_PARTLYSUBMITTED,A_DECLINED'}, parameters={variants_filter.Parameters.POSITIVE: False}) # part of the analysis was to eliminate unwanted actions, the code below does just that. # it is not needed to run the network however if you want to filter these activities out: uncomment it and then change # the name in network algo sections from filtered_log1 to filtered_log2 # event_log1 = pm4py.convert_to_event_stream(filtered_log1) # filtered_log2 = pm4py.filtering.filter_event_attribute_values(filtered_log1, 'concept:name', # {"O_SELECTED", "O_CREATED", "O_ACCEPTED", # "A_REGISTERED", "A_ACTIVATED", # "O_CANCELLED", "O_DECLINED", 'A_PARTLYSUBMITTED'}, # level='event', retain=False) # Network algorithm sections # Handover of Work hw_values = sna.apply(filtered_log1, variant=sna.Variants.HANDOVER_LOG) gviz_hw_py = sna_visualizer.apply(hw_values, variant=sna_visualizer.Variants.PYVIS) sna_visualizer.view(gviz_hw_py, variant=sna_visualizer.Variants.PYVIS)
def apply(log, parameters=None): """ Returns a log from which a sound workflow net could be extracted taking into account a discovery algorithm returning models only with visible transitions Parameters ------------ log Trace log parameters Possible parameters of the algorithm, including: discovery_algorithm -> Discovery algorithm to consider, possible choices: alphaclassic max_no_variants -> Maximum number of variants to consider to return a Petri net Returns ------------ filtered_log Filtered log """ from pm4py.evaluation.replay_fitness import factory as replay_fitness_factory if parameters is None: parameters = {} discovery_algorithm = parameters["discovery_algorithm"] if "discovery_algorithm" in parameters else "alphaclassic" max_no_variants = parameters["max_no_variants"] if "max_no_variants" in parameters else 20 all_variants_dictio = variants_filter.get_variants(log, parameters=parameters) all_variants_list = [] for var in all_variants_dictio: all_variants_list.append([var, len(all_variants_dictio[var])]) all_variants_list = sorted(all_variants_list, key=lambda x: (x[1], x[0]), reverse=True) considered_variants = [] considered_traces = [] i = 0 while i < min(len(all_variants_list), max_no_variants): variant = all_variants_list[i][0] considered_variants.append(variant) considered_traces.append(all_variants_dictio[variant][0]) filtered_log = EventLog(considered_traces) net = None initial_marking = None final_marking = None if discovery_algorithm == "alphaclassic" or discovery_algorithm == "alpha": net, initial_marking, final_marking = alpha_miner.apply(filtered_log, parameters=parameters) is_sound = check_soundness.check_petri_wfnet_and_soundness(net) if not is_sound: del considered_variants[-1] del considered_traces[-1] else: try: fitness = replay_fitness_factory.apply(filtered_log, net, initial_marking, final_marking, parameters=parameters) if fitness["log_fitness"] < 0.99999: del considered_variants[-1] del considered_traces[-1] except TypeError: del considered_variants[-1] del considered_traces[-1] i = i + 1 sound_log = EventLog() if considered_variants: sound_log = variants_filter.apply(log, considered_variants, parameters=parameters) return sound_log