def execute_script(): dataframe1 = pd.read_csv( os.path.join("..", "tests", "input_data", "interleavings", "receipt_even.csv")) dataframe1 = pm4py.format_dataframe(dataframe1) dataframe2 = pd.read_csv( os.path.join("..", "tests", "input_data", "interleavings", "receipt_odd.csv")) dataframe2 = pm4py.format_dataframe(dataframe2) case_relations = pd.read_csv( os.path.join("..", "tests", "input_data", "interleavings", "case_relations.csv")) merged = case_relations_merging.apply(dataframe1, dataframe2, case_relations) dfg, sa, ea = pm4py.discover_dfg(merged) pm4py.view_dfg(dfg, sa, ea, format="svg")
def execute_script(): log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "receipt.xes")) pm4py.view_performance_spectrum(log, ["Confirmation of receipt", "T04 Determine confirmation of receipt", "T10 Determine necessity to stop indication"], format="svg") df = pd.read_csv(os.path.join("..", "tests", "input_data", "receipt.csv")) df = pm4py.format_dataframe(df) pm4py.view_performance_spectrum(df, ["Confirmation of receipt", "T04 Determine confirmation of receipt", "T10 Determine necessity to stop indication"], format="svg")
def test_csv(self): df = pd.read_csv("input_data/running-example.csv") df = pm4py.format_dataframe(df, case_id="case:concept:name", activity_key="concept:name", timestamp_key="time:timestamp") log2 = pm4py.convert_to_event_log(df) stream1 = pm4py.convert_to_event_stream(log2) df2 = pm4py.convert_to_dataframe(log2) pm4py.write_xes(log2, "test_output_data/log.xes") os.remove("test_output_data/log.xes")
def test_statistics_df(self): df = pd.read_csv("input_data/running-example.csv") df = pm4py.format_dataframe(df, case_id="case:concept:name", activity_key="concept:name", timestamp_key="time:timestamp") pm4py.get_start_activities(df) pm4py.get_end_activities(df) pm4py.get_attributes(df) pm4py.get_attribute_values(df, "org:resource") pm4py.get_variants(df)
def execute_script(): receipt_even = pd.read_csv( os.path.join("..", "tests", "input_data", "interleavings", "receipt_even.csv")) receipt_even = pm4py.format_dataframe(receipt_even) receipt_odd = pd.read_csv( os.path.join("..", "tests", "input_data", "interleavings", "receipt_odd.csv")) receipt_odd = pm4py.format_dataframe(receipt_odd) case_relations = pd.read_csv( os.path.join("..", "tests", "input_data", "interleavings", "case_relations.csv")) interleavings_dataframe = interleavings_miner.apply( receipt_even, receipt_odd, case_relations) print(interleavings_dataframe) # print the frequency and the direction of the interleavings print(interleavings_dataframe[[ "@@source_activity", "@@target_activity", "@@direction" ]].value_counts()) # print the performance of the interleavings print( interleavings_dataframe.groupby( ["@@source_activity", "@@target_activity", "@@direction"])["@@timestamp_diff"].agg("mean")) # visualizes the frequency of the interleavings gviz_freq = interleavings_visualizer.apply(receipt_even, receipt_odd, interleavings_dataframe, parameters={ "annotation": "frequency", "format": "svg" }) interleavings_visualizer.view(gviz_freq) # visualizes the performance of the interleavings gviz_perf = interleavings_visualizer.apply(receipt_even, receipt_odd, interleavings_dataframe, parameters={ "annotation": "performance", "aggregation_measure": "median", "format": "svg" }) interleavings_visualizer.view(gviz_perf)
def execute_script(): log = pd.read_csv( os.path.join("..", "tests", "input_data", "running-example.csv")) log = pm4py.format_dataframe(log) # Metric RBI 1.1: Number of distinct activities done by a resource in a given time interval [t1, t2) print( algorithm.distinct_activities(log, "2010-12-30 00:00:00", "2011-01-25 00:00:00", "Sara")) # Metric RBI 1.3: Fraction of completions of a given activity a, by a given resource r, # during a given time slot, [t1, t2), with respect to the total number of activity completions by resource r # during [t1, t2) print( algorithm.activity_frequency(log, "2010-12-30 00:00:00", "2011-01-25 00:00:00", "Sara", "decide")) # Metric RBI 2.1: The number of activity instances completed by a given resource during a given time slot. print( algorithm.activity_completions(log, "2010-12-30 00:00:00", "2011-01-25 00:00:00", "Sara")) # Metric RBI 2.2: The number of cases completed during a given time slot in which a given resource was involved. print( algorithm.case_completions(log, "2010-12-30 00:00:00", "2011-01-25 00:00:00", "Pete")) # Metric RBI 2.3: The fraction of cases completed during a given time slot in which a given resource was involved # with respect to the total number of cases completed during the time slot. print( algorithm.fraction_case_completions(log, "2010-12-30 00:00:00", "2011-01-25 00:00:00", "Pete")) # Metric RBI 2.4: The average number of activities started by a given resource but not completed at a moment in time. print( algorithm.average_workload(log, "2010-12-30 00:00:00", "2011-01-15 00:00:00", "Mike")) # Metric RBI 3.1: The fraction of active time during which a given resource is involved in more than one activity # with respect to the resource's active time. print( algorithm.multitasking(log, "2010-12-30 00:00:00", "2011-01-25 00:00:00", "Mike")) # Metric RBI 4.3: The average duration of instances of a given activity completed during a given time slot by # a given resource. print( algorithm.average_duration_activity(log, "2010-12-30 00:00:00", "2011-01-25 00:00:00", "Sue", "examine thoroughly")) # Metric RBI 4.4: The average duration of cases completed during a given time slot in which a given resource was involved. print( algorithm.average_case_duration(log, "2010-12-30 00:00:00", "2011-01-25 00:00:00", "Sue")) # Metric RBI 5.1: The number of cases completed during a given time slot in which two given resources were involved. print( algorithm.interaction_two_resources(log, "2010-12-30 00:00:00", "2011-01-25 00:00:00", "Mike", "Pete")) # Metric RBI 5.2: The fraction of resources involved in the same cases with a given resource during a given time slot # with respect to the total number of resources active during the time slot. print( algorithm.social_position(log, "2010-12-30 00:00:00", "2011-01-25 00:00:00", "Sue"))
def execute_script(): dataframe = pd.read_csv( os.path.join("..", "tests", "input_data", "receipt.csv")) dataframe = pm4py.format_dataframe(dataframe) # prints the summary of the positions of two activities print( pm4py.get_activity_position_summary(dataframe, "Confirmation of receipt")) print( pm4py.get_activity_position_summary( dataframe, "T02 Check confirmation of receipt"))
def execute_script(): df = pd.read_csv("../tests/input_data/interval_event_log.csv") df = pm4py.format_dataframe(df) log = pm4py.read_xes("../tests/input_data/interval_event_log.xes") heu_net = plusplus.apply_heu(log, parameters={"heu_net_decoration": "performance"}) heu_net_2 = plusplus.apply_heu_pandas(df, parameters={"heu_net_decoration": "performance"}) gviz = visualizer.apply(heu_net, parameters={"format": "svg"}) visualizer.view(gviz) gviz2 = visualizer.apply(heu_net_2, parameters={"format": "svg"}) visualizer.view(gviz2) net1, im1, fm1 = plusplus.apply(log) net2, im2, fm2 = plusplus.apply(log) gviz3 = pn_visualizer.apply(net1, im1, fm1, parameters={"format": "svg"}) pn_visualizer.view(gviz3) gviz4 = pn_visualizer.apply(net2, im2, fm2, parameters={"format": "svg"}) pn_visualizer.view(gviz4)
def exportSubP(self, y_pred, center, name, encoding, alg): path = os.pardir + '/outputLog/' + encoding + '/' + alg try: os.makedirs(path, exist_ok=True) except OSError: print("Creation of the directory %s failed" % path) else: print("Successfully created the directory %s " % path) frames = [[] for i in range(max(y_pred) + 1)] for i, s in enumerate(self.sessions): frames[y_pred[i]].extend(s.export(self.attrNames, i)) for i in range(max(y_pred) + 1): ind = list( np.flip(np.argsort(centers[i][:len(self.distinct)])[-1:])) subP = concatName(self.distinct, ind) newFile = name + str(i) + subP + '.xes' log = pd.concat(frames[i], ignore_index=True) log = pm.format_dataframe(log, case_id='case', activity_key='concept:name', timestamp_key='time:timestamp') log = pm.convert_to_event_log(log) pm.write_xes(log, os.path.join(path, newFile)) print("Sessions exported")
def convertLog(self, centers, y_pred, name, encoding, alg, datapath, exportSes=False): start = time.time() frames = [] log = pd.DataFrame() for i, s in enumerate(self.sessions): abstracted = s.convertSession(centers[y_pred[i]], y_pred[i], self.distinct, self.attrNames) frames.append(abstracted) log = pd.concat(frames, ignore_index=True) log = pm.format_dataframe(log, case_id='case', activity_key='concept:name', timestamp_key='time:timestamp') num = math.ceil(len(log) * 0.7) log1 = log[:num] log2 = log[num:] log = pm.convert_to_event_log(log) log1 = pm.convert_to_event_log(log1) log2 = pm.convert_to_event_log(log2) pm.write_xes(log1, os.path.join(datapath, name + "train.xes")) pm.write_xes(log2, os.path.join(datapath, name + "test.xes")) pm.write_xes(log, os.path.join(datapath, name + ".xes")) if exportSes: self.exportSubP(y_pred, centers, name, encoding, alg) print("Convertion Time:", time.time() - start)
""" import pandas as pd import pm4py ''' def import_csv(file_path): event_log = pandas.read_csv(file_path, sep=';') event_log = pm4py.format_dataframe(event_log, case_id='case_id', activity_key='activity', timestamp_key='timestamp') start_activities = pm4py.get_start_activities(event_log) end_activities = pm4py.get_end_activities(event_log) print("Start activities: {}\nEnd activities: {}".format(start_activities, end_activities)) ''' file_path = r'file' log = pm4py.format_dataframe(pd.read_csv(file_path, sep=';'), case_id='case_id', activity_key='activity', timestamp_key='timestamp') #log = log[log['@@index']< 40] process_tree = pm4py.discover_tree_inductive(log) bpmn_model = pm4py.convert_to_bpmn(process_tree) pm4py.view_bpmn(bpmn_model) from pm4py.algo.discovery.inductive import algorithm as inductive_miner from pm4py.visualization.process_tree import visualizer as pt_visualizer tree = inductive_miner.apply_tree(log) gviz = pt_visualizer.apply(tree) pt_visualizer.view(gviz)
def gerar_log_eventos(ramo_justica, codtribunal, atuacao, cluster, grau, codorgaoj, codnatureza, codclasse, dtinicio, dtfim, baixado=None, sensibility='60'): eventLog = None cacheKey = "{0}-{1}-{2}-{3}-{4}-{5}-{6}-{7}-{8}-{9}-{10}".format( ramo_justica, codtribunal, atuacao, cluster, grau, codorgaoj, codnatureza, codclasse, dtinicio, dtfim, baixado) cachedEventLog = eventLogCache.get(cacheKey) if cachedEventLog is not None: eventLog = cachedEventLog else: conn = psycopg2.connect(host=db_host, port=db_port, database=db_name, user=db_user, password=db_pass) sufixo_ramo = ramos_justica.get(ramo_justica, 'default') tabela_fato = "inovacnj.fat_movimento_" + sufixo_ramo qry = "SELECT " qry += " fat.npu as npu, " qry += " CASE " qry += " WHEN f.descricao IS NULL THEN fat.mov_cod || ' - ' || mov.descricao " qry += " ELSE f.descricao || ': ' || fat.mov_cod || ' - ' || mov.descricao " qry += " END AS atividade, " qry += " fat.mov_dtmov as mov_dtmov " qry += "FROM " + tabela_fato + " fat " qry += "INNER JOIN inovacnj.acervo_processo_" + sufixo_ramo + " ap ON ap.npu = fat.npu " qry += "INNER JOIN inovacnj.orgao_julgador oj ON oj.cod::varchar = fat.oj_cod " qry += "INNER JOIN inovacnj.clusteroj_orgjulg cojoj ON cojoj.cod_orgao_julg = oj.cod " qry += "INNER JOIN inovacnj.movimentocnj mov ON mov.cod = fat.mov_cod " qry += "INNER JOIN inovacnj.natureza_classe nc ON nc.cod_classe = fat.codclasse " qry += "INNER JOIN inovacnj.natureza nat ON nat.cod = nc.cod_natureza " qry += "LEFT JOIN inovacnj.fase_movimento fm ON fm.cod_movimento = fat.mov_cod " qry += "LEFT JOIN inovacnj.fase f ON f.cod = fm.cod_fase " qry += "WHERE (1=1) " if baixado is not None: qry += "AND ap.baixado = '" + baixado + "' " if codtribunal is not None: qry += "AND fat.codtribunal = '" + codtribunal + "' " if atuacao is not None: qry += "AND oj.atuacao_vara = '" + atuacao + "' " if cluster is not None: qry += "AND cojoj.cod_cluster = " + cluster + " " if codorgaoj is not None: qry += "AND fat.oj_cod = '" + codorgaoj + "' " if grau is not None: qry += "AND fat.grau = '" + grau + "' " if codnatureza is not None: qry += "AND nat.cod = " + str(codnatureza) + " " if codclasse is not None: qry += "AND fat.codclasse = " + str(codclasse) + " " if dtinicio is not None and dtfim is not None: qry += "AND fat.mov_dtmov BETWEEN to_timestamp('" + dtinicio + "', 'yyyy-MM-dd') AND to_timestamp('" + dtfim + "', 'yyyy-MM-dd') " qry += "ORDER BY fat.npu, fat.mov_dtmov ASC " df_logeventos_pd = pd.read_sql_query(qry, conn) if df_logeventos_pd.empty == False: df_event_log = pm4py.format_dataframe(df_logeventos_pd, case_id='npu', activity_key='atividade', timestamp_key='mov_dtmov') eventLog = pm4py.convert_to_event_log(df_event_log) eventLogCache[cacheKey] = eventLog #limpa da cache depois de 10 minutos timer3.apply_after(1000 * 60 * 15, clear_eventlog_cache, args=([cacheKey]), priority=0) if eventLog is not None: if sensibility is not None: eventLog = pm4py.filter_variants_percentage( eventLog, percentage=float(sensibility) / 100) return eventLog
def test_serialization_dataframe(self): df = pd.read_csv("input_data/running-example.csv") df = pm4py.format_dataframe(df) ser = pm4py.serialize(df) df2 = pm4py.deserialize(ser)
def test_new_statistics_df(self): df = pd.read_csv("input_data/running-example.csv") df = pm4py.format_dataframe(df) pm4py.get_trace_attribute_values(df, "case:creator") pm4py.discover_eventually_follows_graph(df) pm4py.get_case_arrival_average(df)
def execute_script(): ENABLE_VISUALIZATION = True # reads a XES into an event log log1 = pm4py.read_xes("../tests/input_data/running-example.xes") # reads a CSV into a dataframe df = pd.read_csv("../tests/input_data/running-example.csv") # formats the dataframe with the mandatory columns for process mining purposes df = pm4py.format_dataframe(df, case_id="case:concept:name", activity_key="concept:name", timestamp_key="time:timestamp") # converts the dataframe to an event log log2 = pm4py.convert_to_event_log(df) # converts the log read from XES into a stream and dataframe respectively stream1 = pm4py.convert_to_event_stream(log1) df2 = pm4py.convert_to_dataframe(log1) # writes the log1 to a XES file pm4py.write_xes(log1, "ru1.xes") dfg, dfg_sa, dfg_ea = pm4py.discover_dfg(log1) petri_alpha, im_alpha, fm_alpha = pm4py.discover_petri_net_alpha(log1) petri_inductive, im_inductive, fm_inductive = pm4py.discover_petri_net_inductive( log1) petri_heuristics, im_heuristics, fm_heuristics = pm4py.discover_petri_net_heuristics( log1) tree_inductive = pm4py.discover_tree_inductive(log1) heu_net = pm4py.discover_heuristics_net(log1) pm4py.write_dfg(dfg, dfg_sa, dfg_ea, "ru_dfg.dfg") pm4py.write_petri_net(petri_alpha, im_alpha, fm_alpha, "ru_alpha.pnml") pm4py.write_petri_net(petri_inductive, im_inductive, fm_inductive, "ru_inductive.pnml") pm4py.write_petri_net(petri_heuristics, im_heuristics, fm_heuristics, "ru_heuristics.pnml") pm4py.write_process_tree(tree_inductive, "ru_inductive.ptml") dfg, dfg_sa, dfg_ea = pm4py.read_dfg("ru_dfg.dfg") petri_alpha, im_alpha, fm_alpha = pm4py.read_petri_net("ru_alpha.pnml") petri_inductive, im_inductive, fm_inductive = pm4py.read_petri_net( "ru_inductive.pnml") petri_heuristics, im_heuristics, fm_heuristics = pm4py.read_petri_net( "ru_heuristics.pnml") tree_inductive = pm4py.read_process_tree("ru_inductive.ptml") pm4py.save_vis_petri_net(petri_alpha, im_alpha, fm_alpha, "ru_alpha.png") pm4py.save_vis_petri_net(petri_inductive, im_inductive, fm_inductive, "ru_inductive.png") pm4py.save_vis_petri_net(petri_heuristics, im_heuristics, fm_heuristics, "ru_heuristics.png") pm4py.save_vis_process_tree(tree_inductive, "ru_inductive_tree.png") pm4py.save_vis_heuristics_net(heu_net, "ru_heunet.png") pm4py.save_vis_dfg(dfg, dfg_sa, dfg_ea, "ru_dfg.png") if ENABLE_VISUALIZATION: pm4py.view_petri_net(petri_alpha, im_alpha, fm_alpha, format="svg") pm4py.view_petri_net(petri_inductive, im_inductive, fm_inductive, format="svg") pm4py.view_petri_net(petri_heuristics, im_heuristics, fm_heuristics, format="svg") pm4py.view_process_tree(tree_inductive, format="svg") pm4py.view_heuristics_net(heu_net, format="svg") pm4py.view_dfg(dfg, dfg_sa, dfg_ea, format="svg") aligned_traces = pm4py.conformance_alignments(log1, petri_inductive, im_inductive, fm_inductive) replayed_traces = pm4py.conformance_tbr(log1, petri_inductive, im_inductive, fm_inductive) fitness_tbr = pm4py.evaluate_fitness_tbr(log1, petri_inductive, im_inductive, fm_inductive) print("fitness_tbr", fitness_tbr) fitness_align = pm4py.evaluate_fitness_alignments(log1, petri_inductive, im_inductive, fm_inductive) print("fitness_align", fitness_align) precision_tbr = pm4py.evaluate_precision_tbr(log1, petri_inductive, im_inductive, fm_inductive) print("precision_tbr", precision_tbr) precision_align = pm4py.evaluate_precision_alignments( log1, petri_inductive, im_inductive, fm_inductive) print("precision_align", precision_align) print("log start activities = ", pm4py.get_start_activities(log2)) print("df start activities = ", pm4py.get_start_activities(df2)) print("log end activities = ", pm4py.get_end_activities(log2)) print("df end activities = ", pm4py.get_end_activities(df2)) print("log attributes = ", pm4py.get_attributes(log2)) print("df attributes = ", pm4py.get_attributes(df2)) print("log org:resource values = ", pm4py.get_attribute_values(log2, "org:resource")) print("df org:resource values = ", pm4py.get_attribute_values(df2, "org:resource")) print("start_activities len(filt_log) = ", len(pm4py.filter_start_activities(log2, ["register request"]))) print("start_activities len(filt_df) = ", len(pm4py.filter_start_activities(df2, ["register request"]))) print("end_activities len(filt_log) = ", len(pm4py.filter_end_activities(log2, ["pay compensation"]))) print("end_activities len(filt_df) = ", len(pm4py.filter_end_activities(df2, ["pay compensation"]))) print( "attributes org:resource len(filt_log) (cases) cases = ", len( pm4py.filter_attribute_values(log2, "org:resource", ["Ellen"], level="case"))) print( "attributes org:resource len(filt_log) (cases) events = ", len( pm4py.filter_attribute_values(log2, "org:resource", ["Ellen"], level="event"))) print( "attributes org:resource len(filt_df) (events) cases = ", len( pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], level="case"))) print( "attributes org:resource len(filt_df) (events) events = ", len( pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], level="event"))) print( "attributes org:resource len(filt_df) (events) events notpositive = ", len( pm4py.filter_attribute_values(df2, "org:resource", ["Ellen"], level="event", retain=False))) print("variants log = ", pm4py.get_variants(log2)) print("variants df = ", pm4py.get_variants(df2)) print( "variants filter log = ", len( pm4py.filter_variants(log2, [[ "register request", "examine thoroughly", "check ticket", "decide", "reject request" ]]))) print( "variants filter df = ", len( pm4py.filter_variants(df2, [[ "register request", "examine thoroughly", "check ticket", "decide", "reject request" ]]))) print("variants filter percentage = ", len(pm4py.filter_variants_percentage(log2, threshold=0.8))) print( "paths filter log len = ", len( pm4py.filter_directly_follows_relation( log2, [("register request", "examine casually")]))) print( "paths filter dataframe len = ", len( pm4py.filter_directly_follows_relation( df2, [("register request", "examine casually")]))) print( "timeframe filter log events len = ", len( pm4py.filter_time_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="events"))) print( "timeframe filter log traces_contained len = ", len( pm4py.filter_time_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_contained"))) print( "timeframe filter log traces_intersecting len = ", len( pm4py.filter_time_range(log2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_intersecting"))) print( "timeframe filter df events len = ", len( pm4py.filter_time_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="events"))) print( "timeframe filter df traces_contained len = ", len( pm4py.filter_time_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_contained"))) print( "timeframe filter df traces_intersecting len = ", len( pm4py.filter_time_range(df2, "2011-01-01 00:00:00", "2011-02-01 00:00:00", mode="traces_intersecting"))) # remove the temporary files os.remove("ru1.xes") os.remove("ru_dfg.dfg") os.remove("ru_alpha.pnml") os.remove("ru_inductive.pnml") os.remove("ru_heuristics.pnml") os.remove("ru_inductive.ptml") os.remove("ru_alpha.png") os.remove("ru_inductive.png") os.remove("ru_heuristics.png") os.remove("ru_inductive_tree.png") os.remove("ru_heunet.png") os.remove("ru_dfg.png")
elif contador == 1: linha = linha.rstrip('\n') variavel2 = linha elif contador == 2: linha = linha.rstrip('\n') variavel3 = linha contador += 1 entrada.close() pd.set_option('max_columns', None) logCsv = pd.read_csv('Production_Data.csv', sep=',') logCsv = dataframe_utils.convert_timestamp_columns_in_df(logCsv) logCsv = pm4py.format_dataframe(logCsv, case_id=f'{variavel1}', activity_key=f'{variavel2}', timestamp_key=f'{variavel3}') logCsv = log_converter.apply(logCsv) from pm4py.algo.discovery.heuristics import algorithm as heuristics_miner net, im, fm = heuristics_miner.apply( logCsv, parameters={ heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.99 }) from pm4py.visualization.petri_net import visualizer as pn_visualizer gviz = pn_visualizer.apply(net, im, fm)
def log_import(file_path, file_format, log_information): """ Imports the file using PM4PY functionalitites, formats it in a processable fashion, accoding to the Log information, if it is an CSV input: file_path str, file_format str, interval bool output: PM4PY default object dependent on Filetype, fromatted in case of csv The Set of all trace activities """ activities = set() if file_format == "csv": # TODO Apply further file integrity check log = pd.read_csv(file_path) # Transform the Timestamp to Datetime if log_information["log_type"] == "noninterval": log[log_information["timestamp"]] = pd.to_datetime( log[log_information["timestamp"]], utc=True) log = format_dataframe( log, case_id=log_information["case_id"], activity_key=log_information["concept_name"], timestamp_key=log_information["timestamp"], ) # Transform the Timestamp to Datetime, and rename the transition:lifecycle elif log_information["log_type"] == "lifecycle": # Convert the Timestamps to Datetime log[log_information["timestamp"]] = pd.to_datetime( log[log_information["timestamp"]], utc=True) log = format_dataframe( log, case_id=log_information["case_id"], activity_key=log_information["concept_name"], timestamp_key=log_information["timestamp"], ) # Rename the Columns to the XES defaults log = log.rename( {log_information["lifecycle"]: xes.DEFAULT_TRANSITION_KEY}, axis=1) elif log_information["log_type"] == "timestamp": # Convert the Timestamps to Datetime log[log_information["end_timestamp"]] = pd.to_datetime( log[log_information["end_timestamp"]], utc=True) log[log_information["start_timestamp"]] = pd.to_datetime( log[log_information["start_timestamp"]], utc=True) log = format_dataframe( log, case_id=log_information["case_id"], activity_key=log_information["concept_name"], timestamp_key=log_information["end_timestamp"], ) # Rename the Columns to the XES defaults log = log.rename( { log_information["start_timestamp"]: xes.DEFAULT_START_TIMESTAMP_KEY }, axis=1, ) activities = set(log[xes.DEFAULT_NAME_KEY].unique()) # Simply load the log using XES elif file_format == "xes": log = xes_importer.apply(file_path, parameters={"show_progress_bar": False}) for trace in log: for event in trace: activities.add(event[log_information["concept_name"]]) else: # TODO Throw some Warning / Show a warning Message in the Console print("Invalid Filepath") return log, activities
try: if filename[-4:] == '.xes': variant = xes_importer.Variants.ITERPARSE parameters = {variant.value.Parameters.TIMESTAMP_SORT: True} log = xes_importer.apply(os.path.join( os.path.dirname(os.path.abspath(__file__)), filename), variant=variant, parameters=parameters) log.attributes['origin'] = 'xes' elif filename[-4:] == '.csv': log = pandas.read_csv(os.path.join( os.path.dirname(os.path.abspath(__file__)), filename), sep=';') log = format_dataframe(log, case_id='case_id', activity_key='activity', timestamp_key='timestamp', timest_format='%Y-%m-%d %H:%M:%S%z') log = log_converter.apply(log) except FileNotFoundError: print(f'No files with name "{filename}" found in folder.') exit() except CustomException: print('Please only give the name of a file formatted in .xes or .csv') exit() """ This section is for testing different abilities of pm4py and will be cleaned up later """ # filtered_log = filtered_log_events = timestamp_filter.apply_events(log, "2010-12-30 00:00:00", "2011-01-01 23:59:59") # parameters = {inductive_miner.Variants.IM.value.Parameters.ACTIVITY_KEY: 'concept:name'} # tree = inductive_miner.apply_tree(filtered_log, variant=inductive_miner.Variants.IM) # net, im, fm = pt_converter.apply(tree, variant=pt_converter.Variants.TO_PETRI_NET) # print(tree)
import pandas as pd import pm4py import os os.environ['PATH'] += os.pathsep + '/usr/local/bin' ##1 # Preparing the log event_log = pd.read_csv('running-exampleNew.csv', sep=';') event_log = pm4py.format_dataframe(event_log, case_id='case_id', activity_key='activity', timestamp_key='timestamp') start_activities = pm4py.get_start_activities(event_log) end_activities = pm4py.get_end_activities(event_log) print("Start activities: {}\nEnd activities: {}".format( start_activities, end_activities)) # Convert from CSV to XES pm4py.write_xes(event_log, 'running-example-exported.xes') # Algorithm alpha log = pm4py.read_xes('running-example-exported.xes') net, initial_marking, final_marking = pm4py.algo.discovery.alpha.algorithm.apply( log) pm4py.view_petri_net(net, initial_marking, final_marking) ##2 # Preparing the log --- Resource event_log = pd.read_csv('running-exampleNew.csv', sep=';') event_log = pm4py.format_dataframe(event_log, case_id='case_id',