def find_all_process_most_frequent(varas_group): macro_trace_processes = {} for vara in varas_group: df_vara = df_log[df_log['case: orgao'] == vara] p = PreProcess(df=df_vara) p.select_desired_columns() p.filter_outlier_timestamp() p.filter_outlier_movements(lower=0.05, upper=0.95) p.filter_outlier_trace_time(lower=0.05, upper=0.95) l = Log(df_log=p.df_log.sort_values('time:timestamp')) all_macro_trace = find_all_macro_trace(l.log, macrosteps) for tran in all_macro_trace: if tran not in macro_trace_processes: macro_trace_processes[tran] = 0 macro_trace_processes[tran] += 1 macro_trace_processes = {k: v for k, v in \ sorted(macro_trace_processes.items(), key=lambda item: item[1])} macro_trace_processes
from log.Log import Log from log.PreProcess import PreProcess from process.MacroSteps import MacroSteps file_path = '/home/vercosa/Documentos/bases_desafio_cnj/'+\ 'versao6/resultado_1_0_null.csv' # file_path = '/home/vercosa/Documentos/bases_desafio_cnj/'+\ # 'versao5/version_5.csv' movement_path = '/home/vercosa/Insync/doutorado/hackaton_cnj/' + \ 'projeto_git/desafio_cnj/data/interim/df_movimentos.csv' pp = PreProcess(file_location=file_path) # pp.select_desired_columns() pp.filter_outlier_timestamp() pp.map_movements(movement_path) # filter first movement to "Distribuição" df_log = pp.df_log df_log df_first = df_log.groupby('case:concept:name', as_index=False).\ agg({'time:timestamp':'min', 'concept:name':'first'}) # df_first.groupby('concept:name', as_index=False).count().\ # sort_values(by='case:concept:name', ascending=False)
from pm4py.algo.discovery.dfg import algorithm as dfg_discovery from pm4py.statistics.traces.log import case_statistics from pm4py.algo.discovery.dfg import parameters from pm4py.visualization.dfg import visualizer as dfg_visualization from log.Log import Log from log.Log import INTENSE_FILTERING from discovery.DFG import DFG import visualization.Visualizer as Visualizer from log.PreProcess import PreProcess file_path = '/home/vercosa/Documentos/bases_desafio_cnj/'+\ 'log_vara_2.csv' p = PreProcess(file_location=file_path) p.select_desired_columns() p.filter_outlier_timestamp() p.filter_outlier_movements(lower=0.01, upper=0.99) p.filter_outlier_trace_time(lower=0.01, upper=0.99) l = Log(df_log=p.df_log.sort_values('time:timestamp')) # l.filter_variants(1.1) dfg = DFG(l.log, parameters={parameters.Parameters.AGGREGATION_MEASURE:'mean'}, variant=dfg_discovery.Variants.FREQUENCY) dfg.filter_activities(number_act=10) dfg.filter_edges(percentage=0.3)
engine='python') df_group = df_group[df_group['process_count'] > 50] varas_group = df_group['case: orgao'].tolist() dfg_dict = {} act_dict = {} act_dict_temp = {} similar_group = {} similar_group_ref = {} for vara in varas_group: df_temp = df_log[df_log['case: orgao'] == vara] p = PreProcess(df=df_temp) p.select_desired_columns() p.filter_outlier_timestamp() p.filter_outlier_movements(lower=0.01, upper=0.99) p.filter_outlier_trace_time(lower=0.01, upper=0.99) l = Log(df_log=p.df_log.sort_values('time:timestamp')) # l.filter_variants(1) dfg = DFG(l.log) dfg.filter_activities(number_act=10) dfg.filter_edges(percentage=0.3) dfg_dict[vara] = list(dfg.dfg) for vara in dfg_dict.keys(): act_dict[vara] = (list(set([item for sublist in dfg_dict[vara] \ for item in sublist])),
'projeto_git/desafio_cnj/data/interim/df_movimentos.csv' vara = '2ª VARA C�?VEL DE SANTANA' macrosteps = [ 'Distribuição', 'Conclusão', 'Despacho', 'Decisão', 'Julgamento', 'Trânsito em julgado', 'Baixa/Arquivamento', ] pp = PreProcess(file_location=file_path) pp.select_desired_columns() pp.filter_outlier_timestamp() pp.map_movements(movement_path) df_log = pp.df_log df_vara = df_log[df_log['case: orgao'] == vara] pp_vara = PreProcess(df=df_vara) pp_vara.filter_outlier_movements(lower=0.05, upper=0.95) pp_vara.filter_outlier_trace_time(lower=0.05, upper=0.95) log = Log(df_log=pp_vara.df_log.sort_values('time:timestamp')) median_case_duration = case_statistics.\ get_median_caseduration(log.log, parameters={