Пример #1
0
def find_all_process_most_frequent(varas_group):

    macro_trace_processes = {}

    for vara in varas_group:
        df_vara = df_log[df_log['case: orgao'] == vara]

        p = PreProcess(df=df_vara)
        p.select_desired_columns()
        p.filter_outlier_timestamp()
        p.filter_outlier_movements(lower=0.05, upper=0.95)
        p.filter_outlier_trace_time(lower=0.05, upper=0.95)

        l = Log(df_log=p.df_log.sort_values('time:timestamp'))
        all_macro_trace = find_all_macro_trace(l.log, macrosteps)

        for tran in all_macro_trace:
            if tran not in macro_trace_processes:
                macro_trace_processes[tran] = 0
            macro_trace_processes[tran] += 1

    macro_trace_processes = {k: v for k, v in \
        sorted(macro_trace_processes.items(),
            key=lambda item: item[1])}

    macro_trace_processes
from log.Log import Log
from log.PreProcess import PreProcess
from process.MacroSteps import MacroSteps


file_path = '/home/vercosa/Documentos/bases_desafio_cnj/'+\
             'versao6/resultado_1_0_null.csv'

# file_path = '/home/vercosa/Documentos/bases_desafio_cnj/'+\
#             'versao5/version_5.csv'

movement_path = '/home/vercosa/Insync/doutorado/hackaton_cnj/' + \
                'projeto_git/desafio_cnj/data/interim/df_movimentos.csv'

pp = PreProcess(file_location=file_path)
# pp.select_desired_columns()
pp.filter_outlier_timestamp()
pp.map_movements(movement_path)

# filter first movement to "Distribuição"

df_log = pp.df_log

df_log

df_first = df_log.groupby('case:concept:name', as_index=False).\
    agg({'time:timestamp':'min', 'concept:name':'first'})

# df_first.groupby('concept:name', as_index=False).count().\
#     sort_values(by='case:concept:name', ascending=False)
from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
from pm4py.statistics.traces.log import case_statistics
from pm4py.algo.discovery.dfg import parameters
from pm4py.visualization.dfg import visualizer as dfg_visualization

from log.Log import Log
from log.Log import INTENSE_FILTERING
from discovery.DFG import DFG
import visualization.Visualizer as Visualizer
from log.PreProcess import PreProcess

file_path = '/home/vercosa/Documentos/bases_desafio_cnj/'+\
            'log_vara_2.csv'


p = PreProcess(file_location=file_path)
p.select_desired_columns()
p.filter_outlier_timestamp()
p.filter_outlier_movements(lower=0.01, upper=0.99)
p.filter_outlier_trace_time(lower=0.01, upper=0.99)

l = Log(df_log=p.df_log.sort_values('time:timestamp'))
# l.filter_variants(1.1)

dfg = DFG(l.log,
          parameters={parameters.Parameters.AGGREGATION_MEASURE:'mean'},
          variant=dfg_discovery.Variants.FREQUENCY)

dfg.filter_activities(number_act=10)
dfg.filter_edges(percentage=0.3)
Пример #4
0
                     engine='python')

df_group = df_group[df_group['process_count'] > 50]
varas_group = df_group['case: orgao'].tolist()

dfg_dict = {}

act_dict = {}
act_dict_temp = {}
similar_group = {}
similar_group_ref = {}


for vara in varas_group:
    df_temp = df_log[df_log['case: orgao'] == vara]
    p = PreProcess(df=df_temp)
    p.select_desired_columns()
    p.filter_outlier_timestamp()
    p.filter_outlier_movements(lower=0.01, upper=0.99)
    p.filter_outlier_trace_time(lower=0.01, upper=0.99)
    l = Log(df_log=p.df_log.sort_values('time:timestamp'))
    # l.filter_variants(1)
    dfg = DFG(l.log)
    dfg.filter_activities(number_act=10)
    dfg.filter_edges(percentage=0.3)
    dfg_dict[vara] = list(dfg.dfg)


for vara in dfg_dict.keys():
    act_dict[vara] = (list(set([item for sublist in dfg_dict[vara] \
                                    for item in sublist])),
                'projeto_git/desafio_cnj/data/interim/df_movimentos.csv'

vara = '2ª VARA C�?VEL DE SANTANA'

macrosteps = [
              
              'Distribuição', 
              'Conclusão',
              'Despacho',
              'Decisão',
              'Julgamento',
              'Trânsito em julgado', 
              'Baixa/Arquivamento',  
             ]

pp = PreProcess(file_location=file_path)
pp.select_desired_columns()
pp.filter_outlier_timestamp()
pp.map_movements(movement_path)

df_log = pp.df_log
df_vara = df_log[df_log['case: orgao'] == vara]

pp_vara = PreProcess(df=df_vara)
pp_vara.filter_outlier_movements(lower=0.05, upper=0.95)
pp_vara.filter_outlier_trace_time(lower=0.05, upper=0.95)

log = Log(df_log=pp_vara.df_log.sort_values('time:timestamp'))

median_case_duration = case_statistics.\
  get_median_caseduration(log.log, parameters={