bechmark_result = [] for dataset, setting in benchmark_settings.items(): print('\n=== Evaluation on %s ===' % dataset) indir = os.path.join(input_dir, os.path.dirname(setting['log_file'])) log_file = os.path.basename(setting['log_file']) parser = LenMa.LogParser(log_format=setting['log_format'], indir=indir, outdir=output_dir, rex=setting['regex'], threshold=setting['threshold']) parser.parse(log_file) accuracy_PA, accuracy_exact_string_matching, edit_distance_result, edit_distance_result_median = evaluator.evaluate( groundtruth=os.path.join(indir, log_file + '_structured.csv'), parsedresult=os.path.join(output_dir, log_file + '_structured.csv')) bechmark_result.append([ dataset, accuracy_PA, accuracy_exact_string_matching, edit_distance_result, edit_distance_result_median ]) print('\n=== Overall evaluation results ===') df_result = pd.DataFrame(bechmark_result, columns=[ 'Dataset', 'Accuracy_PA', 'Accuracy_ExactMatching', 'Edit_distance', 'Edit_distance_std' ]) df_result.set_index('Dataset', inplace=True) print(df_result)
'maxChildNum': 4, 'mergeThreshold': 0.002, 'formatLookupThreshold': 0.3, 'superFormatThreshold': 0.85 }, } bechmark_result = [] for dataset, setting in benchmark_settings.iteritems(): print('\n=== Evaluation on %s ==='%dataset) indir = os.path.join(input_dir, os.path.dirname(setting['log_file'])) log_file = os.path.basename(setting['log_file']) parser = SHISO.LogParser(log_format=setting['log_format'], indir=indir, outdir=output_dir, rex=setting['regex'], maxChildNum=setting['maxChildNum'], mergeThreshold=setting['mergeThreshold'], formatLookupThreshold=setting['formatLookupThreshold'], superFormatThreshold=setting['superFormatThreshold']) parser.parse(log_file) F1_measure, accuracy = evaluator.evaluate( groundtruth=os.path.join(indir, log_file + '_structured.csv'), parsedresult=os.path.join(output_dir, log_file + '_structured.csv') ) bechmark_result.append([dataset, F1_measure, accuracy]) print('\n=== Overall evaluation results ===') df_result = pd.DataFrame(bechmark_result, columns=['Dataset', 'F1_measure', 'Accuracy']) df_result.set_index('Dataset', inplace=True) print(df_result) df_result.T.to_csv('SHISO_bechmark_result.csv')
import time from logparser import evaluator from logparser import ADC # from logparser.ADC import ADC_Token as ADC from benchmark.ADC_benchmark import CONFIG_DICT from logparser.utils.dataset import * from dataEngineering.token_selection import get_token_list dataset = DATASET.Android # ADC.set_TOKEN_LIST(get_token_list(dataset)) parser = ADC.LogParser( # in_path='/home/zhixin/Desktop/Android.log', dataset=dataset, rex=CONFIG_DICT[dataset].rex, st=CONFIG_DICT[dataset].st, pre=CONFIG_DICT[dataset].pre) start = time.perf_counter() time_elapsed, out_path = parser.parse() end = time.perf_counter() F1_measure, accuracy = evaluator.evaluate( groundtruth=log_path_structured(dataset), parsedresult=out_path) print(F1_measure, accuracy, time_elapsed.total_seconds()) print(end - start)