Exemplo n.º 1
0
#!/usr/bin/env python
import sys
sys.path.append('../')
from logparser import LenMa

input_dir = '../logs/HDFS/'  # The input directory of log file
output_dir = 'Lenma_result/'  # The output directory of parsing results
log_file = 'HDFS_2k.log'  # The input log file name
log_format = '<Date> <Time> <Pid> <Level> <Component>: <Content>'  # HDFS log format
threshold = 0.9  # TODO description (default: 0.9)
regex = []  # Regular expression list for optional preprocessing (default: [])

parser = LenMa.LogParser(input_dir,
                         output_dir,
                         log_format,
                         threshold=threshold,
                         rex=regex)
parser.parse(log_file)
Exemplo n.º 2
0
        'log_format':
        '<Month>  <Date> <Time> <User> <Component>\[<PID>\]( \(<Address>\))?: <Content>',
        'regex': [r'([\w-]+\.){2,}[\w-]+'],
        'threshold': 0.86
    },
}

bechmark_result = []
for dataset, setting in benchmark_settings.items():
    print('\n=== Evaluation on %s ===' % dataset)
    indir = os.path.join(input_dir, os.path.dirname(setting['log_file']))
    log_file = os.path.basename(setting['log_file'])

    parser = LenMa.LogParser(log_format=setting['log_format'],
                             indir=indir,
                             outdir=output_dir,
                             rex=setting['regex'],
                             threshold=setting['threshold'])
    parser.parse(log_file)

    accuracy_PA, accuracy_exact_string_matching, edit_distance_result, edit_distance_result_median = evaluator.evaluate(
        groundtruth=os.path.join(indir, log_file + '_structured.csv'),
        parsedresult=os.path.join(output_dir, log_file + '_structured.csv'))
    bechmark_result.append([
        dataset, accuracy_PA, accuracy_exact_string_matching,
        edit_distance_result, edit_distance_result_median
    ])

print('\n=== Overall evaluation results ===')
df_result = pd.DataFrame(bechmark_result,
                         columns=[
Exemplo n.º 3
0
import collections
import os
import pandas as pd
from benchmark.Lenma_benchmark import benchmark_settings
from logparser import LenMa
from File_Info import get_file_size

dataset = 'BGL'

output_dir = 'LenMa_result/'  # The output directory of parsing results
one_setting = benchmark_settings[dataset]
log_file = os.path.basename(one_setting['log_file'])
input_dir = os.path.join('../logs/', os.path.dirname(one_setting['log_file']))

file_size = get_file_size(dataset)

results = collections.defaultdict(list)
for size, file in file_size.items():
    parser = LenMa.LogParser(
        input_dir,
        output_dir,
        log_format=one_setting['log_format'],
        threshold=one_setting['threshold'],
        rex=one_setting['regex'],
    )
    time_elapsed = parser.parse(file)
    results['size'].append(size)
    results['time'].append(time_elapsed.total_seconds())
    print(results['time'])
    pd.DataFrame(results).to_csv('./LenMa_%s.csv' % dataset)