Exemplo n.º 1
0
#!/usr/bin/env python
import sys
sys.path.append('../')
from logparser import LogMine

input_dir = '../logs/HDFS/'  # The input directory of log file
output_dir = 'LogMine_result/'  # The output directory of parsing results
log_file = 'HDFS_2k.log'  # The input log file name
log_format = '<Date> <Time> <Pid> <Level> <Component>: <Content>'  # HDFS log format
levels = 2  # The levels of hierarchy of patterns
max_dist = 0.001  # The maximum distance between any log message in a cluster and the cluster representative
k = 1  # The message distance weight (default: 1)
regex = []  # Regular expression list for optional preprocessing (default: [])

parser = LogMine.LogParser(input_dir,
                           output_dir,
                           log_format,
                           rex=regex,
                           levels=levels,
                           max_dist=max_dist,
                           k=k)
parser.parse(log_file)
Exemplo n.º 2
0
        'max_dist': 0.004,
        'k': 1,
        'levels': 2
    },
}

benchmark_result = []
for dataset, setting in benchmark_settings.iteritems():
    print('\n=== Evaluation on %s ===' % dataset)
    indir = os.path.join(input_dir, os.path.dirname(setting['log_file']))
    log_file = os.path.basename(setting['log_file'])

    parser = LogMine.LogParser(log_format=setting['log_format'],
                               indir=indir,
                               outdir=output_dir,
                               rex=setting['regex'],
                               max_dist=setting['max_dist'],
                               k=setting['k'],
                               levels=setting['levels'])
    parser.parse(log_file)

    F1_measure, accuracy = evaluator.evaluate(
        groundtruth=os.path.join(indir, log_file + '_structured.csv'),
        parsedresult=os.path.join(output_dir, log_file + '_structured.csv'))
    benchmark_result.append([dataset, F1_measure, accuracy])

print('\n=== Overall evaluation results ===')
df_result = pd.DataFrame(benchmark_result,
                         columns=['Dataset', 'F1_measure', 'Accuracy'])
df_result.set_index('Dataset', inplace=True)
print(df_result)