Exemplo n.º 1
0
folder_name = 'Tuning_CT'  # Mention the parameter that you are Tuning
input_dir = os.path.dirname(os.path.dirname(os.path.abspath(
    __file__))) + '/logs/Sample_logs/'  # The input directory of log file
output_dir = 'IPLoM_result/financial_transaction_results/Tuning_results/' + folder_name  # The output directory of parsing results
log_file = 'fin-transaction_log_anonimized.log'
#log_file   = 'HDFS_2k.log'  # The input log file name
#log_format    = '<Date> <Time> <Pid> <Level> <Component>: <Content>' # HDFS log format
#log_format = '<Date> <Time> <Level> <Router> <Pid>: <Month> <Day> <UTCTime>: <Component>: <Content>' # cisco_router log format
log_format = '<Date> <Time> <Level> <Module> \[<StatusAndPayThread>\] - <Content>'

for CT in np.arange(0.3, .45, .01):
    maxEventLen = 200  # The maximal token number of log messages (default: 200)
    #    CT           = 0.35  # The cluster goodness threshold (default: 0.35)
    lowerBound = 0.25  # The lower bound distance (default: 0.25)
    upperBound = 0.9  # The upper bound distance (default: 0.9)
    regex = [
    ]  # Regular expression list for optional preprocessing (default: [])
    step2Support = 0

    parser = IPLoM.LogParser(log_format=log_format,
                             indir=input_dir,
                             outdir=output_dir,
                             maxEventLen=maxEventLen,
                             step2Support=step2Support,
                             CT=round(CT, 2),
                             lowerBound=lowerBound,
                             upperBound=upperBound,
                             rex=regex)
    parser.parse(log_file)
parser = argparse.ArgumentParser()
parser.add_argument(
    '-dir',
    default=
    '/Users/haraldott/Development/thesis/detector/data/openstack/utah/raw/sorted_per_request/',
    type=str)
parser.add_argument('-file', default='18k_spr', type=str)
parser.add_argument('-logtype', type=str, default="OpenStack")
args = parser.parse_args()

input_dir = args.dir  # The input directory of log file
log_file = args.file  # The input log file name

output_dir = 'IPLoM_result/'  # The output directory of parsing results
try:
    log_format = settings[args.logtype]["log_format"]
    regex = settings[args.logtype]["regex"]
    lower_bound = settings[args.logtype]["lowerBound"]
    CT = settings[args.logtype]["CT"]
except ValueError:
    print("log format does not exist")
    raise

parser = IPLoM.LogParser(log_format=log_format,
                         indir=input_dir,
                         outdir=output_dir,
                         rex=regex,
                         CT=CT,
                         lowerBound=lower_bound)
parser.parse(log_file)
Exemplo n.º 3
0
        '<Month>  <Date> <Time> <User> <Component>\[<PID>\]( \(<Address>\))?: <Content>',
        'CT': 0.3,
        'lowerBound': 0.25,
        'regex': [r'([\w-]+\.){2,}[\w-]+']
    }
}

bechmark_result = []
for dataset, setting in benchmark_settings.items():
    print('\n=== Evaluation on %s ===' % dataset)
    indir = os.path.join(input_dir, os.path.dirname(setting['log_file']))
    log_file = os.path.basename(setting['log_file'])

    parser = IPLoM.LogParser(log_format=setting['log_format'],
                             indir=indir,
                             outdir=output_dir,
                             CT=setting['CT'],
                             lowerBound=setting['lowerBound'],
                             rex=setting['regex'])
    parser.parse(log_file)

    F1_measure, accuracy = evaluator.evaluate(
        groundtruth=os.path.join(indir, log_file + '_structured.csv'),
        parsedresult=os.path.join(output_dir, log_file + '_structured.csv'))
    bechmark_result.append([dataset, F1_measure, accuracy])

print('\n=== Overall evaluation results ===')
df_result = pd.DataFrame(bechmark_result,
                         columns=['Dataset', 'F1_measure', 'Accuracy'])
df_result.set_index('Dataset', inplace=True)
print(df_result)
df_result.T.to_csv('IPLoM_bechmark_result.csv')
Exemplo n.º 4
0
import collections
import os
import pandas as pd
from benchmark.IPLoM_benchmark import benchmark_settings
from logparser import IPLoM
from File_Info import get_file_size

dataset = 'BGL'
output_dir = 'IPLoM_result/'
one_setting = benchmark_settings[dataset]
log_file = os.path.basename(one_setting['log_file'])
input_dir = os.path.join('../logs/', os.path.dirname(one_setting['log_file']))

file_size = get_file_size(dataset)

results = collections.defaultdict(list)
for size, file in file_size.items():
    parser = IPLoM.LogParser(log_format=one_setting['log_format'],
                             indir=input_dir,
                             outdir=output_dir,
                             CT=one_setting['CT'],
                             lowerBound=one_setting['lowerBound'],
                             rex=one_setting['regex'],
                             keep_para=False)
    time_elapsed = parser.parse(file)
    results['size'].append(size)
    results['time'].append(time_elapsed.total_seconds())
    print(results['time'])
    pd.DataFrame(results).to_csv('./IPLoM_%s.csv' % dataset)