def DictionaryUpload(logFileName, doubleDictionaryList, triDictionaryList): logFile = open(logFileName, 'r') lineCount = -1 for lineCount, line in enumerate(open(logFileName, 'r')): pass lineCount += 1 lineN = int(lineCount / 2) logLines = logFile.readlines(lineN) for line in logLines: tokens = tokenGenerator(line) for index in range(len(tokens)): if index == len(tokens) - 2: break tripleTmp = tokens[index] + ',' + tokens[index + 1] + ',' + tokens[index + 2] if tripleTmp in triDictionaryList: triDictionaryList[tripleTmp] = triDictionaryList[tripleTmp] + 1 else: triDictionaryList[tripleTmp] = 1 for index in range(len(tokens)): if index == len(tokens) - 1: break doubleTmp = tokens[index] + ',' + tokens[index + 1] if doubleTmp in doubleDictionaryList: doubleDictionaryList[ doubleTmp] = doubleDictionaryList[doubleTmp] + 1 else: doubleDictionaryList[doubleTmp] = 1 return triDictionaryList, doubleDictionaryList
def dictionaryTripleSetUp(logFileName, DictionaryFileName): logFile = open(logFileName, 'r') dictionaryFile = open(DictionaryFileName, 'w+') dictionaryList = { 'dictionary,DHT,triple': -1 } while 1: logLines = logFile.readlines(1000000) if not logLines: break for line in logLines: tokens = tokenGenerator(line) for index in range(len(tokens)): if index == len(tokens) - 2: break tripleTmp = tokens[index] + ',' + tokens[ index + 1] + ',' + tokens[index + 2] if tripleTmp in dictionaryList: dictionaryList[tripleTmp] = dictionaryList[tripleTmp] + 1 else: dictionaryList[tripleTmp] = 1 dictionaryKey = dictionaryList.keys() for key in dictionaryKey: dictionaryFile.write(key + ',' + str(dictionaryList[key])) dictionaryFile.write('\n') pass
def tokenMatchDouble(inputFile, outputFile, tokenDicFile, threshold): try: inFile = open(inputFile, 'r') outFile = open(outputFile, 'w') tokenFile = open(tokenDicFile, 'r') tokenDictionary = { 'dictionary': -1 } tokenLines = tokenFile.readlines() for tokenLine in tokenLines: tokenLine = re.sub('\n', '', tokenLine) tmp = tokenLine.split(',') keyTmp = tmp[0] + ',' + tmp[1] tokenDictionary[keyTmp] = int(tmp[2]) while 1: logLines = inFile.readlines(100000) if not logLines: break for logLine in logLines: logTokens = tokenGenerator(logLine) logEvent = doubleTokenCompare(logTokens, tokenDictionary, threshold) logEvent = re.sub(r'^[0-9]+', '#', logEvent) logEvent = re.sub(r'[ ][0-9]+', ' #', logEvent) outFile.write(logEvent) outFile.write('\n') finally: if inFile and outFile and tokenFile: inFile.close() outFile.close() tokenFile.close()
def TokenMatchTriple(inputAddress, outputAddress, triThreshold, doubleThreshold): sourceFileDir = os.walk(inputAddress) doubleDictionaryList = { 'dictionary,DHT': -1 } triDictionaryList = { 'dictionary,DHT,triple': -1 } for path, dir_list, file_list in sourceFileDir: for file_name in file_list: sourceFile = os.path.join(path, file_name) inFile = open(sourceFile, 'r') outFile = open(outputAddress + file_name + "event.txt", 'w') triDictionaryList, doubleDictionaryList = DictionaryUpload( sourceFile, doubleDictionaryList, triDictionaryList) while 1: logLines = inFile.readlines(100000) if not logLines: break for logLine in logLines: logTokens = tokenGenerator(logLine) logEvent = tripleTokenCompare(logTokens, triDictionaryList, triThreshold, doubleDictionaryList, triThreshold) logEvent = re.sub(r'^[0-9]+', '#', logEvent) logEvent = re.sub(r'[ ][0-9]+', ' #', logEvent) outFile.write(logEvent) print(logLine + '\n') print(logEvent + '\n') outFile.write('\n') pass
def tokenMatchSingle(inputLogFile, outputEventFile, tokenDicFile, threshold): #open the input log files, output log event files and the token dictionary try: inFile = open(inputLogFile, 'r') outFile = open(outputEventFile, 'w') tokenFile = open(tokenDicFile, 'r') tokenDictionary = { 'dictionaryDHT': -1 } tokenLines = tokenFile.readlines() for tokenLine in tokenLines: tmp = tokenLine.split(',') tokenDictionary[tmp[0]] = int(tmp[1]) while 1: logLines = inFile.readlines(100000) if not logLines: break for logLine in logLines: logTokens = tokenGenerator(logLine) logEvent = singleTokenCompare(logTokens, tokenDictionary, threshold) logEvent = re.sub(r'^[0-9]+', '#', logEvent) logEvent = re.sub(r'[ ][0-9]+', ' #', logEvent) outFile.write(logEvent) outFile.write('\n') finally: if inFile and outFile and tokenFile: inFile.close() outFile.close() tokenFile.close()
def dictionarySingleSetUp(logFileName, DictionaryFileName): logFile = open(logFileName, 'r') dictionaryFile = open(DictionaryFileName, 'w+') dictionaryList = { 'dictionaryDHT': -1 } while 1: logLines = logFile.readlines(1000000) if not logLines: break for line in logLines: tokens = tokenGenerator(line) for token in tokens: if token in dictionaryList: dictionaryList[token] = dictionaryList[token] + 1 else: dictionaryList[token] = 1 dictionaryKey = dictionaryList.keys() for key in dictionaryKey: dictionaryFile.write(key + ',' + str(dictionaryList[key])) dictionaryFile.write('\n') pass