def test_fix_all_star_template(self): message1 = Message(['message', 'sent', 'A1']) message2 = Message(['message', 'sent', 'A2']) message3 = Message(['message', 'sent', 'A2', 'from', ':', 'B1']) message4 = Message(['message', 'sent', 'A2', 'from', ':', 'B2']) messages = {3: [message1, message2], 6: [message3, message4]} template = Template(['*', '*', '*', '*', '*', '*']) template.matched_lines = [0, 1] ch = Chromosome({6: [template]}) fix_all_star_template(ch, 6, 0, messages) self.assertFalse(is_all_star_template(ch.templates[6][0]))
def test_no_variable_parts(self): # create messages message1 = Message(['read', 'text', 'file1', 'from', 'ABC1']) message2 = Message(['read', 'text', 'file2', 'from', 'ABC2']) message3 = Message(['read', 'text', 'file3', 'from', 'ABC3']) messages = {5: [message1, message2, message3]} # create a chromosome template = Template(['read', 'text', '*', 'from', '*']) template.matched_lines = [0, 1, 2] ch = Chromosome({5: [template]}) # code to test check_variable_parts(ch, messages) self.assertEqual(ch.templates[5][0].to_string(), "[ read text * from * ]")
def load_log_message(self, message: Message): """ Adds a log message to messages :param message: an object of type Message :return: the attribute messages with an additional element """ key = message.get_length() if key in self.messages: self.messages[key].append(message) else: self.messages[key] = [] self.messages[key].append(message)
def test_compute_matched_lines(self): messages_dict = {1:[Message(['configuration']), Message(['Data']), Message(['Object'])], 2:[Message(['configuration', 'file']), Message(['context', 'folder']), Message(['Close', 'file'])], 4:[Message(['AAA', 'Found', 'context', 'configuration'])]} template = Template(['context', '*']) compute_matched_lines(messages_dict, template) self.assertTrue(template.matched_lines, [1])
def test_check_variable_parts_2templates(self): # create messages message1 = Message(['read', 'text', 'file1', 'from', 'ABC1']) message2 = Message(['read', 'text', 'file2', 'from', 'ABC3']) message3 = Message(['read', 'text', 'file2', 'from', 'ABC4']) message4 = Message( ['read', 'text', 'file', 'ABC', 'from', 'DB', '98765']) message5 = Message( ['read', 'text', 'file', 'DSE', 'from', 'DB', '7654']) messages = {5: [message1, message2, message3], 7: [message4, message5]} # create a chromosome template1 = Template(['read', '*', '*', 'from', '*']) template1.matched_lines = [0, 1, 2] template2 = Template(['read', '*', 'file', '*', 'from', 'DB', '*']) template2.matched_lines = [0, 1] ch = Chromosome({5: [template1], 7: [template2]}) # code to test check_variable_parts(ch, messages) self.assertEqual(ch.templates[5][0].to_string(), "[ read text * from * ]") self.assertEqual(ch.templates[7][0].to_string(), "[ read text file * from DB * ]")
def test_no_match2(self): message = Message(['AAA', 'BBB', 'EEE', 'DDD:', 'xyz.txt']) template = Template(['AAA', 'BBB', 'CCC', 'DDD:', '*']) is_matching = match(message, template) self.assertFalse(is_matching)
def test_no_match1(self): message = Message(['AAA', 'BBB', 'CCC', 'DDD:', 'xyz.txt']) template = Template(['read', 'config', 'File']) is_matching = match(message, template) self.assertFalse(is_matching)
def adapt_log_message(log_message: str, regex=None): # first detect time (12:34:56 or 12:54 ) # date 21-03-2005 03/04/12 # ip address # memory address # file path # mac address # before adding space around ':' is_time = re.findall('(^|\s+)(\d){1,2}:(\d){1,2}(|:(\d){2,4})(\s+|$)', log_message) if is_time: log_message = re.sub('(^|\s+)(\d){1,2}:(\d){1,2}(|:(\d){2,4})(\s+|$)', " #spec# ", log_message) is_date = re.findall('(^|\s)(\d{1,2}(-|/)\d{1,2}(-|/)\d{2,4})(\s|$)', log_message) if is_date: log_message = re.sub('(^|\s)(\d{1,2}(-|/)\d{1,2}(-|/)\d{2,4})(\s|$)', " #spec# ", log_message) is_ip_address1 = re.findall('(/|)([0-9]+\.){3}[0-9]+(:[0-9]+|)', log_message) if is_ip_address1: log_message = re.sub('(/|)([0-9]+\.){3}[0-9]+(:[0-9]+|)', " #spec# ", log_message) is_ip_address = re.findall('(|^)\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(|$)', log_message) if is_ip_address: log_message = re.sub('(|^)\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(|$)', " #spec# ", log_message) is_file_path = re.findall('(/[\d+\w+\-_\.\#\$]*[/\.][\d+\w+\-_\.\#\$/*]*)+', log_message) if is_file_path: log_message = re.sub('(|\w+)(/[\d+\w+\-_\.\#\$]*[/\.][\d+\w+\-_\.\#\$/*]*)+', " #spec# ", log_message) is_memory_address = re.findall('0x([a-zA-Z]|[0-9])+', log_message) if is_memory_address: log_message = re.sub('0x([a-zA-Z]|[0-9])+', " #spec# ", log_message) is_hex = re.findall('(^|\s)([0-9A-F]){9,}(\s|$)', log_message) if is_hex: log_message = re.sub('(^|\s)([0-9A-F]){9,}(\s|$)', " #spec# ", log_message) is_hex1 = re.findall('(^|\s)([0-9a-f]){8,}(\s|$)', log_message) if is_hex1: log_message = re.sub('(^|\s)([0-9a-f]){8,}(\s|$)', " #spec# ", log_message) is_mac_address = re.findall('([0-9A-F]{2}[:-]){5,}([0-9A-F]{2})', log_message) if is_mac_address: log_message = re.sub('([0-9A-F]{2}[:-]){5,}([0-9A-F]{2})', " #spec# ", log_message) is_number = re.findall('(^| )\d+( |$)', log_message) if is_number: log_message = re.sub('(^| )\d+( |$)', " #spec# ", log_message) # add space around '-->' [=:,] <> () [ ] { } log_message = re.sub("([<>=:,;'\(\)\{\}\[\]])", r' \1 ', log_message) # regex is empty if not given as parameter if regex is None: regex = [] for i in range(len(regex)): match = re.findall(regex[i], log_message) if match: log_message = re.sub(regex[i], " #spec# ", log_message) # let's identify the prefixes that we don't have to change prefix_regex = "^[ ]*\[[A-Z *]+\]" match = re.findall(prefix_regex, log_message) if len(match) > 0: log_message = log_message.replace(match[0], match[0].replace(' ', '')) log_message = log_message.replace("-- >", "-->") message = Message(log_message.split()) for index in range(0, message.get_length()): match = False match = match or re.findall(".xml", message.words[index]) # xml files if match: message.words[index] = '#spec#' return message