def test3append_values_float(self):
        """This unittest checks the append_values method with raw_match_object being a float value."""
        event_type_detector = EventTypeDetector(
            self.aminer_config, [self.stream_printer_event_handler])
        # initialize all values.
        t = time.time()
        log_atom = LogAtom(
            b'22.2', ParserMatch(MatchElement('path', '22.2', 22.2, None)), t,
            self.__class__.__name__)
        event_type_detector.receive_atom(log_atom)

        event_type_detector.values = [[[]]]
        event_type_detector.append_values(log_atom, 0)
        self.assertEqual(event_type_detector.values, [[[22.2]]])

        log_atom = LogAtom(b'22',
                           ParserMatch(MatchElement('path', '22', 22, None)),
                           t, self.__class__.__name__)
        event_type_detector.values = [[[]]]
        event_type_detector.append_values(log_atom, 0)
        self.assertEqual(event_type_detector.values, [[[22]]])

        log_atom = LogAtom(
            b'22.2', ParserMatch(MatchElement('path', '22', b'22', None)), t,
            self.__class__.__name__)
        event_type_detector.values = [[[]]]
        event_type_detector.append_values(log_atom, 0)
        self.assertEqual(event_type_detector.values, [[[22]]])
    def test4append_values_bytestring(self):
        """
        This unittest checks the append_values method with raw_match_object being a bytestring.
        This should trigger a ValueError and append the match_string.
        """
        event_type_detector = EventTypeDetector(
            self.aminer_config, [self.stream_printer_event_handler])
        # initialize all values.
        t = time.time()
        log_atom = LogAtom(
            b'This is a string',
            ParserMatch(
                MatchElement('path', 'This is a string', b'This is a string',
                             None)), t, self.__class__.__name__)
        event_type_detector.receive_atom(log_atom)

        event_type_detector.values = [[[]]]
        event_type_detector.append_values(log_atom, 0)
        self.assertEqual(event_type_detector.values, [[['This is a string']]])

        log_atom = LogAtom(
            b'24.05.',
            ParserMatch(MatchElement('path', '24.05.', b'24.05.', None)), t,
            self.__class__.__name__)
        event_type_detector.values = [[[]]]
        event_type_detector.append_values(log_atom, 0)
        self.assertEqual(event_type_detector.values, [[['24.05.']]])
 def run_time_correlation_detector(self, number_of_rules):
     results = [None] * self.iterations
     avg = 0
     z = 0
     while z < self.iterations:
         time_correlation_detector = TimeCorrelationDetector(
             self.aminer_config,
             2,
             number_of_rules,
             0, [self.stream_printer_event_handler],
             record_count_before_event=self.waiting_time * 9000)
         t = time.time()
         seconds = time.time()
         i = 0
         while int(time.time() - seconds) < self.waiting_time:
             decimal_integer_value_me = DecimalIntegerValueModelElement(
                 'd', DecimalIntegerValueModelElement.SIGN_TYPE_NONE,
                 DecimalIntegerValueModelElement.PAD_TYPE_NONE)
             match_context = MatchContext(str(i % 100).encode())
             match_element = decimal_integer_value_me.get_match_element(
                 'integer', match_context)
             log_atom = LogAtom(match_element.match_string,
                                ParserMatch(match_element), t,
                                time_correlation_detector)
             time_correlation_detector.receive_atom(log_atom)
             i = i + 1
         results[z] = i
         z = z + 1
         avg = avg + i
     avg = avg / self.iterations
     type(self).result = self.result + self.result_string % (
         time_correlation_detector.__class__.__name__, avg, results,
         'testCount=%d.' % number_of_rules)
 def test2receive_event_with_same_event_data_attributes(self):
     """In this test case an attribute of AnalysisComponent is overwritten and an JsonError attribute is expected."""
     json_converter_handler = JsonConverterHandler(
         [self.stream_printer_event_handler], self.analysis_context)
     log_atom = LogAtom(self.fixed_dme.fixed_data,
                        ParserMatch(self.match_element), self.t, self)
     self.analysis_context.register_component(self, self.description)
     event_data = {
         'AnalysisComponent': {
             'AffectedParserPaths': ['test/path/1', 'test/path/2'],
             'Message': 'An other event happened too!'
         }
     }
     json_converter_handler.receive_event(self.test_detector,
                                          self.event_message,
                                          self.sorted_log_lines, event_data,
                                          log_atom, self)
     self.assertEqual(
         self.output_stream.getvalue(), self.expected_string %
         (datetime.fromtimestamp(self.t).strftime("%Y-%m-%d %H:%M:%S"),
          self.event_message, self.__class__.__name__, self.description,
          self.__class__.__name__, self.description, self.event_message,
          self.persistence_id, round(float("%.2f" % self.t), 2),
          ',\n  "JsonError": "AnalysisComponent attribute \'Message\' is already in use and can not be overwritten!\\n"'
          ))
 def run_new_match_path_detector(self, number_of_pathes):
     results = [None] * self.iterations
     avg = 0
     z = 0
     while z < self.iterations:
         new_match_path_detector = NewMatchPathDetector(
             self.aminer_config, [self.stream_printer_event_handler],
             'Default', True)
         t = round(time.time(), 3)
         seconds = time.time()
         i = 0
         while int(time.time() - seconds) < self.waiting_time:
             decimal_integer_value_me = DecimalIntegerValueModelElement(
                 'd' + str(i % number_of_pathes),
                 DecimalIntegerValueModelElement.SIGN_TYPE_NONE,
                 DecimalIntegerValueModelElement.PAD_TYPE_NONE)
             match_context = MatchContext(str(i).encode())
             match_element = decimal_integer_value_me.get_match_element(
                 'integer', match_context)
             log_atom = LogAtom(match_element.match_string,
                                ParserMatch(match_element), t,
                                new_match_path_detector)
             new_match_path_detector.receive_atom(log_atom)
             i = i + 1
         results[z] = i
         z = z + 1
         avg = avg + i
     avg = avg / self.iterations
     type(self).result = self.result + self.result_string % (
         new_match_path_detector.__class__.__name__, avg, results,
         self.different_pathes % number_of_pathes)
示例#6
0
    def test1receive_serialized_data(self):
        """This unittest tests the receive_event method with serialized data from the JsonConverterHandler."""
        json_converter_handler = JsonConverterHandler(
            [self.stream_printer_event_handler], self.analysis_context)
        log_atom = LogAtom(self.fixed_dme.fixed_data,
                           ParserMatch(self.match_element), self.t, self)
        self.analysis_context.register_component(self, self.description)
        event_data = {
            'AnalysisComponent': {
                'AffectedParserPaths': ['test/path/1', 'test/path/2']
            }
        }
        json_converter_handler.receive_event(self.test_detector,
                                             self.event_message,
                                             self.sorted_log_lines, event_data,
                                             log_atom, self)
        output = self.output_stream.getvalue()
        kafka_event_handler = KafkaEventHandler(
            self.analysis_context, self.kafka_topic, {
                'bootstrap_servers': ['localhost:9092'],
                'api_version': (2, 0, 1)
            })
        self.assertTrue(
            kafka_event_handler.receive_event(self.test_detector,
                                              self.event_message,
                                              self.sorted_log_lines, output,
                                              log_atom, self))

        self.assertEqual(
            self.consumer.__next__().value, self.expected_string %
            (datetime.fromtimestamp(self.t).strftime("%Y-%m-%d %H:%M:%S"),
             self.event_message, self.__class__.__name__, self.description,
             self.__class__.__name__, self.description, self.event_message,
             self.persistence_id, round(self.t, 2), ""))
 def whitelist_event_in_component(self,
                                  analysis_context,
                                  component_name,
                                  event_data,
                                  whitelisting_data=None):
     component = analysis_context.get_component_by_name(component_name)
     if component is None:
         self.REMOTE_CONTROL_RESPONSE += "FAILURE: component '%s' does not exist!" % component
         return
     if component.__class__.__name__ not in [
             "EnhancedNewMatchPathValueComboDetector",
             "MissingMatchPathValueDetector", "NewMatchPathDetector",
             "NewMatchPathValueComboDetector"
     ]:
         self.REMOTE_CONTROL_RESPONSE += "FAILURE: component class '%s' does not support whitelisting! Only the following classes " \
                                         "support whitelisting: EnhancedNewMatchPathValueComboDetector, MissingMatchPathValueDetector," \
                                         " NewMatchPathDetector and NewMatchPathValueComboDetector." % component.__class__.__name__
         return
     try:
         if component.__class__.__name__ == "MissingMatchPathValueDetector":
             self.REMOTE_CONTROL_RESPONSE += component.whitelist_event(
                 "Analysis.%s" % component.__class__.__name__,
                 [component.__class__.__name__], event_data,
                 whitelisting_data)
         else:
             self.REMOTE_CONTROL_RESPONSE += component.whitelist_event(
                 "Analysis.%s" % component.__class__.__name__,
                 [component.__class__.__name__],
                 [LogAtom("", None, 1666.0, None), event_data],
                 whitelisting_data)
     # skipcq: PYL-W0703
     except Exception as e:
         self.REMOTE_CONTROL_RESPONSE += "Exception: " + repr(e)
 def test2receive_atoms_with_defined_path_list(self):
     """
     In this test case multiple log_atoms are received with default values of the EventTypeDetector.
     path_list is set to a static list of paths and variable_key_list should not be used.
     """
     event_type_detector = EventTypeDetector(
         self.aminer_config, [self.stream_printer_event_handler],
         path_list=['parser/type/path/nametype'])
     results = [
         True, False, True, False, True, False, True, True, False, False,
         True, True, False, True, False, True, False, True, False, False,
         True
     ]
     log_atoms = []
     for line in self.log_lines:
         t = time.time()
         log_atoms.append(
             LogAtom(
                 line,
                 ParserMatch(
                     self.parsing_model.get_match_element(
                         'parser', MatchContext(line))), t,
                 self.__class__.__name__))
     for i, log_atom in enumerate(log_atoms):
         old_vals = (event_type_detector.num_events,
                     event_type_detector.num_eventlines,
                     event_type_detector.total_records,
                     event_type_detector.longest_path)
         self.assertEqual(event_type_detector.receive_atom(log_atom),
                          not results[i], i)
         if results[i]:
             self.assertEqual(old_vals, (event_type_detector.num_events,
                                         event_type_detector.num_eventlines,
                                         event_type_detector.total_records,
                                         event_type_detector.longest_path))
示例#9
0
 def test1log_atom_not_in_path_list(self):
     """This unittest checks if no action happens, when no path in the match_dictionary matches a target_path."""
     parser_count = ParserCount(self.aminer_config, ['fixed/seq', 'fixed/seq/m1', 'fixed/seq/m2'], [self.stream_printer_event_handler])
     t = time.time()
     log_atom = LogAtom(self.fixed_dme_m3.fixed_data, ParserMatch(self.match_element_m3), t, parser_count)
     old_count_dict = dict(parser_count.count_dict)
     parser_count.receive_atom(log_atom)
     self.assertEqual(parser_count.count_dict, old_count_dict)
示例#10
0
 def test6receive_atom_without_target_paths(self):
     """This unittest tests the receive_atom method with multiple paths matching without having target_paths specified."""
     parser_count = ParserCount(self.aminer_config, None, [self.stream_printer_event_handler])
     t = time.time()
     log_atom = LogAtom(self.match_context_seq.match_data, ParserMatch(self.match_element_seq), t, parser_count)
     old_count_dict = dict(parser_count.count_dict)
     old_count_dict['fixed/seq'] = {current_processed_lines_str: 1, total_processed_lines_str: 1}
     parser_count.receive_atom(log_atom)
     self.assertEqual(parser_count.count_dict, old_count_dict)
示例#11
0
 def test2log_atom_matches_single_path(self):
     """This unittest tests the receive_atom method with a single path matching."""
     parser_count = ParserCount(self.aminer_config, ['fixed/seq', 'fixed/seq/m1', 'fixed/seq/m2', 'fixed/m3'],
                                [self.stream_printer_event_handler])
     t = time.time()
     log_atom = LogAtom(self.fixed_dme_m3.fixed_data, ParserMatch(self.match_element_m3), t, parser_count)
     old_count_dict = dict(parser_count.count_dict)
     old_count_dict['fixed/m3'][current_processed_lines_str] = 1
     old_count_dict['fixed/m3'][total_processed_lines_str] = 1
     parser_count.receive_atom(log_atom)
     self.assertEqual(parser_count.count_dict, old_count_dict)
示例#12
0
 def test4do_timer(self):
     """This unittest checks if the do_timer method works properly."""
     parser_count = ParserCount(self.aminer_config, ['fixed/m3'], [self.stream_printer_event_handler], 600)
     t = time.time()
     self.assertEqual(int(parser_count.do_timer(t + 100)), 600)
     self.assertEqual(self.output_stream.getvalue(), "")
     log_atom = LogAtom(self.match_context_seq.match_data, ParserMatch(self.match_element_seq), t, parser_count)
     parser_count.receive_atom(log_atom)
     self.assertEqual(int(parser_count.do_timer(t + 100)), 500)
     self.assertEqual(self.output_stream.getvalue(), "")
     self.assertEqual(parser_count.do_timer(t + 601), 600)
     self.assertNotEqual(self.output_stream.getvalue(), "")
     self.reset_output_stream()
 def run_match_value_stream_writer(self, number_of_pathes):
     results = [None] * self.iterations
     avg = 0
     z = 0
     while z < self.iterations:
         i = 0
         path_list = []
         parsing_model = []
         while i < number_of_pathes / 2:
             path_list.append('match/integer/d' + str(i % number_of_pathes))
             path_list.append('match/integer/s' + str(i % number_of_pathes))
             parsing_model.append(
                 DecimalIntegerValueModelElement(
                     'd' + str(i % number_of_pathes),
                     DecimalIntegerValueModelElement.SIGN_TYPE_NONE,
                     DecimalIntegerValueModelElement.PAD_TYPE_NONE))
             parsing_model.append(
                 FixedDataModelElement('s' + str(i % number_of_pathes),
                                       b' Euro '))
             i = i + 1
         sequence_model_element = SequenceModelElement(
             'integer', parsing_model)
         match_value_stream_writer = MatchValueStreamWriter(
             self.output_stream, path_list, b';', b'-')
         t = time.time()
         seconds = time.time()
         i = 0
         while int(time.time() - seconds) < self.waiting_time:
             data = b''
             p = process_time()
             for j in range(
                     1,
                     int(number_of_pathes / 2) + number_of_pathes % 2 + 1):
                 data = data + str(j).encode() + b' Euro '
             seconds = seconds + process_time() - p
             match_context = MatchContext(data)
             match_element = sequence_model_element.get_match_element(
                 'match', match_context)
             log_atom = LogAtom(match_element.match_object,
                                ParserMatch(match_element), t,
                                match_value_stream_writer)
             match_value_stream_writer.receive_atom(log_atom)
             i = i + 1
         results[z] = i
         z = z + 1
         avg = avg + i
     avg = avg / self.iterations
     type(self).result = self.result + self.result_string % (
         match_value_stream_writer.__class__.__name__, avg, results,
         self.different_pathes % number_of_pathes)
 def test5check_value_reduction(self):
     """This unittest checks the functionality of reducing the values when the maxNumVals threshold is reached."""
     event_type_detector = EventTypeDetector(
         self.aminer_config, [self.stream_printer_event_handler])
     t = time.time()
     val_list = [[[]]]
     for i in range(1, event_type_detector.max_num_vals + 1, 1):
         log_atom = LogAtom(
             str(i).encode(),
             ParserMatch(MatchElement('path', str(i), i, None)), t,
             self.__class__.__name__)
         val_list[0][0].append(float(i))
         self.assertTrue(event_type_detector.receive_atom(log_atom))
         self.assertEqual(event_type_detector.values, val_list)
     i += 1
     log_atom = LogAtom(
         str(i).encode(), ParserMatch(MatchElement('path', str(i), i,
                                                   None)), t,
         self.__class__.__name__)
     val_list[0][0].append(float(i))
     self.assertTrue(event_type_detector.receive_atom(log_atom))
     self.assertEqual(
         event_type_detector.values,
         [[val_list[0][0][-event_type_detector.min_num_vals:]]])
 def run_atom_filters_match_value_filter(self, number_of_pathes):
     results = [None] * self.iterations
     avg = 0
     z = 0
     while z < self.iterations:
         new_match_path_detector = NewMatchPathDetector(
             self.aminer_config, [self.stream_printer_event_handler],
             'Default', True)
         subhandler_filter = SubhandlerFilter([],
                                              stop_when_handled_flag=True)
         i = 0
         dictionary = {}
         while i < 1000000:
             dictionary[i] = new_match_path_detector
             i = i + 1
         i = 0
         while i < number_of_pathes:
             match_value_filter = MatchValueFilter(
                 self.integerd + str(i % number_of_pathes), dictionary,
                 None)
             subhandler_filter.add_handler(match_value_filter,
                                           stop_when_handled_flag=True)
             i = i + 1
         t = round(time.time(), 3)
         seconds = time.time()
         i = 0
         while int(time.time() - seconds) < self.waiting_time:
             decimal_integer_value_me = DecimalIntegerValueModelElement(
                 'd' + str(i % number_of_pathes),
                 DecimalIntegerValueModelElement.SIGN_TYPE_NONE,
                 DecimalIntegerValueModelElement.PAD_TYPE_NONE)
             match_context = MatchContext(str(i).encode())
             match_element = decimal_integer_value_me.get_match_element(
                 'integer', match_context)
             log_atom = LogAtom(match_element.match_string,
                                ParserMatch(match_element), t,
                                match_value_filter)
             subhandler_filter.receive_atom(log_atom)
             i = i + 1
         results[z] = i
         z = z + 1
         avg = avg + i
     avg = avg / self.iterations
     type(self).result = self.result + self.result_string % (
         subhandler_filter.__class__.__name__, avg, results,
         '%d different %ss with a dictionary of %ss.' %
         (number_of_pathes, match_value_filter.__class__.__name__,
          new_match_path_detector.__class__.__name__))
示例#16
0
 def test3log_atom_matches_multiple_paths(self):
     """This unittest tests the receive_atom method with multiple paths matching."""
     parser_count = ParserCount(
         self.aminer_config,
         ['fixed/seq', 'fixed/seq/m1', 'fixed/seq/m2', 'fixed/m3'],
         [self.stream_printer_event_handler])
     t = time.time()
     log_atom = LogAtom(self.match_context_seq.match_data,
                        ParserMatch(self.match_element_seq), t,
                        parser_count)
     old_count_dict = dict(parser_count.count_dict)
     old_count_dict['fixed/seq'] = 1
     old_count_dict['fixed/seq/m1'] = 1
     old_count_dict['fixed/seq/m2'] = 1
     parser_count.receive_atom(log_atom)
     self.assertEqual(parser_count.count_dict, old_count_dict)
 def run_whitelist_violation_detector(self, number_of_pathes,
                                      modulo_factor):
     results = [None] * self.iterations
     avg = 0
     z = 0
     while z < self.iterations:
         i = 0
         rules = []
         while i < number_of_pathes:
             rules.append(
                 PathExistsMatchRule(
                     self.integerd + str(i % number_of_pathes), None))
             i = i + 1
         whitelist_violation_detector = WhitelistViolationDetector(
             self.aminer_config, rules, [self.stream_printer_event_handler])
         t = time.time()
         seconds = time.time()
         i = 0
         while int(time.time() - seconds) < self.waiting_time:
             p = process_time()
             r = random.randint(1, 100)
             if r >= modulo_factor:
                 r = 2
             else:
                 r = 1
             seconds = seconds + process_time() - p
             decimal_integer_value_me = DecimalIntegerValueModelElement(
                 'd' + str(i % (number_of_pathes * r)),
                 DecimalIntegerValueModelElement.SIGN_TYPE_NONE,
                 DecimalIntegerValueModelElement.PAD_TYPE_NONE)
             match_context = MatchContext(str(i % 100).encode())
             match_element = decimal_integer_value_me.get_match_element(
                 'integer', match_context)
             log_atom = LogAtom(match_element.match_string,
                                ParserMatch(match_element), t,
                                whitelist_violation_detector)
             whitelist_violation_detector.receive_atom(log_atom)
             i = i + 1
         results[z] = i
         z = z + 1
         avg = avg + i
     avg = avg / self.iterations
     type(self).result = self.result + self.result_string % (
         whitelist_violation_detector.__class__.__name__, avg, results,
         '%d different PathExistsMatchRules and a moduloFactor of %d.' %
         (number_of_pathes, modulo_factor))
 def run_missing_match_path_value_detector(self, number_of_pathes):
     results = [None] * self.iterations
     avg = 0
     z = 0
     while z < self.iterations:
         i = 0
         path_list = []
         while i < number_of_pathes:
             path_list.append(self.integerd + str(i % number_of_pathes))
             i = i + 1
         missing_match_path_list_value_detector = MissingMatchPathListValueDetector(
             self.aminer_config, path_list,
             [self.stream_printer_event_handler], 'Default', True, 3600,
             86400)
         seconds = time.time()
         t = seconds
         i = 0
         while int(time.time() - seconds) < self.waiting_time:
             decimal_integer_value_me = DecimalIntegerValueModelElement(
                 'd' + str(i % number_of_pathes),
                 DecimalIntegerValueModelElement.SIGN_TYPE_NONE,
                 DecimalIntegerValueModelElement.PAD_TYPE_NONE)
             match_context = MatchContext(str(1).encode())
             match_element = decimal_integer_value_me.get_match_element(
                 'integer', match_context)
             p = process_time()
             r = random.randint(0, 100)
             seconds = seconds + process_time() - p
             delta = int(r / 100)
             t = t + 4000 * delta
             log_atom = LogAtom(match_element.match_object,
                                ParserMatch(match_element), t,
                                missing_match_path_list_value_detector)
             missing_match_path_list_value_detector.receive_atom(log_atom)
             i = i + 1
         results[z] = i
         z = z + 1
         avg = avg + i
     avg = avg / self.iterations
     type(self).result = self.result + self.result_string % (
         missing_match_path_list_value_detector.__class__.__name__, avg,
         results, self.different_pathes % number_of_pathes)
 def test1receive_atoms_with_default_values(self):
     """
     In this test case multiple log_atoms are received with default values of the EventTypeDetector.
     path_list is empty and all paths are learned dynamically in variable_key_list.
     """
     event_type_detector = EventTypeDetector(
         self.aminer_config, [self.stream_printer_event_handler])
     log_atoms = []
     for line in self.log_lines:
         t = time.time()
         log_atoms.append(
             LogAtom(
                 line,
                 ParserMatch(
                     self.parsing_model.get_match_element(
                         'parser', MatchContext(line))), t,
                 self.__class__.__name__))
     for i, log_atom in enumerate(log_atoms):
         self.assertTrue(event_type_detector.receive_atom(log_atom))
         self.assertEqual(event_type_detector.total_records, i + 1)
示例#20
0
 def test2receive_non_serialized_data(self):
     """This unittest tests the receive_event method with not serialized data"""
     log_atom = LogAtom(self.fixed_dme.fixed_data,
                        ParserMatch(self.match_element), self.t, self)
     self.analysis_context.register_component(self, self.description)
     event_data = {
         'AnalysisComponent': {
             'AffectedParserPaths': ['test/path/1', 'test/path/2']
         }
     }
     kafka_event_handler = KafkaEventHandler(
         self.analysis_context, self.kafka_topic, {
             'bootstrap_servers': ['localhost:9092'],
             'api_version': (2, 0, 1)
         })
     self.assertFalse(
         kafka_event_handler.receive_event(self.test_detector,
                                           self.event_message,
                                           self.sorted_log_lines,
                                           event_data, log_atom, self))
     self.assertRaises(StopIteration, self.consumer.__next__)
 def test1receive_expected_event(self):
     """In this test case a normal Event happens and the json output should be sent to a StreamPrinterEventHandler."""
     json_converter_handler = JsonConverterHandler(
         [self.stream_printer_event_handler], self.analysis_context)
     log_atom = LogAtom(self.fixed_dme.fixed_data,
                        ParserMatch(self.match_element), self.t, self)
     self.analysis_context.register_component(self, self.description)
     event_data = {
         'AnalysisComponent': {
             'AffectedParserPaths': ['test/path/1', 'test/path/2']
         }
     }
     json_converter_handler.receive_event(self.test_detector,
                                          self.event_message,
                                          self.sorted_log_lines, event_data,
                                          log_atom, self)
     self.assertEqual(
         self.output_stream.getvalue(), self.expected_string %
         (datetime.fromtimestamp(self.t).strftime("%Y-%m-%d %H:%M:%S"),
          self.event_message, self.__class__.__name__, self.description,
          self.__class__.__name__, self.description, self.event_message,
          self.persistence_id, round(self.t, 2), ""))
 def test6persist_and_load_data(self):
     """This unittest checks the functionality of the persistence by persisting and reloading values."""
     event_type_detector = EventTypeDetector(
         self.aminer_config, [self.stream_printer_event_handler])
     t = time.time()
     log_atom = LogAtom(
         b'22.2', ParserMatch(MatchElement('path', '22.2', 22.2, None)), t,
         self.__class__.__name__)
     event_type_detector.receive_atom(log_atom)
     event_type_detector.do_persist()
     event_type_detector_loaded = EventTypeDetector(
         self.aminer_config, [self.stream_printer_event_handler])
     self.assertEqual(event_type_detector.variable_key_list,
                      event_type_detector_loaded.variable_key_list)
     self.assertEqual(event_type_detector.values,
                      event_type_detector_loaded.values)
     self.assertEqual(event_type_detector.longest_path,
                      event_type_detector_loaded.longest_path)
     self.assertEqual(event_type_detector.check_variables,
                      event_type_detector_loaded.check_variables)
     self.assertEqual(event_type_detector.num_eventlines,
                      event_type_detector_loaded.num_eventlines)
 def run_timestamps_unsorted_detector(self, reset_factor):
     results = [None] * self.iterations
     avg = 0
     z = 0
     while z < self.iterations:
         timestamps_unsorted_detector = TimestampsUnsortedDetector(
             self.aminer_config, [self.stream_printer_event_handler])
         seconds = time.time()
         s = seconds
         i = 0
         mini = 100
         while int(time.time() - seconds) < self.waiting_time:
             decimal_integer_value_me = DecimalIntegerValueModelElement(
                 'd', DecimalIntegerValueModelElement.SIGN_TYPE_NONE,
                 DecimalIntegerValueModelElement.PAD_TYPE_NONE)
             p = process_time()
             r = random.randint(1, 100)
             seconds = seconds + process_time() - p
             match_context = MatchContext(str(i).encode())
             match_element = decimal_integer_value_me.get_match_element(
                 'integer', match_context)
             log_atom = LogAtom(match_element.match_string,
                                ParserMatch(match_element),
                                s + min(r, mini),
                                timestamps_unsorted_detector)
             timestamps_unsorted_detector.receive_atom(log_atom)
             if mini > r:
                 mini = r
             else:
                 mini = mini + reset_factor
             i = i + 1
         results[z] = i
         z = z + 1
         avg = avg + i
     avg = avg / self.iterations
     type(self).result = self.result + self.result_string % (
         timestamps_unsorted_detector.__class__.__name__, avg, results,
         'a resetFactor of %f.' % reset_factor)
示例#24
0
    def test3receive_match_in_time_without_auto_include_flag(self):
        """This test case checks if log_atoms are accepted as expected with the auto_include_flag=False."""
        description = 'test3newMatchIdValueComboDetectorTest'
        output_stream_empty_results = [True, False, True, False, True, False, True, True, False, True, False, True, True, True, False,
                                       False, False, True, False, True, False]
        id_dict_current_results = [
            {1: {'parser/type/syscall/syscall': 1}}, {}, {2: {'parser/type/syscall/syscall': 2}}, {},
            {3: {'parser/type/syscall/syscall': 3}}, {}, {100: {'parser/type/syscall/syscall': 1}},
            {100: {'parser/type/syscall/syscall': 1}, 4: {'parser/type/syscall/syscall': 1}}, {100: {'parser/type/syscall/syscall': 1}},
            {100: {'parser/type/syscall/syscall': 1}, 5: {'parser/type/path/name': 'two'}}, {100: {'parser/type/syscall/syscall': 1}},
            {100: {'parser/type/syscall/syscall': 1}, 6: {'parser/type/syscall/syscall': 4}},
            {100: {'parser/type/syscall/syscall': 1}, 6: {'parser/type/syscall/syscall': 4}, 7: {'parser/type/path/name': 'five'}},
            {100: {'parser/type/syscall/syscall': 1}, 6: {'parser/type/syscall/syscall': 4}, 7: {'parser/type/path/name': 'five'},
                8: {'parser/type/syscall/syscall': 6}},
            {100: {'parser/type/syscall/syscall': 1}, 7: {'parser/type/path/name': 'five'}, 8: {'parser/type/syscall/syscall': 6}},
            {100: {'parser/type/syscall/syscall': 1}, 8: {'parser/type/syscall/syscall': 6}}, {100: {'parser/type/syscall/syscall': 1}},
            {100: {'parser/type/syscall/syscall': 1}, 9: {'parser/type/syscall/syscall': 2}}, {100: {'parser/type/syscall/syscall': 1}},
            {100: {'parser/type/syscall/syscall': 1}, 10: {'parser/type/path/name': 'one'}}, {100: {'parser/type/syscall/syscall': 1}}]
        id_dict_old_results = [{}] * 21
        min_allowed_time_diff = 0.1
        log_atoms = []
        for line in self.log_lines:
            t = time.time()
            log_atoms.append(
                LogAtom(line, ParserMatch(self.parsing_model.get_match_element('parser', MatchContext(line))), t, self.__class__.__name__))
        new_match_id_value_combo_detector = NewMatchIdValueComboDetector(self.aminer_config, [
            'parser/type/path/name', 'parser/type/syscall/syscall'], [self.stream_printer_event_handler],
            id_path_list=['parser/type/path/id', 'parser/type/syscall/id'], min_allowed_time_diff=min_allowed_time_diff,
            auto_include_flag=False, allow_missing_values_flag=True, persistence_id='audit_type_path', output_log_line=False)
        self.analysis_context.register_component(new_match_id_value_combo_detector, description)

        for i, log_atom in enumerate(log_atoms):
            self.assertTrue(new_match_id_value_combo_detector.receive_atom(log_atom))
            self.assertEqual(self.output_stream.getvalue() == "", output_stream_empty_results[i], log_atom.raw_data)
            self.assertEqual(new_match_id_value_combo_detector.id_dict_current, id_dict_current_results[i])
            self.assertEqual(new_match_id_value_combo_detector.id_dict_old, id_dict_old_results[i])
            self.assertEqual(new_match_id_value_combo_detector.known_values, [])
            self.reset_output_stream()
    def run_histogram_analysis(self, number_of_pathes, amplifier):
        results = [None] * self.iterations
        avg = 0
        z = 0
        while z < self.iterations:
            modulo_time_bin_definition = ModuloTimeBinDefinition(
                86400, 86400 / number_of_pathes, 0, 1, number_of_pathes, False)
            histogram_data = HistogramData('match/crontab',
                                           modulo_time_bin_definition)
            histogram_analysis = HistogramAnalysis(
                self.aminer_config,
                [(histogram_data.property_path, modulo_time_bin_definition)],
                amplifier * self.waiting_time,
                [self.stream_printer_event_handler], False, 'Default')

            i = 0
            seconds = time.time()
            t = seconds
            while int(time.time() - seconds) < self.waiting_time:
                p = process_time()
                rand = random.randint(0, 100000)
                seconds = seconds + process_time() - p
                match_element = MatchElement('match/crontab', t + rand,
                                             t + rand, [])
                log_atom = LogAtom(histogram_data.bin_data,
                                   ParserMatch(match_element), t + i,
                                   histogram_analysis)
                histogram_analysis.receive_atom(log_atom)
                i = i + 1
            results[z] = i
            z = z + 1
            avg = avg + i
        avg = avg / self.iterations
        type(self).result = self.result + self.result_string % (
            histogram_analysis.__class__.__name__, avg, results,
            '%d bin(s) and output after %d elements.' %
            (number_of_pathes, amplifier * self.waiting_time))
    def run_timestamp_correction_filters(self, number_of_pathes):
        results = [None] * self.iterations
        avg = 0
        z = 0
        while z < self.iterations:
            new_match_path_detector = NewMatchPathDetector(
                self.aminer_config, [self.stream_printer_event_handler],
                'Default', True)
            simple_monotonic_timestamp_adjust = SimpleMonotonicTimestampAdjust(
                [new_match_path_detector])

            seconds = time.time()
            i = 0
            while int(time.time() - seconds) < self.waiting_time:
                decimal_integer_value_me = DecimalIntegerValueModelElement(
                    'd' + str(i % number_of_pathes),
                    DecimalIntegerValueModelElement.SIGN_TYPE_NONE,
                    DecimalIntegerValueModelElement.PAD_TYPE_NONE)
                p = process_time()
                r = random.randint(1, 1000000)
                seconds = seconds + process_time() - p
                match_context = MatchContext(str(i).encode())
                match_element = decimal_integer_value_me.get_match_element(
                    'integer', match_context)
                log_atom = LogAtom(match_element.match_string,
                                   ParserMatch(match_element), seconds - r,
                                   simple_monotonic_timestamp_adjust)
                simple_monotonic_timestamp_adjust.receive_atom(log_atom)
                i = i + 1
            results[z] = i
            z = z + 1
            avg = avg + i
        avg = avg / self.iterations
        type(self).result = self.result + self.result_string % (
            simple_monotonic_timestamp_adjust.__class__.__name__, avg, results,
            'a %s and %d different path(es).' %
            (new_match_path_detector.__class__.__name__, number_of_pathes))
    def run_time_correlation_violation_detector(self, chance):
        results = [None] * self.iterations
        avg = 0
        z = 0
        while z < self.iterations:
            correlation_rule = CorrelationRule('Correlation',
                                               0,
                                               chance,
                                               max_artefacts_a_for_single_b=1,
                                               artefact_match_parameters=[
                                                   ('/integer/d0',
                                                    '/integer/d1')
                                               ])
            a_class_selector = EventClassSelector('Selector1',
                                                  [correlation_rule], None)
            b_class_selector = EventClassSelector('Selector2', None,
                                                  [correlation_rule])
            rules = [
                Rules.PathExistsMatchRule('/integer/d0', a_class_selector),
                Rules.PathExistsMatchRule('/integer/d1', b_class_selector)
            ]

            time_correlation_violation_detector = TimeCorrelationViolationDetector(
                self.analysis_context.aminer_config, rules,
                [self.stream_printer_event_handler])
            seconds = time.time()
            s = seconds
            i = 0
            decimal_integer_value_me = DecimalIntegerValueModelElement(
                'd0', DecimalIntegerValueModelElement.SIGN_TYPE_NONE,
                DecimalIntegerValueModelElement.PAD_TYPE_NONE)
            while int(time.time() - seconds) < self.waiting_time:
                integer = '/integer'
                p = process_time()
                r = random.randint(1, 100)
                seconds = seconds + process_time() - p
                decimal_integer_value_me1 = DecimalIntegerValueModelElement(
                    'd1', DecimalIntegerValueModelElement.SIGN_TYPE_NONE,
                    DecimalIntegerValueModelElement.PAD_TYPE_NONE)
                match_context = MatchContext(str(i).encode())
                match_element = decimal_integer_value_me.get_match_element(
                    integer, match_context)
                log_atom = LogAtom(match_element.match_string,
                                   ParserMatch(match_element), s,
                                   time_correlation_violation_detector)
                time_correlation_violation_detector.receive_atom(log_atom)

                match_context = MatchContext(str(i).encode())
                match_element = decimal_integer_value_me1.get_match_element(
                    integer, match_context)
                log_atom = LogAtom(match_element.match_string,
                                   ParserMatch(match_element), s + r / 100,
                                   time_correlation_violation_detector)
                time_correlation_violation_detector.receive_atom(log_atom)
                s = s + r / 100

                if r / 100 >= chance:
                    p = process_time()
                    match_context = MatchContext(str(i).encode())
                    match_element = decimal_integer_value_me.get_match_element(
                        integer, match_context)
                    log_atom = LogAtom(match_element.match_string,
                                       ParserMatch(match_element), s,
                                       time_correlation_violation_detector)
                    time_correlation_violation_detector.receive_atom(log_atom)
                    seconds = seconds + process_time() - p
                time_correlation_violation_detector.do_timer(s)
                i = i + 1
            results[z] = i
            z = z + 1
            avg = avg + i
        avg = avg / self.iterations
        type(self).result = self.result + self.result_string % (
            time_correlation_violation_detector.__class__.__name__, avg,
            results, '%d%% chance of not finding an element' %
            ((1 - chance) * 100))
示例#28
0
    def test6receive_atom(self):
        """
        This unittest tests if atoms are sorted to the right distribution and if the update steps also work properly.
        Therefore the assumption that after 200 values the VTD with the default parameters can change to the right distribution.
        """
        # load data
        with open('unit/data/vtd_data/uni_data_test6', 'rb') as f:
            uni_data_list = pickle.load(f)  # skipcq: BAN-B301
        with open('unit/data/vtd_data/nor_data_test6', 'rb') as f:
            nor_data_list = pickle.load(f)  # skipcq: BAN-B301
        with open('unit/data/vtd_data/beta1_data_test6', 'rb') as f:
            beta1_data_list = pickle.load(f)  # skipcq: BAN-B301

        uni_data_list = uni_data_list * 10
        nor_data_list = nor_data_list * 10
        beta1_data_list = beta1_data_list * 10
        vtd_arguments = [(100, 50), (110, 55), (90, 45), (80, 40), (70, 35)]

        for init, update in vtd_arguments:
            etd = EventTypeDetector(self.aminer_config,
                                    [self.stream_printer_event_handler])
            vtd = VariableTypeDetector(self.aminer_config,
                                       [self.stream_printer_event_handler],
                                       etd,
                                       num_init=init,
                                       num_update=update,
                                       div_thres=0.8,
                                       sim_thres=0.3,
                                       num_pause_others=0)
            t = time.time()
            stat_data = b'True'
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            # initialize data
            for i in range(init):
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['stat', [stat_data.decode()], True], result,
                             (init, update, result))

            # static -> static
            for i in range(update):
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['stat', [stat_data.decode()], True], result,
                             (init, update, result))

            # static -> uni
            for uni_data in uni_data_list[:init]:
                log_atom = LogAtom(
                    uni_data,
                    ParserMatch(MatchElement('', uni_data, str(uni_data),
                                             None)), t,
                    self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            pos_distr = vtd.alternative_distribution_types[0][0]
            self.assertTrue(
                result[0] == 'uni'
                or 'uni' in [distr[0] for distr in pos_distr],
                (init, update, result))

            # uni -> others
            for i in range(update):
                stat_data = bytes(str((i % 75) * 0.1), 'utf-8')
                log_atom = LogAtom(
                    stat_data,
                    ParserMatch(
                        MatchElement('', stat_data.decode(), stat_data, None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['others', 0], result, (init, update, result))

            # others -> d
            for i in range(update):
                stat_data = bytes(str((i % 10) * 0.1), 'utf-8')
                log_atom = LogAtom(
                    stat_data,
                    ParserMatch(
                        MatchElement('', stat_data.decode(), stat_data, None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual('d', result[0], (init, update, result))

            # reset all
            etd = EventTypeDetector(self.aminer_config,
                                    [self.stream_printer_event_handler])
            vtd = VariableTypeDetector(self.aminer_config,
                                       [self.stream_printer_event_handler],
                                       etd,
                                       num_init=init,
                                       num_update=update,
                                       div_thres=0.3,
                                       sim_thres=0.5,
                                       num_pause_others=0,
                                       num_d_bt=30)

            # initialize with d
            for i in range(init):
                stat_data = bytes(str((i % 10) * 0.1), 'utf-8')
                log_atom = LogAtom(
                    stat_data,
                    ParserMatch(
                        MatchElement('', stat_data.decode(), stat_data, None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual('d', result[0], (init, update, result))

            # discrete to others with new values
            for uni_data in uni_data_list[:init]:
                log_atom = LogAtom(
                    uni_data,
                    ParserMatch(MatchElement('', uni_data, str(uni_data),
                                             None)), t,
                    self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['others', 0], result, (init, update, result))

            # reset all
            etd = EventTypeDetector(self.aminer_config,
                                    [self.stream_printer_event_handler])
            vtd = VariableTypeDetector(self.aminer_config,
                                       [self.stream_printer_event_handler],
                                       etd,
                                       num_init=init,
                                       num_update=update,
                                       div_thres=0.3,
                                       sim_thres=0.5,
                                       num_pause_others=0,
                                       num_d_bt=20)

            # initialize with d
            for i in range(init):
                stat_data = bytes(str((i % 10) * 0.1), 'utf-8')
                log_atom = LogAtom(
                    stat_data,
                    ParserMatch(
                        MatchElement('', stat_data.decode(), stat_data, None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual('d', result[0], (init, update, result))

            # discrete to others without new values, low num_d_bt
            for i in range(update):
                stat_data = bytes(str((i % 3) * 0.1), 'utf-8')
                log_atom = LogAtom(
                    stat_data,
                    ParserMatch(
                        MatchElement('', stat_data.decode(), stat_data, None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['others', 0], result, (init, update, result))

            # reset all
            etd = EventTypeDetector(self.aminer_config,
                                    [self.stream_printer_event_handler])
            vtd = VariableTypeDetector(self.aminer_config,
                                       [self.stream_printer_event_handler],
                                       etd,
                                       num_init=init,
                                       num_update=update,
                                       div_thres=0.3,
                                       sim_thres=0.5,
                                       num_pause_others=0,
                                       num_d_bt=100)

            # initialize with d
            for i in range(init):
                stat_data = bytes(str((i % 10) * 0.1), 'utf-8')
                log_atom = LogAtom(
                    stat_data,
                    ParserMatch(
                        MatchElement('', stat_data.decode(), stat_data, None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual('d', result[0], (init, update, result))

            # discrete to others without new values, high num_d_bt
            for i in range(update):
                stat_data = bytes(str((i % 3) * 0.1), 'utf-8')
                log_atom = LogAtom(
                    stat_data,
                    ParserMatch(
                        MatchElement('', stat_data.decode(), stat_data, None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertNotEqual(['others', 0], result, (init, update, result))

            # reset all
            etd = EventTypeDetector(self.aminer_config,
                                    [self.stream_printer_event_handler])
            vtd = VariableTypeDetector(self.aminer_config,
                                       [self.stream_printer_event_handler],
                                       etd,
                                       num_init=init,
                                       num_update=update,
                                       div_thres=0.3,
                                       sim_thres=0.3,
                                       num_pause_others=0)
            t = time.time()
            stat_data = b'True'
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            # initialize data
            for i in range(init):
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['stat', [stat_data.decode()], True], result,
                             (init, update, result))

            # static -> asc
            for i in range(init):
                stat_data = bytes(str(i * 0.1), 'utf-8')
                log_atom = LogAtom(
                    stat_data,
                    ParserMatch(
                        MatchElement('', stat_data.decode(), stat_data, None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['asc', 'float'], result, (init, update, result))

            # asc -> desc
            for i in range(init, 0, -1):
                stat_data = bytes(str(i * 0.1), 'utf-8')
                log_atom = LogAtom(
                    stat_data,
                    ParserMatch(
                        MatchElement('', stat_data.decode(), stat_data, None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['desc', 'float'], result, (init, update, result))

            # reset all
            etd = EventTypeDetector(self.aminer_config,
                                    [self.stream_printer_event_handler])
            vtd = VariableTypeDetector(self.aminer_config,
                                       [self.stream_printer_event_handler],
                                       etd,
                                       num_init=init,
                                       num_update=update,
                                       div_thres=0.3,
                                       sim_thres=0.3,
                                       num_pause_others=0)
            t = time.time()
            stat_data = b'True'
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            # initialize data
            for i in range(init):
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['stat', [stat_data.decode()], True], result,
                             (init, update, result))

            # static -> nor
            for nor_data in nor_data_list[:init]:
                log_atom = LogAtom(
                    nor_data,
                    ParserMatch(MatchElement('', nor_data, str(nor_data),
                                             None)), t,
                    self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            pos_distr = vtd.alternative_distribution_types[0][0]
            self.assertTrue(
                result[0] == 'nor'
                or 'nor' in [distr[0] for distr in pos_distr],
                (init, update, result))

            # nor -> beta1
            for beta1_data in beta1_data_list[:init]:
                log_atom = LogAtom(
                    beta1_data,
                    ParserMatch(
                        MatchElement('', beta1_data, str(beta1_data), None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            pos_distr = vtd.alternative_distribution_types[0][0]
            self.assertTrue(
                (result[0] == 'beta' and result[-1] == 1) or 'beta1'
                in [distr[0] + str(distr[-1])
                    for distr in pos_distr], (init, update, result))

            # reset all
            etd = EventTypeDetector(self.aminer_config,
                                    [self.stream_printer_event_handler])
            vtd = VariableTypeDetector(self.aminer_config,
                                       [self.stream_printer_event_handler],
                                       etd,
                                       num_init=init,
                                       num_update=update,
                                       div_thres=0.3,
                                       sim_thres=0.3,
                                       num_pause_others=0)
            t = time.time()
            stat_data = b'True'
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            # initialize data
            for i in range(init):
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['stat', [stat_data.decode()], True], result,
                             (init, update, result))

            # static -> unq
            vtd.test_ks_int = False
            unq_data_list = [bytes(str(i), 'utf-8') for i in range(init)]
            random.shuffle(unq_data_list)
            for unq_data in unq_data_list:
                log_atom = LogAtom(
                    unq_data,
                    ParserMatch(MatchElement('', unq_data, unq_data, None)), t,
                    self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual('unq', result[0], (init, update, result))
示例#29
0
    def test4detect_var_type(self):
        """This unittest tests possible scenarios of the detect_var_type method."""
        num_init = 100
        etd = EventTypeDetector(self.aminer_config,
                                [self.stream_printer_event_handler])
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init)
        t = time.time()
        # test the 'static' path of detect_var_type
        stat_data = b'5.3.0-55-generic'
        log_atom = LogAtom(
            stat_data,
            ParserMatch(MatchElement('', stat_data.decode(), stat_data, None)),
            t, self.__class__.__name__)
        # check what happens if less than numMinAppearance values are available
        for i in range(num_init):
            self.assertTrue(etd.receive_atom(log_atom))
        result = vtd.detect_var_type(0, 0)
        self.assertEqual(['stat', [stat_data.decode()], False], result)

        # reset etd and vtd for clear results.
        etd = EventTypeDetector(self.aminer_config,
                                [self.stream_printer_event_handler])
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init)

        # test ascending with float values
        for i in range(num_init):
            stat_data = bytes(str(i * 0.1), 'utf-8')
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            self.assertTrue(etd.receive_atom(log_atom))
        result = vtd.detect_var_type(0, 0)
        self.assertEqual(['asc', 'float'], result)

        # reset etd and vtd for clear results.
        etd = EventTypeDetector(self.aminer_config,
                                [self.stream_printer_event_handler])
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init)

        # test ascending with integer values
        for i in range(num_init):
            stat_data = bytes(str(i), 'utf-8')
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            self.assertTrue(etd.receive_atom(log_atom))
        result = vtd.detect_var_type(0, 0)
        self.assertEqual(['asc', 'int'], result)

        # reset etd and vtd for clear results.
        etd = EventTypeDetector(self.aminer_config,
                                [self.stream_printer_event_handler])
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init)

        # test descending with float values
        for i in range(num_init, 0, -1):
            stat_data = bytes(str(i * 0.1), 'utf-8')
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            self.assertTrue(etd.receive_atom(log_atom))
        result = vtd.detect_var_type(0, 0)
        self.assertEqual(['desc', 'float'], result)

        # reset etd and vtd for clear results.
        etd = EventTypeDetector(self.aminer_config,
                                [self.stream_printer_event_handler])
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init)

        # test descending with integer values
        for i in range(num_init, 0, -1):
            stat_data = bytes(str(i), 'utf-8')
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            self.assertTrue(etd.receive_atom(log_atom))
        result = vtd.detect_var_type(0, 0)
        self.assertEqual(['desc', 'int'], result)

        # reset etd and vtd for clear results.
        etd = EventTypeDetector(self.aminer_config,
                                [self.stream_printer_event_handler])
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init,
                                   div_thres=0.3,
                                   test_ks_int=True)

        # test 'num_init' and 'div_thres'
        # prevent results from becoming asc or desc
        stat_data = bytes(str(99), 'utf-8')
        log_atom = LogAtom(
            stat_data,
            ParserMatch(MatchElement('', stat_data.decode(), stat_data, None)),
            t, self.__class__.__name__)
        etd.receive_atom(log_atom)
        values = [float(stat_data)]
        for i in range(99):
            stat_data = bytes(str(i), 'utf-8')
            values.append(float(stat_data))
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            self.assertTrue(etd.receive_atom(log_atom))
        result = vtd.detect_var_type(0, 0)
        # this means that the uniformal distribution must be detected.
        self.assertNotEqual(
            result[0] == 'uni' or 'uni' in [distr[0] for distr in result[-1]],
            result)

        # test 'divThres' option for the continuous distribution
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init,
                                   div_thres=1.0,
                                   test_ks_int=True)
        result = vtd.detect_var_type(0, 0)
        self.assertEqual(['unq', values], result)

        # test 'testInt' option for the continuous distribution
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init,
                                   div_thres=0.3,
                                   test_ks_int=False)
        result = vtd.detect_var_type(0, 0)
        self.assertEqual(['unq', values], result)

        # test 'simThres' option to result in 'others'
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init,
                                   div_thres=0.5,
                                   test_ks_int=False,
                                   sim_thres=0.5)
        values = []
        for i in range(100):
            stat_data = bytes(str((i % 50) * 0.1), 'utf-8')
            values.append(float(stat_data))
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            self.assertTrue(etd.receive_atom(log_atom))
        result = vtd.detect_var_type(0, 0)
        # at least (1 - 'simThresh') * 'numMinAppearance' and maximal 'numMinAppearance' * 'divThres' - 1 unique values must exist.
        self.assertEqual(['others', 0], result)

        # test discrete result
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init,
                                   div_thres=0.5,
                                   test_ks_int=False,
                                   sim_thres=0.3)
        values = []
        for i in range(num_init):
            stat_data = bytes(str((i % 50) * 0.1), 'utf-8')
            values.append(float(stat_data))
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            self.assertTrue(etd.receive_atom(log_atom))
        result = vtd.detect_var_type(0, 0)
        values_set = list(set(values))
        values_app = [0 for _ in range(len(values_set))]
        for value in values:
            values_app[values_set.index(value)] += 1
        values_app = [x / len(values) for x in values_app]
        self.assertEqual(['d', values_set, values_app, len(values)], result)
    def run_new_match_id_value_combo_detector(self, min_allowed_time_diff):
        log_lines = [
            b'type=SYSCALL msg=audit(1580367384.000:1): arch=c000003e syscall=1 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=PATH msg=audit(1580367385.000:1): item=0 name="one" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 '
            b'rdev=00:00 nametype=NORMAL',
            b'type=SYSCALL msg=audit(1580367386.000:2): arch=c000003e syscall=2 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=PATH msg=audit(1580367387.000:2): item=0 name="two" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
            b'nametype=NORMAL',
            b'type=SYSCALL msg=audit(1580367388.000:3): arch=c000003e syscall=3 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=PATH msg=audit(1580367389.000:3): item=0 name="three" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00'
            b' nametype=NORMAL',
            b'type=SYSCALL msg=audit(1580367388.500:100): arch=c000003e syscall=1 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=SYSCALL msg=audit(1580367390.000:4): arch=c000003e syscall=1 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=PATH msg=audit(1580367391.000:4): item=0 name="one" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
            b'nametype=NORMAL',
            b'type=PATH msg=audit(1580367392.000:5): item=0 name="two" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
            b'nametype=NORMAL',
            b'type=SYSCALL msg=audit(1580367393.000:5): arch=c000003e syscall=2 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=SYSCALL msg=audit(1580367394.000:6): arch=c000003e syscall=4 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=PATH msg=audit(1580367395.000:7): item=0 name="five" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
            b'nametype=NORMAL',
            b'type=SYSCALL msg=audit(1580367396.000:8): arch=c000003e syscall=6 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=PATH msg=audit(1580367397.000:6): item=0 name="four" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
            b'nametype=NORMAL',
            b'type=SYSCALL msg=audit(1580367398.000:7): arch=c000003e syscall=5 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=PATH msg=audit(1580367399.000:8): item=0 name="six" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 rdev=00:00 '
            b'nametype=NORMAL',
            b'type=SYSCALL msg=audit(1580367400.000:9): arch=c000003e syscall=2 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 a3=4f '
            b'items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 tty=(none) '
            b'ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)',
            b'type=PATH msg=audit(1580367401.000:9): item=0 name="three" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 '
            b'rdev=00:00 nametype=NORMAL',
            b'type=PATH msg=audit(1580367402.000:10): item=0 name="one" inode=790106 dev=fe:01 mode=0100666 ouid=1000 ogid=1000 '
            b'rdev=00:00 nametype=NORMAL',
            b'type=SYSCALL msg=audit(1580367403.000:10): arch=c000003e syscall=3 success=yes exit=21 a0=7ffda5863060 a1=0 a2=1b6 '
            b'a3=4f items=1 ppid=22913 pid=13187 auid=4294967295 uid=33 gid=33 euid=33 suid=33 fsuid=33 egid=33 sgid=33 fsgid=33 '
            b'tty=(none) ses=4294967295 comm="apache2" exe="/usr/sbin/apache2" key=(null)'
        ]
        parsing_model = FirstMatchModelElement('type', [
            SequenceModelElement('path', [
                FixedDataModelElement('type', b'type=PATH '),
                FixedDataModelElement('msg_audit', b'msg=audit('),
                DelimitedDataModelElement('msg', b':'),
                FixedDataModelElement('placeholder', b':'),
                DecimalIntegerValueModelElement('id'),
                FixedDataModelElement('item_string', b'): item='),
                DecimalIntegerValueModelElement('item'),
                FixedDataModelElement('name_string', b' name="'),
                DelimitedDataModelElement('name', b'"'),
                FixedDataModelElement('inode_string', b'" inode='),
                DecimalIntegerValueModelElement('inode'),
                FixedDataModelElement('dev_string', b' dev='),
                DelimitedDataModelElement('dev', b' '),
                FixedDataModelElement('mode_string', b' mode='),
                DecimalIntegerValueModelElement('mode'),
                FixedDataModelElement('ouid_string', b' ouid='),
                DecimalIntegerValueModelElement('ouid'),
                FixedDataModelElement('ogid_string', b' ogid='),
                DecimalIntegerValueModelElement('ogid'),
                FixedDataModelElement('rdev_string', b' rdev='),
                DelimitedDataModelElement('rdev', b' '),
                FixedDataModelElement('nametype_string', b' nametype='),
                FixedWordlistDataModelElement('nametype',
                                              [b'NORMAL', b'ERROR'])
            ]),
            SequenceModelElement('syscall', [
                FixedDataModelElement('type', b'type=SYSCALL '),
                FixedDataModelElement('msg_audit', b'msg=audit('),
                DelimitedDataModelElement('msg', b':'),
                FixedDataModelElement('placeholder', b':'),
                DecimalIntegerValueModelElement('id'),
                FixedDataModelElement('arch_string', b'): arch='),
                DelimitedDataModelElement('arch', b' '),
                FixedDataModelElement('syscall_string', b' syscall='),
                DecimalIntegerValueModelElement('syscall'),
                FixedDataModelElement('success_string', b' success='),
                FixedWordlistDataModelElement('success', [b'yes', b'no']),
                FixedDataModelElement('exit_string', b' exit='),
                DecimalIntegerValueModelElement('exit'),
                AnyByteDataModelElement('remainding_data')
            ])
        ])

        results = [None] * self.iterations
        avg = 0
        z = 0
        while z < self.iterations:
            i = 0
            new_match_id_value_combo_detector = NewMatchIdValueComboDetector(
                self.aminer_config,
                ['parser/type/path/name', 'parser/type/syscall/syscall'],
                [self.stream_printer_event_handler],
                id_path_list=['parser/type/path/id', 'parser/type/syscall/id'],
                min_allowed_time_diff=min_allowed_time_diff,
                auto_include_flag=False,
                allow_missing_values_flag=True,
                persistence_id='audit_type_path',
                output_log_line=False)
            t = time.time()
            seconds = time.time()
            i = 0
            while int(time.time() - seconds) < self.waiting_time:
                p = process_time()
                r = random.randint(0, len(log_lines) - 1)
                seconds = seconds + process_time() - p

                # this code just creates some data to be able to compare with other analysis components.
                decimal_integer_value_me = DecimalIntegerValueModelElement(
                    'd', DecimalIntegerValueModelElement.SIGN_TYPE_NONE,
                    DecimalIntegerValueModelElement.PAD_TYPE_NONE)
                match_context = MatchContext(str(i % 100).encode())
                _match_element = decimal_integer_value_me.get_match_element(
                    'integer', match_context)
                ########################################################################################

                line = log_lines[r]
                log_atom = LogAtom(
                    line,
                    ParserMatch(
                        parsing_model.get_match_element(
                            'parser', MatchContext(line))), t,
                    self.__class__.__name__)
                new_match_id_value_combo_detector.receive_atom(log_atom)
                i = i + 1
            results[z] = i
            z = z + 1
            avg = avg + i
        avg = avg / self.iterations
        type(self).result = self.result + self.result_string % (
            new_match_id_value_combo_detector.__class__.__name__, avg, results,
            '%.2f seconds min_allowed_time_diff.' % min_allowed_time_diff)