def test_constraints_or_without_constraints(self): line_source = LineSource('localhost', 'node_0.log') effect = Clue(('Pineapple', 44), 'Pineapple, 44 times', 3000, line_source) clues_lists = [ ([ Clue(('Banana', 2), 'Banana, 2 times', 1050, line_source), Clue(('Milk', 1), 'Milk, 2 times', 1100, line_source) ], 1), ([ Clue(('Chocolate', 2), 'Chocolate, 2 times', 100, line_source) ], 1) ] # yapf: disable constraints = [] causes = Verifier.constraints_or(clues_lists, effect, constraints, ConstraintManager()) assert len(causes) == 2 assert all(cause.constraints_linkage == InvestigationResult.OR for cause in causes) assert causes[0].lines == [ FrontInput.from_clue(Clue( ('Banana', 2), 'Banana, 2 times', 1050, line_source)), FrontInput.from_clue(Clue( ('Chocolate', 2), 'Chocolate, 2 times', 100, line_source)) ] # yapf: disable assert causes[1].lines == [ FrontInput.from_clue(Clue( ('Milk', 1), 'Milk, 2 times', 1100, line_source)), FrontInput.from_clue(Clue( ('Chocolate', 2), 'Chocolate, 2 times', 100, line_source)) ] # yapf: disable assert all(not cause.constraints for cause in causes)
def test_constraints_and_basic(self): line_source = LineSource('localhost', 'node_0.log') effect = Clue(('Pear', 2), 'Pear, 2 times', 3000, line_source) clues_lists = [ ([ Clue(('Milk', 3), 'Milk, 3 times', 50, line_source), Clue(('Chocolate', 2), 'Chocolate, 2 times', 100, line_source), Clue(('Pear', 2), 'Pear, 2 times', 150, line_source) ], 1), ([ Clue(('Pear', 2), 'Pear, 2 times', 1050, line_source), Clue(('Milk', 1), 'Milk, 1 times', 1100, line_source) ], 1) ] # yapf: disable constraints = [ { 'clues_groups': [[0, 1], [1, 1], [2, 1]], 'name': 'identical', 'params': {} }, { 'clues_groups': [[0, 2], [1, 2], [2, 2]], 'name': 'identical', 'params': {} } ] causes = Verifier.constraints_and(clues_lists, effect, constraints, ConstraintManager()) assert len(causes) == 1 assert all(cause.constraints_linkage == InvestigationResult.AND for cause in causes) assert causes[0].lines == [ FrontInput.from_clue(Clue( ('Pear', 2), 'Pear, 2 times', 150, line_source)), FrontInput.from_clue(Clue( ('Pear', 2), 'Pear, 2 times', 1050, line_source)) ] # yapf: disable
def test_constraints_and_basic(self): line_source = LineSource('localhost', 'node_0.log') effect = Clue(('Pear', 2), 'Pear, 2 times', 3000, line_source) clues_lists = [ ([ Clue(('Milk', 3), 'Milk, 3 times', 50, line_source), Clue(('Chocolate', 2), 'Chocolate, 2 times', 100, line_source), Clue(('Pear', 2), 'Pear, 2 times', 150, line_source) ], 1), ([ Clue(('Pear', 2), 'Pear, 2 times', 1050, line_source), Clue(('Milk', 1), 'Milk, 1 times', 1100, line_source) ], 1) ] # yapf: disable constraints = [{ 'clues_groups': [[0, 1], [1, 1], [2, 1]], 'name': 'identical', 'params': {} }, { 'clues_groups': [[0, 2], [1, 2], [2, 2]], 'name': 'identical', 'params': {} }] causes = Verifier.constraints_and(clues_lists, effect, constraints, ConstraintManager()) assert len(causes) == 1 assert all(cause.constraints_linkage == InvestigationResult.AND for cause in causes) assert causes[0].lines == [ FrontInput.from_clue(Clue( ('Pear', 2), 'Pear, 2 times', 150, line_source)), FrontInput.from_clue(Clue( ('Pear', 2), 'Pear, 2 times', 1050, line_source)) ] # yapf: disable
def test_constraints_check_same_cause_parser_as_effect(self): rule = Rule( [self.cause_a], self.cause_a, [ { 'clues_groups': [[0, 1], [1, 1]], 'name': 'identical', 'params': {} } ], Rule.LINKAGE_AND ) # yapf: disable effect_clues_dict = {'cause_a': Clue((42,), '42 carrots', 1420, self.line_source)} clues = { # it's dictionary of the same type as clues dict collected in SearchManager 'cause_a': [ Clue((40,), '40 carrots', 400, self.line_source), Clue((42,), '42 carrots', 420, self.line_source), Clue((44,), '44 carrots', 440, self.line_source) ], 'dummy': [ Clue((98,), '98 foo bar', 980, self.line_source), Clue((99,), '99 foo bar', 990, self.line_source) ] } # yapf: disable results = rule.constraints_check(clues, effect_clues_dict) assert len(results) == 1 assert results[0].lines == [ FrontInput.from_clue( Clue((42,), '42 carrots', 420, self.line_source)) ] # yapf: disable
def test_constraints_or_two_time_constraints(self): line_source = LineSource('localhost', 'node_0.log') effect = Clue(('Foo occurred', datetime(2000, 6, 14, second=15)), 'Foo occurred, 2000 06 14 00:00:15', 500, line_source) clues_lists = [ ([ Clue(('Bar occurred', datetime(2000, 6, 14, second=10)), 'Bar occurred, 2000 06 14 00:00:10', 250, line_source), ], 1) ] # yapf: disable constraints = [{ 'clues_groups': [[1, 2], [0, 2]], 'name': 'time_delta', 'params': { 'max_delta': 8.0 } }, { 'clues_groups': [[1, 2], [0, 2]], 'name': 'time_delta', 'params': { 'max_delta': 3.0 } }] causes = Verifier.constraints_or(clues_lists, effect, constraints, ConstraintManager()) assert len(causes) == 1 assert all(cause.constraints_linkage == InvestigationResult.OR for cause in causes) assert causes[0].lines == [ FrontInput.from_clue(Clue( ('Bar occurred', datetime(2000, 6, 14, second=10)), 'Bar occurred, 2000 06 14 00:00:10', 250, line_source)) ] # yapf: disable assert len(causes[0].constraints) == 1 assert causes[0].constraints[0] == constraints[0]
def test_constraints_check_same_cause_parser_as_effect(self): rule = Rule( [self.cause_a], self.cause_a, [ { 'clues_groups': [[0, 1], [1, 1]], 'name': 'identical', 'params': {} } ], Rule.LINKAGE_AND ) # yapf: disable effect_clues_dict = { 'cause_a': Clue((42, ), '42 carrots', 1420, self.line_source) } clues = { # it's dictionary of the same type as clues dict collected in SearchManager 'cause_a': [ Clue((40,), '40 carrots', 400, self.line_source), Clue((42,), '42 carrots', 420, self.line_source), Clue((44,), '44 carrots', 440, self.line_source) ], 'dummy': [ Clue((98,), '98 foo bar', 980, self.line_source), Clue((99,), '99 foo bar', 990, self.line_source) ] } # yapf: disable results = rule.constraints_check(clues, effect_clues_dict) assert len(results) == 1 assert results[0].lines == [ FrontInput.from_clue( Clue((42,), '42 carrots', 420, self.line_source)) ] # yapf: disable
def test_temporary_file_assign_to_logtype(self, test_name): input_path, original_log_file, path, result_log_file, results_yaml_file = self._prepare_files_path( test_name) effect_line_offset, line_content = self._gather_effect_line_data( input_path, original_log_file) whylog_config = self._prepare_config(path) log_reader = LogReader(whylog_config) effect_line = FrontInput( effect_line_offset, line_content, LineSource( 'localhost', os.path.join(path, self._get_starting_file_name(input_path)))) node1_source = LineSource('localhost', os.path.join(path, 'node_1.log')) node2_source = LineSource('localhost', os.path.join(path, 'node_2.log')) node3_source = LineSource('localhost', os.path.join(path, 'node_3.log')) temp_assign = {AbstractConfig.DEFAULT_LOG_TYPE: [node1_source]} if test_name == "010_multiple_files": temp_assign = { AbstractConfig.DEFAULT_LOG_TYPE: [node1_source, node2_source] } if test_name == "011_different_entry": temp_assign = { AbstractConfig.DEFAULT_LOG_TYPE: [node1_source, node2_source, node3_source] } results = log_reader.get_causes(effect_line, temp_assign) expected_results = self._investigation_results_from_yaml( results_yaml_file, result_log_file) self._check_results(results, expected_results)
def test_constraints_or_two_time_constraints(self): line_source = LineSource('localhost', 'node_0.log') effect = Clue( ('Foo occurred', datetime(2000, 6, 14, second=15)), 'Foo occurred, 2000 06 14 00:00:15', 500, line_source ) clues_lists = [ ([ Clue(('Bar occurred', datetime(2000, 6, 14, second=10)), 'Bar occurred, 2000 06 14 00:00:10', 250, line_source), ], 1) ] # yapf: disable constraints = [ { 'clues_groups': [[1, 2], [0, 2]], 'name': 'time_delta', 'params': {'max_delta': 8.0} }, { 'clues_groups': [[1, 2], [0, 2]], 'name': 'time_delta', 'params': {'max_delta': 3.0} } ] causes = Verifier.constraints_or(clues_lists, effect, constraints, ConstraintManager()) assert len(causes) == 1 assert all(cause.constraints_linkage == InvestigationResult.OR for cause in causes) assert causes[0].lines == [ FrontInput.from_clue(Clue( ('Bar occurred', datetime(2000, 6, 14, second=10)), 'Bar occurred, 2000 06 14 00:00:10', 250, line_source)) ] # yapf: disable assert len(causes[0].constraints) == 1 assert causes[0].constraints[0] == constraints[0]
def test_constraints_and_verification_failed_when_or_succeeded(self): line_source = LineSource('localhost', 'node_0.log') effect = Clue(('Banana', 44), 'Banana, 44 times', 3000, line_source) clues_lists = [ ([ Clue(('Milk', 3), 'Milk, 3 times', 50, line_source), Clue(('Chocolate', 4), 'Chocolate, 4 times', 100, line_source), Clue(('Pear', 2), 'Pear, 2 times', 150, line_source) # <- should be found (parser 1) ], 1), ([ Clue(('Pineapple', 2), 'Pineapple, 2 times', 1050, line_source), # <- should be found (parser 2) Clue(('Milk', 1), 'Milk, 1 times', 1100, line_source) ], 1) ] # yapf: disable constraints = [ { 'clues_groups': [[0, 1], [1, 1], [2, 1]], 'name': 'identical', 'params': {} }, { 'clues_groups': [[1, 2], [2, 2]], 'name': 'identical', 'params': {} } ] # testing 'and' causes = Verifier.constraints_and(clues_lists, effect, constraints, ConstraintManager()) assert not causes # testing 'or' causes = Verifier.constraints_or(clues_lists, effect, constraints, ConstraintManager()) assert len(causes) == 1 assert all(cause.constraints_linkage == InvestigationResult.OR for cause in causes) assert causes[0].lines == [ FrontInput.from_clue(Clue( ('Pear', 2), 'Pear, 2 times', 150, line_source)), FrontInput.from_clue(Clue( ('Pineapple', 2), 'Pineapple, 2 times', 1050, line_source)) ] # yapf: disable assert causes[0].constraints == [ { 'clues_groups': [[1, 2], [2, 2]], 'name': 'identical', 'params': {} } ]
def test_constraints_and_verification_failed_when_or_succeeded(self): line_source = LineSource('localhost', 'node_0.log') effect = Clue(('Banana', 44), 'Banana, 44 times', 3000, line_source) clues_lists = [ ([ Clue(('Milk', 3), 'Milk, 3 times', 50, line_source), Clue(('Chocolate', 4), 'Chocolate, 4 times', 100, line_source), Clue(('Pear', 2), 'Pear, 2 times', 150, line_source) # <- should be found (parser 1) ], 1), ([ Clue(('Pineapple', 2), 'Pineapple, 2 times', 1050, line_source), # <- should be found (parser 2) Clue(('Milk', 1), 'Milk, 1 times', 1100, line_source) ], 1) ] # yapf: disable constraints = [{ 'clues_groups': [[0, 1], [1, 1], [2, 1]], 'name': 'identical', 'params': {} }, { 'clues_groups': [[1, 2], [2, 2]], 'name': 'identical', 'params': {} }] # testing 'and' causes = Verifier.constraints_and(clues_lists, effect, constraints, ConstraintManager()) assert not causes # testing 'or' causes = Verifier.constraints_or(clues_lists, effect, constraints, ConstraintManager()) assert len(causes) == 1 assert all(cause.constraints_linkage == InvestigationResult.OR for cause in causes) assert causes[0].lines == [ FrontInput.from_clue(Clue( ('Pear', 2), 'Pear, 2 times', 150, line_source)), FrontInput.from_clue(Clue( ('Pineapple', 2), 'Pineapple, 2 times', 1050, line_source)) ] # yapf: disable assert causes[0].constraints == [{ 'clues_groups': [[1, 2], [2, 2]], 'name': 'identical', 'params': {} }]
def tests_whylog_factory(self): log_reader, teacher_generator, config = whylog_factory() teacher = teacher_generator() front_input = FrontInput(1, 'line content', LineSource('host', 'path')) teacher.add_line(0, front_input, True) self.assertRaises(NoLogTypeError, log_reader.get_causes, front_input) config.get_log_type(front_input.line_source)
def test_update_by_pattern(self): ra = RegexAssistant() line = "Hello, Whylog guy!" line_id = 1 ra.add_line(line_id, FrontInput(0, line, 0)) unlikely_regex = r'^Hello, (Whylog (team|guy)!)$' assert not ra.regex_matches[line_id].regex == unlikely_regex ra.update_by_pattern(line_id, unlikely_regex) assert ra.regex_matches[line_id].regex == unlikely_regex
def _create_investigation_result(cls, clues_combination, constraints, linkage): """ basing on clues combination and constraints, returns appropriate InvestigationResult object which collects information about lines (FrontInput objects) instead of Clues """ return InvestigationResult( [FrontInput.from_clue(clue) for clue in clues_combination], constraints, linkage )
def setUp(self): """ Creates teacher with sample Rule. """ test_files_dir = 'empty_config_files' path = os.path.join(*path_test_files + [test_files_dir]) parsers_path, rules_path, log_types_path = ConfigPathFactory.get_path_to_config_files( path, False ) self.test_files = [parsers_path, rules_path, log_types_path] self._clean_test_files() yaml_config = YamlConfig(parsers_path, rules_path, log_types_path) regex_assistant = RegexAssistant() self.teacher = Teacher(yaml_config, regex_assistant) self.effect_id = 0 self.effect_front_input = FrontInput( offset=42, line_content=r'2015-12-03 12:11:00 Error occurred in reading test', line_source=LineSource('sample_host', 'sample_path') ) self.cause1_id = 1 self.cause1_front_input = FrontInput( offset=30, line_content=r'2015-12-03 12:10:55 Data is missing on comp21', line_source=LineSource('sample_host1', 'sample_path1') ) self.cause2_id = 2 self.cause2_front_input = FrontInput( offset=21, line_content=r'2015-12-03 12:10:50 Data migration to comp21 failed in test 123', line_source=LineSource('sample_host2', 'sample_path2') ) self.identical_groups = [(self.cause1_id, 2), (self.cause2_id, 2)] self.date_groups = [(self.effect_id, 1), (self.effect_id, 1)] self._add_rule()
def _create_investigation_result(cls, clues_combination, constraints, linkage): """ basing on clues combination and constraints, returns appropriate InvestigationResult object which collects information about lines (FrontInput objects) instead of Clues """ return InvestigationResult( [FrontInput.from_clue(clue) for clue in clues_combination], constraints, linkage)
def test_constraints_check_basic(self): rule = Rule( [self.cause_a, self.cause_b], self.effect, [ { 'clues_groups': [[0, 1], [1, 1], [2, 1]], 'name': 'identical', 'params': {} } ], Rule.LINKAGE_AND ) # yapf: disable effect_clues_dict = { 'effect': Clue((42, ), '42 dinners', 1420, self.line_source) } clues = { # it's dictionary of the same type as clues dict collected in SearchManager 'cause_a': [ Clue((40,), '40 carrots', 400, self.line_source), Clue((42,), '42 carrots', 420, self.line_source), Clue((44,), '44 carrots', 440, self.line_source) ], 'cause_b': [ Clue((32,), '32 broccoli', 100, self.line_source), Clue((42,), '42 broccoli', 120, self.line_source), Clue((52,), '52 broccoli', 140, self.line_source) ], 'dummy': [ Clue((42,), '42 foo bar', 980, self.line_source), Clue((84,), '84 foo bar', 990, self.line_source) ] } # yapf: disable results = rule.constraints_check(clues, effect_clues_dict) assert len(results) == 1 assert results[0].lines == [ FrontInput.from_clue( Clue((42,), '42 carrots', 420, self.line_source)), FrontInput.from_clue( Clue((42,), '42 broccoli', 120, self.line_source)) ] # yapf: disable
def _investigation_results_from_yaml(self, yaml_file, real_log_file): file_content = yaml.load(open(yaml_file)) results = [] for result in file_content: causes = [ FrontInput( self._deduce_line_offset_by_unique_content(real_log_file, line_str), line_str, LineSource("localhost", real_log_file) ) for line_str in result['causes'] ] # yapf: disable results.append( InvestigationResult(causes, result['constraints'], result['linkage'])) return results
def test_guess_pattern_matches(self): line = r'2015-12-03 or [10/Oct/1999 21:15:05 +0500] "GET /index.html HTTP/1.0" 200 1043' front_input = FrontInput(0, line, 0) line_id = 1 ra = RegexAssistant() ra.add_line(line_id, front_input) pattern_matches = ra.guess_pattern_matches(line_id) assert pattern_matches guessed_regexes = [ pattern_match.pattern for pattern_match in pattern_matches.values() ] for guessed_regex in guessed_regexes: self.verify_regex_match(guessed_regex, line)
def test_constraints_check_basic(self): rule = Rule( [self.cause_a, self.cause_b], self.effect, [ { 'clues_groups': [[0, 1], [1, 1], [2, 1]], 'name': 'identical', 'params': {} } ], Rule.LINKAGE_AND ) # yapf: disable effect_clues_dict = {'effect': Clue((42,), '42 dinners', 1420, self.line_source)} clues = { # it's dictionary of the same type as clues dict collected in SearchManager 'cause_a': [ Clue((40,), '40 carrots', 400, self.line_source), Clue((42,), '42 carrots', 420, self.line_source), Clue((44,), '44 carrots', 440, self.line_source) ], 'cause_b': [ Clue((32,), '32 broccoli', 100, self.line_source), Clue((42,), '42 broccoli', 120, self.line_source), Clue((52,), '52 broccoli', 140, self.line_source) ], 'dummy': [ Clue((42,), '42 foo bar', 980, self.line_source), Clue((84,), '84 foo bar', 990, self.line_source) ] } # yapf: disable results = rule.constraints_check(clues, effect_clues_dict) assert len(results) == 1 assert results[0].lines == [ FrontInput.from_clue( Clue((42,), '42 carrots', 420, self.line_source)), FrontInput.from_clue( Clue((42,), '42 broccoli', 120, self.line_source)) ] # yapf: disable
def test_creatig_effect_clues(self): #TODO: add some line source assert when FrontInput will contains LineSource or something like that offset = 42 front_input = FrontInput(offset, self.lost_data_line, None) effect_params = { 'lostdata': ("2015-12-03 12:11:00", "alfa21", "567.02", "101") } clues = self.simple_config._create_effect_clues( effect_params, front_input) assert len(clues) == 1 clue = clues['lostdata'] assert clue.regex_parameters == (datetime(2015, 12, 3, 12, 11), 'alfa21', '567.02', '101') assert clue.line_offset == offset assert clue.line_prefix_content == self.lost_data_line
def test_constraints_when_one_unmatched(self): line_source = LineSource('localhost', 'node_0.log') effect = Clue(('Banana', 2), 'Banana, 2 times', 3000, line_source) clues_lists = [ ([ ], 1), ([ Clue(('Banana', 2), 'Banana, 2 times', 1050, line_source), Clue(('Milk', 1), 'Milk, 1 times', 1100, line_source) ], 1) ] # yapf: disable constraints = [ { 'clues_groups': [[0, 1], [1, 1], [2, 1]], 'name': 'identical', 'params': {} }, { 'clues_groups': [[0, 2], [2, 2]], 'name': 'identical', 'params': {} } ] # testing 'or' causes = Verifier.constraints_or(clues_lists, effect, constraints, ConstraintManager()) assert len(causes) == 1 assert all(cause.constraints_linkage == InvestigationResult.OR for cause in causes) assert causes[0].lines == [ FrontInput.from_clue(Clue( ('Banana', 2), 'Banana, 2 times', 1050, line_source)) ] # yapf: disable assert causes[0].constraints == [ { 'clues_groups': [[0, 2], [2, 2]], 'name': 'identical', 'params': {} } ] # testing 'and' causes = Verifier.constraints_and(clues_lists, effect, constraints, ConstraintManager()) assert not causes
def test_one(self, test_name): input_path, original_log_file, path, result_log_file, results_yaml_file = self._prepare_files_path( test_name) effect_line_offset, line_content = self._gather_effect_line_data( input_path, original_log_file) # preparing Whylog structures, normally prepared by Front whylog_config = YamlConfig( *ConfigPathFactory.get_path_to_config_files(path)) log_reader = LogReader(whylog_config) effect_line = FrontInput( effect_line_offset, line_content, LineSource( 'localhost', os.path.join(path, self._get_starting_file_name(input_path)))) results = log_reader.get_causes(effect_line) expected_results = self._investigation_results_from_yaml( results_yaml_file, result_log_file) self._check_results(results, expected_results)
def test_constraints_when_one_unmatched(self): line_source = LineSource('localhost', 'node_0.log') effect = Clue(('Banana', 2), 'Banana, 2 times', 3000, line_source) clues_lists = [ ([ ], 1), ([ Clue(('Banana', 2), 'Banana, 2 times', 1050, line_source), Clue(('Milk', 1), 'Milk, 1 times', 1100, line_source) ], 1) ] # yapf: disable constraints = [{ 'clues_groups': [[0, 1], [1, 1], [2, 1]], 'name': 'identical', 'params': {} }, { 'clues_groups': [[0, 2], [2, 2]], 'name': 'identical', 'params': {} }] # testing 'or' causes = Verifier.constraints_or(clues_lists, effect, constraints, ConstraintManager()) assert len(causes) == 1 assert all(cause.constraints_linkage == InvestigationResult.OR for cause in causes) assert causes[0].lines == [ FrontInput.from_clue(Clue( ('Banana', 2), 'Banana, 2 times', 1050, line_source)) ] # yapf: disable assert causes[0].constraints == [{ 'clues_groups': [[0, 2], [2, 2]], 'name': 'identical', 'params': {} }] # testing 'and' causes = Verifier.constraints_and(clues_lists, effect, constraints, ConstraintManager()) assert not causes