def mocked_investigation_plan(): super_parser = RegexSuperParser('^(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d).*', [1], {1: 'date'}) matcher = WildCardFilenameMatcher('localhost', 'node_1.log', 'default', super_parser) default_log_type = LogType('default', [matcher]) cause = RegexParser( 'cause', '2015-12-03 12:08:08 root cause', '^(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d) root cause$', [1], 'default', {1: 'date'} ) effect = RegexParser( 'effect', '2015-12-03 12:08:09 visible effect', '^(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d) visible effect$', [1], 'default', {1: 'date'} ) concatenated = ConcatenatedRegexParser([cause]) effect_time = datetime(2015, 12, 3, 12, 8, 9) search_range = { 'default': { 'date': { 'left_bound': datetime(2015, 12, 3, 12, 8, 8), 'right_bound': effect_time } } } default_investigation_step = InvestigationStep(concatenated, search_range) rule = Rule( [cause], effect, [ { 'clues_groups': [[1, 1], [0, 1]], 'name': 'time', 'params': {'max_delta': 1} } ], Rule.LINKAGE_AND ) # yapf: disable line_source = LineSource('localhost', 'node_1.log') effect_clues = {'effect': Clue((effect_time,), 'visible effect', 40, line_source)} return InvestigationPlan([rule], [(default_investigation_step, default_log_type)], effect_clues)
def test_log_type_rename(self): whylog_dir = SettingsFactorySelector._attach_whylog_dir(os.getcwd()) config = SettingsFactorySelector.get_settings()['config'] super_parser = RegexSuperParser('^(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d).*', [1], {1: 'date'}) matcher = WildCardFilenameMatcher('localhost', '/temp/*.log', 'default', super_parser) log_type = LogType('default', [matcher]) config.add_log_type(log_type) config.add_rule(self.user_intent) parsers_name = ['connectionerror', 'datamigration', 'lostdata'] assert 'default' in config._log_types.keys() assert 'default' in config._parsers_grouped_by_log_type.keys() assert sorted(config._parsers.keys()) == parsers_name assert sorted(parser.name for parser in config._parsers_grouped_by_log_type['default']) == \ parsers_name assert sorted(log_type.name for log_type in config.get_all_log_types()) == ['default'] config.rename_log_type('default', 'test_log_type') self._check_log_type_renaming(config, parsers_name) config = SettingsFactorySelector.get_settings()['config'] self._check_log_type_renaming(config, parsers_name) shutil.rmtree(whylog_dir)
def test_parsed_distinct_files(self): path = os.path.join(*path_test_files) suffix_1 = 'node_1.log' suffix_2 = 'node_[12].log' super_parser = RegexSuperParser('^(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d).*', [1], {1: 'date'}) matcher_1 = WildCardFilenameMatcher( 'localhost', os.path.join(path, suffix_1), 'test_log_type', super_parser ) matcher_2 = WildCardFilenameMatcher( 'localhost', os.path.join(path, suffix_2), 'test_log_type', super_parser ) log_type = LogType('test_log_type', [matcher_1, matcher_2]) assert sorted(log_type.files_to_parse()) == [ ('localhost', os.path.join(path, 'node_1.log'), super_parser), ('localhost', os.path.join(path, 'node_2.log'), super_parser) ]
def test_parsed_distinct_files(self): path = os.path.join(*path_test_files) suffix_1 = 'node_1.log' suffix_2 = 'node_[12].log' super_parser = RegexSuperParser( '^(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d).*', [1], {1: 'date'}) matcher_1 = WildCardFilenameMatcher('localhost', os.path.join(path, suffix_1), 'test_log_type', super_parser) matcher_2 = WildCardFilenameMatcher('localhost', os.path.join(path, suffix_2), 'test_log_type', super_parser) log_type = LogType('test_log_type', [matcher_1, matcher_2]) assert sorted(log_type.files_to_parse()) == [ ('localhost', os.path.join(path, 'node_1.log'), super_parser), ('localhost', os.path.join(path, 'node_2.log'), super_parser) ]
def test_setting_log_type(self): #TODO setting simple RegexSuperParser sample_filename_matcher = WildCardFilenameMatcher( 'localhost', 'sample_path', 'default', None ) new_log_type = LogType('localhost', [sample_filename_matcher]) self.teacher.set_log_type(self.effect_id, new_log_type) parser = self.teacher.get_rule().parsers[self.effect_id] assert new_log_type == parser.log_type_name
def test_not_set_log_type(self): assert self._check_if_parser_has_problem(self.effect_id, NotSetLogTypeProblem()) sample_filename_matcher = WildCardFilenameMatcher( 'localhost', 'sample_path', 'default', None) new_log_type = LogType('localhost', [sample_filename_matcher]) self.teacher.set_log_type(self.effect_id, new_log_type) assert not self._check_if_parser_has_problem(self.effect_id, NotSetLogTypeProblem())
def _load_log_types(self): matchers = defaultdict(list) matcher_definitions = self._load_file_with_config(self._log_type_path) matchers_factory_dict = { 'WildCardFilenameMatcher': WildCardFilenameMatcherFactory } for definition in matcher_definitions: matcher_class_name = definition['matcher_class_name'] factory_class = matchers_factory_dict.get(matcher_class_name) if factory_class is None: raise UnsupportedFilenameMatcher(matcher_class_name) matcher = factory_class.from_dao(definition) matchers[definition['log_type_name']].append(matcher) return dict( (log_type_name, LogType(log_type_name, log_type_matchers)) for log_type_name, log_type_matchers in six.iteritems(matchers))
def test_add_log_type(self): SettingsFactorySelector.WHYLOG_DIR = TestPaths.WHYLOG_DIR config = SettingsFactorySelector.get_settings()['config'] whylog_dir = SettingsFactorySelector._attach_whylog_dir(os.getcwd()) super_parser = RegexSuperParser( '^(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d).*', [1], {1: 'date'}) matcher = WildCardFilenameMatcher('localhost', 'node_1.log', 'test_log_type', super_parser) default_log_type = LogType('test_log_type', [matcher]) config.add_log_type(default_log_type) config = SettingsFactorySelector.get_settings()['config'] assert len(config._log_types) == 1 log_type = config._log_types['test_log_type'] assert log_type.name == 'test_log_type' assert len(log_type.filename_matchers) == 1 matcher = log_type.filename_matchers[0] assert matcher.host_pattern == 'localhost' assert matcher.path_pattern == 'node_1.log' assert matcher.log_type_name == 'test_log_type' assert matcher.super_parser == super_parser shutil.rmtree(whylog_dir)
from whylog.config.filename_matchers import WildCardFilenameMatcher from whylog.config.log_type import LogType from whylog.config.super_parser import RegexSuperParser EFFECT_NUMBER = 0 DEFAULT_NAME = "default" DEFAULT_MATCHER = WildCardFilenameMatcher("localhost", "", DEFAULT_NAME, RegexSuperParser("", [], {})) DEFAULT_LOG_TYPE = LogType( DEFAULT_NAME, [DEFAULT_MATCHER] ) # yapf: disable class YamlFileNames(object): rules = 'rules.yaml' parsers = 'parsers.yaml' default_log_types = 'log_types.yaml' unix_log_types = 'unix_log_types.yaml' windows_log_types = 'windows_log_types.yaml' settings = 'settings.yaml'
class AbstractConfig(object): words_count_in_name = 4 DEFAULT_NAME = "default" DEFAULT_LOG_TYPE = LogType( DEFAULT_NAME, [ WildCardFilenameMatcher("localhost", "", DEFAULT_NAME, RegexSuperParser("", [], {})) ] ) # yapf: disable def __init__(self): self._parsers = self._load_parsers() self._parsers_grouped_by_log_type = self._index_parsers_by_log_type( six.itervalues(self._parsers)) self._parser_name_generator = ParserNameGenerator(self._parsers) self._rules = self._load_rules() self._log_types = self._load_log_types() @abstractmethod def _load_parsers(self): pass @abstractmethod def _load_rules(self): pass @abstractmethod def _load_log_types(self): pass @classmethod def _index_parsers_by_log_type(cls, parsers): grouped_parsers = defaultdict(list) for parser in parsers: grouped_parsers[parser.log_type].append(parser) return grouped_parsers def add_rule(self, user_rule_intent): created_rule = RegexRuleFactory.create_from_intent(user_rule_intent) self._save_rule_definition(created_rule.serialize()) created_parsers = created_rule.get_new_parsers( self._parser_name_generator) self._save_parsers_definition(parser.serialize() for parser in created_parsers) self._rules[created_rule.get_effect_name()].append(created_rule) for parser in created_parsers: self._parsers[parser.name] = parser self._parsers_grouped_by_log_type[parser.log_type].append(parser) self._parser_name_generator = ParserNameGenerator(self._parsers) def add_log_type(self, log_type): for matcher in log_type.filename_matchers: self.add_filename_matcher_to_log_type(matcher) self._log_types[log_type.name] = log_type def add_filename_matcher_to_log_type(self, matcher): self._save_filename_matcher_definition(matcher.serialize()) @abstractmethod def _save_rule_definition(self, rule_definition): pass @abstractmethod def _save_parsers_definition(self, parser_definitions): pass @abstractmethod def _save_filename_matcher_definition(self, matcher_definition): pass def get_all_log_types(self): if self.DEFAULT_NAME in self._log_types: return six.itervalues(self._log_types) return itertools.chain([self.DEFAULT_LOG_TYPE], six.itervalues(self._log_types)) def get_log_type(self, line_source): for log_type in six.itervalues(self._log_types): if line_source in log_type: return log_type def create_investigation_plan(self, front_input, log_type): matching_parsers, effect_params = self._find_matching_parsers( front_input.line_content, log_type.name) suspected_rules = self._filter_rule_set(matching_parsers) concatenated_parsers = self._create_concatenated_parsers_for_investigation( suspected_rules) effect_clues = self._create_effect_clues(effect_params, front_input) steps = self._create_steps_in_investigation(concatenated_parsers, suspected_rules, effect_clues) return InvestigationPlan(suspected_rules, steps, effect_clues) def _create_effect_clues(self, effect_params, front_input): effect_clues = {} for parser_name, params in six.iteritems(effect_params): parser = self._parsers[parser_name] clue = Clue( parser.convert_params( params ), front_input.line_content, front_input.offset, front_input.line_source ) # yapf: disable effect_clues[parser_name] = clue return effect_clues def _find_matching_parsers(self, effect_line_content, log_type_name): """ This method finding all parsers from Config base which matching with effect_line_content """ matching_parsers = [] extracted_params = {} for parser in self._parsers_grouped_by_log_type[log_type_name]: params = parser.get_regex_params(effect_line_content) if params is not None: extracted_params[parser.name] = params matching_parsers.append(parser) return matching_parsers, extracted_params def _filter_rule_set(self, parsers_list): """ This method finding all rules from Config base which can be fulfilled in single investigation base on parsers_list found by _find_matching_parsers """ suspected_rules = [] for parser in parsers_list: rules = self._rules.get(parser.name) if rules is not None: suspected_rules.extend(rules) return suspected_rules @classmethod def _create_concatenated_parsers_for_investigation(cls, rules): """ Create concatenated parser for all log types which participate in given investigation based on suspected rules found by _filter_rule_set """ grouped_parsers = defaultdict(list) inserted_parsers = set() for suspected_rule in rules: for parser in suspected_rule.get_causes_parsers(): if parser.name not in inserted_parsers: grouped_parsers[parser.log_type].append(parser) inserted_parsers.add(parser.name) return dict( (log_type_name, ConcatenatedRegexParser(parsers)) for log_type_name, parsers in six.iteritems(grouped_parsers)) def _create_steps_in_investigation(self, concatenated_parsers, suspected_rules, effect_clues): steps = [] search_ranges = self._get_search_ranges(suspected_rules, effect_clues) for log_type_name, parser in six.iteritems(concatenated_parsers): log_type = self._log_types[log_type_name] investigation_step = InvestigationStep( parser, search_ranges.get(log_type_name, {})) steps.append((investigation_step, log_type)) return steps def _get_search_ranges(self, suspected_rules, effect_clues): # TODO: remove mock return { 'database': { 'date': { 'left_bound': datetime(2016, 4, 12, 23, 53, 3), 'right_bound': datetime(2016, 4, 12, 23, 54, 33) } }, 'apache': { 'date': { 'left_bound': datetime(2016, 4, 12, 23, 54, 33), 'right_bound': datetime(2016, 4, 12, 23, 54, 43) } } } def is_free_parser_name(self, parser_name, black_list): return self._parser_name_generator.is_free_parser_name( parser_name, black_list) def propose_parser_name(self, line, regex_str, black_list): return self._parser_name_generator.propose_parser_name( line, regex_str, black_list, self.words_count_in_name)