def __init__(self): self._parsers = self._load_parsers() self._parsers_grouped_by_log_type = self._index_parsers_by_log_type( six.itervalues(self._parsers)) self._parser_name_generator = ParserNameGenerator(self._parsers) self._rules = self._load_rules() self._log_types = self._load_log_types()
def add_rule(self, user_rule_intent): created_rule = RegexRuleFactory.create_from_intent(user_rule_intent) self._save_rule_definition(created_rule.serialize()) created_parsers = created_rule.get_new_parsers( self._parser_name_generator) self._save_parsers_definition(parser.serialize() for parser in created_parsers) self._rules[created_rule.get_effect_name()].append(created_rule) for parser in created_parsers: self._parsers[parser.name] = parser self._parsers_grouped_by_log_type[parser.log_type].append(parser) self._parser_name_generator = ParserNameGenerator(self._parsers)
def __init__(self): self._parsers = self._load_parsers() self._parsers_grouped_by_log_type = self._index_parsers_by_log_type( six.itervalues(self._parsers) ) self._parser_name_generator = ParserNameGenerator(self._parsers) self._rules = self._load_rules() self._log_types = self._load_log_types()
def add_rule(self, user_rule_intent): created_rule = RegexRuleFactory.create_from_intent(user_rule_intent) self._save_rule_definition(created_rule.serialize()) created_parsers = created_rule.get_new_parsers(self._parser_name_generator) self._save_parsers_definition(parser.serialize() for parser in created_parsers) self._rules[created_rule.get_effect_name()].append(created_rule) for parser in created_parsers: self._parsers[parser.name] = parser self._parsers_grouped_by_log_type[parser.log_type].append(parser) self._parser_name_generator = ParserNameGenerator(self._parsers)
def _prepare_config(cls, path): # preparing Whylog structures special for temporary assign file to log type test whylog_config = YamlConfig( *ConfigPathFactory.get_path_to_config_files(path)) whylog_config._log_types = {"default": AbstractConfig.DEFAULT_LOG_TYPE} for parser in six.itervalues(whylog_config._parsers): parser.log_type = "default" whylog_config._parser_name_generator = ParserNameGenerator( whylog_config._parsers) whylog_config._parsers_grouped_by_log_type[ "default"] = whylog_config._parsers_grouped_by_log_type.pop( "test_log_type") return whylog_config
class AbstractConfig(object): words_count_in_name = 4 DEFAULT_NAME = "default" DEFAULT_LOG_TYPE = LogType( DEFAULT_NAME, [ WildCardFilenameMatcher("localhost", "", DEFAULT_NAME, RegexSuperParser("", [], {})) ] ) # yapf: disable def __init__(self): self._parsers = self._load_parsers() self._parsers_grouped_by_log_type = self._index_parsers_by_log_type( six.itervalues(self._parsers) ) self._parser_name_generator = ParserNameGenerator(self._parsers) self._rules = self._load_rules() self._log_types = self._load_log_types() @abstractmethod def _load_parsers(self): pass @abstractmethod def _load_rules(self): pass @abstractmethod def _load_log_types(self): pass @classmethod def _index_parsers_by_log_type(cls, parsers): grouped_parsers = defaultdict(list) for parser in parsers: grouped_parsers[parser.log_type].append(parser) return grouped_parsers def add_rule(self, user_rule_intent): created_rule = RegexRuleFactory.create_from_intent(user_rule_intent) self._save_rule_definition(created_rule.serialize()) created_parsers = created_rule.get_new_parsers(self._parser_name_generator) self._save_parsers_definition(parser.serialize() for parser in created_parsers) self._rules[created_rule.get_effect_name()].append(created_rule) for parser in created_parsers: self._parsers[parser.name] = parser self._parsers_grouped_by_log_type[parser.log_type].append(parser) self._parser_name_generator = ParserNameGenerator(self._parsers) def add_log_type(self, log_type): for matcher in log_type.filename_matchers: self.add_filename_matcher_to_log_type(matcher) self._log_types[log_type.name] = log_type def add_filename_matcher_to_log_type(self, matcher): self._save_filename_matcher_definition(matcher.serialize()) @abstractmethod def _save_rule_definition(self, rule_definition): pass @abstractmethod def _save_parsers_definition(self, parser_definitions): pass @abstractmethod def _save_filename_matcher_definition(self, matcher_definition): pass def get_all_log_types(self): if self.DEFAULT_NAME in self._log_types: return six.itervalues(self._log_types) return itertools.chain([self.DEFAULT_LOG_TYPE], six.itervalues(self._log_types)) def get_log_type(self, line_source): for log_type in six.itervalues(self._log_types): if line_source in log_type: return log_type def create_investigation_plan(self, front_input, log_type): matching_parsers, effect_params = self._find_matching_parsers( front_input.line_content, log_type.name ) suspected_rules = self._filter_rule_set(matching_parsers) concatenated_parsers = self._create_concatenated_parsers_for_investigation(suspected_rules) effect_clues = self._create_effect_clues(effect_params, front_input) steps = self._create_steps_in_investigation( concatenated_parsers, suspected_rules, effect_clues ) return InvestigationPlan(suspected_rules, steps, effect_clues) def _create_effect_clues(self, effect_params, front_input): effect_clues = {} for parser_name, params in six.iteritems(effect_params): parser = self._parsers[parser_name] clue = Clue( parser.convert_params( params ), front_input.line_content, front_input.offset, front_input.line_source ) # yapf: disable effect_clues[parser_name] = clue return effect_clues def _find_matching_parsers(self, effect_line_content, log_type_name): """ This method finding all parsers from Config base which matching with effect_line_content """ matching_parsers = [] extracted_params = {} for parser in self._parsers_grouped_by_log_type[log_type_name]: params = parser.get_regex_params(effect_line_content) if params is not None: extracted_params[parser.name] = params matching_parsers.append(parser) return matching_parsers, extracted_params def _filter_rule_set(self, parsers_list): """ This method finding all rules from Config base which can be fulfilled in single investigation base on parsers_list found by _find_matching_parsers """ suspected_rules = [] for parser in parsers_list: rules = self._rules.get(parser.name) if rules is not None: suspected_rules.extend(rules) return suspected_rules @classmethod def _create_concatenated_parsers_for_investigation(cls, rules): """ Create concatenated parser for all log types which participate in given investigation based on suspected rules found by _filter_rule_set """ grouped_parsers = defaultdict(list) inserted_parsers = set() for suspected_rule in rules: for parser in suspected_rule.get_causes_parsers(): if parser.name not in inserted_parsers: grouped_parsers[parser.log_type].append(parser) inserted_parsers.add(parser.name) return dict( (log_type_name, ConcatenatedRegexParser(parsers)) for log_type_name, parsers in six.iteritems(grouped_parsers) ) def _create_steps_in_investigation(self, concatenated_parsers, suspected_rules, effect_clues): steps = [] search_ranges = self._get_search_ranges(suspected_rules, effect_clues) for log_type_name, parser in six.iteritems(concatenated_parsers): log_type = self._log_types[log_type_name] investigation_step = InvestigationStep(parser, search_ranges.get(log_type_name, {})) steps.append((investigation_step, log_type)) return steps def _get_search_ranges(self, suspected_rules, effect_clues): # TODO: remove mock return { 'database': { 'date': { 'left_bound': datetime(2016, 4, 12, 23, 53, 3), 'right_bound': datetime(2016, 4, 12, 23, 54, 33) } }, 'apache': { 'date': { 'left_bound': datetime(2016, 4, 12, 23, 54, 33), 'right_bound': datetime(2016, 4, 12, 23, 54, 43) } } } def is_free_parser_name(self, parser_name, black_list): return self._parser_name_generator.is_free_parser_name(parser_name, black_list) def propose_parser_name(self, line, regex_str, black_list): return self._parser_name_generator.propose_parser_name( line, regex_str, black_list, self.words_count_in_name )
class AbstractConfig(object): words_count_in_name = 4 DEFAULT_NAME = "default" DEFAULT_LOG_TYPE = LogType( DEFAULT_NAME, [ WildCardFilenameMatcher("localhost", "", DEFAULT_NAME, RegexSuperParser("", [], {})) ] ) # yapf: disable def __init__(self): self._parsers = self._load_parsers() self._parsers_grouped_by_log_type = self._index_parsers_by_log_type( six.itervalues(self._parsers)) self._parser_name_generator = ParserNameGenerator(self._parsers) self._rules = self._load_rules() self._log_types = self._load_log_types() @abstractmethod def _load_parsers(self): pass @abstractmethod def _load_rules(self): pass @abstractmethod def _load_log_types(self): pass @classmethod def _index_parsers_by_log_type(cls, parsers): grouped_parsers = defaultdict(list) for parser in parsers: grouped_parsers[parser.log_type].append(parser) return grouped_parsers def add_rule(self, user_rule_intent): created_rule = RegexRuleFactory.create_from_intent(user_rule_intent) self._save_rule_definition(created_rule.serialize()) created_parsers = created_rule.get_new_parsers( self._parser_name_generator) self._save_parsers_definition(parser.serialize() for parser in created_parsers) self._rules[created_rule.get_effect_name()].append(created_rule) for parser in created_parsers: self._parsers[parser.name] = parser self._parsers_grouped_by_log_type[parser.log_type].append(parser) self._parser_name_generator = ParserNameGenerator(self._parsers) def add_log_type(self, log_type): for matcher in log_type.filename_matchers: self.add_filename_matcher_to_log_type(matcher) self._log_types[log_type.name] = log_type def add_filename_matcher_to_log_type(self, matcher): self._save_filename_matcher_definition(matcher.serialize()) @abstractmethod def _save_rule_definition(self, rule_definition): pass @abstractmethod def _save_parsers_definition(self, parser_definitions): pass @abstractmethod def _save_filename_matcher_definition(self, matcher_definition): pass def get_all_log_types(self): if self.DEFAULT_NAME in self._log_types: return six.itervalues(self._log_types) return itertools.chain([self.DEFAULT_LOG_TYPE], six.itervalues(self._log_types)) def get_log_type(self, line_source): for log_type in six.itervalues(self._log_types): if line_source in log_type: return log_type def create_investigation_plan(self, front_input, log_type): matching_parsers, effect_params = self._find_matching_parsers( front_input.line_content, log_type.name) suspected_rules = self._filter_rule_set(matching_parsers) concatenated_parsers = self._create_concatenated_parsers_for_investigation( suspected_rules) effect_clues = self._create_effect_clues(effect_params, front_input) steps = self._create_steps_in_investigation(concatenated_parsers, suspected_rules, effect_clues) return InvestigationPlan(suspected_rules, steps, effect_clues) def _create_effect_clues(self, effect_params, front_input): effect_clues = {} for parser_name, params in six.iteritems(effect_params): parser = self._parsers[parser_name] clue = Clue( parser.convert_params( params ), front_input.line_content, front_input.offset, front_input.line_source ) # yapf: disable effect_clues[parser_name] = clue return effect_clues def _find_matching_parsers(self, effect_line_content, log_type_name): """ This method finding all parsers from Config base which matching with effect_line_content """ matching_parsers = [] extracted_params = {} for parser in self._parsers_grouped_by_log_type[log_type_name]: params = parser.get_regex_params(effect_line_content) if params is not None: extracted_params[parser.name] = params matching_parsers.append(parser) return matching_parsers, extracted_params def _filter_rule_set(self, parsers_list): """ This method finding all rules from Config base which can be fulfilled in single investigation base on parsers_list found by _find_matching_parsers """ suspected_rules = [] for parser in parsers_list: rules = self._rules.get(parser.name) if rules is not None: suspected_rules.extend(rules) return suspected_rules @classmethod def _create_concatenated_parsers_for_investigation(cls, rules): """ Create concatenated parser for all log types which participate in given investigation based on suspected rules found by _filter_rule_set """ grouped_parsers = defaultdict(list) inserted_parsers = set() for suspected_rule in rules: for parser in suspected_rule.get_causes_parsers(): if parser.name not in inserted_parsers: grouped_parsers[parser.log_type].append(parser) inserted_parsers.add(parser.name) return dict( (log_type_name, ConcatenatedRegexParser(parsers)) for log_type_name, parsers in six.iteritems(grouped_parsers)) def _create_steps_in_investigation(self, concatenated_parsers, suspected_rules, effect_clues): steps = [] search_ranges = self._get_search_ranges(suspected_rules, effect_clues) for log_type_name, parser in six.iteritems(concatenated_parsers): log_type = self._log_types[log_type_name] investigation_step = InvestigationStep( parser, search_ranges.get(log_type_name, {})) steps.append((investigation_step, log_type)) return steps def _get_search_ranges(self, suspected_rules, effect_clues): # TODO: remove mock return { 'database': { 'date': { 'left_bound': datetime(2016, 4, 12, 23, 53, 3), 'right_bound': datetime(2016, 4, 12, 23, 54, 33) } }, 'apache': { 'date': { 'left_bound': datetime(2016, 4, 12, 23, 54, 33), 'right_bound': datetime(2016, 4, 12, 23, 54, 43) } } } def is_free_parser_name(self, parser_name, black_list): return self._parser_name_generator.is_free_parser_name( parser_name, black_list) def propose_parser_name(self, line, regex_str, black_list): return self._parser_name_generator.propose_parser_name( line, regex_str, black_list, self.words_count_in_name)
class AbstractConfig(object): words_count_in_name = 4 def __init__(self): self._parsers = self._load_parsers() self._parsers_grouped_by_log_type = self._index_parsers_by_log_type( six.itervalues(self._parsers) ) self._parser_name_generator = ParserNameGenerator(self._parsers) self._rules = self._load_rules() self._log_types = self._load_log_types() @abstractmethod def _load_parsers(self): pass @abstractmethod def _load_rules(self): pass @abstractmethod def _load_log_types(self): pass @classmethod def _index_parsers_by_log_type(cls, parsers): grouped_parsers = defaultdict(list) for parser in parsers: grouped_parsers[parser.log_type].append(parser) return grouped_parsers def add_rule(self, user_rule_intent): created_rule = RegexRuleFactory.create_from_intent(user_rule_intent) self._save_rule_definition(created_rule.serialize()) created_parsers = created_rule.get_new_parsers(self._parser_name_generator) self._save_parsers_definition(parser.serialize() for parser in created_parsers) self._rules[created_rule.get_effect_name()].append(created_rule) for parser in created_parsers: self._parsers[parser.name] = parser self._parsers_grouped_by_log_type[parser.log_type].append(parser) self._parser_name_generator = ParserNameGenerator(self._parsers) def rename_log_type(self, old_name, new_name): if old_name == new_name: return if old_name not in self._log_types: raise NoLogTypeError(old_name) if new_name in self._log_types: raise RenameLogTypeError log_type = self._log_types.pop(old_name) log_type.name = new_name for matcher in log_type.filename_matchers: matcher.log_type_name = new_name self._log_types[new_name] = log_type for parser in six.itervalues(self._parsers): if parser.log_type == old_name: parser.log_type = new_name self._parsers_grouped_by_log_type[new_name ] = self._parsers_grouped_by_log_type.pop(old_name) all_matchers_definition = tuple() for log_type in six.itervalues(self._log_types): matchers_definitions = (matcher.serialize() for matcher in log_type.filename_matchers) all_matchers_definition = itertools.chain(all_matchers_definition, matchers_definitions) self._resave_all_log_types(all_matchers_definition) self._resave_all_parsers(parser.serialize() for parser in six.itervalues(self._parsers)) def add_log_type(self, log_type): for matcher in log_type.filename_matchers: self.add_filename_matcher_to_log_type(matcher) self._log_types[log_type.name] = log_type def add_filename_matcher_to_log_type(self, matcher): self._save_filename_matcher_definition(matcher.serialize()) @abstractmethod def _resave_all_log_types(self, matchers_definition): pass @abstractmethod def _resave_all_parsers(self, parsers_definition): pass @abstractmethod def _save_rule_definition(self, rule_definition): pass @abstractmethod def _save_parsers_definition(self, parser_definitions): pass @abstractmethod def _save_filename_matcher_definition(self, matcher_definition): pass def get_all_log_types(self): return six.itervalues(self._log_types) def get_log_type(self, line_source): for log_type in six.itervalues(self._log_types): if line_source in log_type: return log_type def create_investigation_plan(self, front_input, log_type): matching_parsers, effect_params = self._find_matching_parsers( front_input.line_content, log_type.name ) suspected_rules = self._filter_rule_set(matching_parsers) concatenated_parsers = self._create_concatenated_parsers_for_investigation(suspected_rules) effect_clues = self._create_effect_clues(effect_params, front_input) steps = self._create_steps_in_investigation( concatenated_parsers, suspected_rules, effect_clues ) return InvestigationPlan(suspected_rules, steps, effect_clues) def _create_effect_clues(self, effect_params, front_input): effect_clues = {} for parser_name, params in six.iteritems(effect_params): parser = self._parsers[parser_name] clue = Clue( parser.convert_params( params ), front_input.line_content, front_input.offset, front_input.line_source ) # yapf: disable effect_clues[parser_name] = clue return effect_clues def _find_matching_parsers(self, effect_line_content, log_type_name): """ This method finding all parsers from Config base which matching with effect_line_content """ matching_parsers = [] extracted_params = {} for parser in self._parsers_grouped_by_log_type[log_type_name]: params = parser.get_regex_params(effect_line_content) if params is not None: extracted_params[parser.name] = params matching_parsers.append(parser) return matching_parsers, extracted_params def _filter_rule_set(self, parsers_list): """ This method finding all rules from Config base which can be fulfilled in single investigation base on parsers_list found by _find_matching_parsers """ suspected_rules = [] for parser in parsers_list: rules = self._rules.get(parser.name) if rules is not None: suspected_rules.extend(rules) return suspected_rules @classmethod def _create_concatenated_parsers_for_investigation(cls, rules): """ Create concatenated parser for all log types which participate in given investigation based on suspected rules found by _filter_rule_set """ grouped_parsers = defaultdict(list) inserted_parsers = set() for suspected_rule in rules: for parser in suspected_rule.get_causes_parsers(): if parser.name not in inserted_parsers: grouped_parsers[parser.log_type].append(parser) inserted_parsers.add(parser.name) return dict( (log_type_name, ConcatenatedRegexParser(parsers)) for log_type_name, parsers in six.iteritems(grouped_parsers) ) def _create_steps_in_investigation(self, concatenated_parsers, suspected_rules, effect_clues): steps = [] search_ranges = self._get_search_ranges(suspected_rules, effect_clues) for log_type_name, parser in six.iteritems(concatenated_parsers): log_type = self._log_types[log_type_name] investigation_step = InvestigationStep(parser, search_ranges.get(log_type_name, {})) steps.append((investigation_step, log_type)) return steps @classmethod def _get_search_ranges(cls, suspected_rules, effect_clues): """ old_search_range is a dictionary with two keys: InvesitgationStep.LEFT_BOUND and InvesitgationStep.RIGHT_BOUND. Values in this dict are concrete values of type_ the same as type_ of some primary key. Both values taken together represent some interval. Sample search range_: { InvesitgationStep.LEFT_BOUND: datetime(2016, 5, 29, 12, 33, 0), InvesitgationStep.RIGHT_BOUND: datetime(2016, 5, 29, 12, 33, 30) } Log type_'s search ranges in rule context is a dictionary with old_search_range for every type_ of primary key of rule's parsers that belong to this log type_. Sample log type_'s search ranges: { 'date': { InvesitgationStep.LEFT_BOUND: datetime(2016, 5, 29, 12, 33, 0), InvesitgationStep.RIGHT_BOUND: datetime(2016, 5, 29, 12, 33, 30) } } Rule's search_ranges is sum of all log type_'s search ranges, where rule's parsers belong to these log types. Sample rule's search ranges (1) : { 'apache': { 'date': { InvesitgationStep.LEFT_BOUND: datetime(2016, 5, 29, 12, 33, 0), InvesitgationStep.RIGHT_BOUND: datetime(2016, 5, 29, 12, 33, 30) } }, 'database': { 'date': { InvesitgationStep.LEFT_BOUND: datetime(2016, 5, 29, 12, 32, 0), InvesitgationStep.RIGHT_BOUND: datetime(2016, 5, 29, 12, 33, 20) } } } Sample rule's search ranges (2) : { 'apache': { 'date': { InvesitgationStep.LEFT_BOUND: datetime(2016, 5, 29, 12, 32, 0), InvesitgationStep.RIGHT_BOUND: datetime(2016, 5, 29, 12, 33, 20) } }, } This method sums all rule's search ranges from every rule in suspected rules. Expected returned value based on (1) and (2): { 'apache': { 'date': { InvesitgationStep.LEFT_BOUND: datetime(2016, 5, 29, 12, 32, 0), InvesitgationStep.RIGHT_BOUND: datetime(2016, 5, 29, 12, 33, 30) } }, 'database': { 'date': { InvesitgationStep.LEFT_BOUND: datetime(2016, 5, 29, 12, 32, 0), InvesitgationStep.RIGHT_BOUND: datetime(2016, 5, 29, 12, 33, 20) } } } """ # This implementation assumes that all primary key groups is a one element list # TODO implementation for longer primary key groups search_ranges = {} for rule in suspected_rules: rule_search_ranges = rule.get_search_ranges(effect_clues) for log_type_name, log_type_ranges in six.iteritems(rule_search_ranges): log_type_search_range = search_ranges.get(log_type_name) if log_type_search_range is None: search_ranges[log_type_name] = log_type_ranges continue for type_, range_ in six.iteritems(rule_search_ranges[log_type_name]): old_search_range = log_type_search_range.get(type_) if old_search_range is None: log_type_search_range[type_] = range_ continue left_bound, right_bound = cls._calculate_new_bounds(range_, old_search_range) old_search_range[InvestigationStep.LEFT_BOUND] = left_bound old_search_range[InvestigationStep.RIGHT_BOUND] = right_bound return search_ranges @classmethod def _calculate_new_bounds(cls, range_, old_search_range): left_bound_candidate = range_[InvestigationStep.LEFT_BOUND] right_bound_candidate = range_[InvestigationStep.RIGHT_BOUND] left_bound = old_search_range[InvestigationStep.LEFT_BOUND] right_bound = old_search_range[InvestigationStep.RIGHT_BOUND] return min(left_bound_candidate, left_bound), max(right_bound_candidate, right_bound) def is_free_parser_name(self, parser_name, black_list): return self._parser_name_generator.is_free_parser_name(parser_name, black_list) def propose_parser_name(self, line, regex_str, black_list): return self._parser_name_generator.propose_parser_name( line, regex_str, black_list, self.words_count_in_name )
class AbstractConfig(object): words_count_in_name = 4 def __init__(self): self._parsers = self._load_parsers() self._parsers_grouped_by_log_type = self._index_parsers_by_log_type( six.itervalues(self._parsers)) self._parser_name_generator = ParserNameGenerator(self._parsers) self._rules = self._load_rules() self._log_types = self._load_log_types() @abstractmethod def _load_parsers(self): pass @abstractmethod def _load_rules(self): pass @abstractmethod def _load_log_types(self): pass @classmethod def _index_parsers_by_log_type(cls, parsers): grouped_parsers = defaultdict(list) for parser in parsers: grouped_parsers[parser.log_type].append(parser) return grouped_parsers def add_rule(self, user_rule_intent): created_rule = RegexRuleFactory.create_from_intent(user_rule_intent) self._save_rule_definition(created_rule.serialize()) created_parsers = created_rule.get_new_parsers( self._parser_name_generator) self._save_parsers_definition(parser.serialize() for parser in created_parsers) self._rules[created_rule.get_effect_name()].append(created_rule) for parser in created_parsers: self._parsers[parser.name] = parser self._parsers_grouped_by_log_type[parser.log_type].append(parser) self._parser_name_generator = ParserNameGenerator(self._parsers) def rename_log_type(self, old_name, new_name): if old_name == new_name: return if old_name not in self._log_types: raise NoLogTypeError(old_name) if new_name in self._log_types: raise RenameLogTypeError log_type = self._log_types.pop(old_name) log_type.name = new_name for matcher in log_type.filename_matchers: matcher.log_type_name = new_name self._log_types[new_name] = log_type for parser in six.itervalues(self._parsers): if parser.log_type == old_name: parser.log_type = new_name self._parsers_grouped_by_log_type[ new_name] = self._parsers_grouped_by_log_type.pop(old_name) all_matchers_definition = tuple() for log_type in six.itervalues(self._log_types): matchers_definitions = (matcher.serialize() for matcher in log_type.filename_matchers) all_matchers_definition = itertools.chain(all_matchers_definition, matchers_definitions) self._resave_all_log_types(all_matchers_definition) self._resave_all_parsers(parser.serialize() for parser in six.itervalues(self._parsers)) def add_log_type(self, log_type): for matcher in log_type.filename_matchers: self.add_filename_matcher_to_log_type(matcher) self._log_types[log_type.name] = log_type def add_filename_matcher_to_log_type(self, matcher): self._save_filename_matcher_definition(matcher.serialize()) @abstractmethod def _resave_all_log_types(self, matchers_definition): pass @abstractmethod def _resave_all_parsers(self, parsers_definition): pass @abstractmethod def _save_rule_definition(self, rule_definition): pass @abstractmethod def _save_parsers_definition(self, parser_definitions): pass @abstractmethod def _save_filename_matcher_definition(self, matcher_definition): pass def get_all_log_types(self): return six.itervalues(self._log_types) def get_log_type(self, line_source): for log_type in six.itervalues(self._log_types): if line_source in log_type: return log_type def create_investigation_plan(self, front_input, log_type): matching_parsers, effect_params = self._find_matching_parsers( front_input.line_content, log_type.name) suspected_rules = self._filter_rule_set(matching_parsers) concatenated_parsers = self._create_concatenated_parsers_for_investigation( suspected_rules) effect_clues = self._create_effect_clues(effect_params, front_input) steps = self._create_steps_in_investigation(concatenated_parsers, suspected_rules, effect_clues) return InvestigationPlan(suspected_rules, steps, effect_clues) def _create_effect_clues(self, effect_params, front_input): effect_clues = {} for parser_name, params in six.iteritems(effect_params): parser = self._parsers[parser_name] clue = Clue( parser.convert_params( params ), front_input.line_content, front_input.offset, front_input.line_source ) # yapf: disable effect_clues[parser_name] = clue return effect_clues def _find_matching_parsers(self, effect_line_content, log_type_name): """ This method finding all parsers from Config base which matching with effect_line_content """ matching_parsers = [] extracted_params = {} for parser in self._parsers_grouped_by_log_type[log_type_name]: params = parser.get_regex_params(effect_line_content) if params is not None: extracted_params[parser.name] = params matching_parsers.append(parser) return matching_parsers, extracted_params def _filter_rule_set(self, parsers_list): """ This method finding all rules from Config base which can be fulfilled in single investigation base on parsers_list found by _find_matching_parsers """ suspected_rules = [] for parser in parsers_list: rules = self._rules.get(parser.name) if rules is not None: suspected_rules.extend(rules) return suspected_rules @classmethod def _create_concatenated_parsers_for_investigation(cls, rules): """ Create concatenated parser for all log types which participate in given investigation based on suspected rules found by _filter_rule_set """ grouped_parsers = defaultdict(list) inserted_parsers = set() for suspected_rule in rules: for parser in suspected_rule.get_causes_parsers(): if parser.name not in inserted_parsers: grouped_parsers[parser.log_type].append(parser) inserted_parsers.add(parser.name) return dict( (log_type_name, ConcatenatedRegexParser(parsers)) for log_type_name, parsers in six.iteritems(grouped_parsers)) def _create_steps_in_investigation(self, concatenated_parsers, suspected_rules, effect_clues): steps = [] search_ranges = self._get_search_ranges(suspected_rules, effect_clues) for log_type_name, parser in six.iteritems(concatenated_parsers): log_type = self._log_types[log_type_name] investigation_step = InvestigationStep( parser, search_ranges.get(log_type_name, {})) steps.append((investigation_step, log_type)) return steps @classmethod def _get_search_ranges(cls, suspected_rules, effect_clues): """ old_search_range is a dictionary with two keys: InvesitgationStep.LEFT_BOUND and InvesitgationStep.RIGHT_BOUND. Values in this dict are concrete values of type_ the same as type_ of some primary key. Both values taken together represent some interval. Sample search range_: { InvesitgationStep.LEFT_BOUND: datetime(2016, 5, 29, 12, 33, 0), InvesitgationStep.RIGHT_BOUND: datetime(2016, 5, 29, 12, 33, 30) } Log type_'s search ranges in rule context is a dictionary with old_search_range for every type_ of primary key of rule's parsers that belong to this log type_. Sample log type_'s search ranges: { 'date': { InvesitgationStep.LEFT_BOUND: datetime(2016, 5, 29, 12, 33, 0), InvesitgationStep.RIGHT_BOUND: datetime(2016, 5, 29, 12, 33, 30) } } Rule's search_ranges is sum of all log type_'s search ranges, where rule's parsers belong to these log types. Sample rule's search ranges (1) : { 'apache': { 'date': { InvesitgationStep.LEFT_BOUND: datetime(2016, 5, 29, 12, 33, 0), InvesitgationStep.RIGHT_BOUND: datetime(2016, 5, 29, 12, 33, 30) } }, 'database': { 'date': { InvesitgationStep.LEFT_BOUND: datetime(2016, 5, 29, 12, 32, 0), InvesitgationStep.RIGHT_BOUND: datetime(2016, 5, 29, 12, 33, 20) } } } Sample rule's search ranges (2) : { 'apache': { 'date': { InvesitgationStep.LEFT_BOUND: datetime(2016, 5, 29, 12, 32, 0), InvesitgationStep.RIGHT_BOUND: datetime(2016, 5, 29, 12, 33, 20) } }, } This method sums all rule's search ranges from every rule in suspected rules. Expected returned value based on (1) and (2): { 'apache': { 'date': { InvesitgationStep.LEFT_BOUND: datetime(2016, 5, 29, 12, 32, 0), InvesitgationStep.RIGHT_BOUND: datetime(2016, 5, 29, 12, 33, 30) } }, 'database': { 'date': { InvesitgationStep.LEFT_BOUND: datetime(2016, 5, 29, 12, 32, 0), InvesitgationStep.RIGHT_BOUND: datetime(2016, 5, 29, 12, 33, 20) } } } """ # This implementation assumes that all primary key groups is a one element list # TODO implementation for longer primary key groups search_ranges = {} for rule in suspected_rules: rule_search_ranges = rule.get_search_ranges(effect_clues) for log_type_name, log_type_ranges in six.iteritems( rule_search_ranges): log_type_search_range = search_ranges.get(log_type_name) if log_type_search_range is None: search_ranges[log_type_name] = log_type_ranges continue for type_, range_ in six.iteritems( rule_search_ranges[log_type_name]): old_search_range = log_type_search_range.get(type_) if old_search_range is None: log_type_search_range[type_] = range_ continue left_bound, right_bound = cls._calculate_new_bounds( range_, old_search_range) old_search_range[InvestigationStep.LEFT_BOUND] = left_bound old_search_range[ InvestigationStep.RIGHT_BOUND] = right_bound return search_ranges @classmethod def _calculate_new_bounds(cls, range_, old_search_range): left_bound_candidate = range_[InvestigationStep.LEFT_BOUND] right_bound_candidate = range_[InvestigationStep.RIGHT_BOUND] left_bound = old_search_range[InvestigationStep.LEFT_BOUND] right_bound = old_search_range[InvestigationStep.RIGHT_BOUND] return min(left_bound_candidate, left_bound), max(right_bound_candidate, right_bound) def is_free_parser_name(self, parser_name, black_list): return self._parser_name_generator.is_free_parser_name( parser_name, black_list) def propose_parser_name(self, line, regex_str, black_list): return self._parser_name_generator.propose_parser_name( line, regex_str, black_list, self.words_count_in_name)