def __init__(self, raw_format=None): """ Takes raw format and generates regex :param raw_format: raw log format """ self.raw_format = self.combined_format if raw_format is None else raw_format self.keys = [] self.regex_string = r'' self.regex = None current_key = None # preprocess raw format and if we have trailing spaces in format we should remove them self.raw_format = prep_raw(self.raw_format).rstrip() def finalize_key(): key_without_dollar = current_key[1:] self.keys.append(key_without_dollar) rxp = self.common_variables.get(key_without_dollar, self.default_variable)[0] # Handle formats with multiple instances of the same variable. var_count = self.keys.count(key_without_dollar) if var_count > 1: # Duplicate variables will be named starting at 2 (var, var2, var3, etc...) regex_var_name = '%s_occurance_%s' % (key_without_dollar, var_count) else: regex_var_name = key_without_dollar self.regex_string += '(?P<%s>%s)' % (regex_var_name, rxp) for char in self.raw_format: if current_key: # if there's a current key if char.isalpha() or char.isdigit() or char == '_': # continue building key current_key += char else: # finalize current_key finalize_key() if char == '$': # if there's a new key - create it current_key = char else: # otherwise - add char to regex current_key = None if char.isalpha() or char.isdigit(): self.regex_string += char else: self.regex_string += '\%s' % char else: # if there's no current key if char == '$': current_key = char else: if char.isalpha() or char.isdigit(): self.regex_string += char else: self.regex_string += '\%s' % char # key can be the last one element in a string if current_key: finalize_key() self.regex = re.compile(self.regex_string)
def __init__(self, raw_format=None): """ Takes raw format and generates regex :param raw_format: raw log format """ self.raw_format = self.combined_format if raw_format is None else raw_format self.keys = [] self.regex_string = r'' self.regex = None self.separators = [] self.start_from_separator = False # preprocess raw format and if we have trailing spaces in format we should remove them self.raw_format = prep_raw(self.raw_format).rstrip() current_key = None current_separator = None def finalize_key(): """ Finalizes key: 1) removes $ and {} from it 2) adds a regex for the key to the regex_string """ chars_to_remove = ['$', '{', '}'] plain_key = current_key.translate(None, ''.join(chars_to_remove)) self.keys.append(plain_key) rxp = self.common_variables.get(plain_key, self.default_variable)[0] # Handle formats with multiple instances of the same variable. var_count = self.keys.count(plain_key) if var_count > 1: # Duplicate variables will be named starting at 2 (var, var2, var3, etc...) regex_var_name = '%s_occurance_%s' % (plain_key, var_count) else: regex_var_name = plain_key self.regex_string += '(?P<%s>%s)' % (regex_var_name, rxp) char_index = 0 for char in self.raw_format: if current_key: if char.isalpha() or char.isdigit() or char == '_' or ( char == '{' and current_key == '$'): current_key += char elif char == '}': # the end of ${key} format current_key += char finalize_key() else: # finalize key and start a new one finalize_key() if char == '$': # if there's a new key - create it current_key = char else: # otherwise - add char to regex current_key = None safe_char = char if ( char.isalpha() or char.isdigit()) else '\%s' % char self.regex_string += safe_char if current_separator is not None: current_separator += char else: current_separator = char else: # if there's no current key if char == '$': current_key = char if current_separator is not None: self.separators.append(current_separator) current_separator = None else: safe_char = char if (char.isalpha() or char.isdigit()) else '\%s' % char self.regex_string += safe_char if current_separator is not None: current_separator += char else: current_separator = char if char_index == 0: self.start_from_separator = True char_index += 1 # key can be the last element in a string if current_key: finalize_key() # separator also can be the last element in a string if current_separator: self.separators.append(current_separator) self.regex = re.compile(self.regex_string) # these two values are used for every line, so let's have them saved self.keys_amount = len(self.keys) self.separators_amount = len(self.separators)
def __logic_parse(self, files, result=None): """ Parses input files and updates result dict :param files: dict of files from pyparsing :return: dict of config tree """ if result is None: result = {} for file_index, rows in files.iteritems(): while len(rows): row = rows.pop(0) row_as_list = row.asList() if isinstance(row_as_list[0], list): # this is a new key key_bucket, value_bucket = row key = key_bucket[0] if len(key_bucket) == 1: # simple key, with one param subtree_indexed = self.__idx_save( self.__logic_parse({file_index: row[1]}), file_index, row.line_number ) if key == 'server': # work with servers if key in result: result[key].append(subtree_indexed) else: result[key] = [subtree_indexed] else: result[key] = subtree_indexed else: # compound key (for locations and upstreams for example) # remove all redundant spaces parts = filter(lambda x: x, ' '.join(key_bucket[1:]).split(' ')) sub_key = ' '.join(parts) subtree_indexed = self.__idx_save( self.__logic_parse({file_index: row[1]}), file_index, row.line_number ) if key in result: result[key][sub_key] = subtree_indexed else: result[key] = {sub_key: subtree_indexed} else: # can be just an assigment, without value if len(row) >= 2: key, value = row[0], ''.join(row[1:]) else: key, value = row[0], '' # transform multiline values to single one if """\'""" in value or """\n""" in value: value = re.sub(r"\'\s*\n\s*\'", '', value) value = re.sub(r"\'", "'", value) if key in IGNORED_DIRECTIVES: continue # Pass ignored directives. elif key == 'log_format': # work with log formats gwe = re.match("([\w\d_-]+)\s+'(.+)'", value) if gwe: format_name, format_value = gwe.group(1), gwe.group(2) indexed_value = self.__idx_save(format_value, file_index, row.line_number) # Handle odd Python auto-escaping of raw strings when packing/unpacking. indexed_value = (prep_raw(indexed_value[0]), indexed_value[1]) if key in result: result[key][format_name] = indexed_value else: result[key] = {format_name: indexed_value} elif key == 'include': indexed_value = self.__idx_save(value, file_index, row.line_number) if key in result: result[key].append(indexed_value) else: result[key] = [indexed_value] included_files = self.__pyparse(value) self.__logic_parse(included_files, result=result) elif key in ('access_log', 'error_log'): # Handle access_log and error_log edge cases if value == '': continue # skip log directives that are empty if '$' in value and ' if=$' not in value: continue # skip directives that are use nginx variables and it's not if # Otherwise handle normally (see ending else below). indexed_value = self.__idx_save(value, file_index, row.line_number) self.__simple_save(result, key, indexed_value) elif key == 'ssl_certificate': if value == '': continue # skip empty values if '$' in value and ' if=$' not in value: continue # skip directives that are use nginx variables and it's not if cert_path = self.resolve_local_path(value) self.ssl_certificates.append(cert_path) # Add value to ssl_certificates self.populate_directories(cert_path) # save config value indexed_value = self.__idx_save(value, file_index, row.line_number) self.__simple_save(result, key, indexed_value) else: indexed_value = self.__idx_save(value, file_index, row.line_number) self.__simple_save(result, key, indexed_value) return result
def __logic_parse(self, files, result=None): """ Parses input files and updates result dict :param files: dict of files from pyparsing :return: dict of config tree """ if result is None: result = {} for file_index, rowsp in files.iteritems(): rows = rowsp[:] while len(rows): row = rows.pop(0) row_as_list = row.asList() if isinstance(row_as_list[0], list): # this is a new key key_bucket, value_bucket = row key = key_bucket[0] if len(key_bucket) == 1: # simple key, with one param subtree_indexed = self.__idx_save( self.__logic_parse({file_index: row[1]}), file_index, row.line_number) if key == 'server': # work with servers if key in result: result[key].append(subtree_indexed) else: result[key] = [subtree_indexed] else: result[key] = subtree_indexed else: # compound key (for locations and upstreams for example) def flatten(l): """Helper function that flattens a list of lists into a single list""" flattened = [] for element in l: if not isinstance(element, list): flattened.append(element) elif isinstance(element, ParseResults): flattened += flatten(element.asList()) else: flattened += flatten(element) return flattened # with some changes to how we use pyparse we now might get "ParseResults" back...handle it here # typically occurs on "if" statements/blocks if not isinstance(key_bucket[1], (str, unicode)): key_bucket = key_bucket.asList() if isinstance( key_bucket, ParseResults) else key_bucket parse_results = key_bucket.pop() key_bucket += flatten(parse_results) # remove all redundant spaces parts = filter(len, ' '.join(key_bucket[1:]).split(' ')) sub_key = ' '.join(parts) subtree_indexed = self.__idx_save( self.__logic_parse({file_index: row[1]}), file_index, row.line_number) if key in result: result[key][sub_key] = subtree_indexed else: result[key] = {sub_key: subtree_indexed} else: # can be just an assigment, without value if len(row) >= 2: key, value = row[0], '/s/'.join( row[1:] ) # add special "spacer" character combination # this special spacer only is appears in complex "add_header" directives at the moment else: key, value = row[0], '' # transform multiline values to single one if """\'""" in value or """\n""" in value: value = re.sub(r"\'\s*\n\s*\'", '', value) value = re.sub(r"\'", "'", value) # remove spaces value = value.strip() if key in IGNORED_DIRECTIVES: continue # Pass ignored directives. elif key == 'log_format': value = value.replace('/s/', " '", 1) + "'" value = value.replace('/s/', '') # work with log formats gwe = re.match("([\w\d_-]+)\s+'(.+)'", value) if gwe: format_name, format_value = gwe.group( 1), gwe.group(2) indexed_value = self.__idx_save( format_value, file_index, row.line_number) # Handle odd Python auto-escaping of raw strings when packing/unpacking. indexed_value = (prep_raw(indexed_value[0]), indexed_value[1]) if key in result: result[key][format_name] = indexed_value else: result[key] = {format_name: indexed_value} elif key == 'include': indexed_value = self.__idx_save( value, file_index, row.line_number) if key in result: result[key].append(indexed_value) else: result[key] = [indexed_value] included_files = self.__pyparse(value) self.__logic_parse(included_files, result=result) elif key in ('access_log', 'error_log'): value = value.replace('/s/', ' ') # Handle access_log and error_log edge cases if value == '': continue # skip log directives that are empty if '$' in value and ' if=$' not in value: continue # skip directives that are use nginx variables and it's not if # Otherwise handle normally (see ending else below). indexed_value = self.__idx_save( value, file_index, row.line_number) self.__simple_save(result, key, indexed_value) elif key == 'ssl_certificate': if value == '': continue # skip empty values if '$' in value and ' if=$' not in value: continue # skip directives that are use nginx variables and it's not if cert_path = self.resolve_local_path(value) self.ssl_certificates.append( cert_path) # Add value to ssl_certificates self.populate_directories(cert_path) # save config value indexed_value = self.__idx_save( value, file_index, row.line_number) self.__simple_save(result, key, indexed_value) elif key == 'add_header': indexed_value = self.__idx_save( value.replace('/s/', ' '), file_index, row.line_number) self.__simple_save(result, key, indexed_value) else: indexed_value = self.__idx_save( value, file_index, row.line_number) self.__simple_save(result, key, indexed_value) return result
def __logic_parse(self, files, result=None): """ Parses input files and updates result dict :param files: dict of files from pyparsing :return: dict of config tree """ if result is None: result = {} for file_index, rowsp in files.iteritems(): rows = rowsp[:] while len(rows): row = rows.pop(0) row_as_list = row.asList() if isinstance(row_as_list[0], list): # this is a new key key_bucket, value_bucket = row key = key_bucket[0] if len(key_bucket) == 1: # simple key, with one param subtree_indexed = self.__idx_save( self.__logic_parse({file_index: row[1]}), file_index, row.line_number ) if key == "server": # work with servers if key in result: result[key].append(subtree_indexed) else: result[key] = [subtree_indexed] else: result[key] = subtree_indexed else: # compound key (for locations and upstreams for example) def flatten(l): """Helper function that flattens a list of lists into a single list""" flattened = [] for element in l: if not isinstance(element, list): flattened.append(element) elif isinstance(element, ParseResults): flattened += flatten(element.asList()) else: flattened += flatten(element) return flattened # with some changes to how we use pyparse we now might get "ParseResults" back...handle it here # typically occurs on "if" statements/blocks if not isinstance(key_bucket[1], (str, unicode)): key_bucket = key_bucket.asList() if isinstance(key_bucket, ParseResults) else key_bucket parse_results = key_bucket.pop() key_bucket += flatten(parse_results) # remove all redundant spaces parts = filter(lambda x: x, " ".join(key_bucket[1:]).split(" ")) sub_key = " ".join(parts) subtree_indexed = self.__idx_save( self.__logic_parse({file_index: row[1]}), file_index, row.line_number ) if key in result: result[key][sub_key] = subtree_indexed else: result[key] = {sub_key: subtree_indexed} else: # can be just an assigment, without value if len(row) >= 2: key, value = row[0], "/s/".join(row[1:]) # add special "spacer" character combination # this special spacer only is appears in complex "add_header" directives at the moment else: key, value = row[0], "" # transform multiline values to single one if """\'""" in value or """\n""" in value: value = re.sub(r"\'\s*\n\s*\'", "", value) value = re.sub(r"\'", "'", value) # remove spaces value = value.strip() if key in IGNORED_DIRECTIVES: continue # Pass ignored directives. elif key == "log_format": # work with log formats gwe = re.match("([\w\d_-]+)\s+'(.+)'", value) if gwe: format_name, format_value = gwe.group(1), gwe.group(2) indexed_value = self.__idx_save(format_value, file_index, row.line_number) # Handle odd Python auto-escaping of raw strings when packing/unpacking. indexed_value = (prep_raw(indexed_value[0]), indexed_value[1]) if key in result: result[key][format_name] = indexed_value else: result[key] = {format_name: indexed_value} elif key == "include": indexed_value = self.__idx_save(value, file_index, row.line_number) if key in result: result[key].append(indexed_value) else: result[key] = [indexed_value] included_files = self.__pyparse(value) self.__logic_parse(included_files, result=result) elif key in ("access_log", "error_log"): # Handle access_log and error_log edge cases if value == "": continue # skip log directives that are empty if "$" in value and " if=$" not in value: continue # skip directives that are use nginx variables and it's not if # Otherwise handle normally (see ending else below). indexed_value = self.__idx_save(value, file_index, row.line_number) self.__simple_save(result, key, indexed_value) elif key == "ssl_certificate": if value == "": continue # skip empty values if "$" in value and " if=$" not in value: continue # skip directives that are use nginx variables and it's not if cert_path = self.resolve_local_path(value) self.ssl_certificates.append(cert_path) # Add value to ssl_certificates self.populate_directories(cert_path) # save config value indexed_value = self.__idx_save(value, file_index, row.line_number) self.__simple_save(result, key, indexed_value) elif key == "add_header": indexed_value = self.__idx_save(value.replace("/s/", " "), file_index, row.line_number) self.__simple_save(result, key, indexed_value) else: indexed_value = self.__idx_save(value, file_index, row.line_number) self.__simple_save(result, key, indexed_value) return result
def __logic_parse(self, files, result=None): """ Parses input files and updates result dict :param files: dict of files from pyparsing :return: dict of config tree """ if result is None: result = {} for file_index, rows in files.iteritems(): while len(rows): row = rows.pop(0) row_as_list = row.asList() if isinstance(row_as_list[0], list): # this is a new key key_bucket, value_bucket = row key = key_bucket[0] if len(key_bucket) == 1: # simple key, with one param subtree_indexed = self.__idx_save( self.__logic_parse({file_index: row[1]}), file_index, row.line_number ) if key == 'server': # work with servers if key in result: result[key].append(subtree_indexed) else: result[key] = [subtree_indexed] else: result[key] = subtree_indexed else: # compound key (for locations and upstreams for example) # remove all redundant spaces parts = filter(lambda x: x, ' '.join(key_bucket[1:]).split(' ')) sub_key = ' '.join(parts) subtree_indexed = self.__idx_save( self.__logic_parse({file_index: row[1]}), file_index, row.line_number ) if key in result: result[key][sub_key] = subtree_indexed else: result[key] = {sub_key: subtree_indexed} else: # can be just an assigment, without value if len(row) >= 2: key, value = row[0], ''.join(row[1:]) else: key, value = row[0], '' # transform multiline values to single one if """\'""" in value or """\n""" in value: value = re.sub(r"\'\s*\n\s*\'", '', value) value = re.sub(r"\'", "'", value) if key in IGNORED_DIRECTIVES: continue # Pass ignored directives. elif key == 'log_format': # work with log formats gwe = re.match("([\w\d_-]+)\s+'(.+)'", value) if gwe: format_name, format_value = gwe.group(1), gwe.group(2) indexed_value = self.__idx_save(format_value, file_index, row.line_number) # Handle odd Python auto-escaping of raw strings when packing/unpacking. indexed_value = (prep_raw(indexed_value[0]), indexed_value[1]) if key in result: result[key][format_name] = indexed_value else: result[key] = {format_name: indexed_value} elif key == 'include': indexed_value = self.__idx_save(value, file_index, row.line_number) if key in result: result[key].append(indexed_value) else: result[key] = [indexed_value] included_files = self.__pyparse(value) self.__logic_parse(included_files, result=result) elif key in ('access_log', 'error_log'): # Handle access_log and error_log edge cases if value == '': continue # skip log directives that are empty if '$' in value and ' if=$' not in value: continue # skip directives that are use nginx variables and it's not if # Otherwise handle normally (see ending else below). indexed_value = self.__idx_save(value, file_index, row.line_number) self.__simple_save(result, key, indexed_value) elif key == 'ssl_certificate': if value == '': continue # skip empty values if '$' in value and ' if=$' not in value: continue # skip directives that are use nginx variables and it's not if self.ssl_certificates.append(self.resolve_local_path(value)) # Add value to ssl_certificates # save config value indexed_value = self.__idx_save(value, file_index, row.line_number) self.__simple_save(result, key, indexed_value) else: indexed_value = self.__idx_save(value, file_index, row.line_number) self.__simple_save(result, key, indexed_value) return result