示例#1
0
    def __init__(self, raw_format=None):
        """
        Takes raw format and generates regex
        :param raw_format: raw log format
        """
        self.raw_format = self.combined_format if raw_format is None else raw_format

        self.keys = []
        self.regex_string = r''
        self.regex = None
        current_key = None

        # preprocess raw format and if we have trailing spaces in format we should remove them
        self.raw_format = prep_raw(self.raw_format).rstrip()

        def finalize_key():
            key_without_dollar = current_key[1:]
            self.keys.append(key_without_dollar)
            rxp = self.common_variables.get(key_without_dollar,
                                            self.default_variable)[0]
            # Handle formats with multiple instances of the same variable.
            var_count = self.keys.count(key_without_dollar)
            if var_count > 1:  # Duplicate variables will be named starting at 2 (var, var2, var3, etc...)
                regex_var_name = '%s_occurance_%s' % (key_without_dollar,
                                                      var_count)
            else:
                regex_var_name = key_without_dollar
            self.regex_string += '(?P<%s>%s)' % (regex_var_name, rxp)

        for char in self.raw_format:
            if current_key:
                # if there's a current key
                if char.isalpha() or char.isdigit() or char == '_':
                    # continue building key
                    current_key += char
                else:
                    # finalize current_key
                    finalize_key()

                    if char == '$':
                        # if there's a new key - create it
                        current_key = char
                    else:
                        # otherwise - add char to regex
                        current_key = None
                        if char.isalpha() or char.isdigit():
                            self.regex_string += char
                        else:
                            self.regex_string += '\%s' % char
            else:
                # if there's no current key
                if char == '$':
                    current_key = char
                else:
                    if char.isalpha() or char.isdigit():
                        self.regex_string += char
                    else:
                        self.regex_string += '\%s' % char

        # key can be the last one element in a string
        if current_key:
            finalize_key()

        self.regex = re.compile(self.regex_string)
示例#2
0
    def __init__(self, raw_format=None):
        """
        Takes raw format and generates regex
        :param raw_format: raw log format
        """
        self.raw_format = self.combined_format if raw_format is None else raw_format

        self.keys = []
        self.regex_string = r''
        self.regex = None
        self.separators = []
        self.start_from_separator = False

        # preprocess raw format and if we have trailing spaces in format we should remove them
        self.raw_format = prep_raw(self.raw_format).rstrip()

        current_key = None
        current_separator = None

        def finalize_key():
            """
            Finalizes key:
            1) removes $ and {} from it
            2) adds a regex for the key to the regex_string
            """
            chars_to_remove = ['$', '{', '}']
            plain_key = current_key.translate(None, ''.join(chars_to_remove))

            self.keys.append(plain_key)
            rxp = self.common_variables.get(plain_key,
                                            self.default_variable)[0]

            # Handle formats with multiple instances of the same variable.
            var_count = self.keys.count(plain_key)
            if var_count > 1:  # Duplicate variables will be named starting at 2 (var, var2, var3, etc...)
                regex_var_name = '%s_occurance_%s' % (plain_key, var_count)
            else:
                regex_var_name = plain_key
            self.regex_string += '(?P<%s>%s)' % (regex_var_name, rxp)

        char_index = 0
        for char in self.raw_format:
            if current_key:
                if char.isalpha() or char.isdigit() or char == '_' or (
                        char == '{' and current_key == '$'):
                    current_key += char
                elif char == '}':  # the end of ${key} format
                    current_key += char
                    finalize_key()
                else:  # finalize key and start a new one
                    finalize_key()

                    if char == '$':  # if there's a new key - create it
                        current_key = char
                    else:
                        # otherwise - add char to regex
                        current_key = None

                        safe_char = char if (
                            char.isalpha() or char.isdigit()) else '\%s' % char
                        self.regex_string += safe_char

                        if current_separator is not None:
                            current_separator += char
                        else:
                            current_separator = char
            else:
                # if there's no current key
                if char == '$':
                    current_key = char

                    if current_separator is not None:
                        self.separators.append(current_separator)
                        current_separator = None
                else:
                    safe_char = char if (char.isalpha()
                                         or char.isdigit()) else '\%s' % char
                    self.regex_string += safe_char

                    if current_separator is not None:
                        current_separator += char
                    else:
                        current_separator = char

                    if char_index == 0:
                        self.start_from_separator = True

            char_index += 1

        # key can be the last element in a string
        if current_key:
            finalize_key()

        # separator also can be the last element in a string
        if current_separator:
            self.separators.append(current_separator)

        self.regex = re.compile(self.regex_string)

        # these two values are used for every line, so let's have them saved
        self.keys_amount = len(self.keys)
        self.separators_amount = len(self.separators)
示例#3
0
    def __logic_parse(self, files, result=None):
        """
        Parses input files and updates result dict

        :param files: dict of files from pyparsing
        :return: dict of config tree
        """
        if result is None:
            result = {}

        for file_index, rows in files.iteritems():
            while len(rows):
                row = rows.pop(0)
                row_as_list = row.asList()
                
                if isinstance(row_as_list[0], list):
                    # this is a new key
                    key_bucket, value_bucket = row
                    key = key_bucket[0]

                    if len(key_bucket) == 1:
                        # simple key, with one param
                        subtree_indexed = self.__idx_save(
                            self.__logic_parse({file_index: row[1]}),
                            file_index, row.line_number
                        )
                        if key == 'server':
                            # work with servers
                            if key in result:
                                result[key].append(subtree_indexed)
                            else:
                                result[key] = [subtree_indexed]
                        else:
                            result[key] = subtree_indexed
                    else:
                        # compound key (for locations and upstreams for example)

                        # remove all redundant spaces
                        parts = filter(lambda x: x, ' '.join(key_bucket[1:]).split(' '))
                        sub_key = ' '.join(parts)

                        subtree_indexed = self.__idx_save(
                            self.__logic_parse({file_index: row[1]}),
                            file_index, row.line_number
                        )

                        if key in result:
                            result[key][sub_key] = subtree_indexed
                        else:
                            result[key] = {sub_key: subtree_indexed}
                else:
                    # can be just an assigment, without value
                    if len(row) >= 2:
                        key, value = row[0], ''.join(row[1:])
                    else:
                        key, value = row[0], ''

                    # transform multiline values to single one
                    if """\'""" in value or """\n""" in value:
                        value = re.sub(r"\'\s*\n\s*\'", '', value)
                        value = re.sub(r"\'", "'", value)

                    if key in IGNORED_DIRECTIVES:
                        continue  # Pass ignored directives.
                    elif key == 'log_format':
                        # work with log formats
                        gwe = re.match("([\w\d_-]+)\s+'(.+)'", value)
                        if gwe:
                            format_name, format_value = gwe.group(1), gwe.group(2)

                            indexed_value = self.__idx_save(format_value, file_index, row.line_number)
                            # Handle odd Python auto-escaping of raw strings when packing/unpacking.
                            indexed_value = (prep_raw(indexed_value[0]), indexed_value[1])

                            if key in result:
                                result[key][format_name] = indexed_value
                            else:
                                result[key] = {format_name: indexed_value}
                    elif key == 'include':
                        indexed_value = self.__idx_save(value, file_index, row.line_number)

                        if key in result:
                            result[key].append(indexed_value)
                        else:
                            result[key] = [indexed_value]

                        included_files = self.__pyparse(value)
                        self.__logic_parse(included_files, result=result)
                    elif key in ('access_log', 'error_log'):
                        # Handle access_log and error_log edge cases
                        if value == '':
                            continue  # skip log directives that are empty

                        if '$' in value and ' if=$' not in value:
                            continue  # skip directives that are use nginx variables and it's not if

                        # Otherwise handle normally (see ending else below).
                        indexed_value = self.__idx_save(value, file_index, row.line_number)
                        self.__simple_save(result, key, indexed_value)
                    elif key == 'ssl_certificate':
                        if value == '':
                            continue  # skip empty values

                        if '$' in value and ' if=$' not in value:
                            continue  # skip directives that are use nginx variables and it's not if

                        cert_path = self.resolve_local_path(value)
                        self.ssl_certificates.append(cert_path)  # Add value to ssl_certificates
                        self.populate_directories(cert_path)

                        # save config value
                        indexed_value = self.__idx_save(value, file_index, row.line_number)
                        self.__simple_save(result, key, indexed_value)
                    else:
                        indexed_value = self.__idx_save(value, file_index, row.line_number)
                        self.__simple_save(result, key, indexed_value)

        return result
示例#4
0
    def __init__(self, raw_format=None):
        """
        Takes raw format and generates regex
        :param raw_format: raw log format
        """
        self.raw_format = self.combined_format if raw_format is None else raw_format

        self.keys = []
        self.regex_string = r''
        self.regex = None
        current_key = None

        # preprocess raw format and if we have trailing spaces in format we should remove them
        self.raw_format = prep_raw(self.raw_format).rstrip()

        def finalize_key():
            key_without_dollar = current_key[1:]
            self.keys.append(key_without_dollar)
            rxp = self.common_variables.get(key_without_dollar, self.default_variable)[0]
            # Handle formats with multiple instances of the same variable.
            var_count = self.keys.count(key_without_dollar)
            if var_count > 1:  # Duplicate variables will be named starting at 2 (var, var2, var3, etc...)
                regex_var_name = '%s_occurance_%s' % (key_without_dollar, var_count)
            else:
                regex_var_name = key_without_dollar
            self.regex_string += '(?P<%s>%s)' % (regex_var_name, rxp)

        for char in self.raw_format:
            if current_key:
                # if there's a current key
                if char.isalpha() or char.isdigit() or char == '_':
                    # continue building key
                    current_key += char
                else:
                    # finalize current_key
                    finalize_key()

                    if char == '$':
                        # if there's a new key - create it
                        current_key = char
                    else:
                        # otherwise - add char to regex
                        current_key = None
                        if char.isalpha() or char.isdigit():
                            self.regex_string += char
                        else:
                            self.regex_string += '\%s' % char
            else:
                # if there's no current key
                if char == '$':
                    current_key = char
                else:
                    if char.isalpha() or char.isdigit():
                        self.regex_string += char
                    else:
                        self.regex_string += '\%s' % char

        # key can be the last one element in a string
        if current_key:
            finalize_key()

        self.regex = re.compile(self.regex_string)
示例#5
0
    def __logic_parse(self, files, result=None):
        """
        Parses input files and updates result dict

        :param files: dict of files from pyparsing
        :return: dict of config tree
        """
        if result is None:
            result = {}

        for file_index, rowsp in files.iteritems():
            rows = rowsp[:]
            while len(rows):
                row = rows.pop(0)
                row_as_list = row.asList()

                if isinstance(row_as_list[0], list):
                    # this is a new key
                    key_bucket, value_bucket = row
                    key = key_bucket[0]

                    if len(key_bucket) == 1:
                        # simple key, with one param
                        subtree_indexed = self.__idx_save(
                            self.__logic_parse({file_index: row[1]}),
                            file_index, row.line_number)
                        if key == 'server':
                            # work with servers
                            if key in result:
                                result[key].append(subtree_indexed)
                            else:
                                result[key] = [subtree_indexed]
                        else:
                            result[key] = subtree_indexed
                    else:
                        # compound key (for locations and upstreams for example)

                        def flatten(l):
                            """Helper function that flattens a list of lists into a single list"""
                            flattened = []
                            for element in l:
                                if not isinstance(element, list):
                                    flattened.append(element)
                                elif isinstance(element, ParseResults):
                                    flattened += flatten(element.asList())
                                else:
                                    flattened += flatten(element)
                            return flattened

                        # with some changes to how we use pyparse we now might get "ParseResults" back...handle it here
                        # typically occurs on "if" statements/blocks
                        if not isinstance(key_bucket[1], (str, unicode)):
                            key_bucket = key_bucket.asList() if isinstance(
                                key_bucket, ParseResults) else key_bucket
                            parse_results = key_bucket.pop()
                            key_bucket += flatten(parse_results)

                        # remove all redundant spaces
                        parts = filter(len,
                                       ' '.join(key_bucket[1:]).split(' '))
                        sub_key = ' '.join(parts)

                        subtree_indexed = self.__idx_save(
                            self.__logic_parse({file_index: row[1]}),
                            file_index, row.line_number)

                        if key in result:
                            result[key][sub_key] = subtree_indexed
                        else:
                            result[key] = {sub_key: subtree_indexed}
                else:
                    # can be just an assigment, without value
                    if len(row) >= 2:
                        key, value = row[0], '/s/'.join(
                            row[1:]
                        )  # add special "spacer" character combination
                        # this special spacer only is appears in complex "add_header" directives at the moment
                    else:
                        key, value = row[0], ''

                    # transform multiline values to single one
                    if """\'""" in value or """\n""" in value:
                        value = re.sub(r"\'\s*\n\s*\'", '', value)
                        value = re.sub(r"\'", "'", value)

                    # remove spaces
                    value = value.strip()

                    if key in IGNORED_DIRECTIVES:
                        continue  # Pass ignored directives.
                    elif key == 'log_format':
                        value = value.replace('/s/', " '", 1) + "'"
                        value = value.replace('/s/', '')

                        # work with log formats
                        gwe = re.match("([\w\d_-]+)\s+'(.+)'", value)
                        if gwe:
                            format_name, format_value = gwe.group(
                                1), gwe.group(2)

                            indexed_value = self.__idx_save(
                                format_value, file_index, row.line_number)
                            # Handle odd Python auto-escaping of raw strings when packing/unpacking.
                            indexed_value = (prep_raw(indexed_value[0]),
                                             indexed_value[1])

                            if key in result:
                                result[key][format_name] = indexed_value
                            else:
                                result[key] = {format_name: indexed_value}
                    elif key == 'include':
                        indexed_value = self.__idx_save(
                            value, file_index, row.line_number)

                        if key in result:
                            result[key].append(indexed_value)
                        else:
                            result[key] = [indexed_value]

                        included_files = self.__pyparse(value)
                        self.__logic_parse(included_files, result=result)
                    elif key in ('access_log', 'error_log'):
                        value = value.replace('/s/', ' ')

                        # Handle access_log and error_log edge cases
                        if value == '':
                            continue  # skip log directives that are empty

                        if '$' in value and ' if=$' not in value:
                            continue  # skip directives that are use nginx variables and it's not if

                        # Otherwise handle normally (see ending else below).
                        indexed_value = self.__idx_save(
                            value, file_index, row.line_number)
                        self.__simple_save(result, key, indexed_value)
                    elif key == 'ssl_certificate':
                        if value == '':
                            continue  # skip empty values

                        if '$' in value and ' if=$' not in value:
                            continue  # skip directives that are use nginx variables and it's not if

                        cert_path = self.resolve_local_path(value)
                        self.ssl_certificates.append(
                            cert_path)  # Add value to ssl_certificates
                        self.populate_directories(cert_path)

                        # save config value
                        indexed_value = self.__idx_save(
                            value, file_index, row.line_number)
                        self.__simple_save(result, key, indexed_value)
                    elif key == 'add_header':
                        indexed_value = self.__idx_save(
                            value.replace('/s/', ' '), file_index,
                            row.line_number)
                        self.__simple_save(result, key, indexed_value)
                    else:
                        indexed_value = self.__idx_save(
                            value, file_index, row.line_number)
                        self.__simple_save(result, key, indexed_value)

        return result
示例#6
0
    def __logic_parse(self, files, result=None):
        """
        Parses input files and updates result dict

        :param files: dict of files from pyparsing
        :return: dict of config tree
        """
        if result is None:
            result = {}

        for file_index, rowsp in files.iteritems():
            rows = rowsp[:]
            while len(rows):
                row = rows.pop(0)
                row_as_list = row.asList()

                if isinstance(row_as_list[0], list):
                    # this is a new key
                    key_bucket, value_bucket = row
                    key = key_bucket[0]

                    if len(key_bucket) == 1:
                        # simple key, with one param
                        subtree_indexed = self.__idx_save(
                            self.__logic_parse({file_index: row[1]}), file_index, row.line_number
                        )
                        if key == "server":
                            # work with servers
                            if key in result:
                                result[key].append(subtree_indexed)
                            else:
                                result[key] = [subtree_indexed]
                        else:
                            result[key] = subtree_indexed
                    else:
                        # compound key (for locations and upstreams for example)

                        def flatten(l):
                            """Helper function that flattens a list of lists into a single list"""
                            flattened = []
                            for element in l:
                                if not isinstance(element, list):
                                    flattened.append(element)
                                elif isinstance(element, ParseResults):
                                    flattened += flatten(element.asList())
                                else:
                                    flattened += flatten(element)
                            return flattened

                        # with some changes to how we use pyparse we now might get "ParseResults" back...handle it here
                        # typically occurs on "if" statements/blocks
                        if not isinstance(key_bucket[1], (str, unicode)):
                            key_bucket = key_bucket.asList() if isinstance(key_bucket, ParseResults) else key_bucket
                            parse_results = key_bucket.pop()
                            key_bucket += flatten(parse_results)

                        # remove all redundant spaces
                        parts = filter(lambda x: x, " ".join(key_bucket[1:]).split(" "))
                        sub_key = " ".join(parts)

                        subtree_indexed = self.__idx_save(
                            self.__logic_parse({file_index: row[1]}), file_index, row.line_number
                        )

                        if key in result:
                            result[key][sub_key] = subtree_indexed
                        else:
                            result[key] = {sub_key: subtree_indexed}
                else:
                    # can be just an assigment, without value
                    if len(row) >= 2:
                        key, value = row[0], "/s/".join(row[1:])  # add special "spacer" character combination
                        # this special spacer only is appears in complex "add_header" directives at the moment
                    else:
                        key, value = row[0], ""

                    # transform multiline values to single one
                    if """\'""" in value or """\n""" in value:
                        value = re.sub(r"\'\s*\n\s*\'", "", value)
                        value = re.sub(r"\'", "'", value)

                    # remove spaces
                    value = value.strip()

                    if key in IGNORED_DIRECTIVES:
                        continue  # Pass ignored directives.
                    elif key == "log_format":
                        # work with log formats
                        gwe = re.match("([\w\d_-]+)\s+'(.+)'", value)
                        if gwe:
                            format_name, format_value = gwe.group(1), gwe.group(2)

                            indexed_value = self.__idx_save(format_value, file_index, row.line_number)
                            # Handle odd Python auto-escaping of raw strings when packing/unpacking.
                            indexed_value = (prep_raw(indexed_value[0]), indexed_value[1])

                            if key in result:
                                result[key][format_name] = indexed_value
                            else:
                                result[key] = {format_name: indexed_value}
                    elif key == "include":
                        indexed_value = self.__idx_save(value, file_index, row.line_number)

                        if key in result:
                            result[key].append(indexed_value)
                        else:
                            result[key] = [indexed_value]

                        included_files = self.__pyparse(value)
                        self.__logic_parse(included_files, result=result)
                    elif key in ("access_log", "error_log"):
                        # Handle access_log and error_log edge cases
                        if value == "":
                            continue  # skip log directives that are empty

                        if "$" in value and " if=$" not in value:
                            continue  # skip directives that are use nginx variables and it's not if

                        # Otherwise handle normally (see ending else below).
                        indexed_value = self.__idx_save(value, file_index, row.line_number)
                        self.__simple_save(result, key, indexed_value)
                    elif key == "ssl_certificate":
                        if value == "":
                            continue  # skip empty values

                        if "$" in value and " if=$" not in value:
                            continue  # skip directives that are use nginx variables and it's not if

                        cert_path = self.resolve_local_path(value)
                        self.ssl_certificates.append(cert_path)  # Add value to ssl_certificates
                        self.populate_directories(cert_path)

                        # save config value
                        indexed_value = self.__idx_save(value, file_index, row.line_number)
                        self.__simple_save(result, key, indexed_value)
                    elif key == "add_header":
                        indexed_value = self.__idx_save(value.replace("/s/", " "), file_index, row.line_number)
                        self.__simple_save(result, key, indexed_value)
                    else:
                        indexed_value = self.__idx_save(value, file_index, row.line_number)
                        self.__simple_save(result, key, indexed_value)

        return result
示例#7
0
    def __logic_parse(self, files, result=None):
        """
        Parses input files and updates result dict

        :param files: dict of files from pyparsing
        :return: dict of config tree
        """
        if result is None:
            result = {}

        for file_index, rows in files.iteritems():
            while len(rows):
                row = rows.pop(0)
                row_as_list = row.asList()
                
                if isinstance(row_as_list[0], list):
                    # this is a new key
                    key_bucket, value_bucket = row
                    key = key_bucket[0]

                    if len(key_bucket) == 1:
                        # simple key, with one param
                        subtree_indexed = self.__idx_save(
                            self.__logic_parse({file_index: row[1]}),
                            file_index, row.line_number
                        )
                        if key == 'server':
                            # work with servers
                            if key in result:
                                result[key].append(subtree_indexed)
                            else:
                                result[key] = [subtree_indexed]
                        else:
                            result[key] = subtree_indexed
                    else:
                        # compound key (for locations and upstreams for example)

                        # remove all redundant spaces
                        parts = filter(lambda x: x, ' '.join(key_bucket[1:]).split(' '))
                        sub_key = ' '.join(parts)

                        subtree_indexed = self.__idx_save(
                            self.__logic_parse({file_index: row[1]}),
                            file_index, row.line_number
                        )

                        if key in result:
                            result[key][sub_key] = subtree_indexed
                        else:
                            result[key] = {sub_key: subtree_indexed}
                else:
                    # can be just an assigment, without value
                    if len(row) >= 2:
                        key, value = row[0], ''.join(row[1:])
                    else:
                        key, value = row[0], ''

                    # transform multiline values to single one
                    if """\'""" in value or """\n""" in value:
                        value = re.sub(r"\'\s*\n\s*\'", '', value)
                        value = re.sub(r"\'", "'", value)

                    if key in IGNORED_DIRECTIVES:
                        continue  # Pass ignored directives.
                    elif key == 'log_format':
                        # work with log formats
                        gwe = re.match("([\w\d_-]+)\s+'(.+)'", value)
                        if gwe:
                            format_name, format_value = gwe.group(1), gwe.group(2)

                            indexed_value = self.__idx_save(format_value, file_index, row.line_number)
                            # Handle odd Python auto-escaping of raw strings when packing/unpacking.
                            indexed_value = (prep_raw(indexed_value[0]), indexed_value[1])

                            if key in result:
                                result[key][format_name] = indexed_value
                            else:
                                result[key] = {format_name: indexed_value}
                    elif key == 'include':
                        indexed_value = self.__idx_save(value, file_index, row.line_number)

                        if key in result:
                            result[key].append(indexed_value)
                        else:
                            result[key] = [indexed_value]

                        included_files = self.__pyparse(value)
                        self.__logic_parse(included_files, result=result)
                    elif key in ('access_log', 'error_log'):
                        # Handle access_log and error_log edge cases
                        if value == '':
                            continue  # skip log directives that are empty

                        if '$' in value and ' if=$' not in value:
                            continue  # skip directives that are use nginx variables and it's not if

                        # Otherwise handle normally (see ending else below).
                        indexed_value = self.__idx_save(value, file_index, row.line_number)
                        self.__simple_save(result, key, indexed_value)
                    elif key == 'ssl_certificate':
                        if value == '':
                            continue  # skip empty values

                        if '$' in value and ' if=$' not in value:
                            continue  # skip directives that are use nginx variables and it's not if

                        self.ssl_certificates.append(self.resolve_local_path(value))  # Add value to ssl_certificates

                        # save config value
                        indexed_value = self.__idx_save(value, file_index, row.line_number)
                        self.__simple_save(result, key, indexed_value)
                    else:
                        indexed_value = self.__idx_save(value, file_index, row.line_number)
                        self.__simple_save(result, key, indexed_value)

        return result