Exemplo n.º 1
0
    def __init__(self, pattern, flags=0):
        """
        Constructor - see, the signature is the same as of re.compile
        that can be very useful to retain API compatibility.
        Note, the flags aren't yet implemented - REG_EXTENDED is used
        for everything instead.
        """
        if isinstance(pattern, basestring):
            string_type, reg_function = _get_specialized('comp', pattern)
        else:
            raise TypeError("first argument must be string or unicode")

        # the real compiled regex - a regex_t instance
        self.preg = byref(regex_t())

        pattern_buffer = (string_type * len(pattern))()
        pattern_buffer.value = pattern
        result = reg_function(self.preg, pattern_buffer, len(pattern),
                              REG_EXTENDED)

        if reg_errcode_t[result] != 'REG_OK':
            if reg_errcode_t[result] in ('REG_EBRACK', 'REG_EPAREN',
                    'REG_EBRACE'):
                raise sre_constants.error("unbalanced parenthesis")
            else:
                raise sre_constants.error('Parse error, symbol %s code %d' %
                        (reg_errcode_t[result], result))

        # how much memory to reserve
        # refer to the re_nsub field of the regex_t
        self.match_buffers = self.preg._obj.re_nsub + 1
Exemplo n.º 2
0
    def finditer(self, string):
        """Returns an iterator with all matches"""
        pmatch = (regmatch_t * self.match_buffers)()
        nmatch = c_size_t(self.match_buffers)
        # get the proper types and functions for the string
        string_type, reg_function = _get_specialized('exec', string)

        string_buffer = (string_type * len(string))()
        string_buffer.value = string

        # loop until no matches are found (REG_NOMATCH)
        while True:
            result = reg_function(self.preg, string_buffer, len(string),
                    nmatch, pmatch, 0)

            if reg_errcode_t[result] == 'REG_NOMATCH':
                raise StopIteration
            elif reg_errcode_t[result] != 'REG_OK':
                raise sre_constants.error('Exec error')

            for match in pmatch:
                yield string[match.rm_so:match.rm_eo]
                # move string offset
                string = string[match.rm_eo:]
                string_buffer = (string_type * len(string))()
                string_buffer.value = string