Пример #1
0
 def setup(self):
     self.grab.im_func.pattern = re.compile(
         (
             r'(?:[^@./]\b(?!\.)|\A)('  # Match a boundary, but not on an e-mail address
             + url_regex() + r')[\[>)\]"\'.,;:]*(?:\s|\Z)'  # End boundary
         ),
         re.I | re.DOTALL)
Пример #2
0
def _match_sub_selectors(regex):
    selector_patterns = {
        'alpha': r'[a-zA-Z]+',
        'any': r'.+',
        'chunk': r'\S+',
        'digits': r'\d+',
        'number': r'\d*\.?\d+',
        'url': url_regex(),
        'word': r'\w+',
    }

    regex = regex.replace(' ', r'(?:\s+)')

    name_count = defaultdict(int)

    def selector_to_re(match):
        name = match.group(1)
        pattern = match.group(2)

        if name is None:
            return '(%s)' % selector_patterns[pattern]

        # Prevent conflicts when reusing a name
        name_count[name] += 1
        name = '%s__%d_' % (name, name_count[name])

        return '(?P<%s>%s)' % (name, selector_patterns[pattern])

    regex = re.sub(r'{(?:(\w+):)?(%s)}' % '|'.join(selector_patterns.keys()),
                   selector_to_re, regex)

    if not regex.startswith('^'):
        regex = '^' + regex
    if not regex.endswith('$'):
        regex = regex + '$'

    return regex
Пример #3
0
def _match_sub_selectors(regex):
    selector_patterns = {
        'alpha'   : r'[a-zA-Z]+',
        'any'     : r'.+',
        'chunk'   : r'\S+',
        'digits'  : r'\d+',
        'number'  : r'\d*\.?\d+',
        'url'     : url_regex(),
        'word'    : r'\w+',
    }

    regex = regex.replace(' ', r'(?:\s+)')

    name_count = defaultdict(int)
    def selector_to_re(match):
        name    = match.group(1)
        pattern = match.group(2)

        if name is None:
            return '(%s)' % selector_patterns[pattern]

        # Prevent conflicts when reusing a name
        name_count[name] += 1
        name = '%s__%d_' % (name, name_count[name])

        return '(?P<%s>%s)' % (name, selector_patterns[pattern])

    regex = re.sub(r'{(?:(\w+):)?(%s)}' % '|'.join(selector_patterns.keys()),
                   selector_to_re, regex)

    if not regex.startswith('^'):
        regex = '^' + regex
    if not regex.endswith('$'):
        regex = regex + '$'

    return regex
Пример #4
0
 def setup(self):
     self.grab.im_func.pattern = re.compile((
         r'(?:[^@./]\b(?!\.)|\A)('       # Match a boundary, but not on an e-mail address
         + url_regex() +
         r')[\[>)\]"\'.,;:]*(?:\s|\Z)'   # End boundary
     ), re.I | re.DOTALL)