Exemplo n.º 1
0
    def test_re_match(self):
        self.assertEqual(re.match('a', 'a').groups(), ())
        self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
        self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
        self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
        self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))

        pat = re.compile('((a)|(b))(c)?')
        self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
        self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
        self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
        self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
        self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))

        # A single group
        m = re.match('(a)', 'a')
        self.assertEqual(m.group(0), 'a')
        self.assertEqual(m.group(0), 'a')
        self.assertEqual(m.group(1), 'a')
        self.assertEqual(m.group(1, 1), ('a', 'a'))

        pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
        self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
        self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
                         (None, 'b', None))
        self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Exemplo n.º 2
0
 def test_bug_926075(self):
     try:
         unicode
     except NameError:
         self.skipTest('no problem if we have no unicode')
     self.assertTrue(re.compile('bug_926075') is not
                  re.compile(eval("u'bug_926075'")))
Exemplo n.º 3
0
 def test_empty_array(self):
     # SF buf 1647541
     import array
     for typecode in 'cbBuhHiIlLfd':
         a = array.array(typecode)
         self.assertEqual(re.compile("bla").match(a), None)
         self.assertEqual(re.compile("").match(a).groups(), ())
Exemplo n.º 4
0
 def test_compile(self):
     # Test return value when given string and pattern as parameter
     pattern = re.compile('random pattern')
     self.assertIsInstance(pattern, re._pattern_type)
     same_pattern = re.compile(pattern)
     self.assertIsInstance(same_pattern, re._pattern_type)
     self.assertIs(same_pattern, pattern)
     # Test behaviour when not given a string or pattern as parameter
     self.assertRaises(TypeError, re.compile, 0)
Exemplo n.º 5
0
 def test_issue17998(self):
     for reps in '*', '+', '?', '{1}':
         for mod in '', '?':
             pattern = '.' + reps + mod + 'yz'
             self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
                              ['xyz'], msg=pattern)
             pattern = pattern.encode()
             self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
                              [b'xyz'], msg=pattern)
Exemplo n.º 6
0
    def test_dollar_matches_twice(self):
        "$ matches the end of string, and just before the terminating \n"
        pattern = re.compile('$')
        self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
        self.assertEqual(pattern.sub('#', '\n'), '#\n#')

        pattern = re.compile('$', re.MULTILINE)
        self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
        self.assertEqual(pattern.sub('#', '\n'), '#\n#')
Exemplo n.º 7
0
def __validate_mz(warnings, errors, mz):
    """

    :param list of str warnings:
    :param list of str errors:
    :param str mz:
    :return:
    """
    valid_zones = ['ARGS', 'HEADERS', 'BODY', 'URL']
    valid_named_zones = ['$ARGS_VAR', '$HEADERS_VAR', '$BODY_VAR', '$URL']
    valid_regexp_zones = ['$ARGS_VAR_X', '$HEADERS_VAR_X', '$BODY_VAR_X', '$URL_X']

    use_regexp = False

    for matchzone in mz:
        zone, var = (matchzone, None) if ':' not in matchzone else matchzone.split(':', 1)

        if zone not in valid_zones + valid_named_zones + valid_regexp_zones + ['NAME']:
            errors.append('The matchzone %s is not valid.' % zone)
            return errors, warnings

        if not zone.startswith('$') and use_regexp:
            errors.append('You can not use regexp matchzone with non-regexp one')
            return errors, warnings

        if not var:  # there is no ':' char in the `matchzone`
            if zone.startswith('$'):
                errors.append('The matchzone %s starts with a $, but has no variables')
                return errors, warnings
        else:
            if not var.islower():
                warnings.append('The expression %s is not in lowercase.' % var)
            if zone.endswith('_X'):
                use_regexp = True

                try:
                    pcre.compile(var)
                except pcre.PCREError:
                    errors.append('The regexp %s is invalid.' % var)
                    return errors, warnings

    if len(mz) > 3:
        errors.append('The matchzone has more than 2 pipes.')
        return errors, warnings
    elif len(mz) == 3:
        if mz[2] != 'NAME':
            errors.append('The last argument of your matchzone with two pipes is not "NAME"')
            return errors, warnings
        if not mz[0].startswith('$URL'):
            warnings.append('Your three parts matchzone does not starts with $URL')
    if 1 < len(mz) < 4 and mz[0].startswith('$URL') and (mz[1] == 'NAME'):
        errors.append('You can not use $URL and NAME')
        return errors, warnings
    return errors, warnings
Exemplo n.º 8
0
 def test_debug_flag(self):
     with captured_stdout() as out:
         re.compile('foo', re.DEBUG)
     self.assertEqual(out.getvalue().splitlines(),
                      ['literal 102', 'literal 111', 'literal 111'])
     # Debug output is output again even a second time (bypassing
     # the cache -- issue #20426).
     with captured_stdout() as out:
         re.compile('foo', re.DEBUG)
     self.assertEqual(out.getvalue().splitlines(),
                      ['literal 102', 'literal 111', 'literal 111'])
Exemplo n.º 9
0
def __validate_detection_rx(warnings, errors, p_str):
    if not p_str.islower():
        warnings.append("detection {} is not lower-case. naxsi is case-insensitive".format(p_str))

    try:  # try to validate the regex with PCRE's python bindings
        import pcre
        try:  # if we can't compile the regex, it's likely invalid
            pcre.compile(p_str[3:])
        except pcre.PCREError:
            errors.append("{} is not a valid regex:".format(p_str))
    except ImportError:  # python-pcre is an optional dependency
        pass
    return errors, warnings
Exemplo n.º 10
0
def __validate_detection_rx(warnings, errors, p_str):
    if not p_str.islower():
        warnings.append(
            "detection {} is not lower-case. naxsi is case-insensitive".format(
                p_str))

    try:  # try to validate the regex with PCRE's python bindings
        import pcre
        try:  # if we can't compile the regex, it's likely invalid
            pcre.compile(p_str[3:])
        except pcre.PCREError:
            errors.append("{} is not a valid regex:".format(p_str))
    except ImportError:  # python-pcre is an optional dependency
        pass
    return errors, warnings
Exemplo n.º 11
0
def get_groups_details_of_results(raw_results, matched_results):
    pattern_compiled = pcre.compile('\$([\d]{1,2})')
    group_list = []
    for  key, value in raw_results['rule']['portinfo'].iteritems():
        if pattern_compiled.search(str(value)):
            group_list.append([key, value, matched_results['rule']['portinfo'][key]])
    return group_list
Exemplo n.º 12
0
 def __init__(self, linenum, *params):
     super(RedirectMatch, self).__init__(linenum, *params)
     self.regex = pcre.compile(self.pattern)
     if self.target:
         self.target_repl = self._get_target_repl()
     else:
         self.target_repl = None
Exemplo n.º 13
0
 def test_bug_931848(self):
     try:
         unicode
     except NameError:
         self.skipTest('no problem if we have no unicode')
     pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
     self.assertEqual(re.compile(pattern).split("a.b.c"), ['a', 'b', 'c'])
Exemplo n.º 14
0
 def test_bug_1661(self):
     # Verify that flags do not get silently ignored with compiled patterns
     pattern = re.compile('.')
     self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
     self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
     self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
     self.assertRaises(ValueError, re.compile, pattern, re.I)
Exemplo n.º 15
0
    def __validate_detection_rx(self, p_str, assign=False):
        if not p_str.islower():
            self.warnings.append("detection {} is not lower-case. naxsi is case-insensitive".format(p_str))

        try:  # try to validate the regex with PCRE's python bindings
            import pcre
            try:  # if we can't compile the regex, it's likely invalid
                pcre.compile(p_str[3:])
            except pcre.PCREError:
                return self.__fail("{} is not a valid regex:".format(p_str))
        except ImportError:  # python-pcre is an optional dependency
            pass

        if assign is True:
            self.detection = p_str
        return True
Exemplo n.º 16
0
 def test_bug_931848(self):
     try:
         unicode
     except NameError:
         self.skipTest('no problem if we have no unicode')
     pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
     self.assertEqual(re.compile(pattern).split("a.b.c"),
                      ['a','b','c'])
Exemplo n.º 17
0
    def test_bug_581080(self):
        iter = re.finditer(r"\s", "a b")
        self.assertEqual(iter.next().span(), (1,2))
        self.assertRaises(StopIteration, iter.next)

        scanner = re.compile(r"\s").scanner("a b")
        self.assertEqual(scanner.search().span(), (1, 2))
        self.assertEqual(scanner.search(), None)
Exemplo n.º 18
0
def extract(fingerprint):
    pattern_compiled = pcre.compile(REGEX, flags=parse_compile_flags(RESULT['rule']['pattern']['flags']))
    matched = pcre.match(pattern_compiled, fingerprint)
    if not matched:
        return None
    result = copy.deepcopy(RESULT)
    result['rule']['portinfo'] = substitute_portinfo_template(result['rule']['portinfo'], matched.groups())
    return result
Exemplo n.º 19
0
def _regex_engine(pattern):
	try:
		import pcre
		return pcre.compile(pattern)
	except ImportError:
		warnings.warn("Perl Compatible Regular Expressions (PCRE) library is not available, falling back to built-in Regular Expressions (RE) library. Transformation results might not be reproducible between Python and PMML environments when using more complex patterns", Warning)
		import re
		return re.compile(pattern)
Exemplo n.º 20
0
 def test_symbolic_groups(self):
     re.compile('(?P<a>x)(?P=a)(?(a)y)')
     re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
     self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
     self.assertRaises(re.error, re.compile, '(?Px)')
     self.assertRaises(re.error, re.compile, '(?P=)')
     self.assertRaises(re.error, re.compile, '(?P=1)')
     self.assertRaises(re.error, re.compile, '(?P=a)')
     self.assertRaises(re.error, re.compile, '(?P=a1)')
     self.assertRaises(re.error, re.compile, '(?P=a.)')
     self.assertRaises(re.error, re.compile, '(?P<)')
     self.assertRaises(re.error, re.compile, '(?P<>)')
     self.assertRaises(re.error, re.compile, '(?P<1>)')
     self.assertRaises(re.error, re.compile, '(?P<a.>)')
     self.assertRaises(re.error, re.compile, '(?())')
     self.assertRaises(re.error, re.compile, '(?(a))')
     self.assertRaises(re.error, re.compile, '(?(1a))')
     self.assertRaises(re.error, re.compile, '(?(a.))')
Exemplo n.º 21
0
class JsonParser:
    __pattern = pcre.compile(r'(?(DEFINE)'
                             r'(?<json>(?>\s*(?&object)\s*|\s*(?&array)\s*))'
                             r'(?<object>(?>\{\s*(?>(?&pair)(?>\s*,\s*(?&pair))*)?\s*\}))'
                             r'(?<pair>(?>(?&STRING)\s*:\s*(?&value)))'
                             r'(?<array>(?>\[\s*(?>(?&value)(?>\s*,\s*(?&value))*)?\s*\]))'
                             r'(?<value>(?>true|false|null|(?&STRING)|(?&NUMBER)|(?&object)|(?&array)))'
                             r'(?<STRING>(?>"(?>\\(?>["\\\/bfnrt]|u[a-fA-F0-9]{4})|[^"\\\0-\x1F\x7F]+)*"))'
                             r'(?<NUMBER>(?>-?(?>0|[1-9][0-9]*)(?>\.[0-9]+)?(?>[eE][+-]?[0-9]+)?))'
                             r')'
                             r'\A(?&json)\z')

    @classmethod
    def loads(cls, input_str: str):
        null = None
        pcre.enable_re_template_mode()
        json = pcre.match(cls.__pattern, input_str)
        if input_str.isdigit() or (input_str.startswith('-') and input_str[1:].isdigit()):
            return input_str
        if json is None:
            if input_str.startswith('"') and input_str.endswith('"'):
                return leval(input_str)
            raise SyntaxError('Invalid json format')
        else:
            json = json.group(0)
            res: dict = eval(json, {}, {'null': null})
        return res

    @staticmethod
    def dumps(iter_obj) -> str:
        class null:
            def __repr__(self):
                return self.__class__.__name__

        class mstr(str):
            def __repr__(self):
                return ''.join(('"', super().__repr__()[1:-1], '"'))

        my_null = null()

        def serialize(iter_obj):
            if iter_obj is None:
                return my_null
            elif isinstance(iter_obj, str):
                return mstr(iter_obj)
            elif isinstance(iter_obj, bool):
                return mstr(iter_obj).lower()
            elif isinstance(iter_obj, (int, float)):
                return iter_obj
            elif isinstance(iter_obj, (tuple, set, list)):
                return [serialize(el) for el in iter_obj]
            elif isinstance(iter_obj, dict):
                return {mstr(k): serialize(v) for k, v in iter_obj.items()}
            else:
                raise SyntaxError(f'Object is not JSON serializable. Incorrect object type: {type(iter_obj)}')

        return mstr(serialize(iter_obj))
Exemplo n.º 22
0
 def test_bug_2537(self):
     # issue 2537: empty submatches
     for outer_op in ('{0,}', '*', '+', '{1,187}'):
         for inner_op in ('{0,}', '*', '?'):
             r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
             m = r.match("xyyzy")
             self.assertEqual(m.group(0), "xyy")
             self.assertEqual(m.group(1), "")
             self.assertEqual(m.group(2), "y")
Exemplo n.º 23
0
 def test_bug_764548(self):
     # bug 764548, re.compile() barfs on str/unicode subclasses
     try:
         unicode
     except NameError:
         self.skipTest('no problem if we have no unicode')
     class my_unicode(unicode): pass
     pat = re.compile(my_unicode("abc"))
     self.assertEqual(pat.match("xyz"), None)
Exemplo n.º 24
0
    def __init__(self, index, input_type, rgx, verboselvl=0):
        self.verboselvl = verboselvl
        self.index = index
        if input_type not in _valid_input_types and self.verboselvl:
            sys.stderr.write(
                "WARNING: The input type {} is invalid, valid " +
                "input types: {}".format(input_type, _valid_input_types))

        self.input_type = input_type
        self.rgx = compile(rgx)
Exemplo n.º 25
0
def parse_js_shallow(file):
    Command = "ctags -f - --kinds-javascript=* --fields=neK " + file
    global delimiter
    delimiter = "\r\0?\r?\0\r"
    functionInstanceList = []

    try:
        astString = subprocess.check_output(Command,
                                            stderr=subprocess.STDOUT,
                                            shell=True).decode()

    except subprocess.CalledProcessError as e:
        print("Parser Error:", e)
        astString = ""

    f = open(file, 'r')
    lines = f.readlines()
    functionList = astString.split('\n')
    func = re.compile(r'(function)')
    method = re.compile(r'(method)')
    number = re.compile(r'(\d+)')
    new_line = re.compile(r'(\n)')
    funcB = pcre.compile(r'function[^{]+({(?:[^{}]+|(?-1))*+})')

    string = " "
    funcId = 1
    lines_count = 0

    for i in functionList:
        elemList = re.sub(r'[\t\s ]{2,}', '', i)
        elemList = elemList.split("\t")
        functionInstance = function(file)
        functionInstance.funcBody = ''
        if i != '' and len(elemList) >= 5 and (func.fullmatch(elemList[3]) or
                                               method.fullmatch(elemList[3])):
            functionInstance.name = elemList[0]
            functionInstance.parentFile = elemList[1]
            functionInstance.parentNumLoc = len(lines)
            string = " "
            string = string.join(
                lines[int(number.search(elemList[4]).group(0)) - 1:])
            if funcB.search(string):
                functionInstance.funcBody = functionInstance.funcBody + funcB.search(
                    string).group(1)[1:-1]
            else:
                functionInstance.funcBody = " "
            functionInstance.lines = (
                int(number.search(elemList[4]).group(0)),
                int(number.search(elemList[4]).group(0)) +
                functionInstance.funcBody.count("\n"))
            functionInstance.funcId = funcId
            funcId += 1
            functionInstanceList.append(functionInstance)

    return functionInstanceList
Exemplo n.º 26
0
 def test_keyword_parameters(self):
     # Issue #20283: Accepting the string keyword parameter.
     pat = re.compile(r'(ab)')
     self.assertEqual(
         pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9))
     self.assertEqual(
         pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9))
     self.assertEqual(pat.findall(string='abracadabra', pos=3, endpos=10),
                      ['ab'])
     self.assertEqual(pat.split(string='abracadabra', maxsplit=1),
                      ['', 'ab', 'racadabra'])
Exemplo n.º 27
0
 def test_keyword_parameters(self):
     # Issue #20283: Accepting the string keyword parameter.
     pat = re.compile(r'(ab)')
     self.assertEqual(
         pat.match(string='abracadabra', pos=7, endpos=10).span(), (7, 9))
     self.assertEqual(
         pat.search(string='abracadabra', pos=3, endpos=10).span(), (7, 9))
     self.assertEqual(
         pat.findall(string='abracadabra', pos=3, endpos=10), ['ab'])
     self.assertEqual(
         pat.split(string='abracadabra', maxsplit=1),
         ['', 'ab', 'racadabra'])
Exemplo n.º 28
0
    def test_inline_flags(self):
        # Bug #1700
        upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
        lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow

        p = re.compile(upper_char, re.I | re.U)
        q = p.match(lower_char)
        self.assertNotEqual(q, None)

        p = re.compile(lower_char, re.I | re.U)
        q = p.match(upper_char)
        self.assertNotEqual(q, None)

        p = re.compile('(?i)' + upper_char, re.U)
        q = p.match(lower_char)
        self.assertNotEqual(q, None)

        p = re.compile('(?i)' + lower_char, re.U)
        q = p.match(upper_char)
        self.assertNotEqual(q, None)

        p = re.compile('(?iu)' + upper_char)
        q = p.match(lower_char)
        self.assertNotEqual(q, None)

        p = re.compile('(?iu)' + lower_char)
        q = p.match(upper_char)
        self.assertNotEqual(q, None)
Exemplo n.º 29
0
 def readrgxs(self, ruledir):
     if self.verboselvl > 1:
         sys.stderr.write('\tLoading normaliser rules regular expressions\n')
     parser = etree.XMLParser(remove_blank_text=True, strip_cdata=False)
     regularexpressionsxml = etree.parse((ruledir +
                                          '/regularexpressions.xml'),
                                         parser=parser)
     for rgx in regularexpressionsxml.find('regexs').findall('regex'):
         try:
             self.rgxs[rgx.get('name')] = compile(rgx.find('exp').text)
         except:
             sys.stderr.write(('WARNING Bad regex: ' +
                               '%s %s \n' % ((rgx.get('name')),
                              rgx.find('exp').text)))
Exemplo n.º 30
0
    def test_re_groupref_exists(self):
        self.assertEqual(
            re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(), ('(', 'a'))
        self.assertEqual(
            re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(), (None, 'a'))
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
        self.assertEqual(
            re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(), ('a', 'b'))
        self.assertEqual(
            re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(), (None, 'd'))
        self.assertEqual(
            re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(), (None, 'd'))
        self.assertEqual(
            re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(), ('a', ''))

        # Tests for bug #1177831: exercise groups other than the first group
        p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
        self.assertEqual(p.match('abc').groups(), ('a', 'b', 'c'))
        self.assertEqual(p.match('ad').groups(), ('a', None, 'd'))
        self.assertEqual(p.match('abd'), None)
        self.assertEqual(p.match('ac'), None)
Exemplo n.º 31
0
    def test_re_groupref_exists(self):
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
                         ('(', 'a'))
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
                         (None, 'a'))
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
                         ('a', 'b'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
                         (None, 'd'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
                         (None, 'd'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
                         ('a', ''))

        # Tests for bug #1177831: exercise groups other than the first group
        p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
        self.assertEqual(p.match('abc').groups(),
                         ('a', 'b', 'c'))
        self.assertEqual(p.match('ad').groups(),
                         ('a', None, 'd'))
        self.assertEqual(p.match('abd'), None)
        self.assertEqual(p.match('ac'), None)
Exemplo n.º 32
0
 def test_group_name_in_exception(self):
     # Issue 17341: Poor error message when compiling invalid regex
     with self.assertRaisesRegexp(sre_constants.error, '\?foo'):
         re.compile('(?P<?foo>)')
Exemplo n.º 33
0
 def test_weakref(self):
     s = 'QabbbcR'
     x = re.compile('ab+c')
     y = proxy(x)
     self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
Exemplo n.º 34
0
 def test_flags(self):
     for flag in [re.I, re.M, re.X, re.S, re.L]:
         self.assertNotEqual(re.compile('^pattern$', flag), None)
Exemplo n.º 35
0
 def setUp(self):
     pattern = r'(?<date>(?<year>(\d\d)?\d\d) - (?<month>\d\d) - (?<day>\d\d))'
     self.regex = pcre.compile(pattern)
Exemplo n.º 36
0
 def test_bug_612074(self):
     pat=u"["+re.escape(u"\u2039")+u"]"
     self.assertEqual(re.compile(pat) and 1, 1)
Exemplo n.º 37
0
def run_re_tests():
    from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
    if verbose:
        print 'Running re_tests test suite'
    else:
        # To save time, only run the first and last 10 tests
        #tests = tests[:10] + tests[-10:]
        pass

    for t in tests:
        sys.stdout.flush()
        pattern = s = outcome = repl = expected = None
        if len(t) == 5:
            pattern, s, outcome, repl, expected = t
        elif len(t) == 3:
            pattern, s, outcome = t
        else:
            raise ValueError, ('Test tuples should have 3 or 5 fields', t)

        try:
            obj = re.compile(pattern)
        except re.error:
            if outcome == SYNTAX_ERROR: pass  # Expected a syntax error
            else:
                print '=== Syntax error:', t
        except KeyboardInterrupt: raise KeyboardInterrupt
        except:
            print '*** Unexpected error ***', t
            if verbose:
                traceback.print_exc(file=sys.stdout)
        else:
            try:
                result = obj.search(s)
            except re.error, msg:
                print '=== Unexpected exception', t, repr(msg)
            if outcome == SYNTAX_ERROR:
                # This should have been a syntax error; forget it.
                pass
            elif outcome == FAIL:
                if result is None: pass   # No match, as expected
                else: print '=== Succeeded incorrectly', t
            elif outcome == SUCCEED:
                if result is not None:
                    # Matched, as expected, so now we compute the
                    # result string and compare it to our expected result.
                    start, end = result.span(0)
                    vardict={'found': result.group(0),
                             'groups': result.group(),
                             'flags': result.re.flags}
                    for i in range(1, 100):
                        try:
                            gi = result.group(i)
                            # Special hack because else the string concat fails:
                            if gi is None:
                                gi = "None"
                        except IndexError:
                            gi = "Error"
                        vardict['g%d' % i] = gi
                    for i in result.re.groupindex.keys():
                        try:
                            gi = result.group(i)
                            if gi is None:
                                gi = "None"
                        except IndexError:
                            gi = "Error"
                        vardict[i] = gi
                    repl = eval(repl, vardict)
                    if repl != expected:
                        print '=== grouping error', t,
                        print repr(repl) + ' should be ' + repr(expected)
                else:
                    print '=== Failed incorrectly', t

                # Try the match on a unicode string, and check that it
                # still succeeds.
                try:
                    result = obj.search(unicode(s, "latin-1"))
                    if result is None:
                        print '=== Fails on unicode match', t
                except NameError:
                    continue # 1.5.2
                except TypeError:
                    continue # unicode test case

                # Try the match on a unicode pattern, and check that it
                # still succeeds.
                obj=re.compile(unicode(pattern, "latin-1"))
                result = obj.search(s)
                if result is None:
                    print '=== Fails on unicode pattern match', t

                # Try the match with the search area limited to the extent
                # of the match and see if it still succeeds.  \B will
                # break (because it won't match at the end or start of a
                # string), so we'll ignore patterns that feature it.

                if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
                               and result is not None:
                    obj = re.compile(pattern)
                    result = obj.search(s, result.start(0), result.end(0) + 1)
                    if result is None:
                        print '=== Failed on range-limited match', t

                # Try the match with IGNORECASE enabled, and check that it
                # still succeeds.
                obj = re.compile(pattern, re.IGNORECASE)
                result = obj.search(s)
                if result is None:
                    print '=== Fails on case-insensitive match', t

                # Try the match with LOCALE enabled, and check that it
                # still succeeds.
                obj = re.compile(pattern, re.LOCALE)
                result = obj.search(s)
                if result is None:
                    print '=== Fails on locale-sensitive match', t

                # Try the match with UNICODE locale enabled, and check
                # that it still succeeds.
                obj = re.compile(pattern, re.UNICODE)
                result = obj.search(s)
                if result is None:
                    print '=== Fails on unicode-sensitive match', t
Exemplo n.º 38
0
 def pickle_test(self, pickle):
     oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
     s = pickle.dumps(oldpat)
     newpat = pickle.loads(s)
     self.assertEqual(oldpat, newpat)
Exemplo n.º 39
0
 def test_big_codesize(self):
     # Issue #1160
     r = re.compile('|'.join(('%d'%x for x in range(10000))))
     self.assertIsNotNone(r.match('1000'))
     self.assertIsNotNone(r.match('9999'))
Exemplo n.º 40
0
 def test_bug_3629(self):
     # A regex that triggered a bug in the sre-code validator
     re.compile("(?P<quote>)(?(quote))")