Пример #1
0
 def test_bug_926075(self):
     try:
         unicode
     except NameError:
         return # no problem if we have no unicode
     self.assertTrue(re.compile('bug_926075') is not
                  re.compile(eval("u'bug_926075'")))
Пример #2
0
    def test_re_match(self):
        self.assertEqual(re.match('a', 'a').groups(), ())
        self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
        self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
        self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
        self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))

        pat = re.compile('((a)|(b))(c)?')
        self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
        self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
        self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
        self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
        self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))

        # A single group
        m = re.match('(a)', 'a')
        self.assertEqual(m.group(0), 'a')
        self.assertEqual(m.group(0), 'a')
        self.assertEqual(m.group(1), 'a')
        self.assertEqual(m.group(1, 1), ('a', 'a'))

        pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
        self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
        self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
                         (None, 'b', None))
        self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
Пример #3
0
 def test_empty_array(self):
     # SF buf 1647541
     import array
     for typecode in 'cbBuhHiIlLfd':
         a = array.array(typecode)
         self.assertEqual(re.compile("bla").match(a), None)
         self.assertEqual(re.compile("").match(a).groups(), ())
Пример #4
0
    def test_dollar_matches_twice(self):
        "$ matches the end of string, and just before the terminating \n"
        pattern = re.compile('$')
        self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
        self.assertEqual(pattern.sub('#', '\n'), '#\n#')

        pattern = re.compile('$', re.MULTILINE)
        self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
        self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
        self.assertEqual(pattern.sub('#', '\n'), '#\n#')
Пример #5
0
def test_regex_compile(count):
    re._cache = EmptyCache()
    regexes = capture_regexes()
    times = []

    for _ in xrange(count):
        t0 = time.time()
        for regex, flags in regexes:
            re.compile(regex, flags)
        t1 = time.time()
        times.append(t1 - t0)
    return times
Пример #6
0
 def test_bug_1661(self):
     # Verify that flags do not get silently ignored with compiled patterns
     pattern = re.compile('.')
     self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
     self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
     self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
     self.assertRaises(ValueError, re.compile, pattern, re.I)
Пример #7
0
 def test_bug_931848(self):
     try:
         unicode
     except NameError:
         pass
     pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
     self.assertEqual(re.compile(pattern).split("a.b.c"),
                      ['a','b','c'])
Пример #8
0
    def test_bug_581080(self):
        iter = re.finditer(r"\s", "a b")
        self.assertEqual(iter.next().span(), (1,2))
        self.assertRaises(StopIteration, iter.next)

        scanner = re.compile(r"\s").scanner("a b")
        self.assertEqual(scanner.search().span(), (1, 2))
        self.assertEqual(scanner.search(), None)
Пример #9
0
 def test_bug_764548(self):
     # bug 764548, re.compile() barfs on str/unicode subclasses
     try:
         unicode
     except NameError:
         return  # no problem if we have no unicode
     class my_unicode(unicode): pass
     pat = re.compile(my_unicode("abc"))
     self.assertEqual(pat.match("xyz"), None)
Пример #10
0
    def test_inline_flags(self):
        # Bug #1700
        upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
        lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow

        p = re.compile(upper_char, re.I | re.U)
        q = p.match(lower_char)
        self.assertNotEqual(q, None)

        p = re.compile(lower_char, re.I | re.U)
        q = p.match(upper_char)
        self.assertNotEqual(q, None)

        p = re.compile('(?i)' + upper_char, re.U)
        q = p.match(lower_char)
        self.assertNotEqual(q, None)

        p = re.compile('(?i)' + lower_char, re.U)
        q = p.match(upper_char)
        self.assertNotEqual(q, None)

        p = re.compile('(?iu)' + upper_char)
        q = p.match(lower_char)
        self.assertNotEqual(q, None)

        p = re.compile('(?iu)' + lower_char)
        q = p.match(upper_char)
        self.assertNotEqual(q, None)
Пример #11
0
    def test_re_groupref_exists(self):
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
                         ('(', 'a'))
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
                         (None, 'a'))
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
        self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
                         ('a', 'b'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
                         (None, 'd'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
                         (None, 'd'))
        self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
                         ('a', ''))

        # Tests for bug #1177831: exercise groups other than the first group
        p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
        self.assertEqual(p.match('abc').groups(),
                         ('a', 'b', 'c'))
        self.assertEqual(p.match('ad').groups(),
                         ('a', None, 'd'))
        self.assertEqual(p.match('abd'), None)
        self.assertEqual(p.match('ac'), None)
Пример #12
0
# Python imports
import optparse
import sys, os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
import pcre_re as re
import time

# Local imports
import util

# These are the regular expressions to be tested. These sync up,
# index-for-index with the list of strings generated by gen_string_table()
# below.
regexs = [
    re.compile('Python|Perl'),
    re.compile('Python|Perl'),
    re.compile('(Python|Perl)'),
    re.compile('(?:Python|Perl)'),
    re.compile('Python'),
    re.compile('Python'),
    re.compile('.*Python'),
    re.compile('.*Python.*'),
    re.compile('.*(Python)'),
    re.compile('.*(?:Python)'),
    re.compile('Python|Perl|Tcl'),
    re.compile('Python|Perl|Tcl'),
    re.compile('(Python|Perl|Tcl)'),
    re.compile('(?:Python|Perl|Tcl)'),
    re.compile('(Python)\\1'),
    re.compile('(Python)\\1'),
Пример #13
0
def run_re_tests():
    from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
    if verbose:
        print 'Running re_tests test suite'
    else:
        # To save time, only run the first and last 10 tests
        #tests = tests[:10] + tests[-10:]
        pass

    for t in tests:
        sys.stdout.flush()
        pattern = s = outcome = repl = expected = None
        if len(t) == 5:
            pattern, s, outcome, repl, expected = t
        elif len(t) == 3:
            pattern, s, outcome = t
        else:
            raise ValueError, ('Test tuples should have 3 or 5 fields', t)

        try:
            obj = re.compile(pattern)
        except re.error:
            if outcome == SYNTAX_ERROR: pass  # Expected a syntax error
            else:
                print '=== Syntax error:', t
        except KeyboardInterrupt: raise KeyboardInterrupt
        except:
            print '*** Unexpected error ***', t
            if verbose:
                traceback.print_exc(file=sys.stdout)
        else:
            try:
                result = obj.search(s)
            except re.error, msg:
                print '=== Unexpected exception', t, repr(msg)
            if outcome == SYNTAX_ERROR:
                # This should have been a syntax error; forget it.
                pass
            elif outcome == FAIL:
                if result is None: pass   # No match, as expected
                else: print '=== Succeeded incorrectly', t
            elif outcome == SUCCEED:
                if result is not None:
                    # Matched, as expected, so now we compute the
                    # result string and compare it to our expected result.
                    start, end = result.span(0)
                    vardict={'found': result.group(0),
                             'groups': result.group(),
                             'flags': result.re.flags}
                    for i in range(1, 100):
                        try:
                            gi = result.group(i)
                            # Special hack because else the string concat fails:
                            if gi is None:
                                gi = "None"
                        except IndexError:
                            gi = "Error"
                        vardict['g%d' % i] = gi
                    for i in result.re.groupindex.keys():
                        try:
                            gi = result.group(i)
                            if gi is None:
                                gi = "None"
                        except IndexError:
                            gi = "Error"
                        vardict[i] = gi
                    repl = eval(repl, vardict)
                    if repl != expected:
                        print '=== grouping error', t,
                        print repr(repl) + ' should be ' + repr(expected)
                else:
                    print '=== Failed incorrectly', t

                # Try the match on a unicode string, and check that it
                # still succeeds.
                try:
                    result = obj.search(unicode(s, "latin-1"))
                    if result is None:
                        print '=== Fails on unicode match', t
                except NameError:
                    continue # 1.5.2
                except TypeError:
                    continue # unicode test case

                # Try the match on a unicode pattern, and check that it
                # still succeeds.
                obj=re.compile(unicode(pattern, "latin-1"))
                result = obj.search(s)
                if result is None:
                    print '=== Fails on unicode pattern match', t

                # Try the match with the search area limited to the extent
                # of the match and see if it still succeeds.  \B will
                # break (because it won't match at the end or start of a
                # string), so we'll ignore patterns that feature it.

                if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
                               and result is not None:
                    obj = re.compile(pattern)
                    result = obj.search(s, result.start(0), result.end(0) + 1)
                    if result is None:
                        print '=== Failed on range-limited match', t

                # Try the match with IGNORECASE enabled, and check that it
                # still succeeds.
                obj = re.compile(pattern, re.IGNORECASE)
                result = obj.search(s)
                if result is None:
                    print '=== Fails on case-insensitive match', t

                # Try the match with LOCALE enabled, and check that it
                # still succeeds.
                obj = re.compile(pattern, re.LOCALE)
                result = obj.search(s)
                if result is None:
                    print '=== Fails on locale-sensitive match', t

                # Try the match with UNICODE locale enabled, and check
                # that it still succeeds.
                obj = re.compile(pattern, re.UNICODE)
                result = obj.search(s)
                if result is None:
                    print '=== Fails on unicode-sensitive match', t
Пример #14
0
 def test_bug_3629(self):
     # A regex that triggered a bug in the sre-code validator
     re.compile("(?P<quote>)(?(quote))")
Пример #15
0
 def test_weakref(self):
     s = 'QabbbcR'
     x = re.compile('ab+c')
     y = proxy(x)
     self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
Пример #16
0
 def pickle_test(self, pickle):
     oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
     s = pickle.dumps(oldpat)
     newpat = pickle.loads(s)
     self.assertEqual(oldpat, newpat)
Пример #17
0
 def test_flags(self):
     for flag in [re.I, re.M, re.X, re.S, re.L]:
         self.assertNotEqual(re.compile('^pattern$', flag), None)
Пример #18
0
 def test_bug_612074(self):
     pat=u"["+re.escape(u"\u2039")+u"]"
     self.assertEqual(re.compile(pat) and 1, 1)