def test_not_literal(self): r = get_code(r"[^a]") assert rsre_core.match(r, "A") assert not rsre_core.match(r, "a") r = get_code(r"[^a]+$") assert rsre_core.match(r, "Bx123") assert not rsre_core.match(r, "--a--")
def test_lookbehind(self): r = get_code(r"([a-z]*)(?<=de)") assert rsre_core.match(r, "ade") res = rsre_core.match(r, "adefg") assert res is not None and res.get_mark(1) == 3 assert not rsre_core.match(r, "abc") assert not rsre_core.match(r, "X") assert not rsre_core.match(r, "eX")
def test_group_branch(self): r_code5 = get_code(r'<abc>(ab|c)</abc>') res = rsre_core.match(r_code5, '<abc>ab</abc>def') assert (res.get_mark(0), res.get_mark(1)) == (5, 7) res = rsre_core.match(r_code5, '<abc>c</abc>def') assert (res.get_mark(0), res.get_mark(1)) == (5, 6) res = rsre_core.match(r_code5, '<abc>de</abc>def') assert res is None
def test_not_literal_ignore(self): r = get_code(r"(?i)[^a]") assert rsre_core.match(r, "G") assert not rsre_core.match(r, "a") assert not rsre_core.match(r, "A") r = get_code(r"(?i)[^a]+$") assert rsre_core.match(r, "Gx123") assert not rsre_core.match(r, "--A--")
def test_group_branch_max_until(self): r_code6 = get_code(r'<abc>(ab|c)*a</abc>') res = rsre_core.match(r_code6, '<abc>ccabcccaba</abc>def') assert (res.get_mark(0), res.get_mark(1)) == (12, 14) r_code7 = get_code(r'<abc>((ab)|(c))*a</abc>') res = rsre_core.match(r_code7, '<abc>ccabcccaba</abc>def') assert (res.get_mark(0), res.get_mark(1)) == (12, 14) assert (res.get_mark(2), res.get_mark(3)) == (12, 14) assert (res.get_mark(4), res.get_mark(5)) == (11, 12)
def test_min_until_0_65535(self): r_code2 = get_code(r'<abc>(?:xy)*?xy</abc>') res = rsre_core.match(r_code2, '<abc></abc>def') assert res is None res = rsre_core.match(r_code2, '<abc>xy</abc>def') assert res is not None res = rsre_core.match(r_code2, '<abc>xyxyxy</abc>def') assert res is not None res = rsre_core.match(r_code2, '<abc>' + 'xy' * 1000 + '</abc>def') assert res is not None
def test_min_until_0_65535(self): r_code2 = get_code(r'<abc>(?:xy)*?xy</abc>') res = rsre_core.match(r_code2, '<abc></abc>def') assert res is None res = rsre_core.match(r_code2, '<abc>xy</abc>def') assert res is not None res = rsre_core.match(r_code2, '<abc>xyxyxy</abc>def') assert res is not None res = rsre_core.match(r_code2, '<abc>' + 'xy'*1000 + '</abc>def') assert res is not None
def test_groupref_exists(self): r = get_code(r"((a)|(b))c(?(2)d)$") assert not rsre_core.match(r, "ac") assert rsre_core.match(r, "acd") assert rsre_core.match(r, "bc") assert not rsre_core.match(r, "bcd") # r = get_code(r"((a)|(b))c(?(2)d|e)$") assert not rsre_core.match(r, "ac") assert rsre_core.match(r, "acd") assert not rsre_core.match(r, "ace") assert not rsre_core.match(r, "bc") assert not rsre_core.match(r, "bcd") assert rsre_core.match(r, "bce")
def test_empty_minuntil(self): r_code, r = get_code_and_re(r'(a?)+?y') #assert not r.match('z') -- CPython bug (at least 2.5) eats all memory res = rsre_core.match(r_code, 'z') assert not res # r_code, r = get_code_and_re(r'(a?){4,6}?y') assert not r.match('z') res = rsre_core.match(r_code, 'z') assert not res # r_code, r = get_code_and_re(r'(a?)*?y') #assert not r.match('z') -- CPython bug (at least 2.5) eats all memory res = rsre_core.match(r_code, 'z') assert not res
def test_empty_maxuntil(self): r_code, r = get_code_and_re(r'(a?)+y') assert r.match('y') res = rsre_core.match(r_code, 'y') assert res # r_code, r = get_code_and_re(r'(a?){4,6}y') assert r.match('y') res = rsre_core.match(r_code, 'y') assert res # r_code, r = get_code_and_re(r'(a?)*y') assert r.match('y') res = rsre_core.match(r_code, 'y') assert res
def test_min_until_3_5(self): r_code2, r = get_code_and_re(r'<abc>(?:xy){3,5}?xy</abc>') for i in range(8): s = '<abc>' + 'xy' * i + '</abc>defdefdefdefdef' assert (r.match(s) is not None) is (3 <= i - 1 <= 5) res = rsre_core.match(r_code2, s) assert (res is not None) is (3 <= i - 1 <= 5)
def run_external(t, use_search): from pypy.rlib.rsre.test.re_tests import SUCCEED, FAIL, SYNTAX_ERROR pattern, s, outcome = t[:3] if len(t) == 5: repl, expected = t[3:5] else: assert len(t) == 3 print 'trying:', t try: obj = get_code(pattern) except re.error: if outcome == SYNTAX_ERROR: return # Expected a syntax error raise if outcome == SYNTAX_ERROR: raise Exception("this should have been a syntax error") # if use_search: result = rsre_core.search(obj, s) else: # Emulate a poor man's search() with repeated match()s for i in range(len(s) + 1): result = rsre_core.match(obj, s, start=i) if result: break # if outcome == FAIL: if result is not None: raise Exception("succeeded incorrectly") elif outcome == SUCCEED: if result is None: raise Exception("failed incorrectly") # Matched, as expected, so now we compute the # result string and compare it to our expected result. start, end = result.span(0) vardict = { 'found': result.group(0), 'groups': result.group(), } #'flags': result.re.flags} for i in range(1, 100): try: gi = result.group(i) # Special hack because else the string concat fails: if gi is None: gi = "None" except IndexError: gi = "Error" vardict['g%d' % i] = gi #for i in result.re.groupindex.keys(): # try: # gi = result.group(i) # if gi is None: # gi = "None" # except IndexError: # gi = "Error" # vardict[i] = gi repl = eval(repl, vardict) if repl != expected: raise Exception("grouping error: %r should be %r" % (repl, expected))
def test_min_until_3_5(self): r_code2, r = get_code_and_re(r'<abc>(?:xy){3,5}?xy</abc>') for i in range(8): s = '<abc>' + 'xy'*i + '</abc>defdefdefdefdef' assert (r.match(s) is not None) is (3 <= i-1 <= 5) res = rsre_core.match(r_code2, s) assert (res is not None) is (3 <= i-1 <= 5)
def run_external(t, use_search): from pypy.rlib.rsre.test.re_tests import SUCCEED, FAIL, SYNTAX_ERROR pattern, s, outcome = t[:3] if len(t) == 5: repl, expected = t[3:5] else: assert len(t) == 3 print 'trying:', t try: obj = get_code(pattern) except re.error: if outcome == SYNTAX_ERROR: return # Expected a syntax error raise if outcome == SYNTAX_ERROR: raise Exception("this should have been a syntax error") # if use_search: result = rsre_core.search(obj, s) else: # Emulate a poor man's search() with repeated match()s for i in range(len(s)+1): result = rsre_core.match(obj, s, start=i) if result: break # if outcome == FAIL: if result is not None: raise Exception("succeeded incorrectly") elif outcome == SUCCEED: if result is None: raise Exception("failed incorrectly") # Matched, as expected, so now we compute the # result string and compare it to our expected result. start, end = result.span(0) vardict={'found': result.group(0), 'groups': result.group(), }#'flags': result.re.flags} for i in range(1, 100): try: gi = result.group(i) # Special hack because else the string concat fails: if gi is None: gi = "None" except IndexError: gi = "Error" vardict['g%d' % i] = gi #for i in result.re.groupindex.keys(): # try: # gi = result.group(i) # if gi is None: # gi = "None" # except IndexError: # gi = "Error" # vardict[i] = gi repl = eval(repl, vardict) if repl != expected: raise Exception("grouping error: %r should be %r" % (repl, expected))
def test_minuntil_bug(self): r_code9, r9 = get_code_and_re(r'((x|yz)+?(y)??c)*') match = r9.match('xycxyzxc') assert match.span(2) == (6, 7) #assert match.span(3) == (1, 2) --- bug of CPython res = rsre_core.match(r_code9, 'xycxyzxc') assert (res.get_mark(2), res.get_mark(3)) == (6, 7) assert (res.get_mark(4), res.get_mark(5)) == (1, 2)
def test_assert_not_group(self): r = get_code(r"abc(?!(de)f)(.)") res = rsre_core.match(r, "abcdeFghi") assert res is not None assert res.span(2) == (3, 4) # this I definitely classify as Horrendously Implementation Dependent. # CPython answers (3, 5). assert res.span(1) == (-1, -1)
def test_minuntil_lastmark_restore(self): r_code9, r9 = get_code_and_re(r'(x|yz)+?(y)??c') match = r9.match('xyzxc') assert match.span(1) == (3, 4) assert match.span(2) == (-1, -1) res = rsre_core.match(r_code9, 'xyzxc') assert (res.get_mark(0), res.get_mark(1)) == (3, 4) assert (res.get_mark(2), res.get_mark(3)) == (-1, -1)
def test_groupref(self): r = get_code(r"(xx+)\1+$") # match non-prime numbers of x assert not rsre_core.match(r, "xx") assert not rsre_core.match(r, "xxx") assert rsre_core.match(r, "xxxx") assert not rsre_core.match(r, "xxxxx") assert rsre_core.match(r, "xxxxxx") assert not rsre_core.match(r, "xxxxxxx") assert rsre_core.match(r, "xxxxxxxx") assert rsre_core.match(r, "xxxxxxxxx")
def test_groupref_ignore(self): r = get_code(r"(?i)(xx+)\1+$") # match non-prime numbers of x assert not rsre_core.match(r, "xX") assert not rsre_core.match(r, "xxX") assert rsre_core.match(r, "Xxxx") assert not rsre_core.match(r, "xxxXx") assert rsre_core.match(r, "xXxxxx") assert not rsre_core.match(r, "xxxXxxx") assert rsre_core.match(r, "xxxxxxXx") assert rsre_core.match(r, "xxxXxxxxx")
def test_group_7(self): r_code7, r7 = get_code_and_re(r'<abc>((a)?(b))*</abc>') match = r7.match('<abc>bbbabbbb</abc>') assert match.span(1) == (12, 13) assert match.span(3) == (12, 13) assert match.span(2) == (8, 9) res = rsre_core.match(r_code7, '<abc>bbbabbbb</abc>') assert (res.get_mark(0), res.get_mark(1)) == (12, 13) assert (res.get_mark(4), res.get_mark(5)) == (12, 13) assert (res.get_mark(2), res.get_mark(3)) == (8, 9)
def test_group_branch_repeat_complex_case(self): r_code8, r8 = get_code_and_re(r'<abc>((a)|(b))*</abc>') match = r8.match('<abc>ab</abc>') assert match.span(1) == (6, 7) assert match.span(3) == (6, 7) assert match.span(2) == (5, 6) res = rsre_core.match(r_code8, '<abc>ab</abc>') assert (res.get_mark(0), res.get_mark(1)) == (6, 7) assert (res.get_mark(4), res.get_mark(5)) == (6, 7) assert (res.get_mark(2), res.get_mark(3)) == (5, 6)
def entrypoint1(r, string, repeat): r = array2list(r) string = hlstr(string) match = None for i in range(repeat): match = rsre_core.match(r, string) if match is None: return -1 else: return match.match_end
def test_match_end(self): r = get_code("ab") assert rsre_core.match(r, "abc") assert rsre_core.match(r, "abc", end=333) assert rsre_core.match(r, "abc", end=3) assert rsre_core.match(r, "abc", end=2) assert not rsre_core.match(r, "abc", end=1) assert not rsre_core.match(r, "abc", end=0) assert not rsre_core.match(r, "abc", end=-1)
def test_in_ignore(self): r = get_code(r"(?i)[a-f]") assert rsre_core.match(r, "b") assert rsre_core.match(r, "C") assert not rsre_core.match(r, "g") r = get_code(r"(?i)[a-f]+$") assert rsre_core.match(r, "bCdEf") assert not rsre_core.match(r, "g") assert not rsre_core.match(r, "aaagaaa")
def test_match_start(self): r = get_code(r"^ab") assert rsre_core.match(r, "abc") assert not rsre_core.match(r, "xxxabc", start=3) assert not rsre_core.match(r, "xx\nabc", start=3) # r = get_code(r"(?m)^ab") assert rsre_core.match(r, "abc") assert not rsre_core.match(r, "xxxabc", start=3) assert rsre_core.match(r, "xx\nabc", start=3)
def test_bug1(self): # REPEAT_ONE inside REPEAT r = get_code(r"(?:.+)?B") assert rsre_core.match(r, "AB") is not None r = get_code(r"(?:AA+?)+B") assert rsre_core.match(r, "AAAB") is not None r = get_code(r"(?:AA+)+?B") assert rsre_core.match(r, "AAAB") is not None r = get_code(r"(?:AA+?)+?B") assert rsre_core.match(r, "AAAB") is not None # REPEAT inside REPEAT r = get_code(r"(?:(?:xy)+)?B") assert rsre_core.match(r, "xyB") is not None r = get_code(r"(?:xy(?:xy)+?)+B") assert rsre_core.match(r, "xyxyxyB") is not None r = get_code(r"(?:xy(?:xy)+)+?B") assert rsre_core.match(r, "xyxyxyB") is not None r = get_code(r"(?:xy(?:xy)+?)+?B") assert rsre_core.match(r, "xyxyxyB") is not None
def test_min_repeat_one(self): r_code3 = get_code(r'<abc>.{3,5}?y') for i in range(8): res = rsre_core.match(r_code3, '<abc>' + 'x'*i + 'y') assert (res is not None) is (3 <= i <= 5)
def test_code3(self): r_code1 = get_code(r'<item>\s*<title>(.*?)</title>') res = rsre_core.match(r_code1, "<item> <title>abc</title>def") assert res is not None
def test_category(self): r = get_code(r"[\sx]") assert rsre_core.match(r, "x") assert rsre_core.match(r, " ") assert not rsre_core.match(r, "n")
def test_simple_group(self): r_code4 = get_code(r'<abc>(x.)</abc>') res = rsre_core.match(r_code4, '<abc>xa</abc>def') assert res is not None assert res.get_mark(0) == 5 assert res.get_mark(1) == 7
def match(self, string, pos=0, endpos=sys.maxint): return self._make_match(rsre_core.match(self._code, string, pos, endpos, flags=self.flags))
def test_min_repeat_one(self): r_code3 = get_code(r'<abc>.{3,5}?y') for i in range(8): res = rsre_core.match(r_code3, '<abc>' + 'x' * i + 'y') assert (res is not None) is (3 <= i <= 5)
def test_max_until_groups(self): r_code4 = get_code(r'<abc>(x.)*xy</abc>') res = rsre_core.match(r_code4, '<abc>xaxbxy</abc>def') assert res is not None assert res.get_mark(0) == 7 assert res.get_mark(1) == 9
def test_repeated_set(self): r = get_code(r"[a0x]+f") assert rsre_core.match(r, "a0af") assert not rsre_core.match(r, "a0yaf")
def match(self, string, pos=0, endpos=sys.maxint): return self._make_match( rsre_core.match(self._code, string, pos, endpos, flags=self.flags))