Пример #1
0
def test_charclass_in_range():
    r = make_runner(r"[\de]")
    assert r.recognize('0')
    assert r.recognize('5')
    assert r.recognize('9')
    assert r.recognize('e')
    assert not r.recognize('d')
    r = make_runner(r"[\de]{2,}")
    assert r.recognize('09')
    assert r.recognize('158')
    assert r.recognize('3eee')
    assert not r.recognize('1')
    assert not r.recognize('ddee')
    r = make_runner(r"[\D5]")
    assert r.recognize('d')
    assert r.recognize('\n')
    assert r.recognize('5')
    assert not r.recognize('0')
    r = make_runner(r"[\s][\S]")
    assert r.recognize(' d')
    assert r.recognize('\t9')
    assert not r.recognize('d ')
    assert not r.recognize('99')
    assert not r.recognize('\r\r')
    r = make_runner(r"[\w]+\W[\w]+")
    assert r.recognize('hey hey')
    assert not r.recognize('word')
    assert not r.recognize('variable_name')
Пример #2
0
def test_range():
    r = make_runner("[A-Z]")
    assert r.recognize("A")
    assert r.recognize("F")
    assert r.recognize("Z")
    assert not r.recognize("j")
    r = make_runner("[a-ceg-i]")
    assert r.recognize("a")
    assert r.recognize("b")
    assert r.recognize("c")
    assert r.recognize("e")
    assert r.recognize("g")
    assert r.recognize("h")
    assert r.recognize("i")
    assert not r.recognize("d")
    assert not r.recognize("f")
    r = make_runner("[^a-ceg-i]")
    assert not r.recognize("a")
    assert not r.recognize("b")
    assert not r.recognize("c")
    assert not r.recognize("e")
    assert not r.recognize("g")
    assert not r.recognize("h")
    assert not r.recognize("i")
    assert r.recognize("d")
    assert r.recognize("f")
Пример #3
0
def test_charclass_in_range():
    r = make_runner(r"[\de]")
    assert r.recognize('0')
    assert r.recognize('5')
    assert r.recognize('9')
    assert r.recognize('e')
    assert not r.recognize('d')
    r = make_runner(r"[\de]{2,}")
    assert r.recognize('09')
    assert r.recognize('158')
    assert r.recognize('3eee')
    assert not r.recognize('1')
    assert not r.recognize('ddee')
    r = make_runner(r"[\D5]")
    assert r.recognize('d')
    assert r.recognize('\n')
    assert r.recognize('5')
    assert not r.recognize('0')
    r = make_runner(r"[\s][\S]")
    assert r.recognize(' d')
    assert r.recognize('\t9')
    assert not r.recognize('d ')
    assert not r.recognize('99')
    assert not r.recognize('\r\r')
    r = make_runner(r"[\w]+\W[\w]+")
    assert r.recognize('hey hey')
    assert not r.recognize('word')
    assert not r.recognize('variable_name')
Пример #4
0
def test_range():
    r = make_runner("[A-Z]")
    assert r.recognize("A")
    assert r.recognize("F")
    assert r.recognize("Z")
    assert not r.recognize("j")
    r = make_runner("[a-ceg-i]")
    assert r.recognize("a")
    assert r.recognize("b")
    assert r.recognize("c")
    assert r.recognize("e")
    assert r.recognize("g")
    assert r.recognize("h")
    assert r.recognize("i")
    assert not r.recognize("d")
    assert not r.recognize("f")
    r = make_runner("[^a-ceg-i]")
    assert not r.recognize("a")
    assert not r.recognize("b")
    assert not r.recognize("c")
    assert not r.recognize("e")
    assert not r.recognize("g")
    assert not r.recognize("h")
    assert not r.recognize("i")
    assert r.recognize("d")
    assert r.recognize("f")
Пример #5
0
def test_quotes():
    r = make_runner('"[^\\"]*"')
    assert r.recognize('"abc"')
    assert r.recognize('"asdfefveeaa"')
    assert not r.recognize('"""')
    r = make_runner('\\n\\x0a')
    assert not r.recognize("n\n")
    assert r.recognize("\n\n")
Пример #6
0
def test_quoted():
    r = make_runner("\\(*")
    assert r.recognize("(")
    assert not r.recognize("\\(")
    r = make_runner("(\\x61a)*")
    assert r.recognize("aa")
    assert r.recognize("aaaaaa")
    assert not r.recognize("a")
    assert not r.recognize("aabb")
Пример #7
0
def test_plus():
    r = make_runner("[0-9]+")
    assert r.recognize("09123")
    assert not r.recognize("")
    r = make_runner("a+b+")
    assert r.recognize("ab")
    assert r.recognize("aaaaabbb")
    assert not r.recognize("b")
    assert not r.recognize("a")
    assert not r.recognize("c")
Пример #8
0
def test_plus():
    r = make_runner("[0-9]+")
    assert r.recognize("09123")
    assert not r.recognize("")
    r = make_runner("a+b+")
    assert r.recognize("ab")
    assert r.recognize("aaaaabbb")
    assert not r.recognize("b")
    assert not r.recognize("a")
    assert not r.recognize("c")
Пример #9
0
def test_singlequote():
    r = make_runner("'")
    assert r.recognize("'")
    assert not r.recognize('"')
    r = make_runner("'..*'")
    assert r.recognize("'adadf'")
    assert not r.recognize("'adfasdf")
    r = make_runner("([a-z]([a-zA-Z0-9]|_)*)|('..*')")
    assert r.recognize("aasdf")
    assert r.recognize("'X'")
    assert not r.recognize("''")
Пример #10
0
def test_questionmark():
    r = make_runner("ab?")
    assert r.recognize("a")
    assert r.recognize("ab")
    r = make_runner("0|(\\+|\\-)?[1-9][0-9]*")
    assert r.recognize("0")
    assert not r.recognize("00")
    assert r.recognize("12341")
    assert not r.recognize("021314")
    assert r.recognize("+12314")
    assert r.recognize("-12314")
Пример #11
0
def test_singlequote():
    r = make_runner("'")
    assert r.recognize("'")
    assert not r.recognize('"')
    r = make_runner("'..*'")
    assert r.recognize("'adadf'")
    assert not r.recognize("'adfasdf")
    r = make_runner("([a-z]([a-zA-Z0-9]|_)*)|('..*')")
    assert r.recognize("aasdf")
    assert r.recognize("'X'")
    assert not r.recognize("''")
Пример #12
0
def test_questionmark():
    r = make_runner("ab?")
    assert r.recognize("a")
    assert r.recognize("ab")
    r = make_runner("0|(\\+|\\-)?[1-9][0-9]*")
    assert r.recognize("0")
    assert not r.recognize("00")
    assert r.recognize("12341")
    assert not r.recognize("021314")
    assert r.recognize("+12314")
    assert r.recognize("-12314")
Пример #13
0
def test_repetition():
    r = make_runner('a{15}')
    assert r.recognize("a" * 15)
    assert not r.recognize("a" * 14)
    assert not r.recognize("a" * 16)
    assert not r.recognize("b" * 16)
    r = make_runner('a{2,10}')
    assert r.recognize("a" * 2)
    assert r.recognize("a" * 5)
    assert r.recognize("a" * 10)
    assert not r.recognize("a")
    assert not r.recognize("a" + "b")
    assert not r.recognize("a" * 11)
    assert not r.recognize("a" * 12)
Пример #14
0
def test_quoted():
    r = make_runner("\\(*")
    assert r.recognize("(")
    assert not r.recognize("\\(")
    r = make_runner("(\\x61a)*")
    assert r.recognize("aa")
    assert r.recognize("aaaaaa")
    assert not r.recognize("a")
    assert not r.recognize("aabb")
    r = make_runner("(\\x61a)*")
    assert r.recognize("aa")
    assert r.recognize("aaaaaa")
    assert not r.recognize("a")
    assert not r.recognize("aabb")
Пример #15
0
def test_quotes():
    r = make_runner('"[^\\"]*"')
    assert r.recognize('"abc"')
    assert r.recognize('"asdfefveeaa"')
    assert not r.recognize('"""')
    r = make_runner('\\n\\x0a')
    assert not r.recognize("n\n")
    assert r.recognize("\n\n")
    r = make_runner('\\12\\012')
    assert r.recognize("\n\n")
    r = make_runner('\\377\\xff')
    assert r.recognize("\xff\xff")
    r = make_runner('\\?')
    assert r.recognize("?")
    assert not r.recognize("a")
Пример #16
0
def test_quotes():
    r = make_runner('"[^\\"]*"')
    assert r.recognize('"abc"')
    assert r.recognize('"asdfefveeaa"')
    assert not r.recognize('"""')
    r = make_runner('\\n\\x0a')
    assert not r.recognize("n\n")
    assert r.recognize("\n\n")
    r = make_runner('\\12\\012')
    assert r.recognize("\n\n")
    r = make_runner('\\377\\xff')
    assert r.recognize("\xff\xff")
    r = make_runner('\\?')
    assert r.recognize("?")
    assert not r.recognize("a")
Пример #17
0
def test_escaped_quote():
    r = make_runner(r'"[^\\"]*(\\.[^\\"]*)*"')
    assert r.recognize(r'""')
    assert r.recognize(r'"a"')
    assert r.recognize(r'"a\"b"')
    assert r.recognize(r'"\\\""')
    assert not r.recognize(r'"\\""')
Пример #18
0
def test_escaped_quote():
    r = make_runner(r'"[^\\"]*(\\.[^\\"]*)*"')
    assert r.recognize(r'""')
    assert r.recognize(r'"a"')
    assert r.recognize(r'"a\"b"')
    assert r.recognize(r'"\\\""')
    assert not r.recognize(r'"\\""')
Пример #19
0
def test_simple():
    r = make_runner("a*")
    assert r.recognize("aaaaa")
    assert r.recognize("")
    assert not r.recognize("aaaaaaaaaaaaaaaaaaaaaaaaaa ")
    r = make_runner("a*bc|d")
    assert r.recognize("aaaaabc")
    assert r.recognize("bc")
    assert r.recognize("d")
    assert not r.recognize("abcd")
    r = make_runner("(ab)*|a*b*")
    assert r.recognize("ababababab")
    assert r.recognize("aaaabb")
    assert not r.recognize("abababaabb")
    r = make_runner(".*")
    assert r.recognize("kjsadfq3jlflASDF@#$")
    assert r.recognize("vka afj ASF# A")
Пример #20
0
def test_simple():
    r = make_runner("a*")
    assert r.recognize("aaaaa")
    assert r.recognize("")
    assert not r.recognize("aaaaaaaaaaaaaaaaaaaaaaaaaa ")
    r = make_runner("a*bc|d")
    assert r.recognize("aaaaabc")
    assert r.recognize("bc")
    assert r.recognize("d")
    assert not r.recognize("abcd")
    r = make_runner("(ab)*|a*b*")
    assert r.recognize("ababababab")
    assert r.recognize("aaaabb")
    assert not r.recognize("abababaabb")
    r = make_runner(".*")
    assert r.recognize("kjsadfq3jlflASDF@#$")
    assert r.recognize("vka afj ASF# A")
def test_triple_regex():
    delim = '"'
    harmless = r"[^\%s]" % (delim, )
    anyharmless = harmless + "*"
    atleastoneharmless = harmless + "+"
    normal_chars = anyharmless + any(group(delim, 2 * delim) +
                                     atleastoneharmless)
    runner = make_runner(normal_chars)
    assert runner.recognize('""a""a""a""a')
    assert not runner.recognize('""a""a"""a""a')
Пример #22
0
def test_triple_regex():
    delim = '"'
    harmless = r"[^\%s]" % (delim, )
    anyharmless = harmless + "*"
    atleastoneharmless = harmless + "+"
    normal_chars = anyharmless + any(
        group(delim, 2 * delim) + atleastoneharmless)
    runner = make_runner(normal_chars)
    assert runner.recognize('""a""a""a""a')
    assert not runner.recognize('""a""a"""a""a')
Пример #23
0
def test_charclass():
    r = make_runner(r"\d")
    assert r.recognize('0')
    assert r.recognize('5')
    assert r.recognize('9')
    assert not r.recognize('d')
    r = make_runner(r"\d{2,}")
    assert r.recognize('09')
    assert r.recognize('158')
    assert not r.recognize('1')
    r = make_runner(r"\D")
    assert r.recognize('d')
    assert r.recognize('\n')
    assert not r.recognize('0')
    assert not r.recognize('1234')
    r = make_runner(r"\s\S")
    assert r.recognize(' d')
    assert r.recognize('\t9')
    assert not r.recognize('d ')
    assert not r.recognize('99')
    assert not r.recognize('\r\r')
    r = make_runner(r"\w+")
    assert r.recognize('word')
    assert r.recognize('variable_name')
    assert r.recognize('abc123')
    assert not r.recognize('word\n')
    assert not r.recognize('hey hey')
    r = make_runner(r"\w\W\w")
    assert r.recognize('9 9')
    assert r.recognize('_\fx')
    assert not r.recognize('\n\r\t')
Пример #24
0
def test_charclass():
    r = make_runner(r"\d")
    assert r.recognize('0')
    assert r.recognize('5')
    assert r.recognize('9')
    assert not r.recognize('d')
    r = make_runner(r"\d{2,}")
    assert r.recognize('09')
    assert r.recognize('158')
    assert not r.recognize('1')
    r = make_runner(r"\D")
    assert r.recognize('d')
    assert r.recognize('\n')
    assert not r.recognize('0')
    assert not r.recognize('1234')
    r = make_runner(r"\s\S")
    assert r.recognize(' d')
    assert r.recognize('\t9')
    assert not r.recognize('d ')
    assert not r.recognize('99')
    assert not r.recognize('\r\r')
    r = make_runner(r"\w+")
    assert r.recognize('word')
    assert r.recognize('variable_name')
    assert r.recognize('abc123')
    assert not r.recognize('word\n')
    assert not r.recognize('hey hey')
    r = make_runner(r"\w\W\w")
    assert r.recognize('9 9')
    assert r.recognize('_\fx')
    assert not r.recognize('\n\r\t')
Пример #25
0
def test_repetition():
    r = make_runner('a{15}')
    assert r.recognize("a" * 15)
    assert not r.recognize("a" * 14)
    assert not r.recognize("a" * 16)
    assert not r.recognize("b" * 15)
    r = make_runner('a{2,10}')
    assert r.recognize("a" * 2)
    assert r.recognize("a" * 5)
    assert r.recognize("a" * 10)
    assert not r.recognize("a")
    assert not r.recognize("a" + "b")
    assert not r.recognize("a" * 11)
    assert not r.recognize("a" * 12)
    r = make_runner('a{3,}')
    assert r.recognize("a" * 3)
    assert r.recognize("a" * 5)
    assert r.recognize("a" * 10)
    assert r.recognize("a" * 12)
    assert not r.recognize("a")
    assert not r.recognize("a" + "b")
    assert not r.recognize("a" * 2)
Пример #26
0
def run_individual_test(regex, tests):
    """Run a test from the PCRE suite."""

    # Process the regex and make it ready for make_runner
    regex_to_use = regex

    anchor_left = regex_to_use.startswith('^')
    anchor_right = regex_to_use.endswith(
        '$') and not regex_to_use.endswith('\\$')
    if anchor_left:
        regex_to_use = regex_to_use[1:]  # chop the ^ if it's there
    if anchor_right:
        regex_to_use = regex_to_use[:-1]  # chop the $ if it's there

    if not regex_to_use:
        #print "  SKIPPED (Cant do blank regex)"
        return

    print "%s:" % regex_to_use

    runner = make_runner(regex_to_use)

    # Now run the test expressions against the Regex
    for test, match in tests:
        print "/%r/%r/" % (test, match)

        # Create possible subsequences that we should test
        if anchor_left:
            start_range = [0]
        else:
            start_range = range(0, len(test))

        if anchor_right:
            subseq_gen = ((start, len(test)) for start in start_range)
        else:
            # Go backwards to simulate greediness
            subseq_gen = ((start, end) for start in start_range
                          for end in range(len(test) + 1, start - 1, -1))

        # Search the possibilities for a match...
        for start, end in subseq_gen:
            attempt = test[start:end]
            if runner.recognize(attempt):
                assert attempt == match
                break
        else:
            assert match is None
Пример #27
0
def run_individual_test(regex, tests):
    """Run a test from the PCRE suite."""
    
    # Process the regex and make it ready for make_runner
    regex_to_use = regex

    anchor_left = regex_to_use.startswith('^')
    anchor_right = regex_to_use.endswith('$') and not regex_to_use.endswith('\\$')
    if anchor_left:
        regex_to_use = regex_to_use[1:]   # chop the ^ if it's there
    if anchor_right:
        regex_to_use = regex_to_use[:-1]  # chop the $ if it's there

    if not regex_to_use:
        #print "  SKIPPED (Cant do blank regex)"
        return
    
    print "%s:" % regex_to_use
    
    runner = make_runner(regex_to_use)
    
    # Now run the test expressions against the Regex
    for test, match in tests:
        print "/%r/%r/" % (test, match)
        
        # Create possible subsequences that we should test
        if anchor_left:
            start_range = [0]
        else:
            start_range = range(0, len(test))
        
        if anchor_right:
            subseq_gen = ( (start, len(test)) for start in start_range )
        else:
            # Go backwards to simulate greediness
            subseq_gen = ( (start, end) for start in start_range for end in range(len(test)+1, start-1, -1) )

        # Search the possibilities for a match...
        for start, end in subseq_gen:
            attempt = test[start:end]
            if runner.recognize(attempt):
                assert attempt==match
                break
        else:
            assert match is None
Пример #28
0
def test_file():
    """Open the PCRE tests and run them."""
    tests = [line.rstrip() for line in open('testinput1', 'r').readlines()]
    results = [line.rstrip() for line in open('testoutput1', 'r').readlines()]

    regex_flag_mapping = {'': lambda s: s, 'i': lambda s: s.upper()}

    regex_set = create_regex_iterator(tests, results)
    import pdb
    for regex, regex_flags in regex_set:
        try:
            print '%r' % regex

            # Create an iterator to grab the test/results for this regex
            result_set = create_result_iterator(tests, results)

            # Handle the flags:
            if regex_flags in regex_flag_mapping:
                text_prepare = regex_flag_mapping[regex_flags]
            elif 'x' in regex_flags:
                raise SkipException("Cant do extended PRCE expressions")
            else:
                print "UNKNOWN FLAGS: %s" % regex_flags
                continue

            skipped = any(
                [op in regex for op in ['*?', '??', '+?', '}?', '(?']])
            if skipped:
                raise SkipException(
                    "Cant do non-greedy operators or '(?' constructions)")

            regex_to_use = text_prepare(regex)

            anchor_left = regex_to_use.startswith('^')
            anchor_right = regex_to_use.endswith(
                '$') and not regex_to_use.endswith('\\$')
            if anchor_left:
                regex_to_use = regex_to_use[1:]  # chop the ^ if it's there
            if anchor_right:
                regex_to_use = regex_to_use[:-1]  # chop the $ if it's there

            if not regex_to_use:
                raise SkipException("Cant do blank regex")
        except SkipException, e:
            print "  SKIPPED (%s)" % e.message
            # now burn all the tests for this regex
            for _ in result_set:
                pass
            continue

        # Finally, we make the pypy regex runner
        runner = make_runner(regex_to_use)

        # Now run the test expressions against the Regex
        for test, result in result_set:
            # Create possible subsequences that we should test
            if anchor_left:
                start_range = [0]
            else:
                start_range = range(0, len(test))

            if anchor_right:
                subseq_gen = ((start, len(test)) for start in start_range)
            else:
                # Go backwards to simulate greediness
                subseq_gen = ((start, end) for start in start_range
                              for end in range(len(test) + 1, start, -1))

            # Search the possibilities for a match...
            for start, end in subseq_gen:
                attempt = text_prepare(test[start:end])
                matched = runner.recognize(attempt)
                if matched:
                    break

            # Did we get what we expected?
            if result == 'No match':
                if matched:
                    print "  FALSE MATCH: regex==%r test==%r" % (regex, test)
                else:
                    print "  pass:        regex==%r test==%r" % (regex, test)
            elif result.startswith(' 0: '):
                if not matched:
                    print "  MISSED:      regex==%r test==%r" % (regex, test)
                elif not attempt == text_prepare(result[4:]):
                    print "  BAD MATCH:   regex==%r test==%r found==%r expect==%r" % (
                        regex, test, attempt, result[4:])
                else:
                    print "  pass:        regex==%r test==%r" % (regex, test)
Пример #29
0
def test_file():
    """Open the PCRE tests and run them."""
    tests = [line.rstrip() for line in open('testinput1','r').readlines()]
    results = [line.rstrip() for line in open('testoutput1','r').readlines()]
    
    regex_flag_mapping = { '': lambda s: s, 
                           'i': lambda s: s.upper()
                         }
    
    regex_set = create_regex_iterator(tests, results)    
    import pdb
    for regex, regex_flags in regex_set:
        try:
            print '%r' % regex

            # Create an iterator to grab the test/results for this regex
            result_set = create_result_iterator(tests, results)

            # Handle the flags:
            if regex_flags in regex_flag_mapping:
                text_prepare = regex_flag_mapping[regex_flags]
            elif 'x' in regex_flags:
                raise SkipException("Cant do extended PRCE expressions")            
            else:
                print "UNKNOWN FLAGS: %s" % regex_flags
                continue
        
            skipped = any([op in regex for op in ['*?', '??', '+?', '}?', '(?']])        
            if skipped:
                raise SkipException("Cant do non-greedy operators or '(?' constructions)")
                
            regex_to_use = text_prepare(regex)
        
            anchor_left = regex_to_use.startswith('^')
            anchor_right = regex_to_use.endswith('$') and not regex_to_use.endswith('\\$')
            if anchor_left:
                regex_to_use = regex_to_use[1:]   # chop the ^ if it's there
            if anchor_right:
                regex_to_use = regex_to_use[:-1]  # chop the $ if it's there
        
            if not regex_to_use:
                raise SkipException("Cant do blank regex")
        except SkipException, e:
            print "  SKIPPED (%s)" % e.message
            # now burn all the tests for this regex
            for _ in result_set:
                pass
            continue
            
        # Finally, we make the pypy regex runner
        runner = make_runner(regex_to_use)
        
        # Now run the test expressions against the Regex
        for test, result in result_set:
            # Create possible subsequences that we should test
            if anchor_left:
                start_range = [0]
            else:
                start_range = range(0, len(test))
            
            if anchor_right:
                subseq_gen = ( (start, len(test)) for start in start_range )
            else:
                # Go backwards to simulate greediness
                subseq_gen = ( (start, end) for start in start_range for end in range(len(test)+1, start, -1) )

            # Search the possibilities for a match...
            for start, end in subseq_gen:
                attempt = text_prepare(test[start:end])
                matched = runner.recognize(attempt)
                if matched: 
                    break
            
            # Did we get what we expected?
            if result == 'No match':
                if matched:
                    print "  FALSE MATCH: regex==%r test==%r" % (regex, test)
                else:
                    print "  pass:        regex==%r test==%r" % (regex, test)
            elif result.startswith(' 0: '):
                if not matched:
                    print "  MISSED:      regex==%r test==%r" % (regex, test)
                elif not attempt==text_prepare(result[4:]):
                    print "  BAD MATCH:   regex==%r test==%r found==%r expect==%r" % (regex, test, attempt, result[4:])
                else:
                    print "  pass:        regex==%r test==%r" % (regex, test)
Пример #30
0
def test_comment():
    r = make_runner("(/\\*[^\\*/]*\\*/)")
    assert r.recognize("/*asdfasdfasdf*/")
Пример #31
0
def test_quoted_2():
    r = make_runner('\\[|\\]|\\|')
    assert r.recognize("[")
    assert r.recognize("|")
    assert r.recognize("]")
    assert not r.recognize("]]")
Пример #32
0
def test_number():
    r = make_runner(r"\-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][\+\-]?[0-9]+)?")
    assert r.recognize("-0.912E+0001")
    assert not r.recognize("-0.a912E+0001")
    assert r.recognize("5")
Пример #33
0
def test_quoted():
    r = make_runner('\\[|\\]|\\|')
    assert r.recognize("[")
    assert r.recognize("|")
    assert r.recognize("]")
    assert not r.recognize("]]")
Пример #34
0
def test_number():
    r = make_runner(r"\-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][\+\-]?[0-9]+)?")
    assert r.recognize("-0.912E+0001")
    assert not r.recognize("-0.a912E+0001")
    assert r.recognize("5")
Пример #35
0
def test_comment():
    r = make_runner("(/\\*[^\\*/]*\\*/)")
    assert r.recognize("/*asdfasdfasdf*/")