Пример #1
0
    def test_explicit(self):
        #basicConfig(level=DEBUG)
        number = Token(Digit())
        letter = Token(Letter())
        
        block = Delayed()
        line = Or(Line(number), 
                  Line(letter) & block) > list
        block += Block(line[1:])
        
        program = Trace(line[1:])
        
        text = '''1
2
a
 3
 b
  4
  5
 6
'''
        program.config.lines(block_policy=explicit)
        parser = program.get_parse_string()
        result = parser(text)
        assert result == [['1'], 
                          ['2'], 
                          ['a', ['3'], 
                                ['b', ['4'], 
                                      ['5']], 
                                ['6']]], result
Пример #2
0
    def test_bline(self):
        '''
        Test a simple example: letters introduce numbers in an indented block.
        '''
        #basicConfig(level=DEBUG)

        number = Token(Digit())
        letter = Token(Letter())

        # the simplest whitespace grammar i can think of - lines are either
        # numbers (which are single, simple statements) or letters (which
        # mark the start of a new, indented block).
        block = Delayed()
        line = Or(BLine(number), BLine(letter) & block) > list
        # and a block is simply a collection of lines, as above
        block += Block(line[1:])

        program = Trace(line[1:])

        text = '''1
2
a
 3
 b
  4
  5
 6
'''
        program.config.default_line_aware(block_policy=1)
        parser = program.get_parse_string()
        result = parser(text)
        assert result == [['1'], ['2'],
                          ['a', ['3'], ['b', ['4'], ['5']], ['6']]], result
Пример #3
0
 def test_literal(self):
     '''
     Simple literal should compile directly.
     '''
     token = Token(Literal('abc'))
     token.compile()
     assert token.regexp == 'abc', repr(token.regexp)
Пример #4
0
    def test_continued_explicit(self):
        number = Token(Digit())
        letter = Token(Letter())
        
        block = Delayed()
        bline = ContinuedLineFactory(r'x')
        line = Or(bline(number), 
                  bline(letter) & block) > list
        block += Block(line[1:])
        
        program = Trace(line[1:])
        
        text = '''1
2
a
 3
 b
  4
  5
 6
'''
        program.config.lines(block_policy=explicit)
        parser = program.get_parse_string()
        result = parser(text)
        assert result == [['1'], 
                          ['2'], 
                          ['a', ['3'], 
                                ['b', ['4'], 
                                      ['5']], 
                                ['6']]], result
Пример #5
0
 def test_string_arg(self):
     '''
     Skip anything(not just spaces)
     '''
     words = Token('[a-z]+')[:]
     words.config.lexer(discard='.')
     parser = words.get_parse()
     results = parser('abc defXghi')
     assert results == ['abc', 'def', 'ghi'], results
Пример #6
0
 def test_real(self):
     '''
     A real is more complex, but still compiles.
     '''
     token = Token(Real(exponent='Ee'))
     token.compile()
     assert token.regexp == \
         '(?:[\\+\\-])?(?:(?:[0-9](?:[0-9])*)?\\.[0-9](?:[0-9])*|[0-9](?:[0-9])*(?:\\.)?)(?:[Ee](?:[\\+\\-])?[0-9](?:[0-9])*)?', \
         repr(token.regexp)
Пример #7
0
 def test_impossible(self):
     '''
     Cannot compile arbitrary functions.
     '''
     try:
         token = Token(Real() > (lambda x: x))
         token.compile()
         assert False, 'Expected error'
     except LexerError:
         pass
Пример #8
0
 def test_good_error_msg(self):
     '''
     Better error message with streams.
     '''
     #basicConfig(level=DEBUG)
     words = Token('[a-z]+')[:]
     words.config.lexer()
     parser = words.get_parse_string()
     try:
         parser('abc defXghi')
         assert False, 'expected error'
     except RuntimeLexerError as err:
         assert str(err) == "No token for 'Xghi' at line 1, character 8 of 'abc defXghi'.", str(err)
Пример #9
0
 def test_bad_error_msg(self):
     '''
     An ugly error message.
     '''
     #basicConfig(level=DEBUG)
     words = Token('[a-z]+')[:]
     words.config.lexer()
     parser = words.get_parse_sequence()
     try:
         parser('abc defXghi')
         assert False, 'expected error'
     except RuntimeLexerError as err:
         assert str(err) == "No token for 'Xghi' at offset 7, value 'X' of 'abc defXghi'.", str(err)
Пример #10
0
 def test_bad_space(self):
     '''
     An unexpected character fails to match.
     '''
     token = Token('a')
     token.config.clear().lexer(discard='b')
     parser = token.get_parse()
     assert parser('a') == ['a'], parser('a')
     assert parser('b') == None, parser('b')
     try:
         parser('c')
         assert False, 'expected failure'
     except RuntimeLexerError as err:
         assert str(err) == "No token for 'c' at line 1, character 1 of 'c'.", str(err)
Пример #11
0
 def test_file(self):
     if version[0] == '3':
         f = TemporaryFile('w+', encoding='utf8')
     else:
         f = TemporaryFile('w+')
     print("hello world\n", file=f)
     f.flush()
     #        f.seek(0)
     #        print(f.readlines())
     f.seek(0)
     w = Token('[a-z]+')
     s = Token(' +')
     v = w & s & w
     v.parse_iterable(f)
Пример #12
0
 def test_invert_bug_4(self):
     #basicConfig(level=DEBUG)
     bad = BLine(Token('[^a]*'))
     bad.config.line_aware(block_policy=2).left_memoize()
     parser = bad.get_parse_string()
     result = parser('123')
     assert result == ['123'], result
Пример #13
0
 def test_mixed(self):
     '''
     Cannot mix tokens and non-tokens at same level.
     '''
     bad = Token(Any()) & Any()
     try:
         bad.get_parse()
         assert False, 'expected failure'
     except LexerError as err:
         assert str(err) == 'The grammar contains a mix of Tokens and ' \
                            'non-Token matchers at the top level.  If ' \
                            'Tokens are used then non-token matchers ' \
                            'that consume input must only appear "inside" ' \
                            'Tokens.  The non-Token matchers include: ' \
                            'Any(None).', str(err)
     else:
         assert False, 'wrong exception'
Пример #14
0
 def test_line(self):
     #basicConfig(level=DEBUG)
     text = Token('[^\n\r]+')
     quoted = Regexp("'[^']'")
     line = BLine(text(quoted))
     line.config.default_line_aware(block_start=0)
     parser = line.get_parse_string()
     assert parser("'a'") == ["'a'"]
Пример #15
0
 def test_incomplete(self):
     '''
     A token is not completely consumed (this doesn't raise error messages,
     it just fails to match).
     '''
     token = Token('[a-z]+')(Any())
     token.config.no_full_first_match()
     parser = token.get_parse_string()
     assert parser('a') == ['a'], parser('a')
     # even though this matches the token, the Any() sub-matcher doesn't
     # consume all the contents
     assert parser('ab') == None, parser('ab')
     token = Token('[a-z]+')(Any(), complete=False)
     token.config.no_full_first_match()
     parser = token.get_parse_string()
     assert parser('a') == ['a'], parser('a')
     # whereas this is fine, since complete=False
     assert parser('ab') == ['a'], parser('ab')
Пример #16
0
 def test_invert_bug_6(self):
     #basicConfig(level=DEBUG)
     bad = BLine(Token(str('[^(*SOL)(*EOL)a]*')))
     bad.config.default_line_aware(block_policy=2,
                                   parser_factory=make_str_parser)
     bad.config.trace(True)
     parser = bad.get_parse_string()
     result = parser(str('123'))
     assert result == [str('123')], result
Пример #17
0
 def simple_grammar(self):
     '''
     Test a simple example: letters introduce numbers in an indented block.
     '''
     #basicConfig(level=DEBUG)
     
     number = Token(Digit())
     letter = Token(Letter())
     
     # the simplest whitespace grammar i can think of - lines are either
     # numbers (which are single, simple statements) or letters (which
     # mark the start of a new, indented block).
     block = Delayed()
     line = Or(Line(number), 
               Line(letter) & block) > list
     # and a block is simply a collection of lines, as above
     block += Block(line[1:])
     
     program = Trace(line[1:])
     program.config.lines(block_policy=1)
     return program
Пример #18
0
def ContinuedLineFactory(matcher):
    '''
    Create a replacement for ``Line()`` that can match multiple lines if they
    end in the given character/matcher.
    '''
    matcher = coerce_(matcher, lambda regexp: Token(regexp))
    restricted = RestrictTokensBy(matcher, LineEnd(), LineStart())

    def factory(matcher, indent=True):
        return restricted(Line(matcher, indent=indent))

    return factory
Пример #19
0
 def test_offset(self):
     #basicConfig(level=DEBUG)
     text = Token('[^\n\r]+')
     line = BLine(text(~Literal('aa') & Regexp('.*')))
     line.config.default_line_aware(block_start=0)
     parser = line.get_parse_string()
     assert parser('aabc') == ['bc']
     # what happens with an empty match?
     check = ~Literal('aa') & Regexp('.*')
     check.config.no_full_first_match()
     assert check.parse('aa') == ['']
     assert parser('aa') == ['']
Пример #20
0
 def test_bad_config(self):
     #basicConfig(level=DEBUG)
     text = Token('[^\n\r]+')
     quoted = Regexp("'[^']'")
     line = BLine(text(quoted))
     line.config.default_line_aware()
     parser = line.get_parse_string()
     try:
         parser("'a'")
         assert False, 'Expected error'
     except OffsideError as error:
         assert str(error).startswith('No initial indentation has been set.')
Пример #21
0
    def test_indent(self):
        '''
        Test simple matches against leading spaces.
        '''
        #basicConfig(level=DEBUG)
        text = '''
left
    four'''
        word = Token(Word(Letter()))
        indent = LineStart()
        line1 = indent('') + LineEnd()
        line2 = indent('') & word('left') + LineEnd()
        line3 = indent('    ') & word('four') + LineEnd()
        expr = (line1 & line2 & line3)
        expr.config.lines(block_start=NO_BLOCKS)
        parser = expr.get_parse_string()
        result = parser(text)
        assert result == ['', '', 'left', '    ', 'four'], result
Пример #22
0
 def test_tabs(self):
     '''
     Use block_policy here so that the regexp parser that excludes SOL
     and EOL is used; otherwise Any()[:] matches those and we end up
     with a single monster token.
     '''
     line = Indent() & Token(Any()) & Eol()
     line.config.default_line_aware(tabsize=8, block_policy=0).trace(True)
     result = line.parse('a')
     assert result == ['', 'a', ''], result
     result = line.parse('\ta')
     assert result == ['        ', 'a', ''], result
     line.config.default_line_aware(tabsize=None, block_policy=0)
     result = line.parse('\ta')
     assert result == ['\t', 'a', ''], result
     line.config.default_line_aware(block_policy=0)
     result = line.parse('\ta')
     assert result == ['        ', 'a', ''], result
Пример #23
0
    def test_indent(self):
        '''
        Test simple matches against leading spaces.
        '''
        #basicConfig(level=DEBUG)
        text = '''
left
    four'''
        word = Token(Word(Letter()))
        indent = Indent()
        line1 = indent('') + Eol()
        line2 = indent('') & word('left') + Eol()
        line3 = indent('    ') & word('four') + Eol()
        expr = (line1 & line2 & line3)
        expr.config.default_line_aware()
        parser = expr.get_parse_string()
        result = parser(text)
        assert result == ['', '', 'left', '    ', 'four'], result
Пример #24
0
    def test_indent(self):
        '''
        Test simple matches against leading spaces.
        '''
        #basicConfig(level=DEBUG)
        text = '''
 onespace
 \tspaceandtab'''
        word = Token(Word(Letter()))
        indent = LineStart()
        line1 = indent('') & ~LineEnd()
        line2 = indent(' ') & word('onespace') & ~LineEnd()
        line3 = indent('     ') & word('spaceandtab') & ~LineEnd()
        expr = line1 & line2 & line3
        expr.config.lines(tabsize=4, block_start=NO_BLOCKS).trace_stack(True)
        parser = expr.get_parse_string()
        result = parser(text)
        #print(result)
        assert result == ['', ' ', 'onespace', '     ', 'spaceandtab'], result
Пример #25
0
    def test_indent(self):
        '''
        Test simple matches against leading spaces.
        '''
        #basicConfig(level=DEBUG)
        text = '''
 onespace
 \tspaceandtab'''
        word = Token(Word(Letter()))
        indent = Indent()
        line1 = indent('') & ~Eol()
        line2 = indent(' ') & word('onespace') & ~Eol()
        line3 = indent('     ') & word('spaceandtab') & ~Eol()
        expr = line1 & line2 & line3
        expr.config.default_line_aware(tabsize=4).trace(True)
        parser = expr.get_parse_string()
        result = parser(text)
        #print(result)
        assert result == ['', ' ', 'onespace', '     ', 'spaceandtab'], result
Пример #26
0
 def test_default(self):
     w = Token('[a-z]+')
     s = Token(' +')
     v = w & s & w
     v.parse_string("hello world\n")
Пример #27
0
 def test_bug(self):
     #basicConfig(level=DEBUG)
     t = Token(Word())(Any()[2] & Eos())                               
     t.match("ab cd")
Пример #28
0
 def test_bug(self):
     #basicConfig(level=DEBUG)
     t = Token(Word())(Any()[2] & Eos())
     t.match("ab cd")