Пример #1
0
 def test_single(self):
     #basicConfig(level=DEBUG)
     expr = Literal('*')[:,...][3]
     expr.config.clear().low_memory(5)
     match = expr.get_match_string()('*' * 4)
     list(match)
     
Пример #2
0
 def assert_range(self, n_match, direcn, results, multiplier):
     for index in range(len(results)):
         queue_len = index * multiplier
         expr = Literal('*')[::direcn,...][n_match] & Eos()
         expr.config.clear().low_memory(queue_len)
         matcher = expr.get_match_string()
         self.assert_count(matcher, queue_len, index, results[index])
Пример #3
0
    def test_workaround(self):

        c = Delayed()

        a = Literal("a") + c
        b = Literal("b")
        c += (a | b) >= list

        self.assert_literal('ab', c)
Пример #4
0
    def test_as_given(self):

        c = Delayed()

        a = Literal("a") + c
        b = Literal("b")
        c += a | b

        self.assert_literal('ab', c)
Пример #5
0
 def test_offset(self):
     #basicConfig(level=DEBUG)
     text = Token('[^\n\r]+')
     line = BLine(text(~Literal('aa') & Regexp('.*')))
     line.config.default_line_aware(block_start=0)
     parser = line.get_parse_string()
     assert parser('aabc') == ['bc']
     # what happens with an empty match?
     check = ~Literal('aa') & Regexp('.*')
     check.config.no_full_first_match()
     assert check.parse('aa') == ['']
     assert parser('aa') == ['']
Пример #6
0
    def build(self):
        '''
        Construct the parser.
        '''

        # Avoid dependency loops
        from lepl.matchers.derived import Drop, Eos, AnyBut, Upper
        from lepl.matchers.core import Any, Lookahead, Literal, Delayed
        from lepl.matchers.error import make_error
        from lepl.matchers.variables import TraceVariables
        from lepl.support.node import node_throw

        with TraceVariables(False):

            # these two definitions enforce the conditions above, providing only
            # special characters appear as literals in the grammar
            escaped = Drop(self.alphabet.escape) & self.alphabet.escaped
            raw      = ~Lookahead(self.alphabet.escape) & \
                            AnyBut(self.alphabet.illegal)
            close = Drop(')')
            extend = (Drop('(*') & Upper()[1:, ...] & close) >> self.extend

            single = escaped | raw | extend

            any_ = Literal('.') >> self.dot
            letter = single >> self.dup
            pair = single & Drop('-') & single > self.tup

            interval = pair | letter
            brackets = Drop('[') & interval[1:] & Drop(']')
            inverted = Drop('[^') & interval[1:] & Drop(']') >= self.invert
            char = inverted | brackets | letter | any_ | extend > self.char

            item = Delayed()

            open = Drop('(?:')
            range = Drop(self.alphabet.escape) & self.alphabet.range
            seq = (char | item | range)[0:] > self.sequence
            group = open & seq & close
            alts = open & seq[2:, Drop('|')] & close > self.choice
            star = (alts | group | char) & Drop('*') > self.star
            plus = (alts | group | char) & Drop('+') > self.plus
            opt = (alts | group | char) & Drop('?') > self.option
            bad_grp  = (Drop('(') & ~Lookahead('?:') & seq & close) \
                            ** make_error(
                                "Lepl's own regular expressions do not currently "
                                "support matched groups.\n"
                                "Use '(?:...)' to group expressions without "
                                "matching.")

            item += alts | group | star | plus | opt | bad_grp

            expr = ((char | item)[:] & Drop(Eos())) >> node_throw

        # Empty config here avoids loops if the default config includes
        # references to alphabets
        expr.config.clear()
        return expr.parse_string
Пример #7
0
def SingleLineString(quote='"', escape='\\', exclude='\n'):
    '''
    Like `String`,  but will not match across multiple lines.
    '''
    q = Literal(quote)
    content = AnyBut(Or(q, Any(exclude)))
    if escape:
        content = Or(content, And(Drop(escape), q))
    content = Repeat(content, add_=True)
    return And(Drop(q), content, Drop(q))
Пример #8
0
def String(quote='"', escape='\\'):
    '''
    Match a string with quotes that can be escaped.  This will match across
    newlines (see `SingleLineString` for an alternative).
    '''
    q = Literal(quote)
    content = AnyBut(q)
    if escape:
        content = Or(And(Drop(escape), q), content)
    content = Repeat(content, add_=True) 
    return And(Drop(q), content, Drop(q))
Пример #9
0
 def mkchr(char, range, invert=False):
     from lepl.matchers.core import Literal
     from lepl.matchers.derived import Map
     from lepl.regexp.core import Character
     intervals = lmap(lambda x: (x, x), range)
     if invert:
         # this delays call to invert until after creation of self
         func = lambda _: Character(self.invert(intervals), self)
     else:
         func = lambda _: Character(intervals, self)
     return Map(Literal(char), func)
Пример #10
0
def SkipString(quote='"', escape='\\', ignore='\n', empty='', join=__add__):
    '''
    Like `String`, matching across multiple lines, but will silently 
    drop newlines.
    '''
    q = Literal(quote)
    content = AnyBut(Or(q, Any(ignore)))
    if escape:
        content = Or(content, And(Drop(escape), q))
    content = Or(content, Drop(Any(ignore)))
    content = Repeat(content, reduce=(empty, join))
    return And(Drop(q), content, Drop(q))
Пример #11
0
def String(quote='"', escape='\\', empty='', join=__add__):
    '''
    Match a string with quotes that can be escaped.  This will match across
    newlines (see `SingleLineString` for an alternative).

    More generally, a string is a grouping of results.  Setting `empty` and
    `join` correctly will allow this matcher to work with a variety of types.
    '''
    q = Literal(quote)
    content = AnyBut(q)
    if escape:
        content = Or(And(Drop(escape), q), content)
    content = Repeat(content, reduce=(empty, join))
    return And(Drop(q), content, Drop(q))
Пример #12
0
def SingleLineString(quote='"',
                     escape='\\',
                     exclude='\n',
                     empty='',
                     join=__add__):
    '''
    Like `String`,  but will not match across multiple lines.
    '''
    q = Literal(quote)
    content = AnyBut(Or(q, Any(exclude)))
    if escape:
        content = Or(content, And(Drop(escape), q))
    content = Repeat(content, reduce=(empty, join))
    return And(Drop(q), content, Drop(q))
Пример #13
0
    def build(self):
        '''
        Construct the parser.
        '''

        # Avoid dependency loops
        from lepl.matchers.derived import Drop, Eos, AnyBut, Upper
        from lepl.matchers.core import Any, Lookahead, Literal, Delayed

        # these two definitions enforce the conditions above, providing only
        # special characters appear as literals in the grammar
        escaped = Drop(self.alphabet.escape) + Any(self.alphabet.escaped)
        raw      = ~Lookahead(self.alphabet.escape) + \
                        AnyBut(self.alphabet.escaped)

        single = escaped | raw

        any_ = Literal('.') >> self.dot
        letter = single >> self.dup
        pair = single & Drop('-') & single > self.tup
        extend = (Drop('(*') & Upper()[1:, ...] & Drop(')')) >> self.extend

        interval = pair | letter | extend
        brackets = Drop('[') & interval[1:] & Drop(']')
        inverted = Drop('[^') & interval[1:] & Drop(']') >= self.invert
        char = inverted | brackets | letter | any_ | extend > self.char

        item = Delayed()

        seq = (char | item)[0:] > self.sequence
        group = Drop('(') & seq & Drop(')')
        alts = Drop('(') & seq[2:, Drop('|')] & Drop(')') > self.choice
        star = (alts | group | char) & Drop('*') > self.star
        plus = (alts | group | char) & Drop('+') > self.plus
        opt = (alts | group | char) & Drop('?') > self.option

        item += alts | group | star | plus | opt

        expr = (char | item)[:] & Drop(Eos())

        # Empty config here avoids loops if the default config includes
        # references to alphabets
        expr.config.clear()
        return expr.parse_string
Пример #14
0
 def test_nested(self):
     # note extra list 
     self.assert_direct('foo ', 
                        (Literal('foo') >> self.mkappend('b')) > self.mkappend(['c']), 
                        [[['foob', 'c']]])
Пример #15
0
 def test_apply(self):
     # note extra list 
     self.assert_direct('foo ', Literal('foo') > self.mkappend(['b']), [[['foo', 'b']]])
Пример #16
0
 def test_kapply(self):
     # note extra list 
     self.assert_direct('foo ', Literal('foo') >> self.mkappend('b'), [['foob']])
Пример #17
0
 def test_literal(self):
     self.assert_direct('foo ', Literal('foo'), [['foo']])
Пример #18
0
 def test_commit(self):
     self.assert_direct('abcd', 
         (Any()[0::'b'] + (Literal('d') | 
                           Literal('cd') + Commit() | 
                           Literal('bcd')) + Eof()), 
         [['abcd'], ['abcd']])
Пример #19
0
 def test_example(self):
     factory = Exclude(lambda x: x == 'a')
     matcher = factory(Literal('b')[:, ...]) + Literal('c')[:, ...]
     result = matcher.parse_string('abababccc')
     assert result == ['bbbccc'], result
Пример #20
0
 def test_flatten(self):
     matcher = Literal('a') & Literal('b') & Literal('c')
     assert str(matcher) == "And(And, Literal)", str(matcher)
     matcher.config.clear().flatten()
     parser = matcher.get_parse_string()
     assert str(parser.matcher) == "And(Literal, Literal, Literal)", str(parser.matcher)
Пример #21
0
def Newline():
    '''Match newline (Unix) or carriage return newline (Windows)'''
    return Or(Literal('\n'), Literal('\r\n'))