Пример #1
0
    def test_trace(self):
        buffer = StringIO()
        with TraceVariables(out=buffer):
            word = ~Lookahead('OR') & Word()
            phrase = String()
            with DroppedSpace():
                text = (phrase | word)[1:] > list
                query = text[:, Drop('OR')]
        expected = '''      phrase failed                             stream = 'spicy meatballs OR...
        word = ['spicy']                        stream = ' meatballs OR "el ...
      phrase failed                             stream = 'meatballs OR "el b...
        word = ['meatballs']                    stream = ' OR "el bulli rest...
      phrase failed                             stream = 'OR "el bulli resta...
        word failed                             stream = 'OR "el bulli resta...
      phrase failed                             stream = ' OR "el bulli rest...
        word failed                             stream = ' OR "el bulli rest...
        text = [['spicy', 'meatballs']]         stream = ' OR "el bulli rest...
      phrase = ['el bulli restaurant']          stream = ''
      phrase failed                             stream = ''
        word failed                             stream = ''
        text = [['el bulli restaurant']]        stream = ''
       query = [['spicy', 'meatballs'], ['el... stream = ''
'''
        query.config.auto_memoize(full=True)
        query.parse('spicy meatballs OR "el bulli restaurant"')
        trace = buffer.getvalue()
        assert trace == expected, '"""' + trace + '"""'
        # check caching works
        query.parse('spicy meatballs OR "el bulli restaurant"')
        trace = buffer.getvalue()
        assert trace == expected, '"""' + trace + '"""'

        
Пример #2
0
    def __init__(self, clean_html=True):
        self.clean_html = clean_html

        self._punctuation = '!"#&\'()*+,.;<=>?@[\\]^_`{|}~'
        self._lctx_1_exceptions = set('/ :'.split())
        self._lctx_2_exceptions = set('discount redeem voucher'.split())
        self._rctx_1_exceptions = set('/ : th am pm hour hours %'.split())
        self._rctx_2_exceptions = set('discount redeem voucher'.split())

        # LEPL Real Number Matchers (w/thousands)
        _comma_three_digits = Join(Drop(','), Add(Digit()[3]))[:]
        _thousand_group = Or(
            Join(_comma_three_digits, Any('.'), UnsignedInteger()),
            Join(_comma_three_digits, Optional(Any('.'))))
        _real = Or(Join(UnsignedInteger(), _thousand_group),
                   UnsignedReal()) >> float
        _any = Join(Star(AnyBut(_real)))
        self._real_partition_matcher = Star(And(_any, _real, _any))
        self._real_simple_matcher = _real[:,
                                          Drop(
                                              Star(Or(Whitespace(), Any(',-')))
                                          )]
Пример #3
0
    def test_node(self):
        
        class Term(Node): pass

        number      = Any('1')                             > 'number'
        term        = number                               > Term
        factor      = term | Drop(Optional(term))
        
        factor.config.clear().compose_transforms()
        p = factor.get_parse_string()
        ast = p('1')[0]
        assert type(ast) == Term, type(ast)
        assert ast[0] == '1', ast[0]
        assert str26(ast) == """Term
 `- number '1'""", ast
Пример #4
0
 def mkhex(char, n):
     from lepl.matchers.derived import Drop
     return Drop(Any(char)) + Any('0123456789abcdefABCDEF')[n,...] >> \
                 (lambda x: chr(int(x, 16)))
Пример #5
0
 def __init__(self, space=None):
     from lepl.matchers.derived import Space, Drop
     if space is None:
         space = Space()[:]
     space = Drop(space)
     super(DroppedSpace, self).__init__(space)
Пример #6
0
 def test_not(self):
     matcher = PostMatch(Drop(Any()[:]) & Any(), r'[0-9]', not_=True)
     matcher.config.no_full_first_match()
     results = list(matcher.parse_all('12a'))
     assert results == [['a']], results
Пример #7
0
 def test_normal(self):
     matcher = PostMatch(Drop(Any()[:]) & Any(), r'[0-9]')
     matcher.config.no_full_first_match()
     results = list(matcher.parse_all('12a'))
     assert results == [['2'], ['1']], results
Пример #8
0
    def build(self):
        '''
        Construct the parser.
        '''

        # Avoid dependency loops
        from lepl.matchers.derived import Drop, Eos, AnyBut, Upper
        from lepl.matchers.core import Any, Lookahead, Literal, Delayed
        from lepl.matchers.error import make_error
        from lepl.matchers.variables import TraceVariables
        from lepl.support.node import node_throw

        with TraceVariables(False):

            # these two definitions enforce the conditions above, providing only
            # special characters appear as literals in the grammar
            escaped = Drop(self.alphabet.escape) & self.alphabet.escaped
            raw      = ~Lookahead(self.alphabet.escape) & \
                            AnyBut(self.alphabet.illegal)
            close = Drop(')')
            extend = (Drop('(*') & Upper()[1:, ...] & close) >> self.extend

            single = escaped | raw | extend

            any_ = Literal('.') >> self.dot
            letter = single >> self.dup
            pair = single & Drop('-') & single > self.tup

            interval = pair | letter
            brackets = Drop('[') & interval[1:] & Drop(']')
            inverted = Drop('[^') & interval[1:] & Drop(']') >= self.invert
            char = inverted | brackets | letter | any_ | extend > self.char

            item = Delayed()

            open = Drop('(?:')
            range = Drop(self.alphabet.escape) & self.alphabet.range
            seq = (char | item | range)[0:] > self.sequence
            group = open & seq & close
            alts = open & seq[2:, Drop('|')] & close > self.choice
            star = (alts | group | char) & Drop('*') > self.star
            plus = (alts | group | char) & Drop('+') > self.plus
            opt = (alts | group | char) & Drop('?') > self.option
            bad_grp  = (Drop('(') & ~Lookahead('?:') & seq & close) \
                            ** make_error(
                                "Lepl's own regular expressions do not currently "
                                "support matched groups.\n"
                                "Use '(?:...)' to group expressions without "
                                "matching.")

            item += alts | group | star | plus | opt | bad_grp

            expr = ((char | item)[:] & Drop(Eos())) >> node_throw

        # Empty config here avoids loops if the default config includes
        # references to alphabets
        expr.config.clear()
        return expr.parse_string
Пример #9
0
    def build(self):
        '''
        Construct the parser.
        '''

        # Avoid dependency loops
        from lepl.matchers.derived import Drop, Eos, AnyBut, Upper
        from lepl.matchers.core import Any, Lookahead, Literal, Delayed

        # these two definitions enforce the conditions above, providing only
        # special characters appear as literals in the grammar
        escaped = Drop(self.alphabet.escape) + Any(self.alphabet.escaped)
        raw      = ~Lookahead(self.alphabet.escape) + \
                        AnyBut(self.alphabet.escaped)

        single = escaped | raw

        any_ = Literal('.') >> self.dot
        letter = single >> self.dup
        pair = single & Drop('-') & single > self.tup
        extend = (Drop('(*') & Upper()[1:, ...] & Drop(')')) >> self.extend

        interval = pair | letter | extend
        brackets = Drop('[') & interval[1:] & Drop(']')
        inverted = Drop('[^') & interval[1:] & Drop(']') >= self.invert
        char = inverted | brackets | letter | any_ | extend > self.char

        item = Delayed()

        seq = (char | item)[0:] > self.sequence
        group = Drop('(') & seq & Drop(')')
        alts = Drop('(') & seq[2:, Drop('|')] & Drop(')') > self.choice
        star = (alts | group | char) & Drop('*') > self.star
        plus = (alts | group | char) & Drop('+') > self.plus
        opt = (alts | group | char) & Drop('?') > self.option

        item += alts | group | star | plus | opt

        expr = (char | item)[:] & Drop(Eos())

        # Empty config here avoids loops if the default config includes
        # references to alphabets
        expr.config.clear()
        return expr.parse_string