def test_trace(self): buffer = StringIO() with TraceVariables(out=buffer): word = ~Lookahead('OR') & Word() phrase = String() with DroppedSpace(): text = (phrase | word)[1:] > list query = text[:, Drop('OR')] expected = ''' phrase failed stream = 'spicy meatballs OR... word = ['spicy'] stream = ' meatballs OR "el ... phrase failed stream = 'meatballs OR "el b... word = ['meatballs'] stream = ' OR "el bulli rest... phrase failed stream = 'OR "el bulli resta... word failed stream = 'OR "el bulli resta... phrase failed stream = ' OR "el bulli rest... word failed stream = ' OR "el bulli rest... text = [['spicy', 'meatballs']] stream = ' OR "el bulli rest... phrase = ['el bulli restaurant'] stream = '' phrase failed stream = '' word failed stream = '' text = [['el bulli restaurant']] stream = '' query = [['spicy', 'meatballs'], ['el... stream = '' ''' query.config.auto_memoize(full=True) query.parse('spicy meatballs OR "el bulli restaurant"') trace = buffer.getvalue() assert trace == expected, '"""' + trace + '"""' # check caching works query.parse('spicy meatballs OR "el bulli restaurant"') trace = buffer.getvalue() assert trace == expected, '"""' + trace + '"""'
def __init__(self, clean_html=True): self.clean_html = clean_html self._punctuation = '!"#&\'()*+,.;<=>?@[\\]^_`{|}~' self._lctx_1_exceptions = set('/ :'.split()) self._lctx_2_exceptions = set('discount redeem voucher'.split()) self._rctx_1_exceptions = set('/ : th am pm hour hours %'.split()) self._rctx_2_exceptions = set('discount redeem voucher'.split()) # LEPL Real Number Matchers (w/thousands) _comma_three_digits = Join(Drop(','), Add(Digit()[3]))[:] _thousand_group = Or( Join(_comma_three_digits, Any('.'), UnsignedInteger()), Join(_comma_three_digits, Optional(Any('.')))) _real = Or(Join(UnsignedInteger(), _thousand_group), UnsignedReal()) >> float _any = Join(Star(AnyBut(_real))) self._real_partition_matcher = Star(And(_any, _real, _any)) self._real_simple_matcher = _real[:, Drop( Star(Or(Whitespace(), Any(',-'))) )]
def test_node(self): class Term(Node): pass number = Any('1') > 'number' term = number > Term factor = term | Drop(Optional(term)) factor.config.clear().compose_transforms() p = factor.get_parse_string() ast = p('1')[0] assert type(ast) == Term, type(ast) assert ast[0] == '1', ast[0] assert str26(ast) == """Term `- number '1'""", ast
def mkhex(char, n): from lepl.matchers.derived import Drop return Drop(Any(char)) + Any('0123456789abcdefABCDEF')[n,...] >> \ (lambda x: chr(int(x, 16)))
def __init__(self, space=None): from lepl.matchers.derived import Space, Drop if space is None: space = Space()[:] space = Drop(space) super(DroppedSpace, self).__init__(space)
def test_not(self): matcher = PostMatch(Drop(Any()[:]) & Any(), r'[0-9]', not_=True) matcher.config.no_full_first_match() results = list(matcher.parse_all('12a')) assert results == [['a']], results
def test_normal(self): matcher = PostMatch(Drop(Any()[:]) & Any(), r'[0-9]') matcher.config.no_full_first_match() results = list(matcher.parse_all('12a')) assert results == [['2'], ['1']], results
def build(self): ''' Construct the parser. ''' # Avoid dependency loops from lepl.matchers.derived import Drop, Eos, AnyBut, Upper from lepl.matchers.core import Any, Lookahead, Literal, Delayed from lepl.matchers.error import make_error from lepl.matchers.variables import TraceVariables from lepl.support.node import node_throw with TraceVariables(False): # these two definitions enforce the conditions above, providing only # special characters appear as literals in the grammar escaped = Drop(self.alphabet.escape) & self.alphabet.escaped raw = ~Lookahead(self.alphabet.escape) & \ AnyBut(self.alphabet.illegal) close = Drop(')') extend = (Drop('(*') & Upper()[1:, ...] & close) >> self.extend single = escaped | raw | extend any_ = Literal('.') >> self.dot letter = single >> self.dup pair = single & Drop('-') & single > self.tup interval = pair | letter brackets = Drop('[') & interval[1:] & Drop(']') inverted = Drop('[^') & interval[1:] & Drop(']') >= self.invert char = inverted | brackets | letter | any_ | extend > self.char item = Delayed() open = Drop('(?:') range = Drop(self.alphabet.escape) & self.alphabet.range seq = (char | item | range)[0:] > self.sequence group = open & seq & close alts = open & seq[2:, Drop('|')] & close > self.choice star = (alts | group | char) & Drop('*') > self.star plus = (alts | group | char) & Drop('+') > self.plus opt = (alts | group | char) & Drop('?') > self.option bad_grp = (Drop('(') & ~Lookahead('?:') & seq & close) \ ** make_error( "Lepl's own regular expressions do not currently " "support matched groups.\n" "Use '(?:...)' to group expressions without " "matching.") item += alts | group | star | plus | opt | bad_grp expr = ((char | item)[:] & Drop(Eos())) >> node_throw # Empty config here avoids loops if the default config includes # references to alphabets expr.config.clear() return expr.parse_string
def build(self): ''' Construct the parser. ''' # Avoid dependency loops from lepl.matchers.derived import Drop, Eos, AnyBut, Upper from lepl.matchers.core import Any, Lookahead, Literal, Delayed # these two definitions enforce the conditions above, providing only # special characters appear as literals in the grammar escaped = Drop(self.alphabet.escape) + Any(self.alphabet.escaped) raw = ~Lookahead(self.alphabet.escape) + \ AnyBut(self.alphabet.escaped) single = escaped | raw any_ = Literal('.') >> self.dot letter = single >> self.dup pair = single & Drop('-') & single > self.tup extend = (Drop('(*') & Upper()[1:, ...] & Drop(')')) >> self.extend interval = pair | letter | extend brackets = Drop('[') & interval[1:] & Drop(']') inverted = Drop('[^') & interval[1:] & Drop(']') >= self.invert char = inverted | brackets | letter | any_ | extend > self.char item = Delayed() seq = (char | item)[0:] > self.sequence group = Drop('(') & seq & Drop(')') alts = Drop('(') & seq[2:, Drop('|')] & Drop(')') > self.choice star = (alts | group | char) & Drop('*') > self.star plus = (alts | group | char) & Drop('+') > self.plus opt = (alts | group | char) & Drop('?') > self.option item += alts | group | star | plus | opt expr = (char | item)[:] & Drop(Eos()) # Empty config here avoids loops if the default config includes # references to alphabets expr.config.clear() return expr.parse_string