def just_tokenize(*tokens): g = pgm.Grammar(noop, tokens) def meta(text): _tokens = g.get_tokens(text) assert ''.join(tok.value for tok in _tokens) == text return _tokens return meta
from codetalker import pgm from codetalker.pgm.tokens import STRING, ID, NUMBER, WHITE, NEWLINE from codetalker.pgm.special import star, plus, _or from codetalker.pgm.grammar import ParseError def start(rule): rule | plus(value) def value(rule): rule | STRING | ID | NUMBER grammar = pgm.Grammar(start=start, tokens=[STRING, ID, NUMBER, WHITE, NEWLINE], ignore=[WHITE, NEWLINE]) def test_one(): text = '"a string" an_id 12 14.3\n"and\\"12" .3' tree = grammar.process(text) assert str(tree) == text if __name__ == '__main__': for name, fn in globals().items(): if name.startswith('test_'): fn() print 'test passed' print 'Finished!'
#!/usr/bin/env python from codetalker import pgm from codetalker.pgm.tokens import INT, WHITE, CharToken, ID, STRING, SSTRING from codetalker.pgm.special import star, plus, _or, no_ignore, _not from codetalker.pgm.errors import ParseError class SYMBOL(CharToken): chars = '@;}' def at(rule): rule | (no_ignore('@', ID), _or(STRING, SSTRING, star(_not(_or(';','}')))), ';') rule | star(_not(_or(';','}'))) g = pgm.Grammar(start=at, tokens=[SYMBOL, ID, STRING, SSTRING, WHITE], ignore=[WHITE]) from codetalker import testing parse_rule = testing.parse_rule(__name__, g) parse_rule(at, ( '@one "hi";', '@two "ho" ;', '@three lots of stuff;', '@four many" m"ore;', 'random junk', '@I know you can', '@do "it" yes', )) if __name__ == '__main__':
def test_three(): try: g3 = pgm.Grammar(start=start3, tokens=[], ignore=[]) except AstError, e: pass
from codetalker.pgm.special import star, plus, _or from codetalker.pgm.grammar import ParseError from codetalker.cgrammar import TokenError def start(rule): rule | 'what' class SMALL(ReToken): rx = re.compile('hello') grammar = pgm.Grammar(start=start, tokens=[SMALL, WHITE, NEWLINE], indent=True, ignore=[WHITE]) g2 = pgm.Grammar(start=start, tokens=[INT, WHITE, NEWLINE], indent=True, ignore=[WHITE]) def test_indent(): tokens = grammar.get_tokens('hello\n hello') assert len(tokens) == 5 assert isinstance(tokens[2], INDENT) # tokens[2][0] == INDENT def test_dedent():
&& || ; not using +- */% ** () ''' expression = binop(list('-+'), list('*/%'), ['**'], value=NUMBER, ops_token=OP, name='BinOp', paren=True) grammar = pgm.Grammar(start=expression, tokens=[SYMBOL, OP], ignore=[WHITE, NEWLINE], ast_tokens=[NUMBER]) m = pgm.Translator(grammar) ast = grammar.ast_classes import operator ops = { '**': operator.pow, '*': operator.mul, '/': operator.div, '%': operator.mod, '+': operator.add, '-': operator.sub }
def start(rule): rule | (ID, ':', value) def value(rule): rule | (ID, star('-', _or(INT, ID))) rule.dont_ignore = True strings = (('name : value', 'name : value-2', 'name : value-or-34-others', 'name: lots-of-white '), ('name: -value', '32: hi', 'name: value - 2', 'name : some-value- end')) g = pgm.Grammar(start=start, tokens=[SYMBOL, ID, INT, WHITE], ignore=[WHITE]) def mpass(what): def meta(): g.process(what) return meta def mfail(what): def meta(): try: g.process(what) except ParseError: pass
def make_grammar(start): return pgm.Grammar( start=start, tokens=[STRING, ID, NUMBER, CCOMMENT, SYMBOL, NEWLINE, EOF, WHITE], ignore=[WHITE, CCOMMENT])