def test_AttributeGrammar_syn(): g = Grammar( Production('S', 'S', '+', 'S'), Production('S', 'zero'), Production('S', 'digit') ) attr_g = AttributeGrammar( g, syn_attrs={'res'}, prod_attr_rules=[ {'res.0': lambda res: res(1) + res(3)}, {'res.0': lambda: 0}, {'res.0': lambda res: res(1)} ], terminal_attr_rules={ 'digit': {'res.0': lambda word: int(word)} } ) assert_equal(attr_g.attrs, {'res'}) assert_equal(attr_g.syn_attrs, {'res'}) assert_equal(attr_g.inh_attrs, set()) assert_equal(attr_g.get_terminal_syn_attr_eval('digit', '6'), {'res': 6}) assert_equal(attr_g.get_terminal_syn_attr_eval('zero', '0'), {}) assert_equal(attr_g.eval_prod_syn_attr(g.prods[0], {}, [{'res': 4}, {}, {'res': 7}]), {'res': 4 + 7}) assert_equal(attr_g.eval_prod_syn_attr(g.prods[2], {}, [{'res': 8}]), {'res': 8}) assert_equal(attr_g.eval_prod_syn_attr(g.prods[1], {}, [{}]), {'res': 0})
def test_ParserGenerator_simple_ast(): class Ast: def __init__(self, pos: TreePosition): self.pos = pos class ConstantAst(Ast): def __init__(self, pos, num): super().__init__(pos) self.num = num class SumAst(Ast): def __init__(self, pos, ast, num): super().__init__(pos) self.ast = ast self.num = num g = Grammar( Production('A', 'B'), Production('B', 'num'), Production('B', 'B', '+', 'num') ) attr_g = AttributeGrammar( g, inh_attrs=set(), syn_attrs={'ast', 'num'}, prod_attr_rules=[ {'ast': 'ast.1'}, {'ast': lambda _pos, num: ConstantAst(_pos, num(1))}, {'ast': lambda _pos, ast, num: SumAst(_pos, ast(1), num(3))} ], terminal_attr_rules={ 'num': {'num': lambda val: int(val)} } ) assert_equal(set(g.terminals), {'num', '+'}) assert_equal(attr_g.attrs, {'ast', 'num'}) assert_equal(attr_g.syn_attrs, {'ast', 'num'}) assert_equal(attr_g.inh_attrs, set()) word = '1+2+3' tokens = ['num', '+', 'num', '+', 'num'] tokens_pos = [0, 1, 2, 3, 4] print('tokens:', tokens, 'with decomposition', tokens_pos) parser = ParserGenerator(g) right_analysis, attr_eval = parser.parse_syn_attr_analysis(attr_g, word, tokens, tokens_pos) print('right analysis:', right_analysis) print('attribute eval:', attr_eval) assert 'ast' in attr_eval ast3 = attr_eval['ast'] assert isinstance(ast3, SumAst) assert ast3.pos == TreePosition(word, 0, 5) assert ast3.num == 3 ast2 = ast3.ast assert isinstance(ast2, SumAst) assert ast2.pos == TreePosition(word, 0, 3) assert ast2.num == 2 ast1 = ast2.ast assert isinstance(ast1, ConstantAst) assert ast1.pos == TreePosition(word, 0, 1) assert ast1.num == 1
def test_Grammar(): g = Grammar( Production('S2', 'S'), Production('S', 'A', 'b'), Production('A', 'a', 'a') ) assert_equal(set(g.symbols), {'S2', 'S', 'A', 'a', 'b'}) assert_equal(set(g.terminals), {'a', 'b'}) assert_equal(set(g.non_terminals), {'S2', 'S', 'A'}) assert_equal(set(g.get_prods_for('S')), {g.prods[1]})
def test_ParserGenerator_simple_lookahead(): g = Grammar( Production('S2', 'S'), Production('S', 'b', 'b'), Production('S', 'a', 'b') ) parser = ParserGenerator(g) assert_equal(parser.parse_analysis('bb', ['b', 'b'], [0, 1]), (g.prods[0], g.prods[1])) assert_equal(parser.parse_analysis('ab', ['a', 'b'], [0, 1]), (g.prods[0], g.prods[2])) with assert_raises(ParseError): parser.parse_analysis('ba', ['b', 'a'], [0, 1])
def test_ParserGenerator_simple_left_recursive_epsilon(): g = Grammar( Production('S2', 'S'), Production('S', 'S', 'a'), Production('S') ) parser = ParserGenerator(g) for count in [0, 1, 2, 3, 20, 100, 10000]: assert_equal( parser.parse_analysis( 'a' * count, ['a'] * count, list(range(count))), (g.prods[0],) + count * (g.prods[1],) + (g.prods[2],))
def test_ParserGenerator_simple_left_recursive(): g = Grammar( Production('S2', 'S'), Production('S', 'S', 'a'), Production('S', 'a') ) parser = ParserGenerator(g) for count in [1, 2, 3, 20, 100, 10000]: assert_equal( parser.parse_analysis( 'a' * count, ['a'] * count, list(range(count))), (g.prods[0],) + (count-1) * (g.prods[1],) + (g.prods[2],)) with assert_raises(ParseError): parser.parse_analysis('', [], [])
def test_ParserGenerator_arithmetic(): # left associative, with operator precedence op_plus = Production('Sum', 'Sum', '+', 'Prod') op_minus = Production('Sum', 'Sum', '-', 'Prod') op_mult = Production('Prod', 'Prod', '*', 'Term') op_div = Production('Prod', 'Prod', '/', 'Term') const_terms = {Production('Term', str(i)): i for i in range(11)} # we can clean this up once we have proper tokens g = Grammar(*[ Production('Expr', 'Sum'), op_plus, op_minus, Production('Sum', 'Prod'), op_mult, op_div, Production('Prod', 'Term'), Production('Term', '(', 'Sum', ')')] + list(const_terms.keys()) ) parser = ParserGenerator(g) def evaluate(word: str): assert isinstance(word, str) tokens = list(word) tokens_pos = list(range(len(tokens))) print('input word:', word) analysis = parser.parse_analysis(word, tokens, tokens_pos) print('analysis:', analysis) stack = [] for prod in reversed(analysis): if prod in const_terms: stack.append(const_terms[prod]) elif prod == op_plus: b, a = stack.pop(-1), stack.pop(-1) stack.append(a + b) elif prod == op_minus: b, a = stack.pop(-1), stack.pop(-1) stack.append(a - b) elif prod == op_mult: b, a = stack.pop(-1), stack.pop(-1) stack.append(a * b) elif prod == op_div: b, a = stack.pop(-1), stack.pop(-1) stack.append(a / b) assert len(stack) == 1 result_value = stack[0] print('result:', result_value) assert result_value == eval(word) evaluate('1*2+3') evaluate('1+2*3') evaluate('(1+2)*3') evaluate('1+2+3') evaluate('1+2-3') evaluate('1-2+3') evaluate('3-2-1') evaluate('1*2+3*4') evaluate('4/2-1')
def test_ParserGenerator_parse_tree_epsilon2(): g = Grammar( Production('TopLevelExpr', 'ExprList'), Production('ExprList'), Production('ExprList', 'Expr', 'ExprList'), Production('Expr', 'Val', ';'), Production('Val', 'number') ) parser = ParserGenerator(g) word = '42;' tokens = ['number', ';'] tokens_pos = [0, 2] print(parser.parse_analysis(word, tokens, tokens_pos)) print(parser.parse_tree(word, tokens, tokens_pos))
def test_make_first1_sets_epsilon(): g = Grammar( Production('S', 'A', 'B'), Production('A', 'a'), Production('A'), Production('B', 'A'), Production('B', 'b', 'c') ) first1 = make_first1_sets(g) print('first1 sets:', first1) assert_equal(first1['A'], {EPSILON, 'a'}) assert_equal(first1['B'], {EPSILON, 'a', 'b'}) assert_equal(first1['S'], {EPSILON, 'a', 'b'}) assert_equal(get_first1_set_for_word(first1, ('B', 'c')), {'a', 'b', 'c'})
def test_ParserGenerator_attr_syn(): g = Grammar( Production('A', 'S'), Production('S', 'T', '+', 'S'), Production('S', 'T'), Production('T', 'zero'), Production('T', 'digit') ) attr_g = AttributeGrammar( g, inh_attrs=set(), syn_attrs={'res'}, prod_attr_rules=[ {'res.0': lambda res: res(1)}, {'res.0': lambda res: res(1) + res(3)}, {'res.0': lambda res: res(1)}, {'res.0': lambda res: 0}, {'res.0': lambda res: res(1)} ], terminal_attr_rules={ 'digit': {'res.0': lambda value_: int(value_)} } ) assert_equal(attr_g.attrs, {'res'}) assert_equal(attr_g.syn_attrs, {'res'}) assert_equal(attr_g.inh_attrs, set()) word = '5+7' tokens = ['digit', '+', 'digit'] tokens_pos = [0, 1, 2] print('tokens:', tokens, 'with decomposition', tokens_pos) parser = ParserGenerator(g) right_analysis, attr_eval = parser.parse_syn_attr_analysis(attr_g, word, tokens, tokens_pos) print('right analysis:', right_analysis) print('attribute eval (online):', attr_eval) assert_equal(right_analysis, (g.prods[0], g.prods[1], g.prods[2], g.prods[4], g.prods[4])) assert_equal(attr_eval, {'res': 5 + 7}) tree = parser.parse_tree(word, tokens, tokens_pos) print('parse tree:', tree) assert_equal( tree, SyntaxTree(g.prods[0], SyntaxTree( g.prods[1], SyntaxTree(g.prods[4], None), None, SyntaxTree(g.prods[2], SyntaxTree(g.prods[4], None))))) attr_eval_gen = AttributeEvalGenerator(attr_g) tree_attr_eval = attr_eval_gen.eval_attrs(tree, word, tokens, tokens_pos) print('attribute eval (using tree):', tree_attr_eval) assert_equal(tree_attr_eval, {'res': 5 + 7})
def test_ParserGenerator_parse_tree_epsilon(): g = Grammar( Production('S', 'A'), Production('A'), Production('A', 'B'), Production('B', 'a'), Production('B', 'a', 'B') ) parser = ParserGenerator(g) word = 'aaa' tokens = ['a', 'a', 'a'] tokens_pos = [0, 1, 2] assert_equal( parser.parse_analysis(word, tokens, tokens_pos), (g.prods[0], g.prods[2], g.prods[4], g.prods[4], g.prods[3])) assert_equal( parser.parse_tree(word, tokens, tokens_pos), SyntaxTree(g.prods[0], SyntaxTree(g.prods[2], SyntaxTree(g.prods[4], None, SyntaxTree(g.prods[4], None, SyntaxTree( g.prods[3], None))))))
def test_ParserGenerator_cfg_with_lexer(): g = Grammar( Production('Grammar', 'Decl'), Production('Decl', 'Prod', ';', 'Decl'), Production('Decl', 'Prod'), Production('Prod', 'Symb', '->', 'Right'), Production('Right', 'Symbs', '|', 'Symbs'), Production('Right', 'Symbs'), Production('Symbs', 'Symb', 'Symbs'), Production('Symbs'), ) lexer = LexerGenerator([';', '->', '|', 'Symb'], ['; *|[\n\r ]+', ' *\\-> *', ' *\\| *', '([a-z]|[A-Z])+']) parser = ParserGenerator(g) word = 'A->Bc|B; B->ca' print('input word:', word) tokens, tokens_pos = lexer.tokenize(word) print('tokenized word:', tokens, 'with decomposition', tokens_pos) analysis = parser.parse_analysis(word, tokens, tokens_pos) print('right-most analysis:', analysis)
def test_make_first1_sets(): g = Grammar( Production('S', 'S', 'O', 'S'), Production('S', '(', 'S', ')'), Production('S', '0'), Production('S', '1'), Production('O', '+'), Production('O', '*'), ) assert_equal(set(g.terminals), {'0', '1', '(', ')', '+', '*'}) assert_equal(set(g.non_terminals), {'S', 'O'}) first1 = make_first1_sets(g) print('first1 sets:', first1) assert_equal(first1['S'], {'0', '1', '('}) assert_equal(first1['O'], {'+', '*'}) assert_equal(get_first1_set_for_word(first1, ('(', 'S')), {'('})
def test_AttributeEvalGenerator_typed_arithmetic(): import math import numpy as np lexer = LexerGenerator(token_names=[ '(', ')', '+', '-', '*', '**', '/', '[', ']', ',', 'const', 'name', IGNORED_TOKEN ], token_regex_table=[ '\\(', '\\)', '\\+', '\\-', '\\*', '\\*\\*', '/', '\\[', '\\]', ',', '(0|[1-9][0-9]*)(\\.[0-9]+)?', '([a-z]|[A-Z])+', ' +' ]) # left associative (except ** that is right associative), with operator precedence g = Grammar(Production('Expr', 'Sum'), Production('Sum', 'Sum', '+', 'Prod'), Production('Sum', 'Sum', '-', 'Prod'), Production('Sum', 'Prod'), Production('Prod', 'Prod', '*', 'Pow'), Production('Prod', 'Prod', '/', 'Pow'), Production('Prod', 'Pow'), Production('Pow', 'Term', '**', 'Pow'), Production('Pow', 'Term'), Production('Term', '-', 'NegTerm'), Production('Term', 'NegTerm'), Production('NegTerm', '(', 'Sum', ')'), Production('NegTerm', 'name', '(', 'Sum', ')'), Production('NegTerm', 'const'), Production('NegTerm', '[', 'ExprList', ']'), Production('ExprList', 'Sum'), Production('ExprList', 'ExprList', ',', 'Sum')) ERROR = None def op_plus_res(type, res): left, right, left_type, right_type = res(1), res(3), type(1), type(3) if left is ERROR or right is ERROR: return ERROR if left_type == right_type in {'num', 'vec', 'mat'}: if np.shape(left) != np.shape(right): return ERROR return left + right return ERROR def op_minus_res(type, res): left, right, left_type, right_type = res(1), res(3), type(1), type(3) if left is ERROR or right is ERROR: return ERROR if left_type == right_type in {'num', 'vec', 'mat'}: if np.shape(left) != np.shape(right): return ERROR return left - right return ERROR def op_times_type(type): left_type, right_type = type(1), type(3) if left_type == right_type == 'num': return 'num' if (left_type == 'vec' and right_type == 'num') or (left_type == 'num' and right_type == 'vec'): return 'vec' if left_type == 'mat' or right_type == 'mat': return 'mat' return ERROR def op_times_res(type, res): left, right, left_type, right_type = res(1), res(3), type(1), type(3) if left is ERROR or right is ERROR: return ERROR if left_type not in {'num', 'vec', 'mat' } or right_type not in {'num', 'vec', 'mat'}: return ERROR if left_type == 'num' or right_type == 'num': return left * right if left_type == 'mat' or right_type == 'mat': if left_type == 'vec': assert right_type == 'mat' if np.shape(left)[0] != np.shape(right)[0]: return ERROR return np.matmul(left, right) if right_type == 'vec': assert left_type == 'mat' if np.shape(left)[1] != np.shape(right)[0]: return ERROR return np.matmul(left, right) assert left_type == right_type == 'mat' if np.shape(left)[1] != np.shape(right)[0]: return ERROR return np.matmul(left, right) return ERROR def op_divided_res(type, res): left, right, left_type, right_type = res(1), res(3), type(1), type(3) if left is ERROR or right is ERROR: return ERROR if left_type not in {'num', 'vec', 'mat' } or right_type not in {'num', 'vec', 'mat'}: return ERROR if right_type == 'num': return left / right return ERROR def op_pow_res(type, res): left, right, left_type, right_type = res(1), res(3), type(1), type(3) if left is ERROR or right is ERROR: return ERROR if left_type == right_type == 'num': assert isinstance(left, (float, int)) and isinstance( right, (float, int)) return left**right if left_type == 'mat' and right_type == 'num': if np.shape(left)[0] != np.shape(left)[1]: return ERROR return left**right return ERROR def op_neg_res(type, res): type_, res_ = type(2), res(2) if res_ is ERROR: return ERROR if type_ in {'num', 'vec', 'mat'}: return -res_ return ERROR def op_func_type(type, name): arg_type = type(3) if name(1) in {'ones', 'zeros'}: return 'vec' if name(1) in {'len'}: return 'num' return arg_type def op_func_res(type, res, name): arg, arg_type = res(3), type(3) if arg is ERROR: return ERROR if name(1) in {'ones', 'zeros'}: if arg_type != 'num' or arg <= 1: return ERROR assert isinstance(arg, (float, int)) return (np.ones if name(1) == 'ones' else np.zeros)(int(arg)) if name(1) == 'len': if arg_type != 'vec': return ERROR return np.linalg.norm(arg) if arg_type == 'num': if not hasattr(math, name(1)): return ERROR return getattr(math, name(1))(arg) return ERROR def op_const_vec_type(type_list, res_list): type_list_, res_list_ = type_list(2), res_list(2) assert len(type_list_) == len(res_list_) if len(type_list_) == 0: return ERROR type = type_list_[0] if not all(t == type for t in type_list_): return ERROR if type == 'num': return 'vec' if type == 'vec': return 'mat' return ERROR def op_const_vec_res(type_list, res_list): type_list_, res_list_ = type_list(2), res_list(2) assert len(type_list_) == len(res_list_) if len(type_list_) == 0: return ERROR type = type_list_[0] if not all(t == type for t in type_list_): return ERROR if type in {'num', 'vec'}: if type == 'vec': length = len(res_list_[0]) if not all(len(vec) == length for vec in res_list_): return ERROR return np.array(res_list_) return ERROR attr_g = AttributeGrammar( g, syn_attrs={'type', 'res', 'name', 'type_list', 'res_list'}, prod_attr_rules=[{ 'type': 'type.1', 'res': 'res.1' }, { 'type': 'type.1', 'res': op_plus_res }, { 'type': 'type.1', 'res': op_minus_res }, { 'type': 'type.1', 'res': 'res.1' }, { 'type': op_times_type, 'res': op_times_res }, { 'type': op_times_type, 'res': op_divided_res }, { 'type': 'type.1', 'res': 'res.1' }, { 'type': 'type.1', 'res': op_pow_res }, { 'type': 'type.1', 'res': 'res.1' }, { 'type': 'type.2', 'res': op_neg_res }, { 'type': 'type.1', 'res': 'res.1' }, { 'type': 'type.2', 'res': 'res.2' }, { 'type': op_func_type, 'res': op_func_res }, { 'type': 'type.1', 'res': 'res.1' }, { 'type': op_const_vec_type, 'res': op_const_vec_res }, { 'type_list': lambda type: [type(1)], 'res_list': lambda res: [res(1)] }, { 'type_list': lambda type, type_list: type_list(1) + [type(3)], 'res_list': lambda res, res_list: res_list(1) + [res(3)] }], terminal_attr_rules={ 'const': { 'res': lambda lit: float(lit), 'type': lambda _: 'num' }, 'name': { 'name': lambda lit: lit } }) parser = ParserGenerator(g) def evaluate(word, expected_result, expected_type=None): print('----') print('input word:', word) tokens, tokens_pos = lexer.tokenize(word) print('tokens:', tokens, 'with decomposition', tokens_pos) analysis, result = parser.parse_syn_attr_analysis( attr_g, word, tokens, tokens_pos) print('result:') print(result['res']) if isinstance(expected_result, (float, int)): assert_almost_equal(result['res'], expected_result) else: np.testing.assert_equal(result['res'], expected_result) if result['res'] is not ERROR: print('result type:', result['type']) assert_equal(result['type'], expected_type) evaluate('1*2+3', 5, 'num') evaluate('5-3', 2, 'num') evaluate('(1+2)*3', 9, 'num') evaluate('1+2+3', 6, 'num') evaluate('1-2+3', 2, 'num') evaluate('3-2-1', 0, 'num') evaluate('1*2+3*4', 14, 'num') evaluate('4/2-1', 1, 'num') evaluate('sin(3.1415)', 9.26535897e-5, 'num') evaluate('2**2**3', 256, 'num') evaluate('(2**2)**3', 64, 'num') evaluate('(3**2+4**2)**0.5', 5, 'num') evaluate('ones(4)', [1, 1, 1, 1], 'vec') evaluate('ones(4) * 5', [5, 5, 5, 5], 'vec') evaluate('ones(2) - ones(2) * 0', [1, 1], 'vec') evaluate('2 ** ones(2)', ERROR) evaluate('[1 , 2 , 3]', [1, 2, 3], 'vec') evaluate('[1, 2] + 2 * [3, 2]', [7, 6], 'vec') evaluate('[[1,2],[3,4]]', [[1, 2], [3, 4]], 'mat') evaluate('[ones(2)]', [[1, 1]], 'mat') evaluate('[ones(3), ones(4)]', ERROR) evaluate('zeros(0+2-2)', ERROR) evaluate('[[1,0,0],[0,1,0],[0,0,1]]', [[1, 0, 0], [0, 1, 0], [0, 0, 1]], 'mat') evaluate('[[1,0,0],[0,1,0],[0,0,1]]+[[0,1]]', ERROR) evaluate('[[1,0,0],[0,1,0],[0,0,1]]+[[1,0,0],[0,1,0],[0,0,1]]', [[2, 0, 0], [0, 2, 0], [0, 0, 2]], 'mat') evaluate('[[1,0,0],[0,1,0],[0,0,1]]+0*[[1,0,0],[0,1,0],[0,0,1]]', [[1, 0, 0], [0, 1, 0], [0, 0, 1]], 'mat') evaluate('[[1,0],[0,1]] ** 5', [[1, 0], [0, 1]], 'mat') evaluate('[[0,2],[1,1]] + 3 * [[3,0],[1,1]]', [[9, 2], [4, 4]], 'mat') evaluate('[[1,0,3,-2],[3,2,1,0]] * [[2,1,0],[0,1,0],[-1,4,1],[3,2,-1]]', [[-7, 9, 5], [5, 9, 1]], 'mat') evaluate('2**-3', 2**-3, 'num') evaluate('2--3', 5, 'num') evaluate('-[1,0]*4', [-4, 0], 'vec') evaluate('[1,2,3]*[1,2]', ERROR) evaluate('len([3,4])', 5, 'num') evaluate('len(4)', ERROR) evaluate('len([[2,3],[1,2]])', ERROR) evaluate('(((((((((7)))))))))', 7, 'num')
def test_AttributeEvalGenerator_check_declaredness(): lexer = LexerGenerator( [';', '=', 'const', 'name'], [';', '=', '(0|[1-9])[0-9]*(\\.[0-9]*)?', '([a-z]|[A-Z])+']) g = Grammar( Production('Prog0', 'Decls'), Production('Decls', 'Decl'), Production('Decls', 'Decl', ';', 'Decls'), Production('Decl', 'name', '=', 'Var'), Production('Var', 'name'), Production('Var', 'const'), ) parser = ParserGenerator(g) attr_g = AttributeGrammar( g, inh_attrs={'decl_i'}, syn_attrs={'decl_s', 'name', 'ok'}, prod_attr_rules=[{ 'decl_i.1': set(), 'decl_s.0': 'decl_s.1', 'ok': 'ok.1' }, { 'decl_i.1': 'decl_i.0', 'decl_s.0': 'decl_s.1', 'ok': 'ok.1' }, { 'decl_i.1': 'decl_i.0', 'decl_i.3': 'decl_s.1', 'decl_s.0': 'decl_s.3', 'ok': lambda ok: ok(1) and ok(3) }, { 'decl_i.3': 'decl_i.0', 'decl_s.0': lambda decl_i, name: decl_i(0) | {name(1)}, 'ok.0': 'ok.3' }, { 'ok.0': lambda decl_i, name: name(1) in decl_i(0) }, { 'ok.0': True }], terminal_attr_rules={'name': { 'name.0': lambda value: value }}) attr_eval_gen = AttributeEvalGenerator(attr_g) def evaluate(word, target_eval): print('----') print('input word:', word) tokens, tokens_pos = lexer.tokenize(word) print('tokens:', tokens, 'with decomposition', tokens_pos) tree = parser.parse_tree(word, tokens, tokens_pos) print('parsed tree:', tree) tree_attr_eval = attr_eval_gen.eval_attrs(tree, word, tokens, tokens_pos) print('attribute eval:', tree_attr_eval) assert_equal(tree_attr_eval, target_eval) evaluate('alpha=5;beta=7;gamma=alpha', { 'decl_s': {'alpha', 'beta', 'gamma'}, 'ok': True }) evaluate('alpha=5;beta=alpha;gamma=alpha', { 'decl_s': {'alpha', 'beta', 'gamma'}, 'ok': True }) evaluate('alpha=1;beta=alpha;gamma=beta', { 'decl_s': {'alpha', 'beta', 'gamma'}, 'ok': True }) evaluate('alpha=alpha', {'decl_s': {'alpha'}, 'ok': False}) evaluate('alpha=1.0;beta=gamma', { 'decl_s': {'alpha', 'beta'}, 'ok': False }) evaluate('alpha=1.0;beta=4.0;gamma=beta;delta=alpha;alpha=alpha', { 'decl_s': {'alpha', 'beta', 'gamma', 'delta'}, 'ok': True })
from sleepy.ast import FileAst, AbstractScopeAst, annotate_ast, ExpressionStatementAst, ReturnStatementAst, \ AssignStatementAst, IdentifierExpressionAst, MemberExpressionAst, \ IfStatementAst, WhileStatementAst, ConstantExpressionAst, \ StringLiteralExpressionAst, CallExpressionAst, AnnotationAst, UnionTypeAst, IdentifierTypeAst, ImportsAst, \ UnbindExpressionAst, StructDeclarationAst from sleepy.builtin_symbols import SLEEPY_DOUBLE, SLEEPY_FLOAT, SLEEPY_INT, SLEEPY_LONG, SLEEPY_CHAR from sleepy.functions import FunctionDeclarationAst from sleepy.syntactical_analysis.grammar import AttributeGrammar, Production from sleepy.syntactical_analysis.parser import ParserGenerator from sleepy.token_value_parsing import parse_assign_op, parse_long, parse_double, parse_float, parse_char, parse_string, \ parse_hex_int SLEEPY_ATTR_GRAMMAR = AttributeGrammar.from_dict( prods_attr_rules={ Production('S', 'File'): { 'ast': 'ast.1' }, Production('File', 'SeparatedImportDecl?', 'BracelessScope'): { 'ast': lambda _pos, ast: FileAst(_pos, scope=ast(2), imports_ast=ast(1)) }, Production('SeparatedImportDecl?'): { 'ast': lambda _pos: ImportsAst(pos=_pos, imports=[]) }, Production('SeparatedImportDecl?', 'ImportDecl'): { 'ast': 'ast.1' }, Production('ImportDecl', 'import', 'Import', 'ImportNames'): { 'ast': lambda _pos, path, paths: ImportsAst(_pos, imports=[path(2)] + paths(3))
from typing import List, Dict, Set, Optional, Tuple from sleepy.syntactical_analysis.automaton import NonDeterministicAutomaton, make_dfa_from_nfa, DeterministicAutomaton, OTHER_CHAR from sleepy.errors import LexError from sleepy.syntactical_analysis.grammar import Grammar, Production, EPSILON, IGNORED_TOKEN, get_token_word_from_tokens_pos, DummyPath from sleepy.syntactical_analysis.parser import ParserGenerator REGEX_LIT_TOKEN = 'a' REGEX_SPECIAL_TOKENS = frozenset( {'(', ')', '\\', '-', '[', ']', '*', '+', '?', '|', '^', '.'}) # Currently we only recognize 7-bit ASCII REGEX_RECOGNIZED_CHARS = frozenset({OTHER_CHAR} | {chr(c) for c in range(0, 128)}) REGEX_CHOICE_OP = Production('Choice', 'Choice', '|', 'Concat') REGEX_CONCAT_OP = Production('Concat', 'Concat', 'Repeat') REGEX_REPEAT_OP = Production('Repeat', 'Range', '*') REGEX_REPEAT_EXISTS_OP = Production('Repeat', 'Range', '+') REGEX_OPTIONAL_OP = Production('Repeat', 'Range', '?') REGEX_RANGE_OP = Production('Range', '[', 'a', '-', 'a', ']') REGEX_RANGE_LITS_OP = Production('Range', '[', 'Lits', ']') REGEX_INV_RANGE_OP = Production('Range', '[', '^', 'a', '-', 'a', ']') REGEX_INV_RANGE_LITS_OP = Production('Range', '[', '^', 'Lits', ']') REGEX_LIT_OP = Production('Lit', REGEX_LIT_TOKEN) REGEX_LIT_ANY_OP = Production('Lit', '.') REGEX_LITS_MULTIPLE_OP = Production('Lits', 'a', 'Lits') REGEX_LITS_SINGLE_OP = Production('Lits', 'a') REGEX_GRAMMAR = Grammar(Production('Regex', 'Choice'), REGEX_CHOICE_OP, Production('Choice', 'Concat'), REGEX_CONCAT_OP,
def test_ParserGenerator_cfg(): g = Grammar( Production('Grammar', 'Decl'), Production('Decl', 'Prod', ';', 'Decl'), Production('Decl', 'Prod', '\n', 'Decl'), Production('Decl', 'Prod'), Production('Prod', 'Symb', '->', 'Right'), Production('Right', 'Symbs', '|', 'Symbs'), Production('Right', 'Symbs'), Production('Symbs', 'Symb', 'Symbs'), Production('Symbs'), Production('Symb', 'A'), Production('Symb', 'B'), Production('Symb', 'C'), Production('Symb', 'a'), Production('Symb', 'b'), Production('Symb', 'c') ) parser = ParserGenerator(g) tokens = ['A', '->', 'B', 'c', '|', 'B', ';', 'B', '->', 'c', 'a'] print('tokenized word:', tokens) analysis = parser.parse_analysis(''.join(tokens), tokens, list(range(len(tokens)))) print('right-most analysis:', analysis)
def test_ParserGenerator_arithmetic_syn(): import math lexer = LexerGenerator( token_names=['(', ')', '+', '-', '*', '**', '/', 'const', 'func'], token_regex_table=[ '\\(', '\\)', '\\+', '\\-', '\\*', '\\*\\*', '/', '(0|[1-9][0-9]*)(\\.[0-9]+)?', '([a-z]|[A-Z])+'] ) # left associative (except ** that is right associative), with operator precedence g = Grammar( Production('Expr', 'Sum'), Production('Sum', 'Sum', '+', 'Prod'), Production('Sum', 'Sum', '-', 'Prod'), Production('Sum', 'Prod'), Production('Prod', 'Prod', '*', 'Pow'), Production('Prod', 'Prod', '/', 'Pow'), Production('Prod', 'Pow'), Production('Pow', 'Term', '**', 'Pow'), Production('Pow', 'Term'), Production('Term', '(', 'Sum', ')'), Production('Term', 'func', '(', 'Sum', ')'), Production('Term', 'const') ) attr_g = AttributeGrammar( g, syn_attrs={'res', 'name'}, prod_attr_rules=[ {'res': lambda res: res(1)}, {'res': lambda res: res(1) + res(3)}, {'res': lambda res: res(1) - res(3)}, {'res': lambda res: res(1)}, {'res': lambda res: res(1) * res(3)}, {'res': lambda res: res(1) / res(3)}, {'res': lambda res: res(1)}, {'res': lambda res: res(1) ** res(3)}, {'res': lambda res: res(1)}, {'res': lambda res: res(2)}, {'res': lambda res, name: getattr(math, name(1))(res(3))}, {'res': lambda res: res(1)}, ], terminal_attr_rules={ 'const': {'res': lambda lit: float(lit)}, 'func': {'name': lambda lit: lit} } ) parser = ParserGenerator(g) def evaluate(word_): print('----') print('input word:', word_) tokens, tokens_pos = lexer.tokenize(word_) print('tokens:', tokens, 'with decomposition', tokens_pos) analysis, result = parser.parse_syn_attr_analysis(attr_g, word_, tokens, tokens_pos) print('result:', result['res']) # import common operator names for python eval() sin, cos, tan, exp, sqrt = math.sin, math.cos, math.tan, math.exp, math.sqrt # noqa assert_equal(result['res'], eval(word_)) for word in [ '1*2+3', '5-3', '(1+2)*3', '1+2+3', '1+2-3', '1-2+3', '3-2-1', '1*2+3*4', '4/2-1', 'sin(3.1415)', '2**2**3', '(2**2)**3', '(3**2+4**2)**0.5']: evaluate(word)