def test_fruitflies_ambig(self): grammar = """ start: noun verb noun -> simple | noun verb "like" noun -> comparative noun: adj? NOUN verb: VERB adj: ADJ NOUN: "flies" | "bananas" | "fruit" VERB: "like" | "flies" ADJ: "fruit" %import common.WS %ignore WS """ parser = Lark(grammar, ambiguity='explicit', lexer=LEXER) tree = parser.parse('fruit flies like bananas') expected = Tree('_ambig', [ Tree('comparative', [ Tree('noun', ['fruit']), Tree('verb', ['flies']), Tree('noun', ['bananas']) ]), Tree('simple', [ Tree('noun', [Tree('adj', ['fruit']), 'flies']), Tree('verb', ['like']), Tree('noun', ['bananas']) ]) ]) # self.assertEqual(tree, expected) self.assertEqual(tree.data, expected.data) self.assertEqual(set(tree.children), set(expected.children))
def test_propagate_positions(self): g = Lark("""start: a a: "a" """, propagate_positions=True) r = g.parse('a') self.assertEqual( r.children[0].line, 1 )
def test_earley(self): g = Lark("""start: A "b" c A: "a"+ c: "abc" """, parser="earley", lexer=LEXER) x = g.parse('aaaababc')
def test_earley_scanless(self): g = Lark("""start: A "b" c A: "a"+ c: "abc" """, parser="earley", lexer=None) x = g.parse('aaaababc')
def test_anon_in_scanless(self): # Fails an Earley implementation without special handling for empty rules, # or re-processing of already completed rules. g = Lark(r"""start: B B: ("ab"|/[^b]/)* """, lexer=LEXER) self.assertEqual( g.parse('abc').children[0], 'abc')
def test_earley4(self): grammar = """ start: A A? A: "a"+ """ l = Lark(grammar, parser='earley', lexer=LEXER) res = l.parse("aaa") assert set(res.children) == {'aa', 'a'} or res.children == ['aaa']
def test_earley4(self): grammar = """ start: A A? A: "a"+ """ l = Lark(grammar, parser='earley', lexer=LEXER) res = l.parse("aaa") self.assertEqual(res.children, ['aaa'])
def test_undefined_terminals_require_explicit_strategies(): elem_grammar = r""" list : "[" [ELEMENT ("," ELEMENT)*] "]" %declare ELEMENT """ with pytest.raises(InvalidArgument): from_lark(Lark(elem_grammar, start="list")).example() strategy = {"ELEMENT": just("200")} from_lark(Lark(elem_grammar, start="list"), explicit=strategy).example()
def test_expand1(self): g = Lark("""start: a ?a: b b: "x" """) r = g.parse('x') self.assertEqual( r.children[0].data, "b" ) g = Lark("""start: a ?a: b -> c b: "x" """) r = g.parse('x') self.assertEqual( r.children[0].data, "b" ) g = Lark("""start: a ?a: b b -> c b: "x" """) r = g.parse('xx') self.assertEqual( r.children[0].data, "c" )
def test_term_ambig_resolve(self): grammar = r""" !start: NAME+ NAME: /\w+/ %ignore " " """ text = """foo bar""" parser = Lark(grammar) tree = parser.parse(text) self.assertEqual(tree.children, ['foo', 'bar'])
def test_earley3(self): "Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)" grammar = """ start: A A A: "a"+ """ l = Lark(grammar, parser='earley', lexer=LEXER) res = l.parse("aaa") self.assertEqual(res.children, ['aa', 'a'])
def test_infinite_recurse(self): g = """start: a a: a | "a" """ self.assertRaises(GrammarError, Lark, g, parser='lalr') l = Lark(g, parser='earley', lexer=None) self.assertRaises(ParseError, l.parse, 'a') l = Lark(g, parser='earley', lexer='dynamic') self.assertRaises(ParseError, l.parse, 'a')
def test_earley3(self): """Tests prioritization and disambiguation for pseudo-terminals (there should be only one result) By default, `+` should immitate regexp greedy-matching """ grammar = """ start: A A A: "a"+ """ l = Lark(grammar, parser='earley', lexer=LEXER) res = l.parse("aaa") self.assertEqual(set(res.children), {'aa', 'a'})
def puzzle1(): rules, lines = open("day19.txt").read().split('\n\n') rules = rules.translate(str.maketrans('0123456789', 'abcdefghij')) parser = Lark(rules, start='a') total = 0 for line in lines.splitlines(): try: parser.parse(line) total += 1 except LarkError: pass return total
def parse(mod, fname, tests): test_results = TestResults() mod.__dict__.update(BUILTINS) parser = Lark(open(path), parser='lalr', postlex=GrammarIndenter(), transformer=ParseTreeToAST(mod, test_results, fname) ) tree = parser.parse(tests) execute(tree) return test_results.passed, test_results.errors
def test_not_all_derivations(self): grammar = """ start: cd+ "e" !cd: "c" | "d" | "cd" """ l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False) x = l.parse('cde') assert x.data != '_ambig', x assert len(x.children) == 1
def test_ambiguity1(self): grammar = """ start: cd+ "e" !cd: "c" | "d" | "cd" """ l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) x = l.parse('cde') assert x.data == '_ambig', x assert len(x.children) == 2
def process(self): parser = Lark(grammar, propagate_positions = True, parser = 'lalr') lines = self.source.split('\n') parsed = None try: parsed = parser.parse(self.source) except UnexpectedToken as e: self.report_message(e, 'error', f'unexpected token \'{e.token}\'', f'was expecting {expected_to_human_readable(e.expected)} here') except UnexpectedCharacters as e: self.report_message(e, 'error', f'unexpected character \'{lines[e.line - 1][e.column - 1]}\'', f'was expecting {expected_to_human_readable(e.allowed)} here') except UnexpectedEOF as e: self.report_message(eof, 'error', f'unexpected end of file', f'was expecting {expected_to_human_readable(e.expected)} here') self.tokens = IdlTransformer().transform(parsed) for t in self.tokens: if type(t) is Enum: if self.type_registry.is_known_type(t.name): self.report_message(t.type, 'error', f'name {t.name} is already in use.', '') subtype = self.type_registry.get_type(t.type.name) if not subtype: self.report_message(t.type, 'error', f'unknown type for this {t.mode} block', f'{t.type.name} is not a known type', True) if subtype.identity is not TypeIdentity.INTEGER: self.report_message(t.type, 'error', f'{t.name} {t.mode} block\'s type is not an integer', f'{t.type.name} is not an integer') self.type_registry.register_type( Type(t.name, TypeIdentity.CONSTS if t.mode == 'consts' else TypeIdentity.ENUM, fixed_size = subtype.fixed_size, signed = subtype.signed, subtype = subtype) ) t.type = self.type_registry.get_type(t.name) if type(t) is Struct: if self.type_registry.is_known_type(t.name): self.report_message(t.type, 'error', f'name {t.name} is already in use.', '') self.type_registry.register_type( Type(t.name, TypeIdentity.STRUCT, dynamic = True) )
def test_earley_explicit_ambiguity(self): # This was a sneaky bug! grammar = """ start: a b | ab a: "a" b: "b" ab: "ab" """ parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit') ambig_tree = parser.parse('ab') self.assertEqual( ambig_tree.data, '_ambig') self.assertEqual( len(ambig_tree.children), 2)
def __init__(self): self.parser = Lark(ipl_grammar) self.yolo = None self.agender = None self.emotion = None self.food = None self.pspnet = None self.original_img = None self.img_path = '' self.msg = '' self.persons = [] self.objects = [] pass
def test_earley_repeating_empty(self): # This was a sneaky bug! grammar = """ !start: "a" empty empty "b" empty: empty2 empty2: """ parser = Lark(grammar, parser='earley', lexer=LEXER) res = parser.parse('ab') empty_tree = Tree('empty', [Tree('empty2', [])]) self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b'])
def test_earley2(self): grammar = """ start: statement+ statement: "r" | "c" /[a-z]/+ %ignore " " """ program = """c b r""" l = Lark(grammar, parser='earley', lexer=LEXER) l.parse(program)
def test_earley_prioritization_sum(self): "Tests effect of priority on result" grammar = """ start: ab_ b_ a_ | indirection indirection: a_ bb_ a_ a_: "a" b_: "b" ab_: "ab" bb_.1: "bb" """ l = _Lark(grammar, ambiguity='resolve__antiscore_sum') res = l.parse('abba') self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') grammar = """ start: ab_ b_ a_ | indirection indirection: a_ bb_ a_ a_: "a" b_: "b" ab_.1: "ab" bb_: "bb" """ l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') res = l.parse('abba') self.assertEqual(''.join(child.data for child in res.children), 'indirection') grammar = """ start: ab_ b_ a_ | indirection indirection: a_ bb_ a_ a_.2: "a" b_.1: "b" ab_.3: "ab" bb_.3: "bb" """ l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') res = l.parse('abba') self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') grammar = """ start: ab_ b_ a_ | indirection indirection: a_ bb_ a_ a_.1: "a" b_.1: "b" ab_.4: "ab" bb_.3: "bb" """ l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum') res = l.parse('abba') self.assertEqual(''.join(child.data for child in res.children), 'indirection')
def test_ambiguity2(self): grammar = """ ANY: /[a-zA-Z0-9 ]+/ a.2: "A" b+ b.2: "B" c: ANY start: (a|c)* """ l = Lark(grammar, parser='earley', lexer=LEXER) res = l.parse('ABX') expected = Tree('start', [Tree('a', [Tree('b', [])]), Tree('c', ['X'])]) self.assertEqual(res, expected)
def test_can_not_use_undefined_terminals_yet(): grammar = r""" list : "[" ELEMENT ("," ELEMENT)* "]" %declare ELEMENT """ with pytest.raises(InvalidArgument): from_lark(Lark(grammar, start="list")).example()
def test_explicit_ambiguity2(self): grammar = r""" start: NAME+ NAME: /\w+/ %ignore " " """ text = """cat""" parser = Lark(grammar, start='start', ambiguity='explicit') tree = parser.parse(text) self.assertEqual(tree.data, '_ambig') combinations = { tuple(str(s) for s in t.children) for t in tree.children } self.assertEqual(combinations, {('cat', ), ('ca', 't'), ('c', 'at'), ('c', 'a', 't')})
def add_mismatch_offsets( netlist_in: Union[Path, str], netlist_out: Optional[Union[Path, str]] = None, debug: bool = False, ) -> None: if isinstance(netlist_in, str): netlist_in = Path(netlist_in) if netlist_in.suffix in ['.cdl', '.sp', '.spf']: parser = Lark(grammar_cdl, parser='lalr') scs = False elif netlist_in.suffix in ['.scs', '.net']: parser = Lark(grammar_scs, parser='lalr') scs = True else: raise ValueError( f'Unknown netlist suffix={netlist_in.suffix}. Use ".cdl" or ".scs".' ) lines = read_spectre_cdl_unwrap(netlist_in) lines[-1] += '\n' tree = parser.parse('\n'.join(lines)) if debug: pydot__tree_to_png(tree, "test0.png") obj_list = CktTransformer().transform(tree).children obj_list[-1].last = True if netlist_out is None: netlist_out: Path = netlist_in.with_name(netlist_in.stem + 'out') if isinstance(netlist_out, str): netlist_out: Path = Path(netlist_out) full_netlist = '' used_names = [] offset_map = {} for obj in obj_list: full_netlist += obj.netlist(used_names, offset_map, scs) for key, val in offset_map.items(): print(f'{val}: 0.0') with open_file(netlist_out, 'w') as f: f.write(full_netlist)
def test_cannot_convert_EBNF_to_strategy_directly(): with pytest.raises(InvalidArgument): # Not a Lark object from_lark(EBNF_GRAMMAR).example() with pytest.raises(TypeError): # Not even the right number of arguments from_lark(EBNF_GRAMMAR, start="value").example() with pytest.raises(InvalidArgument): # Wrong type for explicit_strategies from_lark(Lark(LIST_GRAMMAR, start="list"), explicit=[]).example()
def test_generation_without_whitespace(): list_grammar = r""" list : "[" [NUMBER ("," NUMBER)*] "]" NUMBER: /[0-9]+/ """ @given(from_lark(Lark(list_grammar, start="list"))) def test(g): assert " " not in g test()
def test_can_generate_ignored_tokens(): list_grammar = r""" list : "[" [STRING ("," STRING)*] "]" STRING : /"[a-z]*"/ WS : /[ \t\r\n]+/ %ignore WS """ strategy = from_lark(Lark(list_grammar, start="list")) # A JSON list of strings in canonical form which does not round-trip, # must contain ignorable whitespace in the initial string. find_any(strategy, lambda s: "\t" in s)