def try_parsing(self, parser: ParserPython, text_to_parse: str): # noinspection PyBroadException try: parser.parse(text_to_parse) except: self.fail("Could not parse {0} using the {1} parser!".format( text_to_parse, parser.__class__))
def convert_trog(inf: str, outf: TextIO) -> None: pp = ParserPython(trogfile, skipws=False) try: tree = pp.parse_file(inf) print(tree) except Exception as e: print(e)
def test_memoization_nomatch(capsys): ''' Test that already failed match is found in the cache on subsequent matches. ''' def grammar(): return [(rule1, ruleb), [rule1, rulec]] def rule1(): return rulea, ruleb def rulea(): return "a" def ruleb(): return "b" def rulec(): return "c" parser = ParserPython(grammar, memoization=True, debug=True) parse_tree = parser.parse("c") assert "Cache hit for [rule1=Sequence, 0] = '0'" in capsys.readouterr()[0] assert parser.cache_hits == 1 assert parser.cache_misses == 4
def test_with_some_words_2(self): parser = ParserPython(document, skipws=False) text = f"{self.words1}\n\n" parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) expect = NonTerminal(document(), [ NonTerminal(body(), [ NonTerminal(element(), [ Terminal(words(), 0, self.words1), ]), NonTerminal(element(), [ Terminal(newline(), 0, '\n'), ]), NonTerminal(element(), [ Terminal(blank_line(), 0, '\n'), ]), ]), Terminal(EOF(), 0, ''), ]) # print('\n: expect') ; pp(expect) assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}")
def test_direct_rule_call(): ''' Test regression where in direct rule call semantic action is erroneously attached to both caller and callee. ''' def grammar(): return rule1, rule2 def rule1(): return "a" def rule2(): return rule1 call_count = [0] class DummySemAction(SemanticAction): def first_pass(self, parser, node, nodes): call_count[0] += 1 return SemanticAction.first_pass(self, parser, node, nodes) # Sem action is attached to rule2 only but # this bug will attach it to rule1 also resulting in # wrong call count. rule2.sem = DummySemAction() parser = ParserPython(grammar) parse_tree = parser.parse("aa") parser.getASG() assert call_count[0] == 1, "Semantic action should be called once!"
def test_02_text_line_pair (self) : def body(): return OneOrMore ( [ text_line ], rule_name='body' ) def document(): return Sequence( ( body, EOF ), rule_name='document' ) # print('\n: document') ; pp(document()) parser = ParserPython( document, skipws=False ) text = self.line1 + self.line2 parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) # # print('\n: flatten') ; pp(flatten(parsed)) p_newline = Terminal(newline(), 0, '\n') p_l1_words = Terminal(words(), 0, self.line1[:-1]) p_l1_text_line = NonTerminal(text_line(), [ p_l1_words, p_newline ]) p_l2_words = Terminal(words(), 0, self.line2[:-1]) p_l2_text_line = NonTerminal(text_line(), [ p_l2_words, p_newline ]) p_body = NonTerminal(body(), [ p_l1_text_line, p_l2_text_line ]) p_eof = Terminal(EOF(), 0, '') expect = NonTerminal(document(), [p_body, p_eof] ) # print('\n: expect') ; pp(expect) assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}" )
def test_05_paragraph_multiple (self) : def body(): return OneOrMore ( OrderedChoice( [ paragraph, newline ] ), rule_name='body' ) def document(): return Sequence( ( body, EOF ), rule_name='document' ) # print('\n: document') ; pp(document()) parser = ParserPython( document, skipws=False ) text = self.line1 + self.line2 + '\n' text = text * 3 parsed = parser.parse(text) # print('\n: parsed') ; pp(parsed) p_newline = Terminal(newline(), 0, '\n') p_l1_words = Terminal(words(), 0, self.line1[:-1]) p_l1_text_line = NonTerminal(text_line(), [ p_l1_words, p_newline ]) p_l2_words = Terminal(words(), 0, self.line2[:-1]) p_l2_text_line = NonTerminal(text_line(), [ p_l2_words, p_newline ]) p_paragraph = NonTerminal(paragraph(), [ p_l1_text_line, p_l2_text_line ]) p_body = NonTerminal(body(), [ p_paragraph, p_newline, p_paragraph, p_newline, p_paragraph, p_newline, ]) p_eof = Terminal(EOF(), 0, '') expect = NonTerminal(document(), [ p_body, p_eof ] ) # print('\n: expect') ; pp(expect) assert parsed == expect, ( f"text = '{text}' :\n" f"[expect]\n{pp_str(expect)}\n[parsed]\n{pp_str(parsed)}" )
def test_memoization_positive(capsys): ''' Test that already matched rule is found in the cache on subsequent matches. Args: capsys - pytest fixture for output capture ''' def grammar(): return [(rule1, ruleb), (rule1, rulec)] def rule1(): return rulea, ruleb def rulea(): return "a" def ruleb(): return "b" def rulec(): return "c" parser = ParserPython(grammar, memoization=True, debug=True) # Parse input where a rule1 will match but ruleb will fail # Second sequence will try rule1 again on the same location # and result should be found in the cache. parse_tree = parser.parse("a b c") # Assert that cached result is used assert "Cache hit" in capsys.readouterr()[0] assert parser.cache_hits == 1 assert parser.cache_misses == 4
def parse_string(self, src, grammar=program, filename=None): oldsrcs = self.input_sources self.context.optimization_level = self.optimization_level self.input_sources = src parser = ParserPython( grammar, comment_def=comment, skipws=True, reduce_tree=False, memoization=True, debug=False, ) self.context.parsers.append(parser) self.context.filenames.append(filename) try: parse_tree = parser.parse(self.input_sources) visitor = MuvVisitor(debug=False) visitor.muvparser = self parse_tree = visit_parse_tree(parse_tree, visitor) out = parse_tree.generate_code(self.context) if self.error_found: return False if len(self.context.filenames) == 1: if self.context.filenames[-1]: filetext = " from {0}".format( self.context.filenames[-1] ) else: filetext = '' self.output = ( "( Generated{0} by the MUV compiler. )\n" "( https://github.com/revarbat/pymuv )\n" "{1}\n" ).format(filetext, self.output) self.output += out if not self.error_found and len(self.context.filenames) == 1: if self.wrapper_program: self.output = ( "@program {0}\n" "1 99999 d\n" "1 i\n" "{1}\n" ".\n" "c\n" "q\n" ).format(self.wrapper_program, self.output) return True except MuvError as e: line, col = parser.pos_to_linecol(e.position) self.print_error(filename, line, col, str(e)) return False except NoMatch as e: line, col = parser.pos_to_linecol(e.position) expected = self.simplify_parse_error(e) self.print_error(filename, line, col, "Expected %s" % expected) return False finally: self.input_sources = oldsrcs self.context.parsers.pop() self.context.filenames.pop()
def parse_tree(): def grammar(): return ("first", "second", "third") parser = ParserPython(grammar) return parser.parse(" first \n\n second third")
def parse(file, enc): with codecs.open(file, "r", encoding=enc) as opened_file: opened_file_content = opened_file.read() parser = ParserPython(segnetics_file, reduce_tree=True) parse_tree = visit_parse_tree(parser.parse(opened_file_content), SegneticsVisitor()) return parse_tree
def setUp(self): # first get defaults, should all be False for boolean flags super().setUp() global parse_debug, record, analyzing self.parse_debug = parse_debug self.record = record self.analyzing = analyzing # quiet, no parse trees displayeda # self.debug = False # show parse tree for pass >= self.debug # self.debug = 2 # Show text being parsed # self.show = True # and again, to apply behavior per altered settings super().setUp() self.grammar = document self.parser = ParserPython ( language_def = self.grammar, reduce_tree = False, debug = self.parse_debug, ) if self.record : write_scratch ( _clean = True )
def main(text): from arpeggio import Optional, ZeroOrMore, OneOrMore, EOF from arpeggio import RegExMatch as _ def number(): return _(r'\d*\.\d*|\d+') def factor(): return Optional(["+", "-"]), [number, ("(", expression, ")")] def term(): return factor, ZeroOrMore(["*", "/"], factor) def expression(): return term, ZeroOrMore(["+", "-"], term) def calc(): return OneOrMore(expression), EOF from arpeggio import ParserPython parser = ParserPython(calc) # calc is the root rule of your grammar # Use param debug=True for verbose debugging # messages and grammar and parse tree visualization # using graphviz and dot parse_tree = parser.parse(text) pass
def test_memoization_nomatch(capsys: FixtureRequest) -> None: """ Test that already failed match is found in the cache on subsequent matches. """ def grammar() -> List[Any]: return [(rule1, ruleb), [rule1, rulec]] def rule1() -> Tuple[Any, ...]: return rulea, ruleb def rulea() -> str: return "a" def ruleb() -> str: return "b" def rulec() -> str: return "c" parser = ParserPython(grammar, memoization=True, debug=True) parse_tree = parser.parse("c") assert "Cache hit for [rule1=Sequence, 0] = '0'" in capsys.readouterr()[0] assert parser.cache_hits == 1 assert parser.cache_misses == 4
def test_combine_python(): # This will result in NonTerminal node def root(): return my_rule(), "." # This will result in Terminal node def my_rule(): return Combine(ZeroOrMore("a"), OneOrMore("b")) parser = ParserPython(root) input1 = "abbb." # Whitespaces are preserved in lexical rules so the following input # should not be recognized. input2 = "a b bb." ptree1 = parser.parse(input1) with pytest.raises(NoMatch): parser.parse(input2) assert isinstance(ptree1, NonTerminal) assert isinstance(ptree1[0], Terminal) assert ptree1[0].value == "abbb"
def test_direct_rule_call() -> None: """ Test regression where in direct rule call semantic action is erroneously attached to both caller and callee. """ def grammar(): return rule1, rule2 def rule1(): return "a" def rule2(): return rule1 call_count = [0] class DummySemAction(SemanticAction): def first_pass(self, parser, node, nodes): call_count[0] += 1 return SemanticAction.first_pass(self, parser, node, nodes) # Sem action is attached to rule2 only but # this bug will attach it to rule1 also resulting in # wrong call count. rule2.sem = DummySemAction() # type: ignore parser = ParserPython(grammar) parse_tree = parser.parse("aa") parser.getASG() assert call_count[0] == 1, "Semantic action should be called once!"
def parse(content: str) -> 'Atom': from foil.language.grammar import atom from foil.language.grammar import comment parser = ParserPython(atom, comment_def=comment) parse_tree = parser.parse(content) return visit_parse_tree(parse_tree, FoilVisitor())
def parse(content: str) -> 'Literal': from foil.language.grammar import literal from foil.language.grammar import comment parser = ParserPython(literal, comment_def=comment) parse_tree = parser.parse(content) return visit_parse_tree(parse_tree, FoilVisitor())
def test_nondeterministic_unordered_group(): def root(): return 'word1', UnorderedGroup(some_rule, 'word2', some_rule), EOF def some_rule(): return Optional('word2'), Optional('word3') content = '''word1 word2 ''' # If the 'word2' from unordered group in the `root` rule matches first # the input parses, else it fails. # We repeat parser construction and parsing many times to check # if it fails every time. The current fix will iterate in order from left # to right and repeat matching until all rules in a unordered group # succeeds. fail = 0 success = 0 for _ in range(100): try: parser = ParserPython(root) parser.parse(content) success += 1 except NoMatch: fail += 1 assert fail == 100
def test_reduce_tree(): input = "34 a 3 3 b 3 b" parser = ParserPython(grammar, reduce_tree=False) result = parser.parse(input) # PTDOTExporter().exportFile(result, 'test_reduce_tree_pt.dot') assert result[0].rule_name == 'first' assert isinstance(result[0], NonTerminal) assert result[3].rule_name == 'first' assert result[0][0].rule_name == 'fourth' # Check reduction for direct OrderedChoice assert result[2][0].rule_name == 'third' parser = ParserPython(grammar, reduce_tree=True) result = parser.parse(input) # PTDOTExporter().exportFile(result, 'test_reduce_tree_pt.dot') assert result[0].rule_name == 'fourth' assert isinstance(result[0], Terminal) assert result[3].rule_name == 'fourth' # Check reduction for direct OrderedChoice assert result[2][0].rule_name == 'third_str'
def main(argv): # Parsing #different alg relation next to each other i.e a*|b require brackets (a*)|b parser = ParserPython(formula) #, debug=True) #, reduce_tree = True) parse_tree = parser.parse(argv) result = visit_parse_tree(parse_tree, formVisitor()) result.tostr() return result
def load_from_str(self, content: str) -> 'Builder': parser = ParserPython(cypher, comment_def=comment) parsed = parser.parse(content) visited = visit_parse_tree(parsed, KnowledgeVisitor()) base = RuleBase(visited['data']) self.load_from_base(base) return self
def language_from_str(language_def, metamodel): """ Constructs parser and initializes metamodel from language description given in textX language. Args: language_def (str): A language description in textX. metamodel (TextXMetaModel): A metamodel to initialize. Returns: Parser for the new language. """ if metamodel.debug: metamodel.dprint("*** PARSING LANGUAGE DEFINITION ***") # Check the cache for already conctructed textX parser if metamodel.debug in textX_parsers: parser = textX_parsers[metamodel.debug] else: # Create parser for TextX grammars using # the arpeggio grammar specified in this module parser = ParserPython(textx_model, comment_def=comment, ignore_case=False, reduce_tree=False, memoization=metamodel.memoization, debug=metamodel.debug, file=metamodel.file) # Cache it for subsequent calls textX_parsers[metamodel.debug] = parser # Parse language description with textX parser try: parse_tree = parser.parse(language_def) except NoMatch as e: line, col = parser.pos_to_linecol(e.position) raise TextXSyntaxError(text(e), line, col) # Construct new parser and meta-model based on the given language # description. lang_parser = visit_parse_tree(parse_tree, TextXVisitor(parser, metamodel)) # Meta-model is constructed. Validate its semantics. metamodel.validate() # Here we connect meta-model and language parser for convenience. lang_parser.metamodel = metamodel metamodel._parser_blueprint = lang_parser if metamodel.debug: # Create dot file for debuging purposes PMDOTExporter().exportFile( lang_parser.parser_model, "{}_parser_model.dot".format(metamodel.rootcls.__name__)) return lang_parser
def parse(text): parser = ParserPython(mspaText, debug=False, reduce_tree=False, skipws=False, ws="\t ", memoization=True) return parser.parse(text)
def __init__(self, graph, platform, cfg, mappingDict={}, debug=False): self.__graph = graph self.__platform = platform self.__mappingDict = mappingDict self.__parser = ParserPython( Grammar.logicLanguage, reduce_tree=True, debug=debug ) self.__debug = debug self.__cfg = cfg
def test_parse_input(): parser = ParserPython(calc) input = "4+5*7/3.45*-45*(2.56+32)/-56*(2-1.34)" result = parser.parse(input) assert isinstance(result, NonTerminal) assert str(result) == "4 | + | 5 | * | 7 | / | 3.45 | * | - | 45 | * | ( | 2.56 | + | 32 | ) | / | - | 56 | * | ( | 2 | - | 1.34 | ) | " assert repr(result) == "[ [ [ [ number '4' [0] ] ], '+' [1], [ [ number '5' [2] ], '*' [3], [ number '7' [4] ], '/' [5], [ number '3.45' [6] ], '*' [10], [ '-' [11], number '45' [12] ], '*' [14], [ '(' [15], [ [ [ number '2.56' [16] ] ], '+' [20], [ [ number '32' [21] ] ] ], ')' [23] ], '/' [24], [ '-' [25], number '56' [26] ], '*' [28], [ '(' [29], [ [ [ number '2' [30] ] ], '-' [31], [ [ number '1.34' [32] ] ] ], ')' [36] ] ] ], EOF [37] ]"
def test_sequence_suppress(): """ """ def grammar(): return Sequence("one", "two", "three", suppress=True), "four" parser = ParserPython(grammar) result = parser.parse("one two three four") assert result[0] == "four"
def _from_peg(self, language_def): parser = ParserPython(peggrammar, comment, reduce_tree=False, debug=self.debug) parser.root_rule_name = self.root_rule_name parse_tree = parser.parse(language_def) return visit_parse_tree(parse_tree, PEGVisitor(self.root_rule_name, self.comment_rule_name, self.ignore_case, debug=self.debug))
def test_sequence(): def grammar(): return ("a", "b", "c") parser = ParserPython(grammar) parsed = parser.parse("a b c") assert str(parsed) == "a | b | c" assert repr(parsed) == "[ 'a' [0], 'b' [2], 'c' [4] ]"
def test_zeroormore_with_separator(): def grammar(): return ZeroOrMore(['a', 'b'], sep=','), EOF parser = ParserPython(grammar, reduce_tree=False) result = parser.parse('a, b, b, b, a') assert result with pytest.raises(NoMatch): parser.parse('a, b a')
def main(): parser = ParserPython(calc) # calc is the root rule of your grammar # Use param debug=True for verbose debugging # messages and grammar and parse tree visualization # using graphviz and dot parse_tree = parser.parse("-(4-1)*5+(2+4.67)+5.89/(.2+7)") print(parse_tree) result = visit_parse_tree(parse_tree, CalcVisitor(debug=False)) print(result)
def test_rrel_basic_parser1(): parser = ParserPython(rrel_standalone) parse_tree = parser.parse("^pkg*.cls") assert len(parse_tree) == 2 # always true (one path, one EOF) parse_tree = parser.parse("obj.ref.~extension *.methods") assert len(parse_tree) == 2 # always true (one path, one EOF) parse_tree = parser.parse("instance.(type.vals)*") assert len(parse_tree) == 2 # always true (one path, one EOF)
def parse(content: str) -> 'Literal': from arpeggio import ParserPython from arpeggio import visit_parse_tree from depysible.language.grammar import literal, comment from depysible.language.visitor import DefeasibleVisitor parser = ParserPython(literal, comment_def=comment) parse_tree = parser.parse(content) return visit_parse_tree(parse_tree, DefeasibleVisitor())
def main(debug=False): parser = ParserPython(initial, debug=debug) file_input = open("input.txt", 'r') input_expr = file_input.read() parse_tree = parser.parse(input_expr) result = parser.getASG()
def test_eolterm(): # first rule should match only first line # so that second rule will match "a" on the new line input = """a a b a b b a""" parser = ParserPython(grammar, reduce_tree=False) result = parser.parse(input) assert result
def test_default_action_disabled(): parser = ParserPython(grammar) parser.parse('(-34) strmatch') parser.getASG(defaults=False) assert not p_removed assert not number_str assert parse_tree_node
def test_empty_nested_parse(): def grammar(): return [first] def first(): return ZeroOrMore("second") parser = ParserPython(grammar) # Parse tree will be empty # as nothing will be parsed tree = parser.parse("something") assert not tree
def test_not_match_as_alternative(): """ Test that Not is not reported if a part of OrderedChoice. """ def grammar(): return ['one', Not('two')], _(r'\w+') parser = ParserPython(grammar) with pytest.raises(NoMatch) as e: parser.parse(' three ident') assert "Expected 'one' at " in str(e)
def parse_bibtex(file_name, debug=False): global parser if parser is None: parser = ParserPython(bibfile, debug=debug) with codecs.open(file_name, "r", encoding="utf-8") as bibtexfile: bibtexfile_content = bibtexfile.read() parse_tree = parser.parse(bibtexfile_content) return visit_parse_tree(parse_tree, BibtexVisitor(debug=debug))
def test_optional_no_error(): """ """ def grammar(): return Optional('a'), 'b' parser = ParserPython(grammar) try: parser.parse('c') assert False except NoMatch as e: assert "Expected 'b'" in str(e)
def test_file_name_reporting(): """ Test that if parser has file name set it will be reported. """ def grammar(): return Optional('a'), 'b', EOF parser = ParserPython(grammar) with pytest.raises(NoMatch) as e: parser.parse("\n\n a c", file_name="test_file.peg") assert "Expected 'b' at position test_file.peg:(3, 6)" in str(e) assert (e.value.line, e.value.col) == (3, 6)
def test_sequence_of_nots(): """ Test that sequence of Not rules is handled properly. """ def grammar(): return Not('one'), Not('two'), _(r'\w+') parser = ParserPython(grammar) with pytest.raises(NoMatch) as e: parser.parse(' two ident') assert "Not expected input" in str(e)
def test_file_name_reporting(): """ Test that if parser has file name set it will be reported. """ def grammar(): return Optional('a'), 'b', EOF parser = ParserPython(grammar) try: parser.parse("\n\n a c", file_name="test_file.peg") except NoMatch as e: assert "Expected 'b' at test_file.peg:(3, 6)" in str(e)
def test_not_match_at_beginning(): """ Test that matching of Not ParsingExpression is not reported in the error message. """ def grammar(): return Not('one'), _(r'\w+') parser = ParserPython(grammar) with pytest.raises(NoMatch) as e: parser.parse(' one ident') assert "Not expected input" in str(e)
def test_pos_to_linecol(): def grammar(): return ("a", "b", "c") parser = ParserPython(grammar) parse_tree = parser.parse("a\n\n\n b\nc") a_pos = parse_tree[0].position assert parser.pos_to_linecol(a_pos) == (1, 1) b_pos = parse_tree[1].position assert parser.pos_to_linecol(b_pos) == (4, 2) c_pos = parse_tree[2].position assert parser.pos_to_linecol(c_pos) == (5, 1)
def test_alternative_added(): """ Test that matches from alternative branches at the same positiona are reported. """ def grammar(): return ['one', 'two'], _(r'\w+') parser = ParserPython(grammar) with pytest.raises(NoMatch) as e: parser.parse(' three ident') assert "Expected 'one' or 'two'" in str(e) assert (e.value.line, e.value.col) == (1, 4)
def test_optional_no_error(): """ Test that optional match failure does not show up in the NoMatch errors. """ def grammar(): return Optional('a'), 'b' parser = ParserPython(grammar) try: parser.parse('c') assert False except NoMatch as e: assert "Expected 'b'" in str(e)
def test_comment_matching_not_reported(): """ Test that matching of comments is not reported. """ def grammar(): return Optional('a'), 'b', EOF def comments(): return _('\/\/.*$') parser = ParserPython(grammar, comments) try: parser.parse('\n\n a // This is a comment \n c') except NoMatch as e: assert "Expected 'b' at position (4, 2)" in str(e)
def test_skipws(): """ skipws may be defined per Sequence. """ def grammar(): return Sequence("one", "two", "three"), "four" parser = ParserPython(grammar) # By default, skipws is True and whitespaces will be skipped. parser.parse("one two three four") def grammar(): return Sequence("one", "two", "three", skipws=False), "four" parser = ParserPython(grammar) # If we disable skipws for sequence only then whitespace # skipping should not be done inside sequence. with pytest.raises(NoMatch): parser.parse("one two three four") # But it will be done outside of it parser.parse("onetwothree four")
def test_comment_matching_not_reported(): """ Test that matching of comments is not reported. """ def grammar(): return Optional('a'), 'b', EOF def comments(): return _('\/\/.*$') parser = ParserPython(grammar, comments) with pytest.raises(NoMatch) as e: parser.parse('\n\n a // This is a comment \n c') assert "Expected 'b' at position (4, 2)" in str(e) assert (e.value.line, e.value.col) == (4, 2)
def test_optional_inside_zeroormore(): """ Test optional match inside a zero or more. Optional should always succeed thus inducing ZeroOrMore to try the match again. Arpeggio handle this using soft failures. """ def grammar(): return ZeroOrMore(Optional('a')) parser = ParserPython(grammar) with pytest.raises(NoMatch): # This could lead to infinite loop parser.parse('b')
def test_ws(): """ ws consists of chars that will be skipped if skipws is enables. By default it consists of space, tab and newline. """ def grammar(): return ("one", "two", "three") parser = ParserPython(grammar) # With default ws this should parse without error parser.parse("""one two three""") # If we make only a space char to be ws than the # same input will raise exception. parser = ParserPython(grammar, ws=" ") with pytest.raises(NoMatch): parser.parse("""one two three""") # But if only spaces are between words than it will # parse. parser.parse("one two three")