def build_generator( tokenizer: Tokenizer, grammar: Grammar, grammar_file: str, output_file: str, compile_extension: bool = False, verbose_c_extension: bool = False, keep_asserts_in_extension: bool = True, skip_actions: bool = False, ) -> ParserGenerator: # TODO: Allow other extensions; pass the output type as an argument. if not output_file.endswith((".c", ".py")): raise RuntimeError("Your output file must either be a .c or .py file") with open(output_file, "w") as file: gen: ParserGenerator if output_file.endswith(".c"): gen = CParserGenerator(grammar, file, skip_actions=skip_actions) elif output_file.endswith(".py"): gen = PythonParserGenerator(grammar, file) # TODO: skip_actions else: assert False # Should have been checked above gen.generate(grammar_file) if compile_extension and output_file.endswith(".c"): compile_c_extension(output_file, verbose=verbose_c_extension, keep_asserts=keep_asserts_in_extension) return gen
def build_c_generator( grammar: Grammar, grammar_file: str, tokens_file: str, output_file: str, compile_extension: bool = False, verbose_c_extension: bool = False, keep_asserts_in_extension: bool = True, skip_actions: bool = False, ) -> ParserGenerator: with open(tokens_file, "r") as tok_file: all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file) with open(output_file, "w") as file: gen: ParserGenerator = CParserGenerator( grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions ) gen.generate(grammar_file) if compile_extension: with tempfile.TemporaryDirectory() as build_dir: compile_c_extension( output_file, build_dir=build_dir, verbose=verbose_c_extension, keep_asserts=keep_asserts_in_extension, ) return gen
def generate_parser_c_extension(grammar: Grammar, path: pathlib.PurePath, debug: bool = False) -> Any: """Generate a parser c extension for the given grammar in the given path Returns a module object with a parse_string() method. TODO: express that using a Protocol. """ # Make sure that the working directory is empty: reusing non-empty temporary # directories when generating extensions can lead to segmentation faults. # Check issue #95 (https://github.com/gvanrossum/pegen/issues/95) for more # context. assert not os.listdir(path) source = path / "parse.c" with open(source, "w", encoding="utf-8") as file: genr = CParserGenerator(grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug) genr.generate("parse.c") extension_path = compile_c_extension(str(source), build_dir=str(path / "build")) extension = import_file("parse", extension_path) return extension
def build_generator( tokenizer: Tokenizer, grammar: Grammar, grammar_file: str, output_file: str, compile_extension: bool = False, verbose_c_extension: bool = False, keep_asserts_in_extension: bool = True, skip_actions: bool = False, ) -> ParserGenerator: with open(output_file, "w") as file: gen: ParserGenerator if output_file.endswith(".c"): gen = CParserGenerator(grammar, file, skip_actions=skip_actions) elif output_file.endswith(".py"): gen = PythonParserGenerator(grammar, file) # TODO: skip_actions else: raise Exception("Your output file must either be a .c or .py file") gen.generate(grammar_file) if compile_extension and output_file.endswith(".c"): compile_c_extension( output_file, verbose=verbose_c_extension, keep_asserts=keep_asserts_in_extension ) return gen
def generate_parser_c_extension(rules, path): """Generate a parser c extension for the given rules in the given path""" source = path / "parse.c" with open(source, "w") as file: genr = CParserGenerator(rules, file) genr.generate("parse.c") extension_path = compile_c_extension(str(source), build_dir=str(path / "build")) extension = import_file("parse", extension_path) return extension
def generate_parser_c_extension(grammar: Grammar, path: pathlib.PurePath) -> Any: """Generate a parser c extension for the given grammar in the given path Returns a module object with a parse_string() method. TODO: express that using a Protocol. """ source = path / "parse.c" with open(source, "w") as file: genr = CParserGenerator(grammar, file) genr.generate("parse.c") extension_path = compile_c_extension(str(source), build_dir=str(path / "build")) extension = import_file("parse", extension_path) return extension
def build_generator( tokenizer, rules, grammar_file, output_file, compile_extension=False, verbose_c_extension=False, ): with open(output_file, "w") as file: gen: ParserGenerator if output_file.endswith(".c"): gen = CParserGenerator(rules.rules, file) elif output_file.endswith(".py"): gen = PythonParserGenerator(rules.rules, file) else: raise Exception("Your output file must either be a .c or .py file") gen.generate(grammar_file) if compile_extension and output_file.endswith(".c"): compile_c_extension(output_file, verbose=verbose_c_extension) return gen
def generate_c_parser_source(grammar: Grammar) -> str: out = io.StringIO() genr = CParserGenerator(grammar, out) genr.generate("<string>") return out.getvalue()
def main() -> None: args = argparser.parse_args() verbose = args.verbose verbose_tokenizer = verbose >= 3 verbose_parser = verbose == 2 or verbose >= 4 t0 = time.time() with open(args.filename) as file: tokenizer = Tokenizer(grammar_tokenizer( tokenize.generate_tokens(file.readline)), verbose=verbose_tokenizer) parser = GrammarParser(tokenizer, verbose=verbose_parser) rules = parser.start() if not rules: err = parser.make_syntax_error(args.filename) traceback.print_exception(err.__class__, err, None) sys.exit(1) endpos = file.tell() if not args.quiet: if args.verbose: print("Raw Grammar:") for rule in rules.rules.values(): print(" ", repr(rule)) print("Clean Grammar:") for rule in rules.rules.values(): print(" ", rule) output = args.output if not output: if args.cpython: output = "parse.c" else: output = "parse.py" with open(output, 'w') as file: gen: ParserGenerator if args.cpython: gen = CParserGenerator(rules.rules, file) else: gen = PythonParserGenerator(rules.rules, file) gen.generate(args.filename) if args.cpython and args.compile_extension: compile_c_extension(output, verbose=args.verbose) if args.verbose: print("First Graph:") for src, dsts in gen.first_graph.items(): print(f" {src} -> {', '.join(dsts)}") print("First SCCS:") for scc in gen.first_sccs: print(" ", scc, end="") if len(scc) > 1: print(" # Indirectly left-recursive") else: name = next(iter(scc)) if name in gen.first_graph[name]: print(" # Left-recursive") else: print() t1 = time.time() if args.verbose: dt = t1 - t0 diag = tokenizer.diagnose() nlines = diag.end[0] if diag.type == token.ENDMARKER: nlines -= 1 print(f"Total time: {dt:.3f} sec; {nlines} lines", end="") if endpos: print(f" ({endpos} bytes)", end="") if dt: print(f"; {nlines / dt:.0f} lines/sec") else: print() print("Caches sizes:") print(f" token array : {len(tokenizer._tokens):10}") print(f" cache : {len(parser._cache):10}") if not print_memstats(): print("(Can't find psutil; install it for memory stats.)")