def recompile_grammar(ebnf_filename, force=False, notify: Callable = lambda: None) -> bool: """ Re-compiles an EBNF-grammar if necessary, that is, if either no corresponding 'XXXXParser.py'-file exists or if that file is outdated. Parameters: ebnf_filename(str): The filename of the ebnf-source of the grammar. In case this is a directory and not a file, all files within this directory ending with .ebnf will be compiled. force(bool): If False (default), the grammar will only be recompiled if it has been changed. notify(Callable): 'notify' is a function without parameters that is called when recompilation actually takes place. This can be used to inform the user. Returns: bool: True, if recompilation of grammar has been successful or did not take place, because the Grammar hasn't changed since the last compilation. False, if the recompilation of the grammar has been attempted but failed. """ if os.path.isdir(ebnf_filename): success = True for entry in os.listdir(ebnf_filename): if entry.lower().endswith('.ebnf') and os.path.isfile(entry): success = success and recompile_grammar(entry, force) return success base, _ = os.path.splitext(ebnf_filename) compiler_name = base + 'Parser.py' error_file_name = base + '_ebnf_ERRORS.txt' messages = [] # type: Iterable[Error] if (not os.path.exists(compiler_name) or force or grammar_changed(compiler_name, ebnf_filename)): notify() messages = compile_on_disk(ebnf_filename) if messages: # print("Errors while compiling: " + ebnf_filename + '!') with open(error_file_name, 'w', encoding="utf-8") as f: for e in messages: f.write(str(e)) f.write('\n') if has_errors(messages): return False if not messages and os.path.exists(error_file_name): os.remove(error_file_name) return True
def load_compiler_suite(compiler_suite: str) -> \ Tuple[PreprocessorFactoryFunc, ParserFactoryFunc, TransformerFactoryFunc, CompilerFactoryFunc]: """ Extracts a compiler suite from file or string `compiler_suite` and returns it as a tuple (preprocessor, parser, ast, compiler). Returns: 4-tuple (preprocessor function, parser class, ast transformer function, compiler class) """ global RX_SECTION_MARKER assert isinstance(compiler_suite, str) source = load_if_file(compiler_suite) dhpath = relative_path(os.path.dirname('.'), DHPARSER_PARENTDIR) imports = DHPARSER_IMPORTS.format(dhparser_parentdir=dhpath) if is_python_code(compiler_suite): sections = split_source(compiler_suite, source) _, imports, preprocessor_py, parser_py, ast_py, compiler_py, _ = sections # TODO: Compile in one step and pick parts from namespace later ? preprocessor = compile_python_object(imports + preprocessor_py, r'get_(?:\w+_)?preprocessor$') parser = compile_python_object(imports + parser_py, r'get_(?:\w+_)?grammar$') ast = compile_python_object(imports + ast_py, r'get_(?:\w+_)?transformer$') else: # Assume source is an ebnf grammar. # Is there really any reasonable application case for this? lg_dir = suspend_logging() compiler_py, messages, _ = compile_source(source, None, get_ebnf_grammar(), get_ebnf_transformer(), get_ebnf_compiler(compiler_suite, source)) resume_logging(lg_dir) if has_errors(messages): raise DefinitionError(only_errors(messages), source) preprocessor = get_ebnf_preprocessor parser = get_ebnf_grammar ast = get_ebnf_transformer compiler = compile_python_object(imports + compiler_py, r'get_(?:\w+_)?compiler$') if callable(preprocessor) and callable(parser) and callable(Callable) and callable(compiler): return preprocessor, parser, ast, compiler raise ValueError('Could not generate compiler suite from source code!')
def grammar_instance(grammar_representation) -> Tuple[Grammar, str]: """ Returns a grammar object and the source code of the grammar, from the given `grammar`-data which can be either a file name, ebnf-code, python-code, a Grammar-derived grammar class or an instance of such a class (i.e. a grammar object already). """ if isinstance(grammar_representation, str): # read grammar grammar_src = load_if_file(grammar_representation) if is_python_code(grammar_src): parser_py = grammar_src # type: str messages = [] # type: List[Error] else: lg_dir = suspend_logging() result, messages, _ = compile_source( grammar_src, None, get_ebnf_grammar(), get_ebnf_transformer(), get_ebnf_compiler()) parser_py = cast(str, result) resume_logging(lg_dir) if has_errors(messages): raise DefinitionError(only_errors(messages), grammar_src) imports = DHPARSER_IMPORTS.format( dhparser_parentdir=relative_path('.', DHPARSER_PARENTDIR)) grammar_class = compile_python_object(imports + parser_py, r'\w+Grammar$') if inspect.isclass(grammar_class) and issubclass(grammar_class, Grammar): parser_root = grammar_class() else: raise ValueError('Could not compile or Grammar class!') else: # assume that dsl_grammar is a ParserHQ-object or Grammar class grammar_src = '' if isinstance(grammar_representation, Grammar): parser_root = grammar_representation else: # assume ``grammar_representation`` is a grammar class and get the root object parser_root = grammar_representation() return parser_root, grammar_src
def compileDSL(text_or_file: str, preprocessor: Optional[PreprocessorFunc], dsl_grammar: Union[str, Grammar], ast_transformation: TransformationFunc, compiler: Compiler) -> Any: """ Compiles a text in a domain specific language (DSL) with an EBNF-specified grammar. Returns the compiled text or raises a compilation error. Raises: CompilationError if any errors occurred during compilation """ assert isinstance(text_or_file, str) assert isinstance(compiler, Compiler) parser, grammar_src = grammar_instance(dsl_grammar) result, messages, AST = compile_source(text_or_file, preprocessor, parser, ast_transformation, compiler) if has_errors(messages): src = load_if_file(text_or_file) raise CompilationError(only_errors(messages), src, grammar_src, AST, result) return result
def compile_source(source: str, preprocessor: Optional[PreprocessorFunc], # str -> str parser: GrammarCallable, # str -> Node (concrete syntax tree (CST)) transformer: TransformationFunc, # Node (CST) -> Node (abstract ST (AST)) compiler: CompilerCallable, # Node (AST), Source -> Any # out_source_data: list = NOPE, # Tuple[str, SourceMapFunc] *, preserve_AST: bool = False) \ -> Tuple[Optional[Any], List[Error], Optional[Node]]: """Compiles a source in four stages: 1. Pre-Processing (if needed) 2. Parsing 3. AST-transformation 4. Compiling. The later stages AST-transformation, compilation will only be invoked if no fatal errors occurred in any of the earlier stages of the processing pipeline. :param source: The input text for compilation or a the name of a file containing the input text. :param preprocessor: text -> text. A preprocessor function or None, if no preprocessor is needed. :param parser: A parsing function or grammar class :param transformer: A transformation function that takes the root-node of the concrete syntax tree as an argument and transforms it (in place) into an abstract syntax tree. :param compiler: A compiler function or compiler class instance :param preserve_AST: Preserves the AST-tree. :returns: The result of the compilation as a 3-tuple (result, errors, abstract syntax tree). In detail: 1. The result as returned by the compiler or ``None`` in case of failure 2. A list of error or warning messages 3. The root-node of the abstract syntax tree if `preserve_ast` is True or `None` otherwise. """ ast = None # type: Optional[Node] original_text = load_if_file(source) # type: str source_name = source if is_filename(source) else 'source' compiler.source = original_text log_file_name = logfile_basename( source, compiler) if is_logging() else '' # type: str if not hasattr(parser, 'free_char_parsefunc__') or parser.history_tracking__: # log only for custom parser/transformer/compilers log_syntax_trees = get_config_value('log_syntax_trees') else: log_syntax_trees = set() # preprocessing errors = [] if preprocessor is None: source_text = original_text # type: str source_mapping = gen_neutral_srcmap_func(source_text, source_name) # lambda i: SourceLocation(source_name, 0, i) # type: SourceMapFunc else: _, source_text, source_mapping, errors = preprocessor( original_text, source_name) if has_errors(errors, FATAL): return None, errors, None # parsing syntax_tree = parser(source_text, source_mapping=source_mapping) # type: RootNode for e in errors: syntax_tree.add_error(None, e) syntax_tree.source = original_text syntax_tree.source_mapping = source_mapping if 'cst' in log_syntax_trees: log_ST(syntax_tree, log_file_name + '.cst') if parser.history_tracking__: log_parsing_history(parser, log_file_name) # assert is_error(syntax_tree.error_flag) or str(syntax_tree) == strip_tokens(source_text), \ # str(syntax_tree) # Ony valid if neither tokens or whitespace are dropped early result = None if not is_fatal(syntax_tree.error_flag): # AST-transformation if is_error(syntax_tree.error_flag): # catch Python exception, because if an error has occurred # earlier, the syntax tree might not look like expected, # which could (fatally) break AST transformations. try: transformer(syntax_tree) except Exception as e: syntax_tree.new_error( syntax_tree, "AST-Transformation failed due to earlier parser errors. " "Crash Message: %s: %s" % (e.__class__.__name__, str(e)), AST_TRANSFORM_CRASH) else: transformer(syntax_tree) if 'ast' in log_syntax_trees: log_ST(syntax_tree, log_file_name + '.ast') if not is_fatal(syntax_tree.error_flag): if preserve_AST: ast = copy.deepcopy(syntax_tree) # Compilation if is_error(syntax_tree.error_flag): # assume Python crashes are merely a consequence of earlier # errors, so let's catch them try: result = compiler(syntax_tree) except Exception as e: # raise e node = syntax_tree # type: Node if isinstance(compiler, Compiler) and compiler.context: node = compiler.context[-1] st = traceback.format_list( traceback.extract_tb(e.__traceback__)) trace = ''.join(filter_stacktrace(st)) syntax_tree.new_error( node, "Compilation failed, most likely, due to errors earlier " "in the processing pipeline. Crash Message: %s: %s\n%s" % (e.__class__.__name__, str(e), trace), COMPILER_CRASH) else: # assume Python crashes are programming mistakes, so let # the exceptions through result = compiler(syntax_tree) messages = syntax_tree.errors_sorted # type: List[Error] # Obsolete, because RootNode adjusts error locations whenever an error is added: # adjust_error_locations(messages, original_text, source_mapping) return result, messages, ast
def compile_on_disk(source_file: str, compiler_suite="", extension=".xml") -> Iterable[Error]: """ Compiles the a source file with a given compiler and writes the result to a file. If no ``compiler_suite`` is given it is assumed that the source file is an EBNF grammar. In this case the result will be a Python script containing a parser for that grammar as well as the skeletons for a preprocessor, AST transformation table, and compiler. If the Python script already exists only the parser name in the script will be updated. (For this to work, the different names need to be delimited section marker blocks.). `compile_on_disk()` returns a list of error messages or an empty list if no errors occurred. Parameters: source_file(str): The file name of the source text to be compiled. compiler_suite(str): The file name of the parser/compiler-suite (usually ending with 'Parser.py'), with which the source file shall be compiled. If this is left empty, the source file is assumed to be an EBNF-Grammar that will be compiled with the internal EBNF-Compiler. extension(str): The result of the compilation (if successful) is written to a file with the same name but a different extension than the source file. This parameter sets the extension. Returns: A (potentially empty) list of error or warning messages. """ filepath = os.path.normpath(source_file) f = None # Optional[TextIO] with open(source_file, encoding="utf-8") as f: source = f.read() rootname = os.path.splitext(filepath)[0] dhpath = relative_path(os.path.dirname(rootname), DHPARSER_PARENTDIR) compiler_name = as_identifier(os.path.basename(rootname)) if compiler_suite: sfactory, pfactory, tfactory, cfactory = load_compiler_suite(compiler_suite) compiler1 = cfactory() else: sfactory = get_ebnf_preprocessor # PreprocessorFactoryFunc pfactory = get_ebnf_grammar # ParserFactoryFunc tfactory = get_ebnf_transformer # TransformerFactoryFunc cfactory = get_ebnf_compiler # CompilerFactoryFunc compiler1 = cfactory() # Compiler is_ebnf_compiler = False # type: bool if isinstance(compiler1, EBNFCompiler): is_ebnf_compiler = True compiler1.set_grammar_name(compiler_name, source_file) result, messages, _ = compile_source(source, sfactory(), pfactory(), tfactory(), compiler1) if has_errors(messages): return messages elif is_ebnf_compiler: # trans == get_ebnf_transformer or trans == EBNFTransformer: # either an EBNF- or no compiler suite given ebnf_compiler = cast(EBNFCompiler, compiler1) # type: EBNFCompiler global SECTION_MARKER, RX_SECTION_MARKER, PREPROCESSOR_SECTION, PARSER_SECTION, \ AST_SECTION, COMPILER_SECTION, END_SECTIONS_MARKER, RX_WHITESPACE f = None try: parser_name = rootname + 'Parser.py' f = open(parser_name, 'r', encoding="utf-8") source = f.read() sections = split_source(parser_name, source) intro, imports, preprocessor, _, ast, compiler, outro = sections ast_trans_python_src = imports + ast # DHPARSER_IMPORTS.format(dhparser_parentdir=dhpath) ast_trans_table = dict() # type: TransformationDict try: ast_trans_table = compile_python_object(ast_trans_python_src, r'(?:\w+_)?AST_transformation_table$') except Exception as e: if isinstance(e, NameError): err_str = 'NameError "{}" while compiling AST-Transformation. ' \ 'Possibly due to a forgotten import at the beginning ' \ 'of the AST-Block (!)'.format(str(e)) else: err_str = 'Exception {} while compiling AST-Transformation: {}' \ .format(str(type(e)), str(e)) messages.append(Error(err_str, 0, CANNOT_VERIFY_TRANSTABLE_WARNING)) if is_logging(): with open(os.path.join(log_dir(), rootname + '_AST_src.py'), 'w', encoding='utf-8') as f: f.write(ast_trans_python_src) messages.extend(ebnf_compiler.verify_transformation_table(ast_trans_table)) # TODO: Verify compiler except (PermissionError, FileNotFoundError, IOError): intro, imports, preprocessor, _, ast, compiler, outro = '', '', '', '', '', '', '' finally: if f: f.close() f = None if RX_WHITESPACE.fullmatch(intro): intro = '#!/usr/bin/env python3' if RX_WHITESPACE.fullmatch(outro): outro = read_template('DSLParser.pyi').format(NAME=compiler_name) if RX_WHITESPACE.fullmatch(imports): imports = DHParser.ebnf.DHPARSER_IMPORTS.format(dhparser_parentdir=dhpath) if RX_WHITESPACE.fullmatch(preprocessor): preprocessor = ebnf_compiler.gen_preprocessor_skeleton() if RX_WHITESPACE.fullmatch(ast): ast = ebnf_compiler.gen_transformer_skeleton() if RX_WHITESPACE.fullmatch(compiler): compiler = ebnf_compiler.gen_compiler_skeleton() compilerscript = rootname + 'Parser.py' try: f = open(compilerscript, 'w', encoding="utf-8") f.write(intro) f.write(SECTION_MARKER.format(marker=SYMBOLS_SECTION)) f.write(imports) f.write(SECTION_MARKER.format(marker=PREPROCESSOR_SECTION)) f.write(preprocessor) f.write(SECTION_MARKER.format(marker=PARSER_SECTION)) f.write(cast(str, result)) f.write(SECTION_MARKER.format(marker=AST_SECTION)) f.write(ast) f.write(SECTION_MARKER.format(marker=COMPILER_SECTION)) f.write(compiler) f.write(SECTION_MARKER.format(marker=END_SECTIONS_MARKER)) f.write(outro) except (PermissionError, FileNotFoundError, IOError) as error: print('# Could not write file "' + compilerscript + '" because of: ' + "\n# ".join(str(error).split('\n)'))) print(result) finally: if f: f.close() if platform.system() != "Windows": # set file permissions so that the compilerscript can be executed st = os.stat(compilerscript) os.chmod(compilerscript, st.st_mode | stat.S_IEXEC) else: f = None try: f = open(rootname + extension, 'w', encoding="utf-8") if isinstance(result, Node): if extension.lower() == '.xml': f.write(result.as_xml()) else: f.write(result.as_sxpr()) elif isinstance(result, str): f.write(result) else: raise AssertionError('Illegal result type: ' + str(type(result))) except (PermissionError, FileNotFoundError, IOError) as error: print('# Could not write file "' + rootname + '.py" because of: ' + "\n# ".join(str(error).split('\n)'))) print(result) finally: if f: f.close() return messages