def parse(self, inputstring: str, document: nodes.document) -> None: """Parse source text. :param inputstring: The source string to parse :param document: The root docutils node to add AST elements to """ # get the global config config: MdParserConfig = document.settings.env.myst_config # update the global config with the file-level config try: topmatter = read_topmatter(inputstring) except TopmatterReadError: pass # this will be reported during the render else: if topmatter: warning = lambda wtype, msg: create_warning( # noqa: E731 document, msg, line=1, append_to=document, subtype=wtype) config = merge_file_level(config, topmatter, warning) parser = create_md_parser(config, SphinxRenderer) parser.options["document"] = document parser.render(inputstring)
def print_anchors(args=None): """ """ arg_parser = argparse.ArgumentParser() arg_parser.add_argument( "input", nargs="?", type=argparse.FileType("r"), default=sys.stdin, help="Input file (default stdin)", ) arg_parser.add_argument( "-o", "--output", type=argparse.FileType("w"), default=sys.stdout, help="Output file (default stdout)", ) arg_parser.add_argument("-l", "--level", type=int, default=2, help="Maximum heading level.") args = arg_parser.parse_args(args) parser = create_md_parser(MdParserConfig(heading_anchors=args.level), RendererHTML) def _filter_plugin(state): state.tokens = [ t for t in state.tokens if t.type.startswith("heading_") and int(t.tag[1]) <= args.level ] parser.use(lambda p: p.core.ruler.push("filter", _filter_plugin)) text = parser.render(args.input.read()) args.output.write(text)
def _render_markdown_base( self, data: MimeData, *, fmt: str, inline: bool, allow_headings: bool ) -> list[nodes.Element]: """Base render for a notebook markdown mime output (block or inline).""" psuedo_element = nodes.Element() # element to hold the parsed markdown current_parser = self.renderer.md current_md_config = self.renderer.md_config try: # potentially replace the parser temporarily if fmt == "myst": # use the current configuration to render the markdown pass elif fmt == "commonmark": # use an isolated, CommonMark only, parser self.renderer.md_config = MdParserConfig(commonmark_only=True) self.renderer.md = create_md_parser( self.renderer.md_config, self.renderer.__class__ ) elif fmt == "gfm": # use an isolated, GitHub Flavoured Markdown only, parser self.renderer.md_config = MdParserConfig(gfm_only=True) self.renderer.md = create_md_parser( self.renderer.md_config, self.renderer.__class__ ) else: self.logger.warning( f"skipping unknown markdown format: {fmt}", subtype="unknown_markdown_format", line=data.line, ) return [] with self.renderer.current_node_context(psuedo_element): self.renderer.nested_render_text( data.string, data.line or 0, inline=inline, allow_headings=allow_headings, ) finally: # restore the parser self.renderer.md = current_parser self.renderer.md_config = current_md_config return psuedo_element.children
def parse(self, inputstring: str, document: nodes.document) -> None: """Parse source text. :param inputstring: The source string to parse :param document: The root docutils node to add AST elements to """ self.setup_parse(inputstring, document) # check for exorbitantly long lines if hasattr(document.settings, "line_length_limit"): for i, line in enumerate(inputstring.split("\n")): if len(line) > document.settings.line_length_limit: error = document.reporter.error( f"Line {i+1} exceeds the line-length-limit:" f" {document.settings.line_length_limit}." ) document.append(error) return # create parsing configuration from the global config try: config = create_myst_config(document.settings, DOCUTILS_EXCLUDED_ARGS) except Exception as exc: error = document.reporter.error(f"Global myst configuration invalid: {exc}") document.append(error) config = MdParserConfig() # update the global config with the file-level config try: topmatter = read_topmatter(inputstring) except TopmatterReadError: pass # this will be reported during the render else: if topmatter: warning = lambda wtype, msg: create_warning( # noqa: E731 document, msg, line=1, append_to=document, subtype=wtype ) config = merge_file_level(config, topmatter, warning) # parse content parser = create_md_parser(config, DocutilsRenderer) parser.options["document"] = document parser.render(inputstring) # post-processing # replace raw nodes if raw is not allowed if not getattr(document.settings, "raw_enabled", True): for node in document.traverse(nodes.raw): warning = document.reporter.warning("Raw content disabled.") node.parent.replace(node, warning) self.finish_parse()
def test_commonmark(entry): if entry["example"] == 14: # This is just a test that +++ are not parsed as thematic breaks pytest.skip("Expects '+++' to be unconverted (not block break).") if entry["example"] in [66, 68]: # Front matter is supported by numerous Markdown flavours, # but not strictly CommonMark, # see: https://talk.commonmark.org/t/metadata-in-documents/721/86 pytest.skip( "Thematic breaks on the first line conflict with front matter syntax" ) test_case = entry["markdown"] md = create_md_parser(MdParserConfig(), RendererHTML) output = md.render(test_case) if entry["example"] == 593: # this doesn't have any bearing on the output output = output.replace("mailto", "MAILTO") if entry["example"] in [187, 209, 210]: # this doesn't have any bearing on the output output = output.replace("<blockquote></blockquote>", "<blockquote>\n</blockquote>") assert output == entry["html"]
def parse(self, inputstring: str, document: nodes.document) -> None: """Parse source text. :param inputstring: The source string to parse :param document: The root docutils node to add AST elements to """ assert self.env is not None, "env not set" self.env: SphinxEnvType document_path = self.env.doc2path(self.env.docname) # get a logger for this document logger = SphinxDocLogger(document) # get markdown parsing configuration md_config: MdParserConfig = self.env.myst_config # get notebook rendering configuration nb_config: NbParserConfig = self.env.mystnb_config # create a reader for the notebook nb_reader = create_nb_reader(document_path, md_config, nb_config, inputstring) # If the nb_reader is None, then we default to a standard Markdown parser if nb_reader is None: return super().parse(inputstring, document) notebook = nb_reader.read(inputstring) # update the global markdown config with the file-level config warning = lambda wtype, msg: create_warning( # noqa: E731 document, msg, line=1, append_to=document, subtype=wtype) nb_reader.md_config = merge_file_level(nb_reader.md_config, notebook.metadata, warning) # potentially replace kernel name with alias kernel_name = notebook.metadata.get("kernelspec", {}).get("name", None) if kernel_name is not None and nb_config.kernel_rgx_aliases: for rgx, alias in nb_config.kernel_rgx_aliases.items(): if re.fullmatch(rgx, kernel_name): logger.debug( f"Replaced kernel name: {kernel_name!r} -> {alias!r}", subtype="kernel", ) notebook.metadata["kernelspec"]["name"] = alias break # Update mystnb configuration with notebook level metadata if nb_config.metadata_key in notebook.metadata: overrides = nb_node_to_dict( notebook.metadata[nb_config.metadata_key]) overrides.pop("output_folder", None) # this should not be overridden try: nb_config = nb_config.copy(**overrides) except Exception as exc: logger.warning( f"Failed to update configuration with notebook metadata: {exc}", subtype="config", ) else: logger.debug("Updated configuration with notebook metadata", subtype="config") # Setup the parser mdit_parser = create_md_parser(nb_reader.md_config, SphinxNbRenderer) mdit_parser.options["document"] = document mdit_parser.options["nb_config"] = nb_config mdit_renderer: SphinxNbRenderer = mdit_parser.renderer # type: ignore mdit_env: dict[str, Any] = {} # load notebook element renderer class from entry-point name # this is separate from SphinxNbRenderer, so that users can override it renderer_name = nb_config.render_plugin nb_renderer: NbElementRenderer = load_renderer(renderer_name)( mdit_renderer, logger) # we temporarily store nb_renderer on the document, # so that roles/directives can access it document.attributes["nb_renderer"] = nb_renderer # we currently do this early, so that the nb_renderer has access to things mdit_renderer.setup_render(mdit_parser.options, mdit_env) # parse notebook structure to markdown-it tokens # note, this does not assume that the notebook has been executed yet mdit_tokens = notebook_to_tokens(notebook, mdit_parser, mdit_env, logger) # open the notebook execution client, # this may execute the notebook immediately or during the page render with create_client(notebook, document_path, nb_config, logger, nb_reader.read_fmt) as nb_client: mdit_parser.options["nb_client"] = nb_client # convert to docutils AST, which is added to the document mdit_renderer.render(mdit_tokens, mdit_parser.options, mdit_env) # save final execution data if nb_client.exec_metadata: NbMetadataCollector.set_exec_data(self.env, self.env.docname, nb_client.exec_metadata) if nb_client.exec_metadata["traceback"]: # store error traceback in outdir and log its path reports_file = Path(self.env.app.outdir).joinpath( "reports", *(self.env.docname + ".err.log").split("/")) reports_file.parent.mkdir(parents=True, exist_ok=True) reports_file.write_text(nb_client.exec_metadata["traceback"], encoding="utf8") logger.warning( f"Notebook exception traceback saved in: {reports_file}", subtype="exec", ) # write final (updated) notebook to output folder (utf8 is standard encoding) path = self.env.docname.split("/") ipynb_path = path[:-1] + [path[-1] + ".ipynb"] content = nbformat.writes(notebook).encode("utf-8") nb_renderer.write_file(ipynb_path, content, overwrite=True) # write glue data to the output folder, # and store the keys to environment doc metadata, # so that they may be used in any post-transform steps if nb_client.glue_data: glue_path = path[:-1] + [path[-1] + ".glue.json"] nb_renderer.write_file( glue_path, json.dumps(nb_client.glue_data, cls=BytesEncoder).encode("utf8"), overwrite=True, ) NbMetadataCollector.set_doc_data(self.env, self.env.docname, "glue", list(nb_client.glue_data.keys())) # move some document metadata to environment metadata, # so that we can later read it from the environment, # rather than having to load the whole doctree for key, (uri, kwargs) in document.attributes.pop("nb_js_files", {}).items(): NbMetadataCollector.add_js_file(self.env, self.env.docname, key, uri, kwargs) # remove temporary state document.attributes.pop("nb_renderer")
def read_myst_markdown_notebook( text, config: MdParserConfig | None = None, code_directive="{code-cell}", raw_directive="{raw-cell}", add_source_map=False, path: str | Path | None = None, ) -> nbf.NotebookNode: """Convert text written in the myst format to a notebook. :param text: the file text :param code_directive: the name of the directive to search for containing code cells :param raw_directive: the name of the directive to search for containing raw cells :param add_source_map: add a `source_map` key to the notebook metadata, which is a list of the starting source line number for each cell. :param path: path to notebook (required for :load:) :raises MystMetadataParsingError if the metadata block is not valid JSON/YAML NOTE: we assume here that all of these directives are at the top-level, i.e. not nested in other directives. """ config = config or MdParserConfig() # parse markdown file up to the block level (i.e. don't worry about inline text) inline_config = dc.replace( config, disable_syntax=(list(config.disable_syntax) + ["inline"]) ) parser = create_md_parser(inline_config, RendererHTML) tokens = parser.parse(text + "\n") lines = text.splitlines() md_start_line = 0 # get the document metadata metadata_nb = {} if tokens[0].type == "front_matter": metadata = tokens.pop(0) md_start_line = metadata.map[1] if metadata.map else 0 try: metadata_nb = yaml.safe_load(metadata.content) except (yaml.parser.ParserError, yaml.scanner.ScannerError) as error: raise MystMetadataParsingError(f"Notebook metadata: {error}") # add missing display name to the metadata, as required by the nbformat schema: # https://github.com/jupyter/nbformat/blob/f712d60f13c5b168313222cbf4bee7face98a081/nbformat/v4/nbformat.v4.5.schema.json#L16 if ( "kernelspec" in metadata_nb and "name" in metadata_nb["kernelspec"] and "display_name" not in metadata_nb["kernelspec"] ): metadata_nb["kernelspec"]["display_name"] = metadata_nb["kernelspec"]["name"] # create an empty notebook nbf_version = nbf.v4 kwargs = {"metadata": nbf.from_dict(metadata_nb)} notebook = nbf_version.new_notebook(**kwargs) source_map = [] # this is a list of the starting line number for each cell def _flush_markdown(start_line, token, md_metadata): """When we find a cell we check if there is preceding text.o""" endline = token.map[0] if token else len(lines) md_source = _strip_blank_lines("\n".join(lines[start_line:endline])) meta = nbf.from_dict(md_metadata) if md_source: source_map.append(start_line) notebook.cells.append( nbf_version.new_markdown_cell(source=md_source, metadata=meta) ) # iterate through the tokens to identify notebook cells nesting_level = 0 md_metadata: dict = {} for token in tokens: nesting_level += token.nesting if nesting_level != 0: # we ignore fenced block that are nested, e.g. as part of lists, etc continue token_map = token.map or [0, 0] if token.type == "fence" and token.info.startswith(code_directive): _flush_markdown(md_start_line, token, md_metadata) options, body_lines = _read_fenced_cell(token, len(notebook.cells), "Code") # Parse :load: or load: tags and populate body with contents of file if "load" in options: body_lines = _load_code_from_file( path, options["load"], token, body_lines ) meta = nbf.from_dict(options) source_map.append(token_map[0] + 1) notebook.cells.append( nbf_version.new_code_cell(source="\n".join(body_lines), metadata=meta) ) md_metadata = {} md_start_line = token_map[1] elif token.type == "fence" and token.info.startswith(raw_directive): _flush_markdown(md_start_line, token, md_metadata) options, body_lines = _read_fenced_cell(token, len(notebook.cells), "Raw") meta = nbf.from_dict(options) source_map.append(token_map[0] + 1) notebook.cells.append( nbf_version.new_raw_cell(source="\n".join(body_lines), metadata=meta) ) md_metadata = {} md_start_line = token_map[1] elif token.type == "myst_block_break": _flush_markdown(md_start_line, token, md_metadata) md_metadata = _read_cell_metadata(token, len(notebook.cells)) md_start_line = token_map[1] _flush_markdown(md_start_line, None, md_metadata) if add_source_map: notebook.metadata["source_map"] = source_map return notebook