Пример #1
0
    def parse(self, inputstring: str, document: nodes.document) -> None:
        """Parse source text.

        :param inputstring: The source string to parse
        :param document: The root docutils node to add AST elements to

        """
        # get the global config
        config: MdParserConfig = document.settings.env.myst_config

        # update the global config with the file-level config
        try:
            topmatter = read_topmatter(inputstring)
        except TopmatterReadError:
            pass  # this will be reported during the render
        else:
            if topmatter:
                warning = lambda wtype, msg: create_warning(  # noqa: E731
                    document,
                    msg,
                    line=1,
                    append_to=document,
                    subtype=wtype)
                config = merge_file_level(config, topmatter, warning)

        parser = create_md_parser(config, SphinxRenderer)
        parser.options["document"] = document
        parser.render(inputstring)
Пример #2
0
def print_anchors(args=None):
    """ """
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument(
        "input",
        nargs="?",
        type=argparse.FileType("r"),
        default=sys.stdin,
        help="Input file (default stdin)",
    )
    arg_parser.add_argument(
        "-o",
        "--output",
        type=argparse.FileType("w"),
        default=sys.stdout,
        help="Output file (default stdout)",
    )
    arg_parser.add_argument("-l",
                            "--level",
                            type=int,
                            default=2,
                            help="Maximum heading level.")
    args = arg_parser.parse_args(args)
    parser = create_md_parser(MdParserConfig(heading_anchors=args.level),
                              RendererHTML)

    def _filter_plugin(state):
        state.tokens = [
            t for t in state.tokens
            if t.type.startswith("heading_") and int(t.tag[1]) <= args.level
        ]

    parser.use(lambda p: p.core.ruler.push("filter", _filter_plugin))
    text = parser.render(args.input.read())
    args.output.write(text)
Пример #3
0
    def _render_markdown_base(
        self, data: MimeData, *, fmt: str, inline: bool, allow_headings: bool
    ) -> list[nodes.Element]:
        """Base render for a notebook markdown mime output (block or inline)."""
        psuedo_element = nodes.Element()  # element to hold the parsed markdown
        current_parser = self.renderer.md
        current_md_config = self.renderer.md_config
        try:
            # potentially replace the parser temporarily
            if fmt == "myst":
                # use the current configuration to render the markdown
                pass
            elif fmt == "commonmark":
                # use an isolated, CommonMark only, parser
                self.renderer.md_config = MdParserConfig(commonmark_only=True)
                self.renderer.md = create_md_parser(
                    self.renderer.md_config, self.renderer.__class__
                )
            elif fmt == "gfm":
                # use an isolated, GitHub Flavoured Markdown only, parser
                self.renderer.md_config = MdParserConfig(gfm_only=True)
                self.renderer.md = create_md_parser(
                    self.renderer.md_config, self.renderer.__class__
                )
            else:
                self.logger.warning(
                    f"skipping unknown markdown format: {fmt}",
                    subtype="unknown_markdown_format",
                    line=data.line,
                )
                return []

            with self.renderer.current_node_context(psuedo_element):
                self.renderer.nested_render_text(
                    data.string,
                    data.line or 0,
                    inline=inline,
                    allow_headings=allow_headings,
                )
        finally:
            # restore the parser
            self.renderer.md = current_parser
            self.renderer.md_config = current_md_config

        return psuedo_element.children
Пример #4
0
    def parse(self, inputstring: str, document: nodes.document) -> None:
        """Parse source text.

        :param inputstring: The source string to parse
        :param document: The root docutils node to add AST elements to
        """

        self.setup_parse(inputstring, document)

        # check for exorbitantly long lines
        if hasattr(document.settings, "line_length_limit"):
            for i, line in enumerate(inputstring.split("\n")):
                if len(line) > document.settings.line_length_limit:
                    error = document.reporter.error(
                        f"Line {i+1} exceeds the line-length-limit:"
                        f" {document.settings.line_length_limit}."
                    )
                    document.append(error)
                    return

        # create parsing configuration from the global config
        try:
            config = create_myst_config(document.settings, DOCUTILS_EXCLUDED_ARGS)
        except Exception as exc:
            error = document.reporter.error(f"Global myst configuration invalid: {exc}")
            document.append(error)
            config = MdParserConfig()

        # update the global config with the file-level config
        try:
            topmatter = read_topmatter(inputstring)
        except TopmatterReadError:
            pass  # this will be reported during the render
        else:
            if topmatter:
                warning = lambda wtype, msg: create_warning(  # noqa: E731
                    document, msg, line=1, append_to=document, subtype=wtype
                )
                config = merge_file_level(config, topmatter, warning)

        # parse content
        parser = create_md_parser(config, DocutilsRenderer)
        parser.options["document"] = document
        parser.render(inputstring)

        # post-processing

        # replace raw nodes if raw is not allowed
        if not getattr(document.settings, "raw_enabled", True):
            for node in document.traverse(nodes.raw):
                warning = document.reporter.warning("Raw content disabled.")
                node.parent.replace(node, warning)

        self.finish_parse()
Пример #5
0
def test_commonmark(entry):
    if entry["example"] == 14:
        # This is just a test that +++ are not parsed as thematic breaks
        pytest.skip("Expects '+++' to be unconverted (not block break).")
    if entry["example"] in [66, 68]:
        # Front matter is supported by numerous Markdown flavours,
        # but not strictly CommonMark,
        # see: https://talk.commonmark.org/t/metadata-in-documents/721/86
        pytest.skip(
            "Thematic breaks on the first line conflict with front matter syntax"
        )
    test_case = entry["markdown"]
    md = create_md_parser(MdParserConfig(), RendererHTML)
    output = md.render(test_case)

    if entry["example"] == 593:
        # this doesn't have any bearing on the output
        output = output.replace("mailto", "MAILTO")
    if entry["example"] in [187, 209, 210]:
        # this doesn't have any bearing on the output
        output = output.replace("<blockquote></blockquote>",
                                "<blockquote>\n</blockquote>")

    assert output == entry["html"]
Пример #6
0
    def parse(self, inputstring: str, document: nodes.document) -> None:
        """Parse source text.

        :param inputstring: The source string to parse
        :param document: The root docutils node to add AST elements to
        """
        assert self.env is not None, "env not set"
        self.env: SphinxEnvType
        document_path = self.env.doc2path(self.env.docname)

        # get a logger for this document
        logger = SphinxDocLogger(document)

        # get markdown parsing configuration
        md_config: MdParserConfig = self.env.myst_config

        # get notebook rendering configuration
        nb_config: NbParserConfig = self.env.mystnb_config

        # create a reader for the notebook
        nb_reader = create_nb_reader(document_path, md_config, nb_config,
                                     inputstring)
        # If the nb_reader is None, then we default to a standard Markdown parser
        if nb_reader is None:
            return super().parse(inputstring, document)
        notebook = nb_reader.read(inputstring)

        # update the global markdown config with the file-level config
        warning = lambda wtype, msg: create_warning(  # noqa: E731
            document,
            msg,
            line=1,
            append_to=document,
            subtype=wtype)
        nb_reader.md_config = merge_file_level(nb_reader.md_config,
                                               notebook.metadata, warning)

        # potentially replace kernel name with alias
        kernel_name = notebook.metadata.get("kernelspec", {}).get("name", None)
        if kernel_name is not None and nb_config.kernel_rgx_aliases:
            for rgx, alias in nb_config.kernel_rgx_aliases.items():
                if re.fullmatch(rgx, kernel_name):
                    logger.debug(
                        f"Replaced kernel name: {kernel_name!r} -> {alias!r}",
                        subtype="kernel",
                    )
                    notebook.metadata["kernelspec"]["name"] = alias
                    break

        # Update mystnb configuration with notebook level metadata
        if nb_config.metadata_key in notebook.metadata:
            overrides = nb_node_to_dict(
                notebook.metadata[nb_config.metadata_key])
            overrides.pop("output_folder",
                          None)  # this should not be overridden
            try:
                nb_config = nb_config.copy(**overrides)
            except Exception as exc:
                logger.warning(
                    f"Failed to update configuration with notebook metadata: {exc}",
                    subtype="config",
                )
            else:
                logger.debug("Updated configuration with notebook metadata",
                             subtype="config")

        # Setup the parser
        mdit_parser = create_md_parser(nb_reader.md_config, SphinxNbRenderer)
        mdit_parser.options["document"] = document
        mdit_parser.options["nb_config"] = nb_config
        mdit_renderer: SphinxNbRenderer = mdit_parser.renderer  # type: ignore
        mdit_env: dict[str, Any] = {}

        # load notebook element renderer class from entry-point name
        # this is separate from SphinxNbRenderer, so that users can override it
        renderer_name = nb_config.render_plugin
        nb_renderer: NbElementRenderer = load_renderer(renderer_name)(
            mdit_renderer, logger)
        # we temporarily store nb_renderer on the document,
        # so that roles/directives can access it
        document.attributes["nb_renderer"] = nb_renderer
        # we currently do this early, so that the nb_renderer has access to things
        mdit_renderer.setup_render(mdit_parser.options, mdit_env)

        # parse notebook structure to markdown-it tokens
        # note, this does not assume that the notebook has been executed yet
        mdit_tokens = notebook_to_tokens(notebook, mdit_parser, mdit_env,
                                         logger)

        # open the notebook execution client,
        # this may execute the notebook immediately or during the page render
        with create_client(notebook, document_path, nb_config, logger,
                           nb_reader.read_fmt) as nb_client:
            mdit_parser.options["nb_client"] = nb_client
            # convert to docutils AST, which is added to the document
            mdit_renderer.render(mdit_tokens, mdit_parser.options, mdit_env)

        # save final execution data
        if nb_client.exec_metadata:
            NbMetadataCollector.set_exec_data(self.env, self.env.docname,
                                              nb_client.exec_metadata)
            if nb_client.exec_metadata["traceback"]:
                # store error traceback in outdir and log its path
                reports_file = Path(self.env.app.outdir).joinpath(
                    "reports",
                    *(self.env.docname + ".err.log").split("/"))
                reports_file.parent.mkdir(parents=True, exist_ok=True)
                reports_file.write_text(nb_client.exec_metadata["traceback"],
                                        encoding="utf8")
                logger.warning(
                    f"Notebook exception traceback saved in: {reports_file}",
                    subtype="exec",
                )

        # write final (updated) notebook to output folder (utf8 is standard encoding)
        path = self.env.docname.split("/")
        ipynb_path = path[:-1] + [path[-1] + ".ipynb"]
        content = nbformat.writes(notebook).encode("utf-8")
        nb_renderer.write_file(ipynb_path, content, overwrite=True)

        # write glue data to the output folder,
        # and store the keys to environment doc metadata,
        # so that they may be used in any post-transform steps
        if nb_client.glue_data:
            glue_path = path[:-1] + [path[-1] + ".glue.json"]
            nb_renderer.write_file(
                glue_path,
                json.dumps(nb_client.glue_data,
                           cls=BytesEncoder).encode("utf8"),
                overwrite=True,
            )
            NbMetadataCollector.set_doc_data(self.env, self.env.docname,
                                             "glue",
                                             list(nb_client.glue_data.keys()))

        # move some document metadata to environment metadata,
        # so that we can later read it from the environment,
        # rather than having to load the whole doctree
        for key, (uri, kwargs) in document.attributes.pop("nb_js_files",
                                                          {}).items():
            NbMetadataCollector.add_js_file(self.env, self.env.docname, key,
                                            uri, kwargs)

        # remove temporary state
        document.attributes.pop("nb_renderer")
Пример #7
0
def read_myst_markdown_notebook(
    text,
    config: MdParserConfig | None = None,
    code_directive="{code-cell}",
    raw_directive="{raw-cell}",
    add_source_map=False,
    path: str | Path | None = None,
) -> nbf.NotebookNode:
    """Convert text written in the myst format to a notebook.

    :param text: the file text
    :param code_directive: the name of the directive to search for containing code cells
    :param raw_directive: the name of the directive to search for containing raw cells
    :param add_source_map: add a `source_map` key to the notebook metadata,
        which is a list of the starting source line number for each cell.
    :param path: path to notebook (required for :load:)

    :raises MystMetadataParsingError if the metadata block is not valid JSON/YAML

    NOTE: we assume here that all of these directives are at the top-level,
    i.e. not nested in other directives.
    """
    config = config or MdParserConfig()
    # parse markdown file up to the block level (i.e. don't worry about inline text)
    inline_config = dc.replace(
        config, disable_syntax=(list(config.disable_syntax) + ["inline"])
    )
    parser = create_md_parser(inline_config, RendererHTML)
    tokens = parser.parse(text + "\n")
    lines = text.splitlines()
    md_start_line = 0

    # get the document metadata
    metadata_nb = {}
    if tokens[0].type == "front_matter":
        metadata = tokens.pop(0)
        md_start_line = metadata.map[1] if metadata.map else 0
        try:
            metadata_nb = yaml.safe_load(metadata.content)
        except (yaml.parser.ParserError, yaml.scanner.ScannerError) as error:
            raise MystMetadataParsingError(f"Notebook metadata: {error}")

    # add missing display name to the metadata, as required by the nbformat schema:
    # https://github.com/jupyter/nbformat/blob/f712d60f13c5b168313222cbf4bee7face98a081/nbformat/v4/nbformat.v4.5.schema.json#L16
    if (
        "kernelspec" in metadata_nb
        and "name" in metadata_nb["kernelspec"]
        and "display_name" not in metadata_nb["kernelspec"]
    ):
        metadata_nb["kernelspec"]["display_name"] = metadata_nb["kernelspec"]["name"]

    # create an empty notebook
    nbf_version = nbf.v4
    kwargs = {"metadata": nbf.from_dict(metadata_nb)}
    notebook = nbf_version.new_notebook(**kwargs)
    source_map = []  # this is a list of the starting line number for each cell

    def _flush_markdown(start_line, token, md_metadata):
        """When we find a cell we check if there is preceding text.o"""
        endline = token.map[0] if token else len(lines)
        md_source = _strip_blank_lines("\n".join(lines[start_line:endline]))
        meta = nbf.from_dict(md_metadata)
        if md_source:
            source_map.append(start_line)
            notebook.cells.append(
                nbf_version.new_markdown_cell(source=md_source, metadata=meta)
            )

    # iterate through the tokens to identify notebook cells
    nesting_level = 0
    md_metadata: dict = {}

    for token in tokens:

        nesting_level += token.nesting

        if nesting_level != 0:
            # we ignore fenced block that are nested, e.g. as part of lists, etc
            continue

        token_map = token.map or [0, 0]

        if token.type == "fence" and token.info.startswith(code_directive):
            _flush_markdown(md_start_line, token, md_metadata)
            options, body_lines = _read_fenced_cell(token, len(notebook.cells), "Code")
            # Parse :load: or load: tags and populate body with contents of file
            if "load" in options:
                body_lines = _load_code_from_file(
                    path, options["load"], token, body_lines
                )
            meta = nbf.from_dict(options)
            source_map.append(token_map[0] + 1)
            notebook.cells.append(
                nbf_version.new_code_cell(source="\n".join(body_lines), metadata=meta)
            )
            md_metadata = {}
            md_start_line = token_map[1]

        elif token.type == "fence" and token.info.startswith(raw_directive):
            _flush_markdown(md_start_line, token, md_metadata)
            options, body_lines = _read_fenced_cell(token, len(notebook.cells), "Raw")
            meta = nbf.from_dict(options)
            source_map.append(token_map[0] + 1)
            notebook.cells.append(
                nbf_version.new_raw_cell(source="\n".join(body_lines), metadata=meta)
            )
            md_metadata = {}
            md_start_line = token_map[1]

        elif token.type == "myst_block_break":
            _flush_markdown(md_start_line, token, md_metadata)
            md_metadata = _read_cell_metadata(token, len(notebook.cells))
            md_start_line = token_map[1]

    _flush_markdown(md_start_line, None, md_metadata)

    if add_source_map:
        notebook.metadata["source_map"] = source_map
    return notebook