Exemplo n.º 1
0
def iter_all_export_paths(config_folder_paths=(), regex="*.json"):
    """we iterate through all json files in the
    supplied plugin_folder_paths, and then in the `export_plugins` folder
    """
    for plugin_folder_path in config_folder_paths:
        for jsonpath in glob.glob(os.path.join(plugin_folder_path, regex)):
            name = os.path.splitext(os.path.basename(jsonpath))[0]
            yield name, pathlib.Path(jsonpath)

    module_path = get_module_path(export_plugins)
    for jsonpath in glob.glob(os.path.join(str(module_path), regex)):
        name = os.path.splitext(os.path.basename(jsonpath))[0]
        yield name, pathlib.Path(jsonpath)
Exemplo n.º 2
0
def load_export_config(export_config_path):
    """load the export configuration"""
    if isinstance(export_config_path, string_types):
        export_config_path = pathlib.Path(export_config_path)

    data = read_file_from_directory(
        export_config_path.parent,
        export_config_path.name,
        "export configuration",
        logger,
        interp_ext=True,
    )

    # validate against schema
    global _EXPORT_SCHEMA
    if _EXPORT_SCHEMA is None:
        # lazy load schema once
        _EXPORT_SCHEMA = read_file_from_directory(
            get_module_path(schema),
            _EXPORT_SCHEMA_FILE,
            "export configuration schema",
            logger,
            interp_ext=True,
        )
    try:
        jsonschema.validate(data, _EXPORT_SCHEMA)
    except jsonschema.ValidationError as err:
        handle_error(
            "validation of export config {} failed against {}: {}".format(
                export_config_path, _EXPORT_SCHEMA_FILE, err.message),
            jsonschema.ValidationError,
            logger=logger,
        )

    return data
Exemplo n.º 3
0
    def postprocess(self, stream, mimetype, filepath, resources=None):
        """ Post-process output.

        Parameters
        ----------
        stream: str
            the main file contents
        mimetype: str
            the mimetype of the file
        filepath: None or str or pathlib.Path
            the path to the output file
            the path does not have to exist, but must be absolute
        resources: None or dict
            a resources dict, output from exporter.from_notebook_node

        Returns
        -------
        stream: str
        filepath: None or str or pathlib.Path

        """

        if (self.allowed_mimetypes is not None
                and mimetype not in self.allowed_mimetypes):
            if not self.skip_mime:
                self.handle_error(
                    "the mimetype {0} is not in the allowed list: {1}".format(
                        mimetype, self.allowed_mimetypes), TypeError)
            else:
                self.logger.debug(
                    "skipping incorrect mime type: {}".format(mimetype))
                return stream, filepath, resources

        if self.requires_path and filepath is None:
            self.handle_error(
                "the filepath is None, "
                "but the post-processor requires a folder", IOError)

        if filepath is not None and isinstance(filepath, string_types):
            filepath = pathlib.Path(filepath)

        if self.requires_path:

            if not filepath.is_absolute():
                self.handle_error(
                    "the post-processor requires an absolute folder path",
                    IOError)

            if filepath.parent.exists() and not filepath.parent.is_dir():
                self.handle_error(
                    "the filepath's parent is not a folder: {}".format(
                        filepath), TypeError)

            if not filepath.parent.exists():
                filepath.parent.mkdir(parents=True)

        if resources is None:
            resources = {}

        return self.run_postprocess(stream, mimetype, filepath, resources)
Exemplo n.º 4
0
def get_export_config_path(export_key, config_folder_paths=()):
    # type (string, Tuple[str]) -> Union[string, None]
    """we search for a plugin name, which matches the supplied plugin name
    """
    for name, jsonpath in iter_all_export_paths(config_folder_paths):
        if name == export_key:
            return pathlib.Path(jsonpath)
    return None
Exemplo n.º 5
0
    def _load_config_file(self, replacements):
        # find conversion configuration
        self.logger.info("finding conversion configuration: {}".format(
            self.conversion))
        export_config_path = None
        if isinstance(self.conversion, string_types):
            outformat_path = pathlib.Path(self.conversion)
        else:
            outformat_path = self.conversion
        if outformat_path.exists():  # TODO use pathlib approach
            # if is outformat is a path that exists, use that
            export_config_path = outformat_path
        else:
            # else search internally
            export_config_path = get_export_config_path(
                self.conversion, self.plugin_folder_paths)

        if export_config_path is None:
            handle_error(
                "could not find conversion configuration: {}".format(
                    self.conversion),
                IOError,
                self.logger,
            )

        # read conversion configuration and create
        self.logger.info("loading conversion configuration")
        data = load_export_config(export_config_path)
        self.logger.info("creating exporter")
        exporter_cls = create_exporter_cls(data["exporter"]["class"])
        self.logger.info("creating template and loading filters")
        template_name = "template_file"
        jinja_template = load_template(template_name, data["template"])
        self.logger.info("creating process configuration")
        export_config = self._create_export_config(data["exporter"],
                                                   template_name, replacements)
        pprocs, pproc_config = self._create_pproc_config(
            data.get("postprocessors", {}), replacements)

        return (exporter_cls, jinja_template, export_config, pprocs,
                pproc_config)
Exemplo n.º 6
0
def external_export_plugin():
    return pathlib.Path(os.path.join(TEST_FILES_DIR,
                                     'example_new_plugin.json'))
Exemplo n.º 7
0
 def expected_path(self):
     return pathlib.Path(self._expected_folder_path)
Exemplo n.º 8
0
 def converted_path(self):
     return pathlib.Path(self._converted_folder_path)
Exemplo n.º 9
0
 def source_path(self):
     return pathlib.Path(self._src_folder_path)
Exemplo n.º 10
0
def merge_notebooks(ipynb_path, ignore_prefix="_", to_str=False, as_version=4):
    """ merge one or more ipynb's,
    if more than one, then the meta data is taken from the first

    Parameters
    ----------
    ipynb_path: str or pathlib.Path
    ignore_prefix : str
        ignore filename starting with this prefix
    to_str: bool
        return as a string, else return nbformat object
    as_version: int
        notebook format vesion

    Returns
    ------
    finalnb: jupyter.notebook
    meta_path : pathlib.Path
        path to notebook containing meta file

    """
    meta_path = ""
    if isinstance(ipynb_path, string_types):
        ipynb_path = pathlib.Path(ipynb_path)
    if not ipynb_path.exists():
        handle_error("the notebook path does not exist: {}".format(ipynb_path),
                     IOError, logger)

    final_nb = None
    if ipynb_path.is_dir():
        logger.info("Merging all notebooks in directory")
        for ipath in alphanumeric_sort(ipynb_path.glob("*.ipynb")):
            if os.path.basename(ipath.name).startswith(ignore_prefix):
                continue
            with ipath.open("r", encoding="utf-8") as f:
                if (sys.version_info.major == 3 and sys.version_info.minor < 6
                        and "win" not in sys.platform):
                    data = f.read()
                    if hasattr(data, "decode"):
                        data = data.decode("utf-8")
                    nb = nbformat.reads(data, as_version=as_version)
                else:
                    nb = nbformat.read(f, as_version=as_version)
            if final_nb is None:
                meta_path = ipath
                final_nb = nb
            else:
                final_nb.cells.extend(nb.cells)
    else:
        logger.info("Reading notebook")
        with ipynb_path.open("r", encoding="utf-8") as f:
            if (sys.version_info.major == 3 and sys.version_info.minor < 6
                    and "win" not in sys.platform):
                data = f.read()
                if hasattr(data, "decode"):
                    data = data.decode("utf-8")
                final_nb = nbformat.reads(data, as_version=as_version)
            else:
                final_nb = nbformat.read(f, as_version=as_version)
        meta_path = ipynb_path
    if not hasattr(final_nb.metadata, "name"):
        final_nb.metadata.name = ""
    final_nb.metadata.name += "_merged"

    if to_str:
        if sys.version_info > (3, 0):
            return nbformat.writes(final_nb)
        else:
            return nbformat.writes(final_nb).encode("utf-8")

    if final_nb is None:
        handle_error(
            "no acceptable notebooks found for path: {}".format(
                ipynb_path.name),
            IOError,
            logger,
        )

    return final_nb, meta_path
Exemplo n.º 11
0
    def publish(self, ipynb_path, nb_node=None):
        """ convert one or more Jupyter notebooks to a published format

        paths can be string of an existing file or folder,
        or a pathlib.Path like object

        all files linked in the documents are placed into a single files_folder

        Parameters
        ----------
        ipynb_path: str or pathlib.Path
            notebook file or directory
        nb_node: None or nbformat.NotebookNode
            a pre-converted notebook

        Returns
        --------
        outdata: dict
            containing keys;
            "outpath", "exporter", "stream", "main_filepath", "resources"

        """
        # setup the input and output paths
        if isinstance(ipynb_path, string_types):
            ipynb_path = pathlib.Path(ipynb_path)
        ipynb_name, ipynb_ext = os.path.splitext(ipynb_path.name)
        outdir = (os.path.join(os.getcwd(), "converted")
                  if self.outpath is None else str(self.outpath))

        with self._log_handlers(ipynb_name, outdir):

            if not ipynb_path.exists() and not nb_node:
                handle_error(
                    "the notebook path does not exist: {}".format(ipynb_path),
                    IOError,
                    self.logger,
                )

            # log start of conversion
            self.logger.info("started ipypublish v{0} at {1}".format(
                ipypublish.__version__, time.strftime("%c")))
            self.logger.info("logging to: {}".format(
                os.path.join(outdir, ipynb_name + ".nbpub.log")))
            self.logger.info("running for ipynb(s) at: {0}".format(ipynb_path))
            self.logger.info("with conversion configuration: {0}".format(
                self.conversion))

            if nb_node is None and ipynb_ext in self.pre_conversion_funcs:
                func = self.pre_conversion_funcs[ipynb_ext]
                self.logger.info("running pre-conversion with: {}".format(
                    inspect.getmodule(func)))
                try:
                    nb_node = func(ipynb_path)
                except Exception as err:
                    handle_error(
                        "pre-conversion failed for {}: {}".format(
                            ipynb_path, err),
                        err,
                        self.logger,
                    )

            # doesn't work with folders
            # if (ipynb_ext != ".ipynb" and nb_node is None):
            #     handle_error(
            #         'the file extension is not associated with any '
            #         'pre-converter: {}'.format(ipynb_ext),
            # TypeError, self.logger)

            if nb_node is None:
                # merge all notebooks
                # TODO allow notebooks to remain separate
                # (would require creating a main.tex with the preamble in etc )
                # Could make everything a 'PyProcess',
                # with support for multiple streams
                final_nb, meta_path = merge_notebooks(
                    ipynb_path, ignore_prefix=self.ignore_prefix)
            else:
                final_nb, meta_path = (nb_node, ipynb_path)

            # validate the notebook metadata against the schema
            if self.validate_nb_metadata:
                nb_metadata_schema = read_file_from_directory(
                    get_module_path(schema),
                    "doc_metadata.schema.json",
                    "doc_metadata.schema",
                    self.logger,
                    interp_ext=True,
                )
                try:
                    jsonschema.validate(final_nb.metadata, nb_metadata_schema)
                except jsonschema.ValidationError as err:
                    handle_error(
                        "validation of notebook level metadata failed: {}\n"
                        "see the doc_metadata.schema.json for full spec".
                        format(err.message),
                        jsonschema.ValidationError,
                        logger=self.logger,
                    )

            # set text replacements for export configuration
            replacements = {
                self.meta_path_placeholder:
                str(meta_path),
                self.files_folder_placeholder:
                "{}{}".format(get_valid_filename(ipynb_name),
                              self.folder_suffix),
            }

            self.logger.debug("notebooks meta path: {}".format(meta_path))

            # load configuration file
            (
                exporter_cls,
                jinja_template,
                econfig,
                pprocs,
                pconfig,
            ) = self._load_config_file(replacements)

            # run nbconvert
            self.logger.info("running nbconvert")
            exporter, stream, resources = self.export_notebook(
                final_nb, exporter_cls, econfig, jinja_template)

            # postprocess results
            main_filepath = os.path.join(outdir,
                                         ipynb_name + exporter.file_extension)

            for post_proc_name in pprocs:
                proc_class = find_entry_point(
                    post_proc_name,
                    "ipypublish.postprocessors",
                    self.logger,
                    "ipypublish",
                )
                proc = proc_class(pconfig)
                stream, main_filepath, resources = proc.postprocess(
                    stream, exporter.output_mimetype, main_filepath, resources)

            self.logger.info("process finished successfully")

        return {
            "outpath": outdir,
            "exporter": exporter,
            "stream": stream,
            "main_filepath": main_filepath,
            "resources": resources,
        }