def iter_all_export_paths(config_folder_paths=(), regex="*.json"): """we iterate through all json files in the supplied plugin_folder_paths, and then in the `export_plugins` folder """ for plugin_folder_path in config_folder_paths: for jsonpath in glob.glob(os.path.join(plugin_folder_path, regex)): name = os.path.splitext(os.path.basename(jsonpath))[0] yield name, pathlib.Path(jsonpath) module_path = get_module_path(export_plugins) for jsonpath in glob.glob(os.path.join(str(module_path), regex)): name = os.path.splitext(os.path.basename(jsonpath))[0] yield name, pathlib.Path(jsonpath)
def load_export_config(export_config_path): """load the export configuration""" if isinstance(export_config_path, string_types): export_config_path = pathlib.Path(export_config_path) data = read_file_from_directory( export_config_path.parent, export_config_path.name, "export configuration", logger, interp_ext=True, ) # validate against schema global _EXPORT_SCHEMA if _EXPORT_SCHEMA is None: # lazy load schema once _EXPORT_SCHEMA = read_file_from_directory( get_module_path(schema), _EXPORT_SCHEMA_FILE, "export configuration schema", logger, interp_ext=True, ) try: jsonschema.validate(data, _EXPORT_SCHEMA) except jsonschema.ValidationError as err: handle_error( "validation of export config {} failed against {}: {}".format( export_config_path, _EXPORT_SCHEMA_FILE, err.message), jsonschema.ValidationError, logger=logger, ) return data
def postprocess(self, stream, mimetype, filepath, resources=None): """ Post-process output. Parameters ---------- stream: str the main file contents mimetype: str the mimetype of the file filepath: None or str or pathlib.Path the path to the output file the path does not have to exist, but must be absolute resources: None or dict a resources dict, output from exporter.from_notebook_node Returns ------- stream: str filepath: None or str or pathlib.Path """ if (self.allowed_mimetypes is not None and mimetype not in self.allowed_mimetypes): if not self.skip_mime: self.handle_error( "the mimetype {0} is not in the allowed list: {1}".format( mimetype, self.allowed_mimetypes), TypeError) else: self.logger.debug( "skipping incorrect mime type: {}".format(mimetype)) return stream, filepath, resources if self.requires_path and filepath is None: self.handle_error( "the filepath is None, " "but the post-processor requires a folder", IOError) if filepath is not None and isinstance(filepath, string_types): filepath = pathlib.Path(filepath) if self.requires_path: if not filepath.is_absolute(): self.handle_error( "the post-processor requires an absolute folder path", IOError) if filepath.parent.exists() and not filepath.parent.is_dir(): self.handle_error( "the filepath's parent is not a folder: {}".format( filepath), TypeError) if not filepath.parent.exists(): filepath.parent.mkdir(parents=True) if resources is None: resources = {} return self.run_postprocess(stream, mimetype, filepath, resources)
def get_export_config_path(export_key, config_folder_paths=()): # type (string, Tuple[str]) -> Union[string, None] """we search for a plugin name, which matches the supplied plugin name """ for name, jsonpath in iter_all_export_paths(config_folder_paths): if name == export_key: return pathlib.Path(jsonpath) return None
def _load_config_file(self, replacements): # find conversion configuration self.logger.info("finding conversion configuration: {}".format( self.conversion)) export_config_path = None if isinstance(self.conversion, string_types): outformat_path = pathlib.Path(self.conversion) else: outformat_path = self.conversion if outformat_path.exists(): # TODO use pathlib approach # if is outformat is a path that exists, use that export_config_path = outformat_path else: # else search internally export_config_path = get_export_config_path( self.conversion, self.plugin_folder_paths) if export_config_path is None: handle_error( "could not find conversion configuration: {}".format( self.conversion), IOError, self.logger, ) # read conversion configuration and create self.logger.info("loading conversion configuration") data = load_export_config(export_config_path) self.logger.info("creating exporter") exporter_cls = create_exporter_cls(data["exporter"]["class"]) self.logger.info("creating template and loading filters") template_name = "template_file" jinja_template = load_template(template_name, data["template"]) self.logger.info("creating process configuration") export_config = self._create_export_config(data["exporter"], template_name, replacements) pprocs, pproc_config = self._create_pproc_config( data.get("postprocessors", {}), replacements) return (exporter_cls, jinja_template, export_config, pprocs, pproc_config)
def external_export_plugin(): return pathlib.Path(os.path.join(TEST_FILES_DIR, 'example_new_plugin.json'))
def expected_path(self): return pathlib.Path(self._expected_folder_path)
def converted_path(self): return pathlib.Path(self._converted_folder_path)
def source_path(self): return pathlib.Path(self._src_folder_path)
def merge_notebooks(ipynb_path, ignore_prefix="_", to_str=False, as_version=4): """ merge one or more ipynb's, if more than one, then the meta data is taken from the first Parameters ---------- ipynb_path: str or pathlib.Path ignore_prefix : str ignore filename starting with this prefix to_str: bool return as a string, else return nbformat object as_version: int notebook format vesion Returns ------ finalnb: jupyter.notebook meta_path : pathlib.Path path to notebook containing meta file """ meta_path = "" if isinstance(ipynb_path, string_types): ipynb_path = pathlib.Path(ipynb_path) if not ipynb_path.exists(): handle_error("the notebook path does not exist: {}".format(ipynb_path), IOError, logger) final_nb = None if ipynb_path.is_dir(): logger.info("Merging all notebooks in directory") for ipath in alphanumeric_sort(ipynb_path.glob("*.ipynb")): if os.path.basename(ipath.name).startswith(ignore_prefix): continue with ipath.open("r", encoding="utf-8") as f: if (sys.version_info.major == 3 and sys.version_info.minor < 6 and "win" not in sys.platform): data = f.read() if hasattr(data, "decode"): data = data.decode("utf-8") nb = nbformat.reads(data, as_version=as_version) else: nb = nbformat.read(f, as_version=as_version) if final_nb is None: meta_path = ipath final_nb = nb else: final_nb.cells.extend(nb.cells) else: logger.info("Reading notebook") with ipynb_path.open("r", encoding="utf-8") as f: if (sys.version_info.major == 3 and sys.version_info.minor < 6 and "win" not in sys.platform): data = f.read() if hasattr(data, "decode"): data = data.decode("utf-8") final_nb = nbformat.reads(data, as_version=as_version) else: final_nb = nbformat.read(f, as_version=as_version) meta_path = ipynb_path if not hasattr(final_nb.metadata, "name"): final_nb.metadata.name = "" final_nb.metadata.name += "_merged" if to_str: if sys.version_info > (3, 0): return nbformat.writes(final_nb) else: return nbformat.writes(final_nb).encode("utf-8") if final_nb is None: handle_error( "no acceptable notebooks found for path: {}".format( ipynb_path.name), IOError, logger, ) return final_nb, meta_path
def publish(self, ipynb_path, nb_node=None): """ convert one or more Jupyter notebooks to a published format paths can be string of an existing file or folder, or a pathlib.Path like object all files linked in the documents are placed into a single files_folder Parameters ---------- ipynb_path: str or pathlib.Path notebook file or directory nb_node: None or nbformat.NotebookNode a pre-converted notebook Returns -------- outdata: dict containing keys; "outpath", "exporter", "stream", "main_filepath", "resources" """ # setup the input and output paths if isinstance(ipynb_path, string_types): ipynb_path = pathlib.Path(ipynb_path) ipynb_name, ipynb_ext = os.path.splitext(ipynb_path.name) outdir = (os.path.join(os.getcwd(), "converted") if self.outpath is None else str(self.outpath)) with self._log_handlers(ipynb_name, outdir): if not ipynb_path.exists() and not nb_node: handle_error( "the notebook path does not exist: {}".format(ipynb_path), IOError, self.logger, ) # log start of conversion self.logger.info("started ipypublish v{0} at {1}".format( ipypublish.__version__, time.strftime("%c"))) self.logger.info("logging to: {}".format( os.path.join(outdir, ipynb_name + ".nbpub.log"))) self.logger.info("running for ipynb(s) at: {0}".format(ipynb_path)) self.logger.info("with conversion configuration: {0}".format( self.conversion)) if nb_node is None and ipynb_ext in self.pre_conversion_funcs: func = self.pre_conversion_funcs[ipynb_ext] self.logger.info("running pre-conversion with: {}".format( inspect.getmodule(func))) try: nb_node = func(ipynb_path) except Exception as err: handle_error( "pre-conversion failed for {}: {}".format( ipynb_path, err), err, self.logger, ) # doesn't work with folders # if (ipynb_ext != ".ipynb" and nb_node is None): # handle_error( # 'the file extension is not associated with any ' # 'pre-converter: {}'.format(ipynb_ext), # TypeError, self.logger) if nb_node is None: # merge all notebooks # TODO allow notebooks to remain separate # (would require creating a main.tex with the preamble in etc ) # Could make everything a 'PyProcess', # with support for multiple streams final_nb, meta_path = merge_notebooks( ipynb_path, ignore_prefix=self.ignore_prefix) else: final_nb, meta_path = (nb_node, ipynb_path) # validate the notebook metadata against the schema if self.validate_nb_metadata: nb_metadata_schema = read_file_from_directory( get_module_path(schema), "doc_metadata.schema.json", "doc_metadata.schema", self.logger, interp_ext=True, ) try: jsonschema.validate(final_nb.metadata, nb_metadata_schema) except jsonschema.ValidationError as err: handle_error( "validation of notebook level metadata failed: {}\n" "see the doc_metadata.schema.json for full spec". format(err.message), jsonschema.ValidationError, logger=self.logger, ) # set text replacements for export configuration replacements = { self.meta_path_placeholder: str(meta_path), self.files_folder_placeholder: "{}{}".format(get_valid_filename(ipynb_name), self.folder_suffix), } self.logger.debug("notebooks meta path: {}".format(meta_path)) # load configuration file ( exporter_cls, jinja_template, econfig, pprocs, pconfig, ) = self._load_config_file(replacements) # run nbconvert self.logger.info("running nbconvert") exporter, stream, resources = self.export_notebook( final_nb, exporter_cls, econfig, jinja_template) # postprocess results main_filepath = os.path.join(outdir, ipynb_name + exporter.file_extension) for post_proc_name in pprocs: proc_class = find_entry_point( post_proc_name, "ipypublish.postprocessors", self.logger, "ipypublish", ) proc = proc_class(pconfig) stream, main_filepath, resources = proc.postprocess( stream, exporter.output_mimetype, main_filepath, resources) self.logger.info("process finished successfully") return { "outpath": outdir, "exporter": exporter, "stream": stream, "main_filepath": main_filepath, "resources": resources, }