def write_files(export_list, nb_node, file_name): """ Export and write files from a notebook node. Args: export_list (list of strings) -- name of valid nbconvert exporters nb_node(nbformat node object) -- notebook to be my_converted file_name (str) -- base name of file to be written Returns: None """ try: # export and write file. for export in export_list: if export == "html": exporter = HTMLExporter() elif export == "py": exporter = PythonExporter() (body, resources) = exporter.from_notebook_node(nb_node) write_file = FilesWriter() write_file.write( output=body, resources=resources, notebook_name=file_name ) except Exception as e: print(f"There was a problem exporting the file(s): {e}") return None
def convert_hugo(nb_path, hugo_path): from os import environ from os.path import abspath # Total hack. Would like the -H to be allowed to have no arg, and then use the env var, # but I don't know how to do that. This is the case where the user types # -H nb_path, so just go with it. if hugo_path and not nb_path: nb_path = hugo_path hugo_path = environ.get('METAPACK_HUGO_DIR') if not hugo_path: err("Must specify value for -H or the METAPACK_HUGO_DIR environment var" ) if not exists(nb_path): err("Notebook path does not exist: '{}' ".format(nb_path)) c = Config() c.HugoExporter.hugo_dir = abspath( hugo_path) # Exports assume rel path is rel to notebook he = HugoExporter(config=c, log=logger) output, resources = he.from_filename(nb_path) prt('Writing Notebook to Hugo Markdown') prt(' Writing ', resources['unique_key'] + resources['output_extension']) for k, v in resources['outputs'].items(): prt(' Writing ', k) fw = FilesWriter() fw.write(output, resources, notebook_name=resources['unique_key'])
def notebook_to_rst(nbfilename): nbfilepath = os.path.join(EXPATH, nbfilename) rstfilename = get_rstfilename(nbfilename) output_files_dir = only_filename_no_ext(rstfilename) metadata_path = os.path.dirname(rstfilename) unique_key = nbfilename.rstrip('.ipynb') resources = { 'metadata': {'path': metadata_path}, 'output_files_dir': output_files_dir, # Prefix for the output image filenames 'unique_key': unique_key } # Read notebook with open(nbfilepath, 'r') as f: nb = nbformat.read(f, as_version=4) # Export exporter = nbsphinx.Exporter(execute='never', allow_errors=True) (body, resources) = exporter.from_notebook_node(nb, resources) # Correct path for the resources for filename in list(resources['outputs'].keys()): tmp = os.path.join(RST_PATH, filename) resources['outputs'][tmp] = resources['outputs'].pop(filename) fw = FilesWriter() fw.build_directory = RST_PATH # Prevent "not in doctree" complains resources['output_extension'] = '' body = 'Examples\n--------\n' + body fw.write(body, resources, notebook_name=rstfilename)
def export_tex( combined_nb: NotebookNode, output_file: Path, pdf=False, template_file=None ): """ A function that exports a .tex file from a notebook node object """ resources = {} resources["unique_key"] = "combined" resources["output_files_dir"] = "combined_files" # log.info('Converting to %s', 'pdf' if pdf else 'latex') exporter = MyLatexPDFExporter() if pdf else MyLatexExporter() if template_file is not None: exporter.template_file = str(template_file) mypreprocessor = ( RegexRemovePreprocessor() ) # Create an instance of the RegexRemovePreprocessor mypreprocessor.patterns = [ "\s*\Z" ] # supply a re pattern (in a list) to the preprocessor's .patterns attribute exporter.register_preprocessor( mypreprocessor, enabled=True ) # apply the preprocessor to the exporter writer = FilesWriter(build_directory=str(output_file.parent)) output, resources = exporter.from_notebook_node(combined_nb, resources) writer.write(output, resources, notebook_name=output_file.stem)
def export_tex(nbnode, outfile="export_tex_out", template="classicm"): latex_exporter = LatexExporter() latex_exporter.template_file = template (body, resources) = latex_exporter.from_notebook_node(nbnode) writer = FilesWriter() writer.write(body, resources, notebook_name=outfile) # will end up with .tex extension
def convert_documentation(nb_path): """Run only the document conversion portion of the notebook conversion The final document will not be completel """ with open(nb_path) as f: nb = nbformat.reads(f.read(), as_version=4) doc = ExtractInlineMetatabDoc(package_url="metapack+file:" + dirname(nb_path)).run(nb) package_name = doc.as_version(None) output_dir = join(getcwd(), package_name) de = DocumentationExporter(config=Config(), log=logger, metadata=doc_metadata(doc)) prt('Converting documentation') output, resources = de.from_filename(nb_path) fw = FilesWriter() fw.build_directory = join(output_dir, 'docs') fw.write(output, resources, notebook_name='notebook') prt("Wrote documentation to {}".format(fw.build_directory))
def write_body_resources(notebook_filename, body, resources, output_dir=None): """Write actual notebook and files to output dir. Use notebook directory if output dir is none""" output_dir = determine_output_dir(notebook_filename, output_dir) config = Config() config.FilesWriter.build_directory = output_dir file_writer = FilesWriter(config=config) file_writer.write(body, resources, notebook_name=to_notebook_basename(notebook_filename))
def _convert(self, tmpdir: Path, entry: Path, outdir: Path, depth: int): """Convert a notebook. Args: tmpdir: Temporary working directory entry: notebook to convert outdir: output directory for .html and .rst files depth: depth below root, for fixing image paths """ test_mode = self.s.get("test_mode") # strip special cells. if self._has_tagged_cells(entry, set(self._cell_tags.values())): _log.debug(f"notebook '{entry.name}' has test cell(s)") orig_entry, entry = entry, self._strip_tagged_cells( tmpdir, entry, ("remove", "exercise"), "testing") notify(f"Stripped tags from: {orig_entry.name}", 3) else: # copy to temporary directory just to protect from output cruft tmp_entry = tmpdir / entry.name shutil.copy(entry, tmp_entry) orig_entry, entry = entry, tmp_entry # convert all tag-stripped versions of the notebook. # before running, check if converted result is newer than source file if self._already_converted(orig_entry, entry, outdir): notify( f"Skip notebook conversion, output is newer, for: {entry.name}", 3) self._results.cached.append(entry) return notify(f"Running notebook: {entry.name}", 3) nb = self._parse_and_execute(entry) if test_mode: # don't do conversion in test mode return notify(f"Exporting notebook '{entry.name}' to directory {outdir}", 3) wrt = FilesWriter() # export each notebook into multiple target formats created_wrapper = False for (exp, postprocess_func, pp_args) in ( (RSTExporter(), self._postprocess_rst, ()), (HTMLExporter(), self._postprocess_html, (depth, )), ): _log.debug( f"export '{orig_entry}' with {exp} to notebook '{entry}'") (body, resources) = exp.from_notebook_node(nb) body = postprocess_func(body, *pp_args) wrt.build_directory = str(outdir) wrt.write(body, resources, notebook_name=entry.stem) # create a 'wrapper' page if not created_wrapper: _log.debug( f"create wrapper page for '{entry.name}' in '{outdir}'") self._create_notebook_wrapper_page(entry.stem, outdir) created_wrapper = True # move notebooks into docs directory _log.debug(f"move notebook '{entry} to output directory: {outdir}") shutil.copy(entry, outdir / entry.name)
def convert(self, file): assert os.path.exists( file), f"this should not happen, path {file} must exist" body, resources = self.export(file) fw = FilesWriter() fw.build_directory = os.path.dirname(file) f_name = os.path.basename(file).replace(".ipynb", "") fw.write(body, resources, notebook_name=f_name)
def write_only_body(notebook_filename, body, output_dir=None): output_dir = determine_output_dir(notebook_filename, output_dir) config = Config() config.FilesWriter.build_directory = output_dir file_writer = FilesWriter(config=config) # no resources since we don't want files written, but add .pdf extension resources = dict(output_extension='.pdf') # add pdf to filename file_writer.write(body, resources, notebook_name=to_notebook_basename(notebook_filename))
def convert(input_fn, output_fn=None): """Execute and save notebook as html Executes notebook, extracts packagelist and saves everything to html Arguments: input_fn {str} -- Input filename of notebook output_fn {str} -- Output filename for html file """ _logger.info(f'Reading notebook "{input_fn}"') with open(input_fn, encoding="utf-8") as f: nb = nbformat.read(f, as_version=4) # Execute notebook _logger.info(f'Executing notebook...') ep = ExecutePreprocessor(timeout=-1) starttime = datetime.datetime.today() ep.preprocess(nb) endtime = datetime.datetime.today() _logger.info(f'Executed notebook') timestamp = "Executed {} in {}.".format( starttime.strftime('%Y-%m-%d %H:%M:%S'), pretty_duration((endtime - starttime).total_seconds())) # Extract .ipynb file scrubbed_nb = scrub_output(copy.deepcopy(nb)) ipynb_data = nbformat.writes(scrubbed_nb).encode('utf-8') ipynb_link = create_embedded_link(os.path.basename(input_fn), ipynb_data) # Get package list _logger.info(f'Getting package list...') packages_data = get_package_list() if packages_data: packages_link = create_embedded_link('packages.txt', packages_data) else: packages_link = '' # Add files / links md = f"---\n {timestamp} {ipynb_link} {packages_link}" nb['cells'].append(new_markdown_cell(md)) # Export to html _logger.info(f'Exporting to "{output_fn}"') exporter = HTMLExporter() (body, resources) = exporter.from_notebook_node(nb) if not output_fn: output_fn = build_output_filename(input_fn) writer = FilesWriter() resources['output_extension'] = None writer.write(output=body, resources=resources, notebook_name=output_fn) _logger.info(f'Finished')
def export(combined_nb: NotebookNode, output_file: Path, pdf=False): resources = {} resources['unique_key'] = 'combined' resources['output_files_dir'] = 'combined_files' log.info('Converting to %s', 'pdf' if pdf else 'latex') exporter = MyLatexPDFExporter() if pdf else MyLatexExporter() writer = FilesWriter(build_directory=str(output_file.parent)) output, resources = exporter.from_notebook_node(combined_nb, resources) writer.write(output, resources, notebook_name=output_file.stem)
def convert(self, file): assert os.path.exists( file), f"this should not happen, path {file} must exist" body, resources = self.export(file) fw = FilesWriter() fw._makedir(self.dst_folder(file)) fw.build_directory = self.dst_folder(file) fw.write(body, resources, notebook_name=self.dest_file(file, withFormat=False))
def main(input, output, name): exporter = MarkdownExporter(template_file=HIDE_TEMPLATE) writer = FilesWriter() with open(input, 'rb') as f: nb = nbformat.read(f, as_version=4) (body, res) = exporter.from_notebook_node(nb) writer.write(MdFormatter().clear(body), PostProcesser().res_path(res, output), notebook_name=output + name)
def nbnode_to_tex(nb_node, filename="texout"): """ function to export a .tex file given a notebookNode object as input :param nb_node: notebookNode object :param filename: str, the name of the output .tex file. Don't need .tex extension :return: nothing returned, but function will output a new .pdf file """ e = MyLatexExporter body, resources = e.from_notebook_node(nb_node) writer = FilesWriter() writer.write(body, resources, notebook_name=filename)
def nbnode_to_ipynb(nb_node, filename="notebookout"): """ function to export a .ipynb file given a notebookNode object as input :param nb_node: notebookNode object :param filename: str, the name of the output .ipynb file. Don't need extension :return: nothing returned, but fuction will output a new .ipynb file """ e = nbconvert.NotebookExporter() body, resources = e.from_notebook_node(nb_node) writer = FilesWriter() writer.write(body, resources, notebook_name=filename)
def main_report_generation_function(): print('START: downloading and executing notebook from github') pm.execute_notebook( 'https://raw.githubusercontent.com/arkulkarni/COVID-19-Analysis/master/COVID-19-Analysis.ipynb', SCRATCH_DIR + '/output.ipynb' ) print('SUCCESS: downloading and executing notebook from github') print('START: convert to html') #jupyter nbconvert output.ipynb --no-input nb = nbformat.read(SCRATCH_DIR + '/output.ipynb', as_version=4) # Instantiate the exporter. html_exporter = HTMLExporter() html_exporter.template_file = 'nbhtml' (body, resources) = html_exporter.from_notebook_node(nb) print('SUCCESS: convert to html') print('START: save html file') write_file = FilesWriter() write_file.write( output=body, resources=resources, notebook_name=SCRATCH_DIR + '/index' ) print('SUCCESS: save html file') print('START: uploading html file to Cloud Storage') storage_client = storage.Client() bucket = storage_client.bucket(BUCKET_NAME) # upload the index.html file blob = bucket.blob('index.html') blob.upload_from_filename(SCRATCH_DIR + '/index.html') print('File index.html uploaded') # upload the custom.css file blob = bucket.blob('custom.css') blob.upload_from_filename('./custom.css') print('File custom.css uploaded') print('SUCCESS: uploading html and css file to Cloud Storage') gc.collect() return 'Done processing and updating the report'
def export(combined_nb: NotebookNode, output_file: Path, pdf=False, template_file=None): resources = {} resources['unique_key'] = 'combined' resources['output_files_dir'] = 'combined_files' log.info('Converting to %s', 'pdf' if pdf else 'latex') exporter = MyLatexPDFExporter() if pdf else MyLatexExporter() if template_file is not None: exporter.template_file = str(template_file) writer = FilesWriter(build_directory=str(output_file.parent)) output, resources = exporter.from_notebook_node(combined_nb, resources) writer.write(output, resources, notebook_name=output_file.stem)
def convertNotebooktoLaTeX(notebookPath, outfilePath='latex_out1', template='classicm'): REG_nb = re.compile(r'(\d\d)\.(\d\d)-(.*)\.ipynb') base_nb_filename = os.path.basename(notebookPath) if REG_nb.match(base_nb_filename): with open(notebookPath) as fh: nbnode = nbformat.read(fh, as_version=4) exporter = LatexExporter() exporter.template_file = template # classicm style if not specified exporter.file_extension = '.tex' exporter.register_preprocessor(RegexRemovePreprocessor, enabled=False) ########################### body, resources = exporter.from_notebook_node(nbnode) writer = FilesWriter() writer.write(body, resources, notebook_name=outfilePath) # will end up with .tex extension
def convert_notebook(nb_path): prt('Convert notebook to Metatab source package') if not exists(nb_path): err("Notebook path does not exist: '{}' ".format(nb_path)) c = Config() pe = NotebookExecutor(config=c, log=logger) prt('Running the notebook') output, resources = pe.from_filename(nb_path) fw = FilesWriter() fw.build_directory = pe.output_dir fw.write(output, resources, notebook_name=DEFAULT_METATAB_FILE) de = DocumentationExporter(config=c, log=logger, metadata=doc_metadata(pe.doc)) prt('Exporting documentation') output, resources = de.from_filename(nb_path) fw.build_directory = join(pe.output_dir, 'docs') fw.write(output, resources, notebook_name='notebook') new_mt_file = join(pe.output_dir, DEFAULT_METATAB_FILE) doc = MetapackDoc(new_mt_file) de.update_metatab(doc, resources) for lib_dir in pe.lib_dirs: lib_dir = normpath(lib_dir).lstrip('./') doc['Resources'].new_term("Root.PythonLib", lib_dir) path = abspath(lib_dir) dest = join(pe.output_dir, lib_dir) ensure_dir(dest) copytree(path, join(pe.output_dir, lib_dir)) doc.write_csv() # Reset the input to use the new data prt('Running with new package file: {}'.format(new_mt_file))
def export_nbnode( combined_nb: NotebookNode, output_file: Path, pdf=False, template_file=None ): resources = {} resources["unique_key"] = "combined" resources["output_files_dir"] = "combined_files" # log.info('Converting to %s', 'pdf' if pdf else 'latex') exporter = MyLatexPDFExporter() if pdf else MyLatexExporter() if template_file is not None: exporter.template_file = str(template_file) writer = FilesWriter(build_directory=str(output_file.parent)) output, resources = exporter.from_notebook_node(combined_nb, resources) writer.write(output, resources, notebook_name=output_file.stem)
def from_meeting(self, meeting: Meeting): notebook_path = repositories.local_meeting_root(meeting) / "".join( [repr(meeting), FileExtensions.Solutionbook]) # TODO concatenate front matter to notebook output front_matter = templates.load("meeting/hugo-front-matter.md.j2") front_matter = front_matter.render( **{ "group": repr(meeting.group), "meeting": { "title": meeting.required["title"], "date": meeting.meta.date.isoformat(), # TODO decide on what date qualifies to be `lastmod` "lastmod": meeting.meta.date.isoformat(), "authors": meeting.required["instructors"], "tags": meeting.optional["tags"], "description": meeting.required["description"], "weight": meeting.number, "room": meeting.meta.room, "cover": meeting.required["cover"], }, "semester": { "full": str(meeting.group.semester), "short": repr(meeting.group.semester), }, "urls": { "youtube": urlgen.youtube(meeting), "slides": urlgen.slides(meeting), "github": urlgen.github(meeting), "kaggle": urlgen.kaggle(meeting), "colab": urlgen.colab(meeting), }, }) # the notebook is output as a string, so treat it as such when concatenating notebook, resources = self.from_filename(str(notebook_path), resources=None) resources.update({"output_extension": ".md"}) writer = FilesWriter( build_directory=str(paths.site_group_folder_from_meeting(meeting))) front_matter_plus_notebook = f"{front_matter}\n{notebook}" writer.write(front_matter_plus_notebook, resources, meeting.required["filename"]) # writer.write(notebook, resources, meeting.required["filename"]) return notebook, resources
def ipynb_to_jupyter(path): """Replace given ``.ipynb`` file with a ``.jupyter`` file. WARNING: This deletes the original file! :param path: Path to ``.ipynb`` file. :type path: os.PathLike or str """ path = Path(path) exporter = JupyterExporter() nb, resources = exporter.from_filename(str(path)) writer = FilesWriter() writer.write(nb, resources, notebook_name=path.with_suffix('').name) path.unlink()
def _load_documentation_files(self): from metapack_jupyter.exporters import DocumentationExporter notebook_docs = [] # First find and remove notebooks from the docs. These wil get processed to create # normal documents. try: for term in list( self.doc['Documentation'].find('Root.Documentation')): u = parse_app_url(term.value) if u is not None and u.target_format == 'ipynb' and u.proto == 'file': notebook_docs.append(term) self.doc.remove_term(term) except KeyError: self.warn("No documentation defined in metadata") # Process all of the normal files super()._load_documentation_files() fw = FilesWriter() fw.build_directory = join(self.package_path.path, 'docs') # Now, generate the notebook documents directly into the filesystem package for term in notebook_docs: de = DocumentationExporter( base_name=term.name or slugify(term.title)) u = parse_app_url(term.value) nb_path = join(self.source_dir, u.path) # Only works if the path is relative. try: output, resources = de.from_filename(nb_path) fw.write(output, resources, notebook_name=de.base_name + '_full') # Write notebook html with inputs de.update_metatab(self.doc, resources) except Exception as e: from metapack.cli.core import warn warn("Failed to convert document for {}: {}".format( term.name, e))
def convert_nb2inject() -> str: ''' Converts the current notebook to an executable .py file Returns ------- str : the full path and filename of the converted file ''' try: import ipykernel import notebook.notebookapp except ImportError as e: #log.exception('ImportError : This only runs in a Jupyter Notebook environment ' + str(e)) return from nbconvert import PythonExporter from nbconvert.writers import FilesWriter import nbformat #from traitlets.config import Config exporter = PythonExporter() nbfile = get_notebook_name() nb = nbformat.read(nbfile, nbformat.NO_CONVERT) start = 0 # Don't use the last cell nb.cells = nb.cells[start:-2] (output, resources) = exporter.from_notebook_node(nb) filename = nbfile.split('/')[-1].split('.')[0] user = os.getenv("JUPYTERHUB_USER") # FIX: This should not be hardcoded tmpdir = f"/tmp/cloudflow/inject/{user}" if not os.path.exists(tmpdir): os.makedirs(tmpdir) outfile = f"{tmpdir}/{filename}" # Save to file writer = FilesWriter() writer.write(output, resources, outfile) return outfile + ".py"
def write_hugo_formatted_nb_to_md( notebook: Union[Path, str], render_to: Optional[Union[Path, str]] = None) -> Path: """ Convert Jupyter notebook to markdown and write it to the appropriate file. Args: notebook: The path to the notebook to be rendered render_to: The directory we want to render the notebook to """ notebook = Path(notebook) notebook_metadata = json.loads(notebook.read_text())['metadata'] rendered_markdown_string, res = notebook_to_markdown(notebook) slug = notebook_metadata['front-matter']['slug'] print("render-to: " + str(notebook_metadata['hugo-jupyter']['render-to'])) render_to = render_to or notebook_metadata['hugo-jupyter'][ 'render-to'] or 'content/post/' if not render_to.endswith('/'): render_to += '/' rendered_markdown_file = Path(render_to, slug + '.md') if not rendered_markdown_file.parent.exists(): rendered_markdown_file.parent.mkdir(parents=True) b_dir = render_to.replace('content/', '') static_dir = os.path.join('static', b_dir, slug) c = Config() c.FilesWriter.build_directory = render_to fw = FilesWriter(config=c) fw.write(rendered_markdown_string, res, notebook_name=slug) if not os.path.exists(static_dir): os.makedirs(static_dir) # Move all .png files from render_to to static_dir for fn in os.listdir(render_to): if fn.endswith('.png'): src = os.path.join(render_to, fn) dst = os.path.join(static_dir, fn) print("Moving: {} -> {}".format(src, dst)) os.rename(src, dst) #rendered_markdown_file.write_text(rendered_markdown_string) print(notebook.name, '->', rendered_markdown_file.name) return rendered_markdown_file
def convertNotebooktoHTML(notebookPath, outfilePath='nb_out', template='md_not_converted'): REG_nb = re.compile(r'(\d\d)\.(\d\d)-(.*)\.ipynb') notebook_basename = os.path.basename(notebookPath) if REG_nb.match(notebook_basename): with open(notebookPath) as fh: nbnode = nbformat.read(notebookPath, as_version=4) exporter = HTMLExporter() exporter.template_file = template # leaves markdown not converted, converts input and output cells exporter.file_extension = '.md' mypreprocessor = RegexRemovePreprocessor() # Create an instance of the RegexRemovePreprocessor mypreprocessor.patterns = ['\s*\Z'] # supply a re pattern (in a list) to the preprocessor's .patterns attribute exporter.register_preprocessor(mypreprocessor, enabled=True) # apply the preprocessor to the exporter body, resources = exporter.from_notebook_node(nbnode) writer = FilesWriter() writer.write(body, resources, notebook_name=outfilePath) # will end up with .html extension
def export(combined_nb: NotebookNode, output_file: Path, pdf=False, template_file=None): resources = {} resources['unique_key'] = 'combined' resources['output_files_dir'] = 'combined_files' # log.info('Converting to %s', 'pdf' if pdf else 'latex') exporter = MyLatexPDFExporter() if pdf else MyLatexExporter() if template_file is not None: exporter.template_file = str(template_file) mypreprocessor = RegexRemovePreprocessor() # Create an instance of the RegexRemovePreprocessor mypreprocessor.patterns = ['\s*\Z'] # supply a re pattern (in a list) to the preprocessor's .patterns attribute exporter.register_preprocessor(mypreprocessor, enabled=True) # apply the preprocessor to the exporter writer = FilesWriter(build_directory=str(output_file.parent)) output, resources = exporter.from_notebook_node(combined_nb, resources) writer.write(output, resources, notebook_name=output_file.stem)
def write_page(html, path_out, resources, standalone=False, custom_css=None, custom_js=None): """ Write an HTML page to disk and extract images if desired. Meant for running after converting a page with `page_html`. This uses the nbconvert `FilesWriter` class to write the HTML content. html : string The HTML to be written to disk. path_out : string The path to the folder where the HTML will be output. resources : dictionary NBConvert resources to be used in the conversion process. These are generated from the `build_book` function. standalone : bool Whether to write the page as a full standalone HTML file with its own <head> and <body> sections. If False, just the converted HTML will be written with the expectation that it will be compiled to "full" HTML by Jupyter Book later. custom_css : string of css | path to css file A collection of custom CSS rules to include in the output HTML, or a path to a CSS file. Only used if `standalone=True`. custom_js : string of javascript | path to js file A collection of custom Javascript to include in the output HTML, or a path to a CSS file. Only used if `standalone=True`. """ c = Config() c.FilesWriter.build_directory = path_out notebook_name = op.split(resources.get("unique_key", "notebook"))[-1] if custom_css is None: custom_css = '' elif op.exists(custom_css): with open(custom_css, 'r') as ff: custom_css = ff.read() if custom_js is None: custom_js = '' elif op.exists(custom_js): with open(custom_js, 'r') as ff: custom_js = ff.read() # If standalone, add a head and body if standalone is True: head = page_head(custom_css=custom_css, custom_js=custom_js) html = f""" <!DOCTYPE html> {head} <body> {html} <nav class="onthispage"></nav> </body> </html>\n """ # Now write the html and resources writer = FilesWriter(config=c) path_html = writer.write(html, resources, notebook_name=notebook_name) return path_html
def convertNotebooktoLaTeX(notebookPath, outfilePath="latex_out1", template="classicm"): REG_nb = re.compile(r"(\d\d)\.(\d\d)-(.*)\.ipynb") base_nb_filename = os.path.basename(notebookPath) if REG_nb.match(base_nb_filename): with open(notebookPath) as fh: nbnode = nbformat.read(fh, as_version=4) exporter = LatexExporter() exporter.template_file = template # classicm style if not specified exporter.file_extension = ".tex" body, resources = exporter.from_notebook_node(nbnode) writer = FilesWriter() writer.write( body, resources, notebook_name=outfilePath) # will end up with .tex extension
def convertNotebooktoHTML(notebookPath, outfilePath='nb_out', template='md_not_converted'): REG_nb = re.compile(r'(\d\d)\.(\d\d)-(.*)\.ipynb') notebook_basename = os.path.basename(notebookPath) if REG_nb.match(notebook_basename): with open(notebookPath) as fh: nbnode = nbformat.read(notebookPath, as_version=4) exporter = HTMLExporter() exporter.template_file = template # leaves markdown not converted, converts input and output cells exporter.file_extension = '.md' body, resources = exporter.from_notebook_node(nbnode) writer = FilesWriter() writer.write( body, resources, notebook_name=outfilePath) # will end up with .html extension
def convert_notebooks(in_directory, html_directory, static_directory): dl = DictLoader({ 'post.tpl': """ {%- extends 'basic.tpl' -%} {% block body %}--- title: {{nb.metadata['title']}} notebook: {{resources['metadata']['path']}}/{{resources['metadata']['name']}}.ipynb date: {{nb.metadata['date']}} --- {{ super() }} {% endblock body %} """ }) c = Config() c.HTMLExporter.preprocessors = [ 'nbconvert.preprocessors.ExtractOutputPreprocessor' ] html_exporter = HTMLExporter(config=c, extra_loaders=[dl]) html_exporter.template_file = 'post.tpl' writer = FilesWriter(build_directory=html_directory) for notebook_file in glob(path.join(in_directory, '*.ipynb')): out_name, _ = path.splitext(path.basename(notebook_file)) out_name = out_name.lower().replace(' ', '-') print('Converting {}'.format(notebook_file)) (body, resources) = html_exporter.from_filename( notebook_file, resources={'output_files_dir': out_name}) writer.write(body, resources, notebook_name=out_name) shutil.rmtree(path.join(static_directory, out_name), True) rename(path.join(html_directory, out_name), path.join(static_directory, out_name))
def init_notebooks(self): filenames = [] pipeline_path = Path(self.config.pipeline) output_path = Path(self.config.output_dir) if pipeline_path.is_dir(): filenames = sorted(pipeline_path.glob('*.ipynb')) elif is_zipfile(str(pipeline_path)): filenames = get_notebooks_in_zip(str(pipeline_path)) elif pipeline_path.is_file(): if is_valid_notebook(str(pipeline_path)): filenames = [pipeline_path] # list with one notebook else: raise Exception("Could not validate notebook") _notebooks = [str(f) for f in filenames] #copy notebooks to pipeline subfolder copied_notebooks = [] for notebook in _notebooks: notebook_pth = Path(notebook) from nbconvert.exporters import NotebookExporter pipeline_writer = FilesWriter(build_directory=str(output_path / 'pipeline')) pipeline_output, resources = export(NotebookExporter, notebook, resources={}) pipeline_writer.write(pipeline_output, resources, notebook_name=notebook_pth.stem) copied_notebook_pth = output_path / 'pipeline' / notebook_pth.name assert copied_notebook_pth.exists() copied_notebooks.append(str(copied_notebook_pth.absolute())) self.notebooks = copied_notebooks #add it into the Pipeline metadata self.config.pipeline_notebooks = [nb for nb in self.notebooks] self.config.executed_notebooks = []
def write(self, output, resources, **kwargs): output = ":orphan:\n\n" + output FilesWriter.write(self, output, resources, **kwargs)
def convert(self, remove_executed=False): """ Convert the executed notebook to a restructured text (RST) file. Parameters ---------- delete_executed : bool, optional Controls whether to remove the executed notebook or not. """ if not path.exists(self._executed_nb_path): raise IOError("Executed notebook file doesn't exist! Expected: {0}" .format(self._executed_nb_path)) if path.exists(self._rst_path) and not self.overwrite: logger.debug("RST version of notebook already exists at {0}. Use " "overwrite=True or --overwrite (at cmd line) to re-run" .format(self._rst_path)) return self._rst_path # Initialize the resources dict - see: # https://github.com/jupyter/nbconvert/blob/master/nbconvert/nbconvertapp.py#L327 resources = {} resources['config_dir'] = '' # we don't need to specify config resources['unique_key'] = self.nb_name # path to store extra files, like plots generated resources['output_files_dir'] = 'nboutput' # these keywords are used to build the filter keywords # TODO: add a pre-processor that extracts the keywords from the markdown # cell in the header and adds them to this list # NOTE: the split[-4] trick below is brittle in that it will break if # a notebook is, say, nested two layers deep instead of just one like # all of our notebooks thus far. resources['nb_keywords'] = [self.nb_path.split(sep)[-4]] # Exports the notebook to RST logger.debug('Exporting notebook to RST...') exporter = RSTExporter() if self.template_file: exporter.template_file = self.template_file output, resources = exporter.from_filename(self._executed_nb_path, resources=resources) # Write the output RST file writer = FilesWriter() output_file_path = writer.write(output, resources, notebook_name=self.nb_name) # read the executed notebook, grab the keywords from the header, # add them in to the RST as filter keywords with open(self._executed_nb_path) as f: nb = nbformat.read(f, as_version=IPYTHON_VERSION) top_cell_text = nb['cells'][0]['source'] match = re.search('## [kK]eywords\s+(.*)', top_cell_text) if match: keywords = match.groups()[0].split(',') keywords = [clean_keyword(k) for k in keywords if k.strip()] keyword_filters = ['filter{0}'.format(k) for k in keywords] else: keyword_filters = [] # Add metatags to top of RST files to get rendered into HTML, used for # the search and filter functionality in Learn Astropy meta_tutorials = '.. meta::\n :keywords: {0}\n' filters = ['filterTutorials'] + keyword_filters meta_tutorials = meta_tutorials.format(', '.join(filters)) with open(output_file_path, 'r') as f: rst_text = f.read() with open(output_file_path, 'w') as f: rst_text = '{0}\n{1}'.format(meta_tutorials, rst_text) f.write(rst_text) if remove_executed: # optionally, clean up the executed notebook file remove(self._executed_nb_path) return output_file_path
class BaseConverter(LoggingConfigurable): notebooks = List([]) assignments = Dict({}) writer = Instance(FilesWriter) exporter = Instance(Exporter) exporter_class = Type(NotebookExporter, klass=Exporter) preprocessors = List([]) force = Bool(False, help="Whether to overwrite existing assignments/submissions").tag(config=True) permissions = Integer( help=dedent( """ Permissions to set on files output by nbgrader. The default is generally read-only (444), with the exception of nbgrader assign and nbgrader feedback, in which case the user also has write permission. """ ) ).tag(config=True) @default("permissions") def _permissions_default(self): return 444 coursedir = Instance(CourseDirectory, allow_none=True) def __init__(self, coursedir=None, **kwargs): self.coursedir = coursedir super(BaseConverter, self).__init__(**kwargs) if self.parent and hasattr(self.parent, "logfile"): self.logfile = self.parent.logfile else: self.logfile = None c = Config() c.Exporter.default_preprocessors = [] self.update_config(c) def start(self): self.init_notebooks() self.writer = FilesWriter(parent=self, config=self.config) self.exporter = self.exporter_class(parent=self, config=self.config) for pp in self.preprocessors: self.exporter.register_preprocessor(pp) currdir = os.getcwd() os.chdir(self.coursedir.root) try: self.convert_notebooks() finally: os.chdir(currdir) @default("classes") def _classes_default(self): classes = super(BaseConverter, self)._classes_default() classes.append(FilesWriter) classes.append(Exporter) for pp in self.preprocessors: if len(pp.class_traits(config=True)) > 0: classes.append(pp) return classes @property def _input_directory(self): raise NotImplementedError @property def _output_directory(self): raise NotImplementedError def _format_source(self, assignment_id, student_id, escape=False): return self.coursedir.format_path(self._input_directory, student_id, assignment_id, escape=escape) def _format_dest(self, assignment_id, student_id, escape=False): return self.coursedir.format_path(self._output_directory, student_id, assignment_id, escape=escape) def init_notebooks(self): self.assignments = {} self.notebooks = [] fullglob = self._format_source(self.coursedir.assignment_id, self.coursedir.student_id) for assignment in glob.glob(fullglob): found = glob.glob(os.path.join(assignment, self.coursedir.notebook_id + ".ipynb")) if len(found) == 0: self.log.warning("No notebooks were matched in '%s'", assignment) continue self.assignments[assignment] = found if len(self.assignments) == 0: msg = "No notebooks were matched by '%s'" % fullglob self.log.error(msg) raise NbGraderException(msg) def init_single_notebook_resources(self, notebook_filename): regexp = re.escape(os.path.sep).join([ self._format_source("(?P<assignment_id>.*)", "(?P<student_id>.*)", escape=True), "(?P<notebook_id>.*).ipynb" ]) m = re.match(regexp, notebook_filename) if m is None: msg = "Could not match '%s' with regexp '%s'" % (notebook_filename, regexp) self.log.error(msg) raise NbGraderException(msg) gd = m.groupdict() self.log.debug("Student: %s", gd['student_id']) self.log.debug("Assignment: %s", gd['assignment_id']) self.log.debug("Notebook: %s", gd['notebook_id']) resources = {} resources['unique_key'] = gd['notebook_id'] resources['output_files_dir'] = '%s_files' % gd['notebook_id'] resources['nbgrader'] = {} resources['nbgrader']['student'] = gd['student_id'] resources['nbgrader']['assignment'] = gd['assignment_id'] resources['nbgrader']['notebook'] = gd['notebook_id'] resources['nbgrader']['db_url'] = self.coursedir.db_url return resources def write_single_notebook(self, output, resources): # configure the writer build directory self.writer.build_directory = self._format_dest( resources['nbgrader']['assignment'], resources['nbgrader']['student']) # write out the results self.writer.write(output, resources, notebook_name=resources['unique_key']) def init_destination(self, assignment_id, student_id): """Initialize the destination for an assignment. Returns whether the assignment should actually be processed or not (i.e. whether the initialization was successful). """ dest = os.path.normpath(self._format_dest(assignment_id, student_id)) # the destination doesn't exist, so we haven't processed it if self.coursedir.notebook_id == "*": if not os.path.exists(dest): return True else: # if any of the notebooks don't exist, then we want to process them for notebook in self.notebooks: filename = os.path.splitext(os.path.basename(notebook))[0] + self.exporter.file_extension path = os.path.join(dest, filename) if not os.path.exists(path): return True # if we have specified --force, then always remove existing stuff if self.force: if self.coursedir.notebook_id == "*": self.log.warning("Removing existing assignment: {}".format(dest)) rmtree(dest) else: for notebook in self.notebooks: filename = os.path.splitext(os.path.basename(notebook))[0] + self.exporter.file_extension path = os.path.join(dest, filename) if os.path.exists(path): self.log.warning("Removing existing notebook: {}".format(path)) remove(path) return True src = self._format_source(assignment_id, student_id) new_timestamp = self.coursedir.get_existing_timestamp(src) old_timestamp = self.coursedir.get_existing_timestamp(dest) # if --force hasn't been specified, but the source assignment is newer, # then we want to overwrite it if new_timestamp is not None and old_timestamp is not None and new_timestamp > old_timestamp: if self.coursedir.notebook_id == "*": self.log.warning("Updating existing assignment: {}".format(dest)) rmtree(dest) else: for notebook in self.notebooks: filename = os.path.splitext(os.path.basename(notebook))[0] + self.exporter.file_extension path = os.path.join(dest, filename) if os.path.exists(path): self.log.warning("Updating existing notebook: {}".format(path)) remove(path) return True # otherwise, we should skip the assignment self.log.info("Skipping existing assignment: {}".format(dest)) return False def init_assignment(self, assignment_id, student_id): """Initializes resources/dependencies/etc. that are common to all notebooks in an assignment. """ source = self._format_source(assignment_id, student_id) dest = self._format_dest(assignment_id, student_id) # detect other files in the source directory for filename in find_all_files(source, self.coursedir.ignore + ["*.ipynb"]): # Make sure folder exists. path = os.path.join(dest, os.path.relpath(filename, source)) if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) if os.path.exists(path): remove(path) self.log.info("Copying %s -> %s", filename, path) shutil.copy(filename, path) def set_permissions(self, assignment_id, student_id): self.log.info("Setting destination file permissions to %s", self.permissions) dest = os.path.normpath(self._format_dest(assignment_id, student_id)) permissions = int(str(self.permissions), 8) for dirname, _, filenames in os.walk(dest): for filename in filenames: os.chmod(os.path.join(dirname, filename), permissions) def convert_single_notebook(self, notebook_filename): """Convert a single notebook. Performs the following steps: 1. Initialize notebook resources 2. Export the notebook to a particular format 3. Write the exported notebook to file """ self.log.info("Converting notebook %s", notebook_filename) resources = self.init_single_notebook_resources(notebook_filename) output, resources = self.exporter.from_filename(notebook_filename, resources=resources) self.write_single_notebook(output, resources) def convert_notebooks(self): errors = [] def _handle_failure(gd): dest = os.path.normpath(self._format_dest(gd['assignment_id'], gd['student_id'])) if self.coursedir.notebook_id == "*": if os.path.exists(dest): self.log.warning("Removing failed assignment: {}".format(dest)) rmtree(dest) else: for notebook in self.notebooks: filename = os.path.splitext(os.path.basename(notebook))[0] + self.exporter.file_extension path = os.path.join(dest, filename) if os.path.exists(path): self.log.warning("Removing failed notebook: {}".format(path)) remove(path) for assignment in sorted(self.assignments.keys()): # initialize the list of notebooks and the exporter self.notebooks = sorted(self.assignments[assignment]) # parse out the assignment and student ids regexp = self._format_source("(?P<assignment_id>.*)", "(?P<student_id>.*)", escape=True) m = re.match(regexp, assignment) if m is None: msg = "Could not match '%s' with regexp '%s'" % (assignment, regexp) self.log.error(msg) raise NbGraderException(msg) gd = m.groupdict() try: # determine whether we actually even want to process this submission should_process = self.init_destination(gd['assignment_id'], gd['student_id']) if not should_process: continue # initialize the destination self.init_assignment(gd['assignment_id'], gd['student_id']) # convert all the notebooks for notebook_filename in self.notebooks: self.convert_single_notebook(notebook_filename) # set assignment permissions self.set_permissions(gd['assignment_id'], gd['student_id']) except UnresponsiveKernelError: self.log.error( "While processing assignment %s, the kernel became " "unresponsive and we could not interrupt it. This probably " "means that the students' code has an infinite loop that " "consumes a lot of memory or something similar. nbgrader " "doesn't know how to deal with this problem, so you will " "have to manually edit the students' code (for example, to " "just throw an error rather than enter an infinite loop). ", assignment) errors.append((gd['assignment_id'], gd['student_id'])) _handle_failure(gd) except sqlalchemy.exc.OperationalError: _handle_failure(gd) self.log.error(traceback.format_exc()) msg = ( "There was an error accessing the nbgrader database. This " "may occur if you recently upgraded nbgrader. To resolve " "the issue, first BACK UP your database and then run the " "command `nbgrader db upgrade`." ) self.log.error(msg) raise NbGraderException(msg) except SchemaMismatchError: _handle_failure(gd) msg = ( "One or more notebooks in the assignment use an old version \n" "of the nbgrader metadata format. Please **back up your class files \n" "directory** and then update the metadata using:\n\nnbgrader update .\n" ) self.log.error(msg) raise NbGraderException(msg) except KeyboardInterrupt: _handle_failure(gd) self.log.error("Canceled") raise except Exception: self.log.error("There was an error processing assignment: %s", assignment) self.log.error(traceback.format_exc()) errors.append((gd['assignment_id'], gd['student_id'])) _handle_failure(gd) if len(errors) > 0: for assignment_id, student_id in errors: self.log.error( "There was an error processing assignment '{}' for student '{}'".format( assignment_id, student_id)) if self.logfile: msg = ( "Please see the error log ({}) for details on the specific " "errors on the above failures.".format(self.logfile)) else: msg = ( "Please see the the above traceback for details on the specific " "errors on the above failures.") self.log.error(msg) raise NbGraderException(msg)
def convert(source_path: Path, output_dir: Path): exporter = MyHTMLExporter() writer = FilesWriter(build_directory=str(output_dir)) output, resources = exporter.from_filename(str(source_path)) notebook_name = source_path.stem writer.write(output, resources, notebook_name=notebook_name)