def write(cells, nb_version=4): """Turn cells list into valid IPython notebook code.""" # Use IPython.nbformat functionality for writing the notebook if nb_version == 3: from IPython.nbformat.v3 import ( new_code_cell, new_text_cell, new_worksheet, new_notebook, new_metadata, new_author, ) nb = new_worksheet() elif nb_version == 4: from IPython.nbformat.v4 import new_code_cell, new_markdown_cell, new_notebook nb_cells = [] for cell_tp, language, block in cells: if cell_tp == "markdown": if nb_version == 3: nb.cells.append(new_text_cell(u"markdown", source=block)) elif nb_version == 4: nb_cells.append(new_markdown_cell(source=block)) elif cell_tp == "codecell": if nb_version == 3: nb.cells.append(new_code_cell(input=block)) elif nb_version == 4: nb_cells.append(new_code_cell(source=block)) if nb_version == 3: nb = new_notebook(worksheets=[nb], metadata=new_metadata()) # Let us make v4 notebook here by upgrading from IPython.nbformat.v4 import upgrade nb = upgrade(nb) import IPython.nbformat.v4.nbjson as nbjson # Convert nb to json format filestr = nbjson.writes(nb) elif nb_version == 4: nb = new_notebook(cells=nb_cells) from IPython.nbformat import writes filestr = writes(nb, version=4) return filestr
def convert(self): try: from IPython.nbformat.v3 import (new_notebook, new_worksheet, new_code_cell, new_text_cell, writes_json) except ImportError: # The `IPython.nbformat` package has been deprecated from nbformat.v3 import (new_notebook, new_worksheet, new_code_cell, new_text_cell, writes_json) ws = new_worksheet() for chunk in self.doc.parsed: if chunk["type"] == "doc": # TODO: this relies on pandoc converting into # markdown fmt = u'markdown' doc = self.format_docchunk(chunk['content']) ws.cells.append(new_text_cell(fmt, source=doc)) if chunk["type"] == "code": lang = u'python' code = chunk['content'] ws.cells.append(new_code_cell(input=code, language=lang)) NB = new_notebook(name='Pweaved ipython notebook', worksheets=[ws]) self.converted = writes_json(NB)
def write(cells, nb_version=4): """Turn cells list into valid IPython notebook code.""" # Use IPython.nbformat functionality for writing the notebook if nb_version == 3: from IPython.nbformat.v3 import (new_code_cell, new_text_cell, new_worksheet, new_notebook, new_metadata, new_author) nb = new_worksheet() elif nb_version == 4: from IPython.nbformat.v4 import (new_code_cell, new_markdown_cell, new_notebook) nb_cells = [] for cell_tp, language, block in cells: if cell_tp == 'markdown': if nb_version == 3: nb.cells.append(new_text_cell(u'markdown', source=block)) elif nb_version == 4: nb_cells.append(new_markdown_cell(source=block)) elif cell_tp == 'codecell': if nb_version == 3: nb.cells.append(new_code_cell(input=block)) elif nb_version == 4: nb_cells.append(new_code_cell(source=block)) if nb_version == 3: nb = new_notebook(worksheets=[nb], metadata=new_metadata()) # Let us make v4 notebook here by upgrading from IPython.nbformat.v4 import upgrade nb = upgrade(nb) import IPython.nbformat.v4.nbjson as nbjson # Convert nb to json format filestr = nbjson.writes(nb) elif nb_version == 4: nb = new_notebook(cells=nb_cells) from IPython.nbformat import writes filestr = writes(nb, version=4) return filestr
def ipynb_code(filestr, code_blocks, code_block_types, tex_blocks, format): """ # We expand all newcommands now from html import embed_newcommands newcommands = embed_newcommands(filestr) if newcommands: filestr = newcommands + filestr """ # Fix pandoc citations to normal internal links: [[key]](#key) filestr = re.sub(r'\[@(.+?)\]', r'[[\g<1>]](#\g<1>)', filestr) # filestr becomes json list after this function so we must typeset # envirs here. All envirs are typeset as pandoc_quote. from common import _CODE_BLOCK, _MATH_BLOCK envir_format = option('ipynb_admon=', 'paragraph') # Remove all !bpop-!epop environments (they cause only problens and # have no use) for envir in 'pop', 'slidecell': filestr = re.sub('^<!-- !b%s .*\n' % envir, '', filestr, flags=re.MULTILINE) filestr = re.sub('^<!-- !e%s .*\n' % envir, '', filestr, flags=re.MULTILINE) filestr = re.sub('^<!-- !bnotes.*?<!-- !enotes -->\n', '', filestr, flags=re.DOTALL|re.MULTILINE) filestr = re.sub('^<!-- !split -->\n', '', filestr, flags=re.MULTILINE) from doconce import doconce_envirs envirs = doconce_envirs()[8:-2] for envir in envirs: pattern = r'^!b%s(.*?)\n(.+?)\s*^!e%s' % (envir, envir) if envir_format in ('quote', 'paragraph', 'hrule'): def subst(m): title = m.group(1).strip() # Text size specified in parenthesis? m2 = re.search('^\s*\((.+?)\)', title) if title == '' and envir not in ('block', 'quote'): title = envir.capitalize() + '.' elif title.lower() == 'none': title == '' elif m2: text_size = m2.group(1).lower() title = title.replace('(%s)' % text_size, '').strip() elif title and title[-1] not in ('.', ':', '!', '?'): # Make sure the title ends with puncuation title += '.' # Recall that this formatting is called very late # so native format must be used! if title: title = '**' + title + '**\n' # Could also consider subsubsection formatting block = m.group(2) # Always use quote typesetting for quotes if envir_format == 'quote' or envir == 'quote': # Make Markdown quote of the block: lines start with > lines = [] for line in block.splitlines(): # Just quote plain text if not (_MATH_BLOCK in line or _CODE_BLOCK in line or line.startswith('FIGURE:') or line.startswith('MOVIE:') or line.startswith('|')): lines.append('> ' + line) else: lines.append('\n' + line + '\n') block = '\n'.join(lines) + '\n\n' # Add quote and a blank line after title if title: title = '> ' + title + '>\n' else: # Add a blank line after title if title: title += '\n' if envir_format == 'hrule': # Native ------ does not work, use <hr/> #text = '\n\n----------\n' + title + '----------\n' + \ # block + '\n----------\n\n' text = '\n\n<hr/>\n' + title + \ block + '\n<hr/>\n\n' else: text = title + block + '\n\n' return text else: errwarn('*** error: --ipynb_admon=%s is not supported' % envir_format) filestr = re.sub(pattern, subst, filestr, flags=re.DOTALL | re.MULTILINE) # Fix pyshell and ipy interactive sessions: remove prompt and output. # or split in multiple cells such that output comes out at the end of a cell # Fix sys environments and use run prog.py so programs can be run in cell # Insert %matplotlib inline in the first block using matplotlib # Only typeset Python code as blocks, otherwise !bc environmens # become plain indented Markdown. from doconce import dofile_basename from sets import Set ipynb_tarfile = 'ipynb-%s-src.tar.gz' % dofile_basename src_paths = Set() mpl_inline = False split_pyshell = option('ipynb_split_pyshell=', 'on') if split_pyshell is None: split_pyshell = False elif split_pyshell in ('no', 'False', 'off'): split_pyshell = False else: split_pyshell = True ipynb_code_tp = [None]*len(code_blocks) for i in range(len(code_blocks)): # Check if continuation lines are in the code block, because # doconce.py inserts a blank after the backslash if '\\ \n' in code_blocks[i]: code_blocks[i] = code_blocks[i].replace('\\ \n', '\\\n') if not mpl_inline and ( re.search(r'import +matplotlib', code_blocks[i]) or \ re.search(r'from +matplotlib', code_blocks[i]) or \ re.search(r'import +scitools', code_blocks[i]) or \ re.search(r'from +scitools', code_blocks[i])): code_blocks[i] = '%matplotlib inline\n\n' + code_blocks[i] mpl_inline = True tp = code_block_types[i] if tp.endswith('-t'): # Standard Markdown code with pandoc/github extension language = tp[:-2] language_spec = language2pandoc.get(language, '') #code_blocks[i] = '\n' + indent_lines(code_blocks[i], format) + '\n' code_blocks[i] = "```%s\n" % language_spec + \ indent_lines(code_blocks[i].strip(), format) + \ "```" ipynb_code_tp[i] = 'markdown' elif tp.startswith('pyshell') or tp.startswith('ipy'): lines = code_blocks[i].splitlines() last_cell_end = -1 if split_pyshell: new_code_blocks = [] # Split for each output an put in separate cell for j in range(len(lines)): if lines[j].startswith('>>>') or lines[j].startswith('... '): lines[j] = lines[j][4:] elif lines[j].startswith('In ['): # IPython lines[j] = ':'.join(lines[j].split(':')[1:]).strip() elif lines[j].startswith(' ...: '): # IPython lines[j] = lines[j][8:] else: # output (no prefix or Out) lines[j] = '' new_code_blocks.append( '\n'.join(lines[last_cell_end+1:j+1])) last_cell_end = j code_blocks[i] = new_code_blocks ipynb_code_tp[i] = 'cell' else: # Remove prompt and output lines; leave code executable in cell for j in range(len(lines)): if lines[j].startswith('>>> ') or lines[j].startswith('... '): lines[j] = lines[j][4:] elif lines[j].startswith('In ['): lines[j] = ':'.join(lines[j].split(':')[1:]).strip() else: # output lines[j] = '' for j in range(lines.count('')): lines.remove('') code_blocks[i] = '\n'.join(lines) ipynb_code_tp[i] = 'cell' elif tp.startswith('sys'): # Do we find execution of python file? If so, copy the file # to separate subdir and make a run file command in a cell. # Otherwise, it is just a plain verbatim Markdown block. found_unix_lines = False lines = code_blocks[i].splitlines() for j in range(len(lines)): m = re.search(r'(.+?>|\$) *python +([A-Za-z_0-9]+?\.py)', lines[j]) if m: name = m.group(2).strip() if os.path.isfile(name): src_paths.add(os.path.dirname(name)) lines[j] = '%%run "%s"' % fullpath else: found_unix_lines = True src_paths = list(src_paths) if src_paths and not found_unix_lines: # This is a sys block with run commands only code_blocks[i] = '\n'.join(lines) ipynb_code_tp[i] = 'cell' else: # Standard Markdown code code_blocks[i] = '\n'.join(lines) code_blocks[i] = indent_lines(code_blocks[i], format) ipynb_code_tp[i] = 'markdown' elif tp.endswith('hid'): ipynb_code_tp[i] = 'cell_hidden' elif tp.startswith('py'): ipynb_code_tp[i] = 'cell' else: # Should support other languages as well, but not for now code_blocks[i] = indent_lines(code_blocks[i], format) ipynb_code_tp[i] = 'markdown' # figure_files and movie_files are global variables and contain # all figures and movies referred to src_paths = list(src_paths) if figure_files: src_paths += figure_files if movie_files: src_paths += movie_files if src_paths: # Make tar file with all the source dirs with files # that need to be executed os.system('tar cfz %s %s' % (ipynb_tarfile, ' '.join(src_paths))) errwarn('collected all required additional files in ' + ipynb_tarfile + ' which must be distributed with the notebook') elif os.path.isfile(ipynb_tarfile): os.remove(ipynb_tarfile) # Parse document into markdown text, code blocks, and tex blocks. # Store in nested list notebook_blocks. notebook_blocks = [[]] authors = '' for line in filestr.splitlines(): if line.startswith('authors = [new_author(name='): # old author method authors = line[10:] elif _CODE_BLOCK in line: code_block_tp = line.split()[-1] if code_block_tp in ('pyhid',) or not code_block_tp.endswith('hid'): notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() notebook_blocks.append(line) # else: hidden block to be dropped (may include more languages # with time in the above tuple) elif _MATH_BLOCK in line: notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() notebook_blocks.append(line) else: if not isinstance(notebook_blocks[-1], list): notebook_blocks.append([]) notebook_blocks[-1].append(line) if isinstance(notebook_blocks[-1], list): notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() # Add block type info pattern = r'(\d+) +%s' for i in range(len(notebook_blocks)): if re.match(pattern % _CODE_BLOCK, notebook_blocks[i]): m = re.match(pattern % _CODE_BLOCK, notebook_blocks[i]) idx = int(m.group(1)) if ipynb_code_tp[idx] == 'cell': notebook_blocks[i] = ['cell', notebook_blocks[i]] elif ipynb_code_tp[idx] == 'cell_hidden': notebook_blocks[i] = ['cell_hidden', notebook_blocks[i]] else: notebook_blocks[i] = ['text', notebook_blocks[i]] elif re.match(pattern % _MATH_BLOCK, notebook_blocks[i]): notebook_blocks[i] = ['math', notebook_blocks[i]] else: notebook_blocks[i] = ['text', notebook_blocks[i]] # Go through tex_blocks and wrap in $$ # (doconce.py runs align2equations so there are no align/align* # environments in tex blocks) label2tag = {} tag_counter = 1 for i in range(len(tex_blocks)): # Extract labels and add tags labels = re.findall(r'label\{(.+?)\}', tex_blocks[i]) for label in labels: label2tag[label] = tag_counter # Insert tag to get labeled equation tex_blocks[i] = tex_blocks[i].replace( 'label{%s}' % label, 'label{%s} \\tag{%s}' % (label, tag_counter)) tag_counter += 1 # Remove \[ and \] or \begin/end{equation*} in single equations tex_blocks[i] = tex_blocks[i].replace(r'\[', '') tex_blocks[i] = tex_blocks[i].replace(r'\]', '') tex_blocks[i] = tex_blocks[i].replace(r'\begin{equation*}', '') tex_blocks[i] = tex_blocks[i].replace(r'\end{equation*}', '') # Check for illegal environments m = re.search(r'\\begin\{(.+?)\}', tex_blocks[i]) if m: envir = m.group(1) if envir not in ('equation', 'equation*', 'align*', 'align', 'array'): errwarn("""\ *** warning: latex envir \\begin{%s} does not work well in Markdown. Stick to \\[ ... \\], equation, equation*, align, or align* environments in math environments. """ % envir) eq_type = 'heading' # or '$$' eq_type = '$$' # Markdown: add $$ on each side of the equation if eq_type == '$$': # Make sure there are no newline after equation tex_blocks[i] = '$$\n' + tex_blocks[i].strip() + '\n$$' # Here: use heading (###) and simple formula (remove newline # in math expressions to keep everything within a heading) as # the equation then looks bigger elif eq_type == 'heading': tex_blocks[i] = '### $ ' + ' '.join(tex_blocks[i].splitlines()) + ' $' # Add labels for the eqs above the block (for reference) if labels: #label_tp = '<a name="%s"></a>' label_tp = '<div id="%s"></div>' tex_blocks[i] = '<!-- Equation labels as ordinary links -->\n' + \ ' '.join([label_tp % label for label in labels]) + '\n\n' + \ tex_blocks[i] # blocks is now a list of text chunks in markdown and math/code line # instructions. Insert code and tex blocks for i in range(len(notebook_blocks)): if _CODE_BLOCK in notebook_blocks[i][1] or _MATH_BLOCK in notebook_blocks[i][1]: words = notebook_blocks[i][1].split() # start of notebook_blocks[i]: number block-indicator code-type n = int(words[0]) if _CODE_BLOCK in notebook_blocks[i][1]: notebook_blocks[i][1] = code_blocks[n] # can be list! if _MATH_BLOCK in notebook_blocks[i][1]: notebook_blocks[i][1] = tex_blocks[n] # Make IPython structures nb_version = int(option('ipynb_version=', '4')) if nb_version == 3: try: from IPython.nbformat.v3 import ( new_code_cell, new_text_cell, new_worksheet, new_notebook, new_metadata, new_author) nb = new_worksheet() except ImportError: errwarn('*** error: could not import IPython.nbformat.v3!') errwarn(' set --ipynb_version=4 or leave out --ipynb_version=3') _abort() elif nb_version == 4: try: from nbformat.v4 import ( new_code_cell, new_markdown_cell, new_notebook) except ImportError: # Try old style try: from IPython.nbformat.v4 import ( new_code_cell, new_markdown_cell, new_notebook) except ImportError: errwarn('*** error: cannot do import nbformat.v4 or IPython.nbformat.v4') errwarn(' make sure IPython notebook or Jupyter is installed correctly') _abort() cells = [] mdstr = [] # plain md format of the notebook prompt_number = 1 for block_tp, block in notebook_blocks: if (block_tp == 'text' or block_tp == 'math') and block != '': # Pure comments between math/code and math/code come # out as empty blocks, should detect that situation # (challenging - can have multiple lines of comments, # or begin and end comment lines with important things between) if nb_version == 3: nb.cells.append(new_text_cell(u'markdown', source=block)) elif nb_version == 4: cells.append(new_markdown_cell(source=block)) mdstr.append(('markdown', block)) elif block_tp == 'cell' and block != '' and block != []: if isinstance(block, list): for block_ in block: block_ = block_.rstrip() if block_ != '': if nb_version == 3: nb.cells.append(new_code_cell( input=block_, prompt_number=prompt_number, collapsed=False)) elif nb_version == 4: cells.append(new_code_cell( source=block_, execution_count=prompt_number, metadata=dict(collapsed=False))) prompt_number += 1 mdstr.append(('codecell', block_)) else: block = block.rstrip() if block != '': if nb_version == 3: nb.cells.append(new_code_cell( input=block, prompt_number=prompt_number, collapsed=False)) elif nb_version == 4: cells.append(new_code_cell( source=block, execution_count=prompt_number, metadata=dict(collapsed=False))) prompt_number += 1 mdstr.append(('codecell', block)) elif block_tp == 'cell_hidden' and block != '': block = block.rstrip() if nb_version == 3: nb.cells.append(new_code_cell( input=block, prompt_number=prompt_number, collapsed=True)) elif nb_version == 4: cells.append(new_code_cell( source=block, execution_count=prompt_number, metadata=dict(collapsed=True))) prompt_number += 1 mdstr.append(('codecell', block)) """ # Dump the notebook cells in a simple ASCII format # (doc/src/ipynb/ipynb_generator.py can translate it back to .ipynb file) f = open(dofile_basename + '.md-ipynb', 'w') for cell_tp, block in mdstr: if cell_tp == 'markdown': f.write('\n-----\n\n') elif cell_tp == 'codecell': f.write('\n-----py\n\n') f.write(block) f.close() """ if nb_version == 3: # Catch the title as the first heading m = re.search(r'^#+\s*(.+)$', filestr, flags=re.MULTILINE) title = m.group(1).strip() if m else '' # md below is not used for anything if authors: authors = eval(authors) md = new_metadata(name=title, authors=authors) else: md = new_metadata(name=title) nb = new_notebook(worksheets=[nb], metadata=new_metadata()) # Let us make v4 notebook here by upgrading from IPython.nbformat.v4 import upgrade nb = upgrade(nb) import IPython.nbformat.v4.nbjson as nbjson # Convert nb to json format filestr = nbjson.writes(nb) elif nb_version == 4: nb = new_notebook(cells=cells) from IPython.nbformat import writes filestr = writes(nb, version=4) # Check that there are no empty cells: if '"input": []' in filestr: errwarn('*** error: empty cells in notebook - report bug in DocOnce') _abort() # must do the replacements here at the very end when json is written out # \eqref and labels will not work, but labels (only in math) do no harm filestr = re.sub(r'([^\\])label\{', r'\g<1>\\\\label{', filestr, flags=re.MULTILINE) # \\eqref{} just gives (???) link at this stage - future versions # will probably support labels #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\\eqref{\g<1>}', filestr) # Now we use explicit references to tags def subst(m): label = m.group(1) try: return r'[(%s)](#%s)' % (label2tag[label], label) except KeyError as e: errwarn('*** error: label "%s" is not defined' % str(e)) filestr = re.sub(r'\(ref\{(.+?)\}\)', subst, filestr) """ # MathJax reference to tag (recall that the equations have both label # and tag (know that tag only works well in HTML, but this mjx-eqn-no # label does not work in ipynb) filestr = re.sub(r'\(ref\{(.+?)\}\)', lambda m: r'[(%s)](#mjx-eqn-%s)' % (label2tag[m.group(1)], label2tag[m.group(1)]), filestr) """ #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'Eq (\g<1>)', filestr) ''' # Final fixes: replace all text between cells by markdown code cells # Note: the patterns are overlapping so a plain re.sub will not work, # here we run through all blocks found and subsitute the first remaining # one, one by one. pattern = r' \},\n(.+?)\{\n "cell_type":' begin_pattern = r'^(.+?)\{\n "cell_type":' remaining_block_begin = re.findall(begin_pattern, filestr, flags=re.DOTALL) remaining_blocks = re.findall(pattern, filestr, flags=re.DOTALL) import string for block in remaining_block_begin + remaining_blocks: filestr = string.replace(filestr, block, json_markdown(block) + ' ', maxreplace=1) filestr_end = re.sub(r' \{\n "cell_type": .+?\n \},\n', '', filestr, flags=re.DOTALL) filestr = filestr.replace(filestr_end, json_markdown(filestr_end)) filestr = """{ "metadata": { "name": "SOME NAME" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ """ + filestr.rstrip() + '\n'+ \ json_pycode('', final_prompt_no+1, 'python').rstrip()[:-1] + """ ], "metadata": {} } ] }""" ''' return filestr
def ipynb_code(filestr, code_blocks, code_block_types, tex_blocks, format): """ # We expand all newcommands now from html import embed_newcommands newcommands = embed_newcommands(filestr) if newcommands: filestr = newcommands + filestr """ # Fix pandoc citations to normal internal links: [[key]](#key) filestr = re.sub(r'\[@(.+?)\]', r'[[\g<1>]](#\g<1>)', filestr) # filestr becomes json list after this function so we must typeset # envirs here. All envirs are typeset as pandoc_quote. from .common import _CODE_BLOCK, _MATH_BLOCK envir_format = option('ipynb_admon=', 'paragraph') # Remove all !bpop-!epop environments (they cause only problens and # have no use) for envir in 'pop', 'slidecell': filestr = re.sub('^<!-- !b%s .*\n' % envir, '', filestr, flags=re.MULTILINE) filestr = re.sub('^<!-- !e%s .*\n' % envir, '', filestr, flags=re.MULTILINE) filestr = re.sub('^<!-- !bnotes.*?<!-- !enotes -->\n', '', filestr, flags=re.DOTALL | re.MULTILINE) filestr = re.sub('^<!-- !split -->\n', '', filestr, flags=re.MULTILINE) from .doconce import doconce_envirs envirs = doconce_envirs()[8:-2] for envir in envirs: pattern = r'^!b%s(.*?)\n(.+?)\s*^!e%s' % (envir, envir) if envir_format in ('quote', 'paragraph', 'hrule'): def subst(m): title = m.group(1).strip() # Text size specified in parenthesis? m2 = re.search('^\s*\((.+?)\)', title) if title == '' and envir not in ('block', 'quote'): title = envir.capitalize() + '.' elif title.lower() == 'none': title == '' elif m2: text_size = m2.group(1).lower() title = title.replace('(%s)' % text_size, '').strip() elif title and title[-1] not in ('.', ':', '!', '?'): # Make sure the title ends with puncuation title += '.' # Recall that this formatting is called very late # so native format must be used! if title: title = '**' + title + '**\n' # Could also consider subsubsection formatting block = m.group(2) # Always use quote typesetting for quotes if envir_format == 'quote' or envir == 'quote': # Make Markdown quote of the block: lines start with > lines = [] for line in block.splitlines(): # Just quote plain text if not (_MATH_BLOCK in line or _CODE_BLOCK in line or line.startswith('FIGURE:') or line.startswith('MOVIE:') or line.startswith('|')): lines.append('> ' + line) else: lines.append('\n' + line + '\n') block = '\n'.join(lines) + '\n\n' # Add quote and a blank line after title if title: title = '> ' + title + '>\n' else: # Add a blank line after title if title: title += '\n' if envir_format == 'hrule': # Native ------ does not work, use <hr/> #text = '\n\n----------\n' + title + '----------\n' + \ # block + '\n----------\n\n' text = '\n\n<hr/>\n' + title + \ block + '\n<hr/>\n\n' else: text = title + block + '\n\n' return text else: errwarn('*** error: --ipynb_admon=%s is not supported' % envir_format) filestr = re.sub(pattern, subst, filestr, flags=re.DOTALL | re.MULTILINE) # Fix pyshell and ipy interactive sessions: remove prompt and output. # or split in multiple cells such that output comes out at the end of a cell # Fix sys environments and use run prog.py so programs can be run in cell # Insert %matplotlib inline in the first block using matplotlib # Only typeset Python code as blocks, otherwise !bc environmens # become plain indented Markdown. from .doconce import dofile_basename ipynb_tarfile = 'ipynb-%s-src.tar.gz' % dofile_basename src_paths = set() mpl_inline = False split_pyshell = option('ipynb_split_pyshell=', 'on') if split_pyshell is None: split_pyshell = False elif split_pyshell in ('no', 'False', 'off'): split_pyshell = False else: split_pyshell = True ipynb_code_tp = [None] * len(code_blocks) for i in range(len(code_blocks)): # Check if continuation lines are in the code block, because # doconce.py inserts a blank after the backslash if '\\ \n' in code_blocks[i]: code_blocks[i] = code_blocks[i].replace('\\ \n', '\\\n') if not mpl_inline and ( re.search(r'import +matplotlib', code_blocks[i]) or \ re.search(r'from +matplotlib', code_blocks[i]) or \ re.search(r'import +scitools', code_blocks[i]) or \ re.search(r'from +scitools', code_blocks[i])): code_blocks[i] = '%matplotlib inline\n\n' + code_blocks[i] mpl_inline = True tp = code_block_types[i] if tp.endswith('-t'): # Standard Markdown code with pandoc/github extension language = tp[:-2] language_spec = language2pandoc.get(language, '') #code_blocks[i] = '\n' + indent_lines(code_blocks[i], format) + '\n' code_blocks[i] = "```%s\n" % language_spec + \ indent_lines(code_blocks[i].strip(), format) + \ "```" ipynb_code_tp[i] = 'markdown' elif tp.startswith('pyshell') or tp.startswith('ipy'): lines = code_blocks[i].splitlines() last_cell_end = -1 if split_pyshell: new_code_blocks = [] # Split for each output an put in separate cell for j in range(len(lines)): if lines[j].startswith('>>>') or lines[j].startswith( '... '): lines[j] = lines[j][4:] elif lines[j].startswith('In ['): # IPython lines[j] = ':'.join(lines[j].split(':')[1:]).strip() elif lines[j].startswith(' ...: '): # IPython lines[j] = lines[j][8:] else: # output (no prefix or Out) lines[j] = '' new_code_blocks.append('\n'.join(lines[last_cell_end + 1:j + 1])) last_cell_end = j code_blocks[i] = new_code_blocks ipynb_code_tp[i] = 'cell' else: # Remove prompt and output lines; leave code executable in cell for j in range(len(lines)): if lines[j].startswith('>>> ') or lines[j].startswith( '... '): lines[j] = lines[j][4:] elif lines[j].startswith('In ['): lines[j] = ':'.join(lines[j].split(':')[1:]).strip() else: # output lines[j] = '' for j in range(lines.count('')): lines.remove('') code_blocks[i] = '\n'.join(lines) ipynb_code_tp[i] = 'cell' elif tp.startswith('sys'): # Do we find execution of python file? If so, copy the file # to separate subdir and make a run file command in a cell. # Otherwise, it is just a plain verbatim Markdown block. found_unix_lines = False lines = code_blocks[i].splitlines() for j in range(len(lines)): m = re.search(r'(.+?>|\$) *python +([A-Za-z_0-9]+?\.py)', lines[j]) if m: name = m.group(2).strip() if os.path.isfile(name): src_paths.add(os.path.dirname(name)) lines[j] = '%%run "%s"' % fullpath else: found_unix_lines = True src_paths = list(src_paths) if src_paths and not found_unix_lines: # This is a sys block with run commands only code_blocks[i] = '\n'.join(lines) ipynb_code_tp[i] = 'cell' else: # Standard Markdown code code_blocks[i] = '\n'.join(lines) code_blocks[i] = indent_lines(code_blocks[i], format) ipynb_code_tp[i] = 'markdown' elif tp.endswith('hid'): ipynb_code_tp[i] = 'cell_hidden' elif tp.endswith('out'): ipynb_code_tp[i] = 'cell_output' elif tp.startswith('py'): ipynb_code_tp[i] = 'cell' else: # Should support other languages as well, but not for now code_blocks[i] = indent_lines(code_blocks[i], format) ipynb_code_tp[i] = 'markdown' # figure_files and movie_files are global variables and contain # all figures and movies referred to src_paths = list(src_paths) if figure_files: src_paths += figure_files if movie_files: src_paths += movie_files if src_paths: # Make tar file with all the source dirs with files # that need to be executed os.system('tar cfz %s %s' % (ipynb_tarfile, ' '.join(src_paths))) errwarn('collected all required additional files in ' + ipynb_tarfile + ' which must be distributed with the notebook') elif os.path.isfile(ipynb_tarfile): os.remove(ipynb_tarfile) # Parse document into markdown text, code blocks, and tex blocks. # Store in nested list notebook_blocks. notebook_blocks = [[]] authors = '' for line in filestr.splitlines(): if line.startswith('authors = [new_author(name='): # old author method authors = line[10:] elif _CODE_BLOCK in line: code_block_tp = line.split()[-1] if code_block_tp in ( 'pyhid', ) or not code_block_tp.endswith('hid'): notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() notebook_blocks.append(line) # else: hidden block to be dropped (may include more languages # with time in the above tuple) elif _MATH_BLOCK in line: notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() notebook_blocks.append(line) else: if not isinstance(notebook_blocks[-1], list): notebook_blocks.append([]) notebook_blocks[-1].append(line) if isinstance(notebook_blocks[-1], list): notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() # Add block type info pattern = r'(\d+) +%s' for i in range(len(notebook_blocks)): if re.match(pattern % _CODE_BLOCK, notebook_blocks[i]): m = re.match(pattern % _CODE_BLOCK, notebook_blocks[i]) idx = int(m.group(1)) if ipynb_code_tp[idx] == 'cell': notebook_blocks[i] = ['cell', notebook_blocks[i]] elif ipynb_code_tp[idx] == 'cell_hidden': notebook_blocks[i] = ['cell_hidden', notebook_blocks[i]] elif ipynb_code_tp[idx] == 'cell_output': notebook_blocks[i] = ['cell_output', notebook_blocks[i]] else: notebook_blocks[i] = ['text', notebook_blocks[i]] elif re.match(pattern % _MATH_BLOCK, notebook_blocks[i]): notebook_blocks[i] = ['math', notebook_blocks[i]] else: notebook_blocks[i] = ['text', notebook_blocks[i]] # Go through tex_blocks and wrap in $$ # (doconce.py runs align2equations so there are no align/align* # environments in tex blocks) label2tag = {} tag_counter = 1 for i in range(len(tex_blocks)): # Extract labels and add tags labels = re.findall(r'label\{(.+?)\}', tex_blocks[i]) for label in labels: label2tag[label] = tag_counter # Insert tag to get labeled equation tex_blocks[i] = tex_blocks[i].replace( 'label{%s}' % label, 'label{%s} \\tag{%s}' % (label, tag_counter)) tag_counter += 1 # Remove \[ and \] or \begin/end{equation*} in single equations tex_blocks[i] = tex_blocks[i].replace(r'\[', '') tex_blocks[i] = tex_blocks[i].replace(r'\]', '') tex_blocks[i] = tex_blocks[i].replace(r'\begin{equation*}', '') tex_blocks[i] = tex_blocks[i].replace(r'\end{equation*}', '') # Check for illegal environments m = re.search(r'\\begin\{(.+?)\}', tex_blocks[i]) if m: envir = m.group(1) if envir not in ('equation', 'equation*', 'align*', 'align', 'array'): errwarn("""\ *** warning: latex envir \\begin{%s} does not work well in Markdown. Stick to \\[ ... \\], equation, equation*, align, or align* environments in math environments. """ % envir) eq_type = 'heading' # or '$$' eq_type = '$$' # Markdown: add $$ on each side of the equation if eq_type == '$$': # Make sure there are no newline after equation tex_blocks[i] = '$$\n' + tex_blocks[i].strip() + '\n$$' # Here: use heading (###) and simple formula (remove newline # in math expressions to keep everything within a heading) as # the equation then looks bigger elif eq_type == 'heading': tex_blocks[i] = '### $ ' + ' '.join( tex_blocks[i].splitlines()) + ' $' # Add labels for the eqs above the block (for reference) if labels: #label_tp = '<a name="%s"></a>' label_tp = '<div id="%s"></div>' tex_blocks[i] = '<!-- Equation labels as ordinary links -->\n' + \ ' '.join([label_tp % label for label in labels]) + '\n\n' + \ tex_blocks[i] # blocks is now a list of text chunks in markdown and math/code line # instructions. Insert code and tex blocks for i in range(len(notebook_blocks)): if _CODE_BLOCK in notebook_blocks[i][ 1] or _MATH_BLOCK in notebook_blocks[i][1]: words = notebook_blocks[i][1].split() # start of notebook_blocks[i]: number block-indicator code-type n = int(words[0]) if _CODE_BLOCK in notebook_blocks[i][1]: notebook_blocks[i][1] = code_blocks[n] # can be list! if _MATH_BLOCK in notebook_blocks[i][1]: notebook_blocks[i][1] = tex_blocks[n] # Make IPython structures nb_version = int(option('ipynb_version=', '4')) if nb_version == 3: try: from IPython.nbformat.v3 import (new_code_cell, new_text_cell, new_worksheet, new_notebook, new_metadata, new_author) nb = new_worksheet() except ImportError: errwarn('*** error: could not import IPython.nbformat.v3!') errwarn(' set --ipynb_version=4 or leave out --ipynb_version=3') _abort() elif nb_version == 4: try: from nbformat.v4 import (new_code_cell, new_markdown_cell, new_notebook) except ImportError: # Try old style try: from IPython.nbformat.v4 import (new_code_cell, new_markdown_cell, new_notebook) except ImportError: errwarn( '*** error: cannot do import nbformat.v4 or IPython.nbformat.v4' ) errwarn( ' make sure IPython notebook or Jupyter is installed correctly' ) _abort() cells = [] mdstr = [] # plain md format of the notebook prompt_number = 1 for block_tp, block in notebook_blocks: if (block_tp == 'text' or block_tp == 'math') and block != '' and block != '<!-- -->': if nb_version == 3: nb.cells.append(new_text_cell(u'markdown', source=block)) elif nb_version == 4: cells.append(new_markdown_cell(source=block)) mdstr.append(('markdown', block)) elif block_tp == 'cell' and block != '' and block != []: if isinstance(block, list): for block_ in block: block_ = block_.rstrip() if block_ != '': if nb_version == 3: nb.cells.append( new_code_cell(input=block_, prompt_number=prompt_number, collapsed=False)) elif nb_version == 4: cells.append( new_code_cell(source=block_, execution_count=prompt_number, metadata=dict(collapsed=False))) prompt_number += 1 mdstr.append(('codecell', block_)) else: block = block.rstrip() if block != '': if nb_version == 3: nb.cells.append( new_code_cell(input=block, prompt_number=prompt_number, collapsed=False)) elif nb_version == 4: cells.append( new_code_cell(source=block, execution_count=prompt_number, metadata=dict(collapsed=False))) prompt_number += 1 mdstr.append(('codecell', block)) elif block_tp == 'cell_output' and block != '': block = block.rstrip() if nb_version == 3: print("WARNING: Output not implemented for nbformat v3.") elif nb_version == 4: outputs = [{ "data": { "text/plain": [block] }, "execution_count": prompt_number - 1, "metadata": {}, "output_type": "execute_result" }] previous_cell = cells[-1] if previous_cell.cell_type == "code": previous_cell.outputs = outputs else: print("WARNING: DocOnce ipynb got code output,", "but previous was not code.") cells.append( new_code_cell(source="#", outputs=outputs, execution_count=prompt_number, metadata=dict(collapsed=False))) mdstr.append(('codecell', block)) elif block_tp == 'cell_hidden' and block != '': block = block.rstrip() if nb_version == 3: nb.cells.append( new_code_cell(input=block, prompt_number=prompt_number, collapsed=True)) elif nb_version == 4: cells.append( new_code_cell(source=block, execution_count=prompt_number, metadata=dict(collapsed=True))) prompt_number += 1 mdstr.append(('codecell', block)) """ # Dump the notebook cells in a simple ASCII format # (doc/src/ipynb/ipynb_generator.py can translate it back to .ipynb file) f = open(dofile_basename + '.md-ipynb', 'w') for cell_tp, block in mdstr: if cell_tp == 'markdown': f.write('\n-----\n\n') elif cell_tp == 'codecell': f.write('\n-----py\n\n') f.write(block) f.close() """ if nb_version == 3: # Catch the title as the first heading m = re.search(r'^#+\s*(.+)$', filestr, flags=re.MULTILINE) title = m.group(1).strip() if m else '' # md below is not used for anything if authors: authors = eval(authors) md = new_metadata(name=title, authors=authors) else: md = new_metadata(name=title) nb = new_notebook(worksheets=[nb], metadata=new_metadata()) # Let us make v4 notebook here by upgrading from IPython.nbformat.v4 import upgrade nb = upgrade(nb) import IPython.nbformat.v4.nbjson as nbjson # Convert nb to json format filestr = nbjson.writes(nb) elif nb_version == 4: nb = new_notebook(cells=cells) try: from nbformat import writes except ImportError: from IPython.nbformat import writes filestr = writes(nb, version=4) # Check that there are no empty cells: if '"input": []' in filestr: errwarn('*** error: empty cells in notebook - report bug in DocOnce') _abort() # must do the replacements here at the very end when json is written out # \eqref and labels will not work, but labels (only in math) do no harm filestr = re.sub(r'([^\\])label\{', r'\g<1>\\\\label{', filestr, flags=re.MULTILINE) # \\eqref{} just gives (???) link at this stage - future versions # will probably support labels #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\\eqref{\g<1>}', filestr) # Now we use explicit references to tags def subst(m): label = m.group(1) try: return r'[(%s)](#%s)' % (label2tag[label], label) except KeyError as e: errwarn('*** error: label "%s" is not defined' % str(e)) filestr = re.sub(r'\(ref\{(.+?)\}\)', subst, filestr) """ # MathJax reference to tag (recall that the equations have both label # and tag (know that tag only works well in HTML, but this mjx-eqn-no # label does not work in ipynb) filestr = re.sub(r'\(ref\{(.+?)\}\)', lambda m: r'[(%s)](#mjx-eqn-%s)' % (label2tag[m.group(1)], label2tag[m.group(1)]), filestr) """ #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'Eq (\g<1>)', filestr) ''' # Final fixes: replace all text between cells by markdown code cells # Note: the patterns are overlapping so a plain re.sub will not work, # here we run through all blocks found and subsitute the first remaining # one, one by one. pattern = r' \},\n(.+?)\{\n "cell_type":' begin_pattern = r'^(.+?)\{\n "cell_type":' remaining_block_begin = re.findall(begin_pattern, filestr, flags=re.DOTALL) remaining_blocks = re.findall(pattern, filestr, flags=re.DOTALL) import string for block in remaining_block_begin + remaining_blocks: filestr = string.replace(filestr, block, json_markdown(block) + ' ', maxreplace=1) filestr_end = re.sub(r' \{\n "cell_type": .+?\n \},\n', '', filestr, flags=re.DOTALL) filestr = filestr.replace(filestr_end, json_markdown(filestr_end)) filestr = """{ "metadata": { "name": "SOME NAME" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ """ + filestr.rstrip() + '\n'+ \ json_pycode('', final_prompt_no+1, 'python').rstrip()[:-1] + """ ], "metadata": {} } ] }""" ''' return filestr
def ipynb_code(filestr, code_blocks, code_block_types, tex_blocks, format): # Parse document into markdown text, code blocks, and tex blocks from common import _CODE_BLOCK, _MATH_BLOCK authors = '' blocks = [[]] for line in filestr.splitlines(): if line.startswith('authors = [new_author(name='): authors = line elif _CODE_BLOCK in line: blocks[-1] = '\n'.join(blocks[-1]).strip() blocks.append(line) elif _MATH_BLOCK in line: blocks[-1] = '\n'.join(blocks[-1]).strip() blocks.append(line) else: if not isinstance(blocks[-1], list): blocks.append([]) blocks[-1].append(line) if isinstance(blocks[-1], list): blocks[-1] = '\n'.join(blocks[-1]).strip() # Add block type info pattern = r'(\d+) +%s' for i in range(len(blocks)): if re.match(pattern % _CODE_BLOCK, blocks[i]): blocks[i] = ['code', blocks[i]] elif re.match(pattern % _MATH_BLOCK, blocks[i]): blocks[i] = ['math', blocks[i]] else: blocks[i] = ['text', blocks[i]] # Go through tex_blocks and wrap in $$ # (doconce.py runs align2equations so there are no align/align* # environments in tex blocks) for i in range(len(tex_blocks)): # Remove \[ and \] or \begin/end{equation*} in single equations tex_blocks[i] = tex_blocks[i].replace(r'\[', '') tex_blocks[i] = tex_blocks[i].replace(r'\]', '') tex_blocks[i] = tex_blocks[i].replace(r'\begin{equation*}', '') tex_blocks[i] = tex_blocks[i].replace(r'\end{equation*}', '') # Check for illegal environments m = re.search(r'\\begin\{(.+?)\}', tex_blocks[i]) if m: envir = m.group(1) if envir not in ('equation', 'equation*', 'align*', 'align'): print """\ *** warning: latex envir \\begin{%s} does not work well in Markdown. Stick to \\[ ... \\], equation, equation*, align, or align* environments in math environments. """ % envir eq_type = 'heading' # or '$$' eq_type = '$$' # Markdown: add $$ on each side of the equation if eq_type == '$$': # Make sure there are no newline after equation tex_blocks[i] = '$$\n' + tex_blocks[i].strip() + '\n$$' # Here: use heading (###) and simple formula (remove newline # in math expressions to keep everything within a heading) as # the equation then looks bigger elif eq_type == 'heading': tex_blocks[i] = '### $ ' + ' '.join(tex_blocks[i].splitlines()) + ' $' # blocks is now a list of text chunks in markdown and math/code line # instructions. Insert code and tex blocks for i in range(len(blocks)): if _CODE_BLOCK in blocks[i][1] or _MATH_BLOCK in blocks[i][1]: words = blocks[i][1].split() # start of blocks[i]: number block-indicator code-type n = int(words[0]) if _CODE_BLOCK in blocks[i][1]: blocks[i][1] = code_blocks[n] if _MATH_BLOCK in blocks[i][1]: blocks[i][1] = tex_blocks[n] # Make IPython structures from IPython.nbformat.v3 import ( NotebookNode, new_code_cell, new_text_cell, new_worksheet, new_notebook, new_output, new_metadata, new_author) import IPython.nbformat.v3.nbjson ws = new_worksheet() prompt_number = 1 for block_tp, block in blocks: if (block_tp == 'text' or block_tp == 'math') and block != '': ws.cells.append(new_text_cell(u'markdown', source=block)) elif block_tp == 'code' and block != '': ws.cells.append(new_code_cell(input=block, prompt_number=prompt_number, collapsed=False)) # Catch the title as the first heading m = re.search(r'^#+\s*(.+)$', filestr, flags=re.MULTILINE) title = m.group(1).strip() if m else '' if authors: exec(authors) md = new_metadata(name=title, authors=authors) else: md = new_metadata(name=title) nb = new_notebook(worksheets=[ws], metadata=new_metadata()) # Convert nb to json format filestr = IPython.nbformat.v3.nbjson.writes(nb) # must do the replacements here at the very end when json is written out # \eqref and labels will not work, but labels do no harm filestr = re.sub(r'([^\\])label\{', r'\g<1>\\\\label{', filestr, flags=re.MULTILINE) # \eqref crashes the notebook, may be better with full MathJax support? #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\eqref{\g<1>}', filestr) filestr = re.sub(r'\(ref\{(.+?)\}\)', r'Eq (\g<1>)', filestr) ''' # Final fixes: replace all text between cells by markdown code cells # Note: the patterns are overlapping so a plain re.sub will not work, # here we run through all blocks found and subsitute the first remaining # one, one by one. pattern = r' \},\n(.+?)\{\n "cell_type":' begin_pattern = r'^(.+?)\{\n "cell_type":' remaining_block_begin = re.findall(begin_pattern, filestr, flags=re.DOTALL) remaining_blocks = re.findall(pattern, filestr, flags=re.DOTALL) import string for block in remaining_block_begin + remaining_blocks: filestr = string.replace(filestr, block, json_markdown(block) + ' ', maxreplace=1) filestr_end = re.sub(r' \{\n "cell_type": .+?\n \},\n', '', filestr, flags=re.DOTALL) filestr = filestr.replace(filestr_end, json_markdown(filestr_end)) filestr = """{ "metadata": { "name": "SOME NAME" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ """ + filestr.rstrip() + '\n'+ \ json_pycode('', final_prompt_no+1, 'python').rstrip()[:-1] + """ ], "metadata": {} } ] }""" ''' return filestr