def cwiki_figure(m): filename = m.group('filename') link = filename if filename.startswith('http') else None if not link and not os.path.isfile(filename): raise IOError('no figure file %s' % filename) basename = os.path.basename(filename) stem, ext = os.path.splitext(basename) root, ext = os.path.splitext(filename) if link is None: if not ext in '.png .gif .jpg .jpeg'.split(): # try to convert image file to PNG, using # convert from ImageMagick: cmd = 'convert %s png:%s' % (filename, root+'.png') try: output = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: errwarn('\n**** Warning: could not run ' + cmd) errwarn('Convert %s to PNG format manually' % filename) _abort() filename = root + '.png' caption = m.group('caption') # keep label if it's there: caption = re.sub(r'label\{(.+?)\}', '(\g<1>)', caption) result = r"""{{%s|%s}}""" % (filename, caption) return result
def cwiki_figure(m): filename = m.group('filename') link = filename if filename.startswith('http') else None if not link and not os.path.isfile(filename): raise IOError('no figure file %s' % filename) basename = os.path.basename(filename) stem, ext = os.path.splitext(basename) root, ext = os.path.splitext(filename) if link is None: if not ext in '.png .gif .jpg .jpeg'.split(): # try to convert image file to PNG, using # convert from ImageMagick: cmd = 'convert %s png:%s' % (filename, root + '.png') try: output = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: errwarn('\n**** Warning: could not run ' + cmd) errwarn('Convert %s to PNG format manually' % filename) _abort() filename = root + '.png' caption = m.group('caption') # keep label if it's there: caption = re.sub(r'label\{(.+?)\}', '(\g<1>)', caption) result = r"""{{%s|%s}}""" % (filename, caption) return result
def subst_footnote(m): name = m.group('name').strip() if name in name2index: i = name2index[m.group('name')] else: errwarn('*** error: found footnote with name "%s", but this one is not defined' % name) _abort() xml = r'<footnote id="%s">%s<footnote>' % (i, name) return xml
def mwiki_code(filestr, code_blocks, code_block_types, tex_blocks, format): # http://en.wikipedia.org/wiki/Help:Displaying_a_formula # MediaWiki math does not support labels in equations. # The enviros equation and \[ \] must be removed (not supported). for i in range(len(tex_blocks)): # Standard align works in Wikipedia and Wikibooks. # Standard align gives somewhat ugly output on wiiki.com services, # but a set of separate equations is not much better. # We therefore stick to align instead. #tex_blocks[i] = align2equations(tex_blocks[i]) tex_blocks[i] = equation2nothing(tex_blocks[i]) tex_blocks[i], labels = remove_labels(tex_blocks[i]) for label in labels: if label in filestr: errwarn('*** warning: reference to label "%s" in an equation does not work in MediaWiki' % label) filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, format) # Supported programming languages: # http://www.mediawiki.org/wiki/Extension:SyntaxHighlight_GeSHi#Supported_languages envir2lang = dict(cod='python', pycod='python', cycod='python', fcod='fortran', ccod='c', cppcod='cpp', mcod='matlab', plcod='perl', shcod='bash', pro='python', pypro='python', cypro='python', fpro='fortran', cpro='c', cpppro='cpp', mpro='matlab', plpro='perl', shpro='bash', rbpro='ruby', rbcod='ruby', javacod='java', javapro='java', htmlcod='html5', xmlcod='xml', htmlpro='html5', xmlpro='xml', html='html5', xml='xml', sys='bash', dat='text', csv='text', txt='text', pyoptpro='python', pyscpro='python', ipy='python', pyshell='python', ) for key in envir2lang: language = envir2lang[key] cpattern = re.compile(r'^!bc\s+%s\s*\n' % key, flags=re.MULTILINE) filestr = cpattern.sub('<syntaxhighlight lang="%s">\n' % \ envir2lang[key], filestr) c = re.compile(r'^!bc.*$\n', re.MULTILINE) filestr = c.sub('<syntaxhighlight lang="text">\n', filestr) filestr = re.sub(r'!ec\n', '</syntaxhighlight>\n', filestr) c = re.compile(r'^!bt\n', re.MULTILINE) filestr = c.sub(':<math>\n', filestr) filestr = re.sub(r'!et\n', '</math>\n', filestr) # Final fix of MediaWiki file # __TOC__ syntax is misinterpretated as paragraph heading, so we # use <<<TOC>>> instead and replace to right syntax here at the end. filestr = filestr.replace('<<<TOC>>>', '__TOC__') return filestr
def sphinx_ref_and_label(section_label2title, format, filestr): # Special fix early in the process: # Deal with !split - by default we place splits before # the all the topmost sections # (This must be done before labels are put above section # headings) if "!split" in filestr and not option("sphinx_keep_splits"): errwarn("*** warning: new !split inserted (override all existing !split)") # Note: the title is at this stage translated to a chapter heading! # This title/heading must be removed for the algorithm below to work # (remove it, then insert afterwards) pattern = r"^.. Document title:\n\n={3,9}.+?={3,9}" m = re.search(pattern, filestr, flags=re.MULTILINE) title_replacement = "<<<<<<<DOCUMENT TITLE>>>>>>>>>>>>" # "unlikely" str if m: title = m.group() filestr = filestr.replace(title, title_replacement) else: title = "" topmost_section = 0 for i in [9, 7, 5]: if re.search(r"^%s" % ("=" * i), filestr, flags=re.MULTILINE): topmost_section = i errwarn(" before every %s heading %s" % ("=" * topmost_section, "=" * topmost_section)) errwarn(" because this strategy gives a well-functioning") errwarn(" table of contents in Sphinx") errwarn(" (use --sphinx_keep_splits to enforce your own !split commands)") break if topmost_section: # First remove all !split filestr = re.sub(r"^!split *\n", "", filestr, flags=re.MULTILINE) # Insert new splits before all topmost sections pattern = r"^%s (.+?) %s" % ("=" * topmost_section, "=" * topmost_section) lines = filestr.splitlines() for i in range(len(lines)): if re.search(pattern, lines[i]): lines[i] = "!split\n" + lines[i] filestr = "\n".join(lines) filestr = filestr.replace(title_replacement, title) filestr = ref_and_label_commoncode(section_label2title, format, filestr) # replace all references to sections: for label in section_label2title: filestr = filestr.replace("ref{%s}" % label, ":ref:`%s`" % label) # Not of interest after sphinx got equation references: # from common import ref2equations # filestr = ref2equations(filestr) # Replace remaining ref{x} as :ref:`x` filestr = re.sub(r"ref\{(.+?)\}", ":ref:`\g<1>`", filestr) return filestr
def rst_movie(m): html_text = html_movie(m) html_text = indent_lines(html_text, 'sphinx') rst_text = '.. raw:: html\n' + html_text + '\n' filename = m.group('filename') if not filename.startswith('http') and not filename.startswith('mov'): errwarn('*** warning: movie file %s' % filename) errwarn(' is not in mov* subdirectory - this will give problems with sphinx') return rst_text
def ipynb_index_bib(filestr, index, citations, pubfile, pubdata): # ipynb has support for latex-style bibliography. # Quite some code here is copy from latex_index_bib # http://nbviewer.ipython.org/github/ipython/nbconvert-examples/blob/master/citations/Tutorial.ipynb if citations: from common import cite_with_multiple_args2multiple_cites filestr = cite_with_multiple_args2multiple_cites(filestr) for label in citations: filestr = filestr.replace( 'cite{%s}' % label, '<cite data-cite="%s">[%d]</cite>' % (label, citations[label])) if pubfile is not None: # Always produce a new bibtex file bibtexfile = pubfile[:-3] + 'bib' errwarn('\nexporting publish database %s to %s:' % (pubfile, bibtexfile)) publish_cmd = 'publish export %s' % os.path.basename(bibtexfile) # Note: we have to run publish in the directory where pubfile resides this_dir = os.getcwd() pubfile_dir = os.path.dirname(pubfile) if not pubfile_dir: pubfile_dir = os.curdir os.chdir(pubfile_dir) os.system(publish_cmd) os.chdir(this_dir) bibstyle = option('latex_bibstyle=', 'plain') from latex import fix_latex_command_regex bibtext = fix_latex_command_regex(r""" ((*- extends 'latex_article.tplx' -*)) ((* block bibliography *)) \bibliographystyle{%s} \bibliography{%s} ((* endblock bibliography *)) """ % (bibstyle, bibtexfile[:-4]), application='replacement') filestr = re.sub(r'^BIBFILE:.+$', bibtext, filestr, flags=re.MULTILINE) # Save idx{} and label{} as metadata, also have labels as div tags filestr = re.sub(r'((idx\{.+?\})', r'<!-- dom:\g<1> -->', filestr) filestr = re.sub(r'(label\{(.+?)\})', r'<!-- dom:\g<1> --><div id="\g<2>"></div>', filestr) # Also treat special cell delimiter comments that might appear from # doconce ipynb2doconce conversions filestr = re.sub(r'^# ---------- (markdown|code) cell$', '', filestr, flags=re.MULTILINE) return filestr
def ipynb_index_bib(filestr, index, citations, pubfile, pubdata): # ipynb has support for latex-style bibliography. # Quite some code here is copy from latex_index_bib # http://nbviewer.ipython.org/github/ipython/nbconvert-examples/blob/master/citations/Tutorial.ipynb if citations: from common import cite_with_multiple_args2multiple_cites filestr = cite_with_multiple_args2multiple_cites(filestr) for label in citations: filestr = filestr.replace('cite{%s}' % label, '<cite data-cite="%s">[%d]</cite>' % (label, citations[label])) if pubfile is not None: # Always produce a new bibtex file bibtexfile = pubfile[:-3] + 'bib' errwarn('\nexporting publish database %s to %s:' % (pubfile, bibtexfile)) publish_cmd = 'publish export %s' % os.path.basename(bibtexfile) # Note: we have to run publish in the directory where pubfile resides this_dir = os.getcwd() pubfile_dir = os.path.dirname(pubfile) if not pubfile_dir: pubfile_dir = os.curdir os.chdir(pubfile_dir) os.system(publish_cmd) os.chdir(this_dir) bibstyle = option('latex_bibstyle=', 'plain') from latex import fix_latex_command_regex bibtext = fix_latex_command_regex(r""" ((*- extends 'latex_article.tplx' -*)) ((* block bibliography *)) \bibliographystyle{%s} \bibliography{%s} ((* endblock bibliography *)) """ % (bibstyle, bibtexfile[:-4]), application='replacement') filestr = re.sub(r'^BIBFILE:.+$', bibtext, filestr, flags=re.MULTILINE) # Save idx{} and label{} as metadata, also have labels as div tags filestr = re.sub(r'((idx\{.+?\})', r'<!-- dom:\g<1> -->', filestr) filestr = re.sub(r'(label\{(.+?)\})', r'<!-- dom:\g<1> --><div id="\g<2>"></div>', filestr) # Also treat special cell delimiter comments that might appear from # doconce ipynb2doconce conversions filestr = re.sub(r'^# ---------- (markdown|code) cell$', '', filestr, flags=re.MULTILINE) return filestr
def YouTubeVideo(filename): # Use YouTubeVideo object if 'watch?v=' in filename: name = filename.split('watch?v=')[1] elif 'youtu.be/' in filename: name = filename.split('youtu.be/')[1] else: errwarn('*** error: youtube movie name "%s" could not be interpreted' % filename) _abort() text = '' global movie_encountered if not movie_encountered: text += 'from IPython.display import YouTubeVideo\n' movie_encountered = True text += 'YouTubeVideo("%s")\n' % name return text
def YouTubeVideo(filename): # Use YouTubeVideo object if 'watch?v=' in filename: name = filename.split('watch?v=')[1] elif 'youtu.be/' in filename: name = filename.split('youtu.be/')[1] else: errwarn( '*** error: youtube movie name "%s" could not be interpreted' % filename) _abort() text = '' global movie_encountered if not movie_encountered: text += 'from IPython.display import YouTubeVideo\n' movie_encountered = True text += 'YouTubeVideo("%s")\n' % name return text
def pdflatex_emoji(m): space1 = m.group(1) space2 = m.group(3) name = m.group(2) if not os.path.isdir(latexfigdir): os.mkdir(latexfigdir) emojifile = os.path.join(latexfigdir, name + '.png') if not os.path.isfile(emojifile): # Download emoji image from common import emoji_url url = emoji_url + name + '.png' import urllib urllib.urlretrieve(url, filename=emojifile) # Check that this was successful with open(emojifile, 'r') as f: if 'Not Found' in f.read(): errwarn('*** error: emoji "name" is probably misspelled - cannot find any emoji with that name') _abort() s = space1 + r'\raisebox{-\height+\ht\strutbox}{\includegraphics[height=1.5em]{%s}}' % emojifile + space2 # NOTE: \ht needs the calc package! return s
def rst_author(authors_and_institutions, auth2index, inst2index, index2inst, auth2email): if option('rst_uio'): if authors_and_institutions: # Use first author and email responsible = authors_and_institutions[0][0] email = authors_and_institutions[0][2] text = """ .. uio-meta:: :responsible-name: %s """ % responsible if email: text += ' :responsible-email: %s\n\n' % email else: errwarn('*** error: with --rst_uio there must be an AUTHOR:') errwarn(' field with (at least) one author w/email who will be') errwarn(' listed as the resposible under uio-meta::') _abort() else: authors = [] for author, i, email in authors_and_institutions: if email: email = email.replace('@', ' at ') authors.append(author + ' (%s)' % email) else: authors.append(author) text = ':Authors: ' + ', '.join(authors) # (text is already r-stripped in typeset_authors) # we skip institutions in rst return text
def pdflatex_emoji(m): space1 = m.group(1) space2 = m.group(3) name = m.group(2) if not os.path.isdir(latexfigdir): os.mkdir(latexfigdir) emojifile = os.path.join(latexfigdir, name + '.png') if not os.path.isfile(emojifile): # Download emoji image from common import emoji_url url = emoji_url + name + '.png' import urllib urllib.urlretrieve(url, filename=emojifile) # Check that this was successful with open(emojifile, 'r') as f: if 'Not Found' in f.read(): errwarn( '*** error: emoji "name" is probably misspelled - cannot find any emoji with that name' ) _abort() s = space1 + r'\raisebox{-\height+\ht\strutbox}{\includegraphics[height=1.5em]{%s}}' % emojifile + space2 # NOTE: \ht needs the calc package! return s
def gwiki_figure(m): filename = m.group('filename') link = filename if filename.startswith('http') else None if not link and not os.path.isfile(filename): raise IOError('no figure file %s' % filename) basename = os.path.basename(filename) stem, ext = os.path.splitext(basename) root, ext = os.path.splitext(filename) if link is None: if not ext in '.png .gif .jpg .jpeg'.split(): # try to convert image file to PNG, using # convert from ImageMagick: cmd = 'convert %s png:%s' % (filename, root+'.png') failure, output = commands.getstatusoutput(cmd) if failure: errwarn('\n**** Warning: could not run ' + cmd) errwarn('Convert %s to PNG format manually' % filename) _abort() filename = root + '.png' caption = m.group('caption') # keep label if it's there: caption = re.sub(r'label\{(.+?)\}', '(\g<1>)', caption) errwarn(""" NOTE: Place %s at some place on the web and edit the .gwiki page, either manually (seach for 'Figure: ') or use the doconce script: doconce gwiki_figsubst.py mydoc.gwiki URL """ % filename) result = r""" --------------------------------------------------------------- Figure: %s (the URL of the image file %s must be inserted here) <wiki:comment> Put the figure file %s on the web (e.g., as part of the googlecode repository) and substitute the line above with the URL. </wiki:comment> --------------------------------------------------------------- """ % (caption, filename, filename) return result
def gwiki_figure(m): filename = m.group('filename') link = filename if filename.startswith('http') else None if not link and not os.path.isfile(filename): raise IOError('no figure file %s' % filename) basename = os.path.basename(filename) stem, ext = os.path.splitext(basename) root, ext = os.path.splitext(filename) if link is None: if not ext in '.png .gif .jpg .jpeg'.split(): # try to convert image file to PNG, using # convert from ImageMagick: cmd = 'convert %s png:%s' % (filename, root + '.png') failure, output = commands.getstatusoutput(cmd) if failure: errwarn('\n**** Warning: could not run ' + cmd) errwarn('Convert %s to PNG format manually' % filename) _abort() filename = root + '.png' caption = m.group('caption') # keep label if it's there: caption = re.sub(r'label\{(.+?)\}', '(\g<1>)', caption) errwarn(""" NOTE: Place %s at some place on the web and edit the .gwiki page, either manually (seach for 'Figure: ') or use the doconce script: doconce gwiki_figsubst.py mydoc.gwiki URL """ % filename) result = r""" --------------------------------------------------------------- Figure: %s (the URL of the image file %s must be inserted here) <wiki:comment> Put the figure file %s on the web (e.g., as part of the googlecode repository) and substitute the line above with the URL. </wiki:comment> --------------------------------------------------------------- """ % (caption, filename, filename) return result
def ipynb_code(filestr, code_blocks, code_block_types, tex_blocks, format): """ # We expand all newcommands now from html import embed_newcommands newcommands = embed_newcommands(filestr) if newcommands: filestr = newcommands + filestr """ # Fix pandoc citations to normal internal links: [[key]](#key) filestr = re.sub(r'\[@(.+?)\]', r'[[\g<1>]](#\g<1>)', filestr) # filestr becomes json list after this function so we must typeset # envirs here. All envirs are typeset as pandoc_quote. from common import _CODE_BLOCK, _MATH_BLOCK envir_format = option('ipynb_admon=', 'paragraph') # Remove all !bpop-!epop environments (they cause only problens and # have no use) for envir in 'pop', 'slidecell': filestr = re.sub('^<!-- !b%s .*\n' % envir, '', filestr, flags=re.MULTILINE) filestr = re.sub('^<!-- !e%s .*\n' % envir, '', filestr, flags=re.MULTILINE) filestr = re.sub('^<!-- !bnotes.*?<!-- !enotes -->\n', '', filestr, flags=re.DOTALL|re.MULTILINE) filestr = re.sub('^<!-- !split -->\n', '', filestr, flags=re.MULTILINE) from doconce import doconce_envirs envirs = doconce_envirs()[8:-2] for envir in envirs: pattern = r'^!b%s(.*?)\n(.+?)\s*^!e%s' % (envir, envir) if envir_format in ('quote', 'paragraph', 'hrule'): def subst(m): title = m.group(1).strip() # Text size specified in parenthesis? m2 = re.search('^\s*\((.+?)\)', title) if title == '' and envir not in ('block', 'quote'): title = envir.capitalize() + '.' elif title.lower() == 'none': title == '' elif m2: text_size = m2.group(1).lower() title = title.replace('(%s)' % text_size, '').strip() elif title and title[-1] not in ('.', ':', '!', '?'): # Make sure the title ends with puncuation title += '.' # Recall that this formatting is called very late # so native format must be used! if title: title = '**' + title + '**\n' # Could also consider subsubsection formatting block = m.group(2) # Always use quote typesetting for quotes if envir_format == 'quote' or envir == 'quote': # Make Markdown quote of the block: lines start with > lines = [] for line in block.splitlines(): # Just quote plain text if not (_MATH_BLOCK in line or _CODE_BLOCK in line or line.startswith('FIGURE:') or line.startswith('MOVIE:') or line.startswith('|')): lines.append('> ' + line) else: lines.append('\n' + line + '\n') block = '\n'.join(lines) + '\n\n' # Add quote and a blank line after title if title: title = '> ' + title + '>\n' else: # Add a blank line after title if title: title += '\n' if envir_format == 'hrule': # Native ------ does not work, use <hr/> #text = '\n\n----------\n' + title + '----------\n' + \ # block + '\n----------\n\n' text = '\n\n<hr/>\n' + title + \ block + '\n<hr/>\n\n' else: text = title + block + '\n\n' return text else: errwarn('*** error: --ipynb_admon=%s is not supported' % envir_format) filestr = re.sub(pattern, subst, filestr, flags=re.DOTALL | re.MULTILINE) # Fix pyshell and ipy interactive sessions: remove prompt and output. # or split in multiple cells such that output comes out at the end of a cell # Fix sys environments and use run prog.py so programs can be run in cell # Insert %matplotlib inline in the first block using matplotlib # Only typeset Python code as blocks, otherwise !bc environmens # become plain indented Markdown. from doconce import dofile_basename from sets import Set ipynb_tarfile = 'ipynb-%s-src.tar.gz' % dofile_basename src_paths = Set() mpl_inline = False split_pyshell = option('ipynb_split_pyshell=', 'on') if split_pyshell is None: split_pyshell = False elif split_pyshell in ('no', 'False', 'off'): split_pyshell = False else: split_pyshell = True ipynb_code_tp = [None]*len(code_blocks) for i in range(len(code_blocks)): # Check if continuation lines are in the code block, because # doconce.py inserts a blank after the backslash if '\\ \n' in code_blocks[i]: code_blocks[i] = code_blocks[i].replace('\\ \n', '\\\n') if not mpl_inline and ( re.search(r'import +matplotlib', code_blocks[i]) or \ re.search(r'from +matplotlib', code_blocks[i]) or \ re.search(r'import +scitools', code_blocks[i]) or \ re.search(r'from +scitools', code_blocks[i])): code_blocks[i] = '%matplotlib inline\n\n' + code_blocks[i] mpl_inline = True tp = code_block_types[i] if tp.endswith('-t'): # Standard Markdown code with pandoc/github extension language = tp[:-2] language_spec = language2pandoc.get(language, '') #code_blocks[i] = '\n' + indent_lines(code_blocks[i], format) + '\n' code_blocks[i] = "```%s\n" % language_spec + \ indent_lines(code_blocks[i].strip(), format) + \ "```" ipynb_code_tp[i] = 'markdown' elif tp.startswith('pyshell') or tp.startswith('ipy'): lines = code_blocks[i].splitlines() last_cell_end = -1 if split_pyshell: new_code_blocks = [] # Split for each output an put in separate cell for j in range(len(lines)): if lines[j].startswith('>>>') or lines[j].startswith('... '): lines[j] = lines[j][4:] elif lines[j].startswith('In ['): # IPython lines[j] = ':'.join(lines[j].split(':')[1:]).strip() elif lines[j].startswith(' ...: '): # IPython lines[j] = lines[j][8:] else: # output (no prefix or Out) lines[j] = '' new_code_blocks.append( '\n'.join(lines[last_cell_end+1:j+1])) last_cell_end = j code_blocks[i] = new_code_blocks ipynb_code_tp[i] = 'cell' else: # Remove prompt and output lines; leave code executable in cell for j in range(len(lines)): if lines[j].startswith('>>> ') or lines[j].startswith('... '): lines[j] = lines[j][4:] elif lines[j].startswith('In ['): lines[j] = ':'.join(lines[j].split(':')[1:]).strip() else: # output lines[j] = '' for j in range(lines.count('')): lines.remove('') code_blocks[i] = '\n'.join(lines) ipynb_code_tp[i] = 'cell' elif tp.startswith('sys'): # Do we find execution of python file? If so, copy the file # to separate subdir and make a run file command in a cell. # Otherwise, it is just a plain verbatim Markdown block. found_unix_lines = False lines = code_blocks[i].splitlines() for j in range(len(lines)): m = re.search(r'(.+?>|\$) *python +([A-Za-z_0-9]+?\.py)', lines[j]) if m: name = m.group(2).strip() if os.path.isfile(name): src_paths.add(os.path.dirname(name)) lines[j] = '%%run "%s"' % fullpath else: found_unix_lines = True src_paths = list(src_paths) if src_paths and not found_unix_lines: # This is a sys block with run commands only code_blocks[i] = '\n'.join(lines) ipynb_code_tp[i] = 'cell' else: # Standard Markdown code code_blocks[i] = '\n'.join(lines) code_blocks[i] = indent_lines(code_blocks[i], format) ipynb_code_tp[i] = 'markdown' elif tp.endswith('hid'): ipynb_code_tp[i] = 'cell_hidden' elif tp.startswith('py'): ipynb_code_tp[i] = 'cell' else: # Should support other languages as well, but not for now code_blocks[i] = indent_lines(code_blocks[i], format) ipynb_code_tp[i] = 'markdown' # figure_files and movie_files are global variables and contain # all figures and movies referred to src_paths = list(src_paths) if figure_files: src_paths += figure_files if movie_files: src_paths += movie_files if src_paths: # Make tar file with all the source dirs with files # that need to be executed os.system('tar cfz %s %s' % (ipynb_tarfile, ' '.join(src_paths))) errwarn('collected all required additional files in ' + ipynb_tarfile + ' which must be distributed with the notebook') elif os.path.isfile(ipynb_tarfile): os.remove(ipynb_tarfile) # Parse document into markdown text, code blocks, and tex blocks. # Store in nested list notebook_blocks. notebook_blocks = [[]] authors = '' for line in filestr.splitlines(): if line.startswith('authors = [new_author(name='): # old author method authors = line[10:] elif _CODE_BLOCK in line: code_block_tp = line.split()[-1] if code_block_tp in ('pyhid',) or not code_block_tp.endswith('hid'): notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() notebook_blocks.append(line) # else: hidden block to be dropped (may include more languages # with time in the above tuple) elif _MATH_BLOCK in line: notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() notebook_blocks.append(line) else: if not isinstance(notebook_blocks[-1], list): notebook_blocks.append([]) notebook_blocks[-1].append(line) if isinstance(notebook_blocks[-1], list): notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() # Add block type info pattern = r'(\d+) +%s' for i in range(len(notebook_blocks)): if re.match(pattern % _CODE_BLOCK, notebook_blocks[i]): m = re.match(pattern % _CODE_BLOCK, notebook_blocks[i]) idx = int(m.group(1)) if ipynb_code_tp[idx] == 'cell': notebook_blocks[i] = ['cell', notebook_blocks[i]] elif ipynb_code_tp[idx] == 'cell_hidden': notebook_blocks[i] = ['cell_hidden', notebook_blocks[i]] else: notebook_blocks[i] = ['text', notebook_blocks[i]] elif re.match(pattern % _MATH_BLOCK, notebook_blocks[i]): notebook_blocks[i] = ['math', notebook_blocks[i]] else: notebook_blocks[i] = ['text', notebook_blocks[i]] # Go through tex_blocks and wrap in $$ # (doconce.py runs align2equations so there are no align/align* # environments in tex blocks) label2tag = {} tag_counter = 1 for i in range(len(tex_blocks)): # Extract labels and add tags labels = re.findall(r'label\{(.+?)\}', tex_blocks[i]) for label in labels: label2tag[label] = tag_counter # Insert tag to get labeled equation tex_blocks[i] = tex_blocks[i].replace( 'label{%s}' % label, 'label{%s} \\tag{%s}' % (label, tag_counter)) tag_counter += 1 # Remove \[ and \] or \begin/end{equation*} in single equations tex_blocks[i] = tex_blocks[i].replace(r'\[', '') tex_blocks[i] = tex_blocks[i].replace(r'\]', '') tex_blocks[i] = tex_blocks[i].replace(r'\begin{equation*}', '') tex_blocks[i] = tex_blocks[i].replace(r'\end{equation*}', '') # Check for illegal environments m = re.search(r'\\begin\{(.+?)\}', tex_blocks[i]) if m: envir = m.group(1) if envir not in ('equation', 'equation*', 'align*', 'align', 'array'): errwarn("""\ *** warning: latex envir \\begin{%s} does not work well in Markdown. Stick to \\[ ... \\], equation, equation*, align, or align* environments in math environments. """ % envir) eq_type = 'heading' # or '$$' eq_type = '$$' # Markdown: add $$ on each side of the equation if eq_type == '$$': # Make sure there are no newline after equation tex_blocks[i] = '$$\n' + tex_blocks[i].strip() + '\n$$' # Here: use heading (###) and simple formula (remove newline # in math expressions to keep everything within a heading) as # the equation then looks bigger elif eq_type == 'heading': tex_blocks[i] = '### $ ' + ' '.join(tex_blocks[i].splitlines()) + ' $' # Add labels for the eqs above the block (for reference) if labels: #label_tp = '<a name="%s"></a>' label_tp = '<div id="%s"></div>' tex_blocks[i] = '<!-- Equation labels as ordinary links -->\n' + \ ' '.join([label_tp % label for label in labels]) + '\n\n' + \ tex_blocks[i] # blocks is now a list of text chunks in markdown and math/code line # instructions. Insert code and tex blocks for i in range(len(notebook_blocks)): if _CODE_BLOCK in notebook_blocks[i][1] or _MATH_BLOCK in notebook_blocks[i][1]: words = notebook_blocks[i][1].split() # start of notebook_blocks[i]: number block-indicator code-type n = int(words[0]) if _CODE_BLOCK in notebook_blocks[i][1]: notebook_blocks[i][1] = code_blocks[n] # can be list! if _MATH_BLOCK in notebook_blocks[i][1]: notebook_blocks[i][1] = tex_blocks[n] # Make IPython structures nb_version = int(option('ipynb_version=', '4')) if nb_version == 3: try: from IPython.nbformat.v3 import ( new_code_cell, new_text_cell, new_worksheet, new_notebook, new_metadata, new_author) nb = new_worksheet() except ImportError: errwarn('*** error: could not import IPython.nbformat.v3!') errwarn(' set --ipynb_version=4 or leave out --ipynb_version=3') _abort() elif nb_version == 4: try: from nbformat.v4 import ( new_code_cell, new_markdown_cell, new_notebook) except ImportError: # Try old style try: from IPython.nbformat.v4 import ( new_code_cell, new_markdown_cell, new_notebook) except ImportError: errwarn('*** error: cannot do import nbformat.v4 or IPython.nbformat.v4') errwarn(' make sure IPython notebook or Jupyter is installed correctly') _abort() cells = [] mdstr = [] # plain md format of the notebook prompt_number = 1 for block_tp, block in notebook_blocks: if (block_tp == 'text' or block_tp == 'math') and block != '': # Pure comments between math/code and math/code come # out as empty blocks, should detect that situation # (challenging - can have multiple lines of comments, # or begin and end comment lines with important things between) if nb_version == 3: nb.cells.append(new_text_cell(u'markdown', source=block)) elif nb_version == 4: cells.append(new_markdown_cell(source=block)) mdstr.append(('markdown', block)) elif block_tp == 'cell' and block != '' and block != []: if isinstance(block, list): for block_ in block: block_ = block_.rstrip() if block_ != '': if nb_version == 3: nb.cells.append(new_code_cell( input=block_, prompt_number=prompt_number, collapsed=False)) elif nb_version == 4: cells.append(new_code_cell( source=block_, execution_count=prompt_number, metadata=dict(collapsed=False))) prompt_number += 1 mdstr.append(('codecell', block_)) else: block = block.rstrip() if block != '': if nb_version == 3: nb.cells.append(new_code_cell( input=block, prompt_number=prompt_number, collapsed=False)) elif nb_version == 4: cells.append(new_code_cell( source=block, execution_count=prompt_number, metadata=dict(collapsed=False))) prompt_number += 1 mdstr.append(('codecell', block)) elif block_tp == 'cell_hidden' and block != '': block = block.rstrip() if nb_version == 3: nb.cells.append(new_code_cell( input=block, prompt_number=prompt_number, collapsed=True)) elif nb_version == 4: cells.append(new_code_cell( source=block, execution_count=prompt_number, metadata=dict(collapsed=True))) prompt_number += 1 mdstr.append(('codecell', block)) """ # Dump the notebook cells in a simple ASCII format # (doc/src/ipynb/ipynb_generator.py can translate it back to .ipynb file) f = open(dofile_basename + '.md-ipynb', 'w') for cell_tp, block in mdstr: if cell_tp == 'markdown': f.write('\n-----\n\n') elif cell_tp == 'codecell': f.write('\n-----py\n\n') f.write(block) f.close() """ if nb_version == 3: # Catch the title as the first heading m = re.search(r'^#+\s*(.+)$', filestr, flags=re.MULTILINE) title = m.group(1).strip() if m else '' # md below is not used for anything if authors: authors = eval(authors) md = new_metadata(name=title, authors=authors) else: md = new_metadata(name=title) nb = new_notebook(worksheets=[nb], metadata=new_metadata()) # Let us make v4 notebook here by upgrading from IPython.nbformat.v4 import upgrade nb = upgrade(nb) import IPython.nbformat.v4.nbjson as nbjson # Convert nb to json format filestr = nbjson.writes(nb) elif nb_version == 4: nb = new_notebook(cells=cells) from IPython.nbformat import writes filestr = writes(nb, version=4) # Check that there are no empty cells: if '"input": []' in filestr: errwarn('*** error: empty cells in notebook - report bug in DocOnce') _abort() # must do the replacements here at the very end when json is written out # \eqref and labels will not work, but labels (only in math) do no harm filestr = re.sub(r'([^\\])label\{', r'\g<1>\\\\label{', filestr, flags=re.MULTILINE) # \\eqref{} just gives (???) link at this stage - future versions # will probably support labels #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\\eqref{\g<1>}', filestr) # Now we use explicit references to tags def subst(m): label = m.group(1) try: return r'[(%s)](#%s)' % (label2tag[label], label) except KeyError as e: errwarn('*** error: label "%s" is not defined' % str(e)) filestr = re.sub(r'\(ref\{(.+?)\}\)', subst, filestr) """ # MathJax reference to tag (recall that the equations have both label # and tag (know that tag only works well in HTML, but this mjx-eqn-no # label does not work in ipynb) filestr = re.sub(r'\(ref\{(.+?)\}\)', lambda m: r'[(%s)](#mjx-eqn-%s)' % (label2tag[m.group(1)], label2tag[m.group(1)]), filestr) """ #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'Eq (\g<1>)', filestr) ''' # Final fixes: replace all text between cells by markdown code cells # Note: the patterns are overlapping so a plain re.sub will not work, # here we run through all blocks found and subsitute the first remaining # one, one by one. pattern = r' \},\n(.+?)\{\n "cell_type":' begin_pattern = r'^(.+?)\{\n "cell_type":' remaining_block_begin = re.findall(begin_pattern, filestr, flags=re.DOTALL) remaining_blocks = re.findall(pattern, filestr, flags=re.DOTALL) import string for block in remaining_block_begin + remaining_blocks: filestr = string.replace(filestr, block, json_markdown(block) + ' ', maxreplace=1) filestr_end = re.sub(r' \{\n "cell_type": .+?\n \},\n', '', filestr, flags=re.DOTALL) filestr = filestr.replace(filestr_end, json_markdown(filestr_end)) filestr = """{ "metadata": { "name": "SOME NAME" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ """ + filestr.rstrip() + '\n'+ \ json_pycode('', final_prompt_no+1, 'python').rstrip()[:-1] + """ ], "metadata": {} } ] }""" ''' return filestr
def ipynb_movie(m): # m.group() must be called before m.group('name') text = '<!-- dom:%s -->' % m.group() global html_encountered, movie_encountered, movie_files filename = m.group('filename') caption = m.group('caption').strip() youtube = False if 'youtu.be' in filename or 'youtube.com' in filename: youtube = True if '*' in filename or '->' in filename: errwarn('*** warning: * or -> in movie filenames is not supported in ipynb') return text def YouTubeVideo(filename): # Use YouTubeVideo object if 'watch?v=' in filename: name = filename.split('watch?v=')[1] elif 'youtu.be/' in filename: name = filename.split('youtu.be/')[1] else: errwarn('*** error: youtube movie name "%s" could not be interpreted' % filename) _abort() text = '' global movie_encountered if not movie_encountered: text += 'from IPython.display import YouTubeVideo\n' movie_encountered = True text += 'YouTubeVideo("%s")\n' % name return text text += '\n<!-- begin movie -->\n' display_method = option('ipynb_movie=', 'HTML') if display_method == 'md': text += html_movie(m) elif display_method.startswith('HTML'): text += '\n!bc pycod\n' if youtube and 'YouTube' in display_method: text += YouTubeVideo(filename) if caption: text += '\nprint "%s"' % caption else: # Use HTML formatting if not html_encountered: text += 'from IPython.display import HTML\n' html_encountered = True text += '_s = """' + html_movie(m) + '"""\n' text += 'HTML(_s)\n' if not filename.startswith('http'): movie_files.append(filename) text += '!ec\n' elif display_method == 'ipynb': text += '!bc pycod\n' if youtube: text += YouTubeVideo(filename) if caption: text += '\nprint "%s"' % caption else: # see http://nbviewer.ipython.org/github/ipython/ipython/blob/1.x/examples/notebooks/Part%205%20-%20Rich%20Display%20System.ipynb # http://stackoverflow.com/questions/18019477/how-can-i-play-a-local-video-in-my-ipython-notebook # http://python.6.x6.nabble.com/IPython-User-embedding-non-YouTube-movies-in-the-IPython-notebook-td5024035.html # Just support .mp4, .ogg, and.webm stem, ext = os.path.splitext(filename) if ext not in ('.mp4', '.ogg', '.webm'): errwarn('*** error: movie "%s" in format %s is not supported for --ipynb_movie=%s' % (filename, ext, display_method)) errwarn(' use --ipynb_movie=HTML instead') _abort() height = 365 width = 640 if filename.startswith('http'): file_open = 'import urllib\nvideo = urllib.urlopen("%s").read()' % filename else: file_open = 'video = open("%s", "rb").read()' % filename text += """ %s from base64 import b64encode video_encoded = b64encode(video) video_tag = '<video controls loop alt="%s" height="%s" width="%s" src="data:video/%s;base64,{0}">'.format(video_encoded) """ % (file_open, filename, height, width, ext[1:]) if not filename.startswith('http'): movie_files.append(filename) if not html_encountered: text += 'from IPython.display import HTML\n' html_encountered = True text += 'HTML(data=video_tag)\n' if caption: text += '\nprint "%s"' % caption text += '!ec\n' else: errwarn('*** error: --ipynb_movie=%s is not supported' % display_method) _abort() text += '<!-- end movie -->\n' return text
def rst_quiz(quiz): import string question_prefix = quiz.get('question prefix', option('quiz_question_prefix=', 'Question:')) common_choice_prefix = option('quiz_choice_prefix=', 'Choice') quiz_expl = option('quiz_explanations=', 'on') # Sphinx tooltop: :abbr:`TERM (explanation in tooltip)` # Can e.g. just have the right answer number as tooltip! text = '\n\n' if 'new page' in quiz: text += '.. !split\n%s\n%s' % (quiz['new page'], '-'*len(quiz['new page'])) text += '.. begin quiz\n\n' # Don't write Question: ... if inside an exercise section if quiz.get('embedding', 'None') in ['exercise',]: pass else: text += '\n\n' if question_prefix: text += '**%s** ' % (question_prefix) if quiz['question'].lstrip().startswith('..'): # block, add extra \n text += '\n\n' text += quiz['question'] + '\n\n\n' # List choices as paragraphs for i, choice in enumerate(quiz['choices']): #choice_no = i+1 choice_no = string.ascii_uppercase[i] answer = choice[0].capitalize() + '!' choice_prefix = common_choice_prefix if 'choice prefix' in quiz: if isinstance(quiz['choice prefix'][i], basestring): choice_prefix = quiz['choice prefix'][i] if choice_prefix == '' or choice_prefix[-1] in ['.', ':', '?']: pass # don't add choice number/letter else: choice_prefix += ' %s:' % choice_no expl = '' if len(choice) == 3 and quiz_expl == 'on': expl = choice[2] if '.. figure::' in expl or 'math::' in expl or '.. code-block::' in expl: errwarn('*** warning: quiz explanation contains block (fig/code/math)') errwarn(' and is therefore skipped') errwarn(expl + '\n') expl = '' # drop explanation when it needs blocks # Should remove markup pattern = r'`(.+?) (<https?.+?)>`__' # URL expl = re.sub(pattern, '\g<1> (\g<2>)', expl) pattern = r'``(.+?)``' # verbatim expl = re.sub(pattern, '\g<1>', expl) pattern = r':math:`(.+?)`' # inline math expl = re.sub(pattern, '\g<1>', expl) # mimic italic.... pattern = r':\*\*(.+?)\*\*' # bold expl = re.sub(pattern, '\g<1>', expl, flags=re.DOTALL) pattern = r':\*(.+?)\*' # emphasize expl = re.sub(pattern, '\g<1>', expl, flags=re.DOTALL) tooltip = ' '.join(expl.splitlines()) if expl: text += '**%s** %s\n\n:abbr:`? (%s)` :abbr:`# (%s)`\n\n' % (choice_prefix, choice[1], answer, tooltip) else: # no explanation text += '**%s** %s\n\n:abbr:`? (%s)`\n\n' % (choice_prefix, choice[1], answer) text += '.. end quiz\n\n' return text
def sphinx_inline_comment(m): # Explicit HTML typesetting does not work, we just use bold name = m.group("name").strip() comment = m.group("comment").strip() global edit_markup_warning if (not edit_markup_warning) and (name[:3] in ("add", "del", "edi") or "->" in comment): errwarn("*** warning: sphinx/rst is a suboptimal format for") errwarn(" typesetting edit markup such as") errwarn(" " + m.group()) errwarn(" Use HTML or LaTeX output instead, implement the") errwarn(" edits (doconce apply_edit_comments) and then use sphinx.") edit_markup_warning = True chars = {",": "comma", ";": "semicolon", ".": "period"} if name[:4] == "del ": for char in chars: if comment == char: return r" (**edit %s**: delete %s)" % (name[4:], chars[char]) return r"(**edit %s**: **delete** %s)" % (name[4:], comment) elif name[:4] == "add ": for char in chars: if comment == char: return r"%s (**edit %s: add %s**)" % (comment, name[4:], chars[char]) return r" (**edit %s: add**) %s (**end add**)" % (name[4:], comment) else: # Ordinary name comment = " ".join(comment.splitlines()) # '\s->\s' -> ' -> ' if " -> " in comment: # Replacement if comment.count(" -> ") != 1: errwarn("*** wrong syntax in inline comment:") errwarn(comment) errwarn("(more than two ->)") _abort() orig, new = comment.split(" -> ") return r"(**%s: remove** %s) (**insert:**)%s (**end insert**)" % (name, orig, new) else: # Ordinary comment return r"[**%s**: %s]" % (name, comment)
def sphinx_inline_comment(m): # Explicit HTML typesetting does not work, we just use bold name = m.group('name').strip() comment = m.group('comment').strip() global edit_markup_warning if (not edit_markup_warning) and \ (name[:3] in ('add', 'del', 'edi') or '->' in comment): errwarn('*** warning: sphinx/rst is a suboptimal format for') errwarn(' typesetting edit markup such as') errwarn(' ' + m.group()) errwarn(' Use HTML or LaTeX output instead, implement the') errwarn(' edits (doconce apply_edit_comments) and then use sphinx.') edit_markup_warning = True chars = {',': 'comma', ';': 'semicolon', '.': 'period'} if name[:4] == 'del ': for char in chars: if comment == char: return r' (**edit %s**: delete %s)' % (name[4:], chars[char]) return r'(**edit %s**: **delete** %s)' % (name[4:], comment) elif name[:4] == 'add ': for char in chars: if comment == char: return r'%s (**edit %s: add %s**)' % (comment, name[4:], chars[char]) return r' (**edit %s: add**) %s (**end add**)' % (name[4:], comment) else: # Ordinary name comment = ' '.join(comment.splitlines()) # '\s->\s' -> ' -> ' if ' -> ' in comment: # Replacement if comment.count(' -> ') != 1: errwarn('*** wrong syntax in inline comment:') errwarn(comment) errwarn('(more than two ->)') _abort() orig, new = comment.split(' -> ') return r'(**%s: remove** %s) (**insert:**)%s (**end insert**)' % ( name, orig, new) else: # Ordinary comment return r'[**%s**: %s]' % (name, comment)
def sphinx_code(filestr, code_blocks, code_block_types, tex_blocks, format): # In rst syntax, code blocks are typeset with :: (verbatim) # followed by intended blocks. This function indents everything # inside code (or TeX) blocks. # default mappings of !bc environments and pygments languages: envir2pygments = dict( cod='python', pro='python', pycod='python', cycod='cython', pypro='python', cypro='cython', fcod='fortran', fpro='fortran', ccod='c', cppcod='c++', cpro='c', cpppro='c++', mcod='matlab', mpro='matlab', plcod='perl', plpro='perl', shcod='bash', shpro='bash', rbcod='ruby', rbpro='ruby', #sys='console', sys='text', rst='rst', css='css', csspro='css', csscod='css', dat='text', csv='text', txt='text', cc='text', ccq='text', # not possible with extra indent for ccq ipy='ipy', xmlcod='xml', xmlpro='xml', xml='xml', htmlcod='html', htmlpro='html', html='html', texcod='latex', texpro='latex', tex='latex', latexcod='latex', latexpro='latex', latex='latex', do='doconce', pyshell='python', pyoptpro='python', pyscpro='python', ) # grab line with: # sphinx code-blocks: cod=python cpp=c++ etc # (do this before code is inserted in case verbatim blocks contain # such specifications for illustration) m = re.search(r'.. *[Ss]phinx +code-blocks?:(.+)', filestr) if m: defs_line = m.group(1) # turn specifications into a dictionary: for definition in defs_line.split(): key, value = definition.split('=') envir2pygments[key] = value # First indent all code blocks for i in range(len(code_blocks)): if code_block_types[i].startswith( 'pyoptpro') and not option('runestone'): code_blocks[i] = online_python_tutor(code_blocks[i], return_tp='iframe') if code_block_types[i].endswith('-h'): indentation = ' ' * 8 else: indentation = ' ' * 4 code_blocks[i] = indent_lines(code_blocks[i], format, indentation) # After transforming align environments to separate equations # the problem with math labels in multiple eqs has disappeared. # (doconce.py applies align2equations, which takes all align # envirs and translates them to separate equations, but align* # environments are allowed. # Any output of labels in align means an error in the # align -> equation transformation...) math_labels = [] multiple_math_labels = [] # sphinx has problems with multiple math labels for i in range(len(tex_blocks)): tex_blocks[i] = indent_lines(tex_blocks[i], format) # extract all \label{}s inside tex blocks and typeset them # with :label: tags label_regex = fix_latex(r'label\{(.+?)\}', application='match') labels = re.findall(label_regex, tex_blocks[i]) if len(labels) == 1: tex_blocks[i] = ' :label: %s\n' % labels[0] + tex_blocks[i] elif len(labels) > 1: multiple_math_labels.append(labels) if len(labels) > 0: math_labels.extend(labels) tex_blocks[i] = re.sub(label_regex, '', tex_blocks[i]) # fix latex constructions that do not work with sphinx math # (just remove them) commands = [ r'\begin{equation}', r'\end{equation}', r'\begin{equation*}', r'\end{equation*}', #r'\begin{eqnarray}', #r'\end{eqnarray}', #r'\begin{eqnarray*}', #r'\end{eqnarray*}', #r'\begin{align}', #r'\end{align}', #r'\begin{align*}', #r'\end{align*}', r'\begin{multline}', r'\end{multline}', r'\begin{multline*}', r'\end{multline*}', #r'\begin{split}', #r'\end{split}', #r'\begin{gather}', #r'\end{gather}', #r'\begin{gather*}', #r'\end{gather*}', r'\[', r'\]', # some common abbreviations (newcommands): r'\beqan', r'\eeqan', r'\beqa', r'\eeqa', r'\balnn', r'\ealnn', r'\baln', r'\ealn', r'\beq', r'\eeq', # the simplest name, contained in others, must come last! ] for command in commands: tex_blocks[i] = tex_blocks[i].replace(command, '') # &=& -> &= tex_blocks[i] = re.sub('&\s*=\s*&', ' &= ', tex_blocks[i]) # provide warnings for problematic environments # Replace all references to equations that have labels in math environments: for label in math_labels: filestr = filestr.replace('(:ref:`%s`)' % label, ':eq:`%s`' % label) multiple_math_labels_with_refs = [] # collect the labels with references for labels in multiple_math_labels: for label in labels: ref = ':eq:`%s`' % label # ref{} is translated to eq:`` if ref in filestr: multiple_math_labels_with_refs.append(label) if multiple_math_labels_with_refs: errwarn(""" *** warning: detected non-align math environment with multiple labels (Sphinx cannot handle this equation system - labels will be removed and references to them will be empty):""") for label in multiple_math_labels_with_refs: errwarn(' label{%s}' % label) print filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'sphinx') # Remove all !bc ipy and !bc pyshell since interactive sessions # are automatically handled by sphinx without indentation # (just a blank line before and after) filestr = re.sub(r'^!bc +d?ipy *\n(.*?)^!ec *\n', '\n\g<1>\n', filestr, re.DOTALL | re.MULTILINE) filestr = re.sub(r'^!bc +d?pyshell *\n(.*?)^!ec *\n', '\n\g<1>\n', filestr, re.DOTALL | re.MULTILINE) # Check if we have custom pygments lexers if 'ipy' in code_block_types: if not has_custom_pygments_lexer('ipy'): envir2pygments['ipy'] = 'python' if 'do' in code_block_types: if not has_custom_pygments_lexer('doconce'): envir2pygments['do'] = 'text' # Make correct code-block:: language constructions legal_pygments_languages = get_legal_pygments_lexers() for key in set(code_block_types): if key in envir2pygments: if not envir2pygments[key] in legal_pygments_languages: errwarn( """*** warning: %s is not a legal Pygments language (lexer) found in line: %s The 'text' lexer will be used instead. """ % (envir2pygments[key], defs_line)) envir2pygments[key] = 'text' #filestr = re.sub(r'^!bc\s+%s\s*\n' % key, # '\n.. code-block:: %s\n\n' % envir2pygments[key], filestr, # flags=re.MULTILINE) # Check that we have code installed to handle pyscpro if 'pyscpro' in filestr and key == 'pyscpro': try: import icsecontrib.sagecellserver except ImportError: errwarn(""" *** warning: pyscpro for computer code (sage cells) is requested, but' icsecontrib.sagecellserver from https://github.com/kriskda/sphinx-sagecell is not installed. Using plain Python typesetting instead.""") key = 'pypro' if key == 'pyoptpro': if option('runestone'): filestr = re.sub( r'^!bc\s+%s\s*\n' % key, '\n.. codelens:: codelens_\n :showoutput:\n\n', filestr, flags=re.MULTILINE) else: filestr = re.sub(r'^!bc\s+%s\s*\n' % key, '\n.. raw:: html\n\n', filestr, flags=re.MULTILINE) elif key == 'pyscpro': if option('runestone'): filestr = re.sub(r'^!bc\s+%s\s*\n' % key, """ .. activecode:: activecode_ :language: python """, filestr, flags=re.MULTILINE) else: filestr = re.sub(r'^!bc\s+%s\s*\n' % key, '\n.. sagecellserver::\n\n', filestr, flags=re.MULTILINE) elif key == 'pysccod': if option('runestone'): # Include (i.e., run) all previous code segments... # NOTE: this is most likely not what we want include = ', '.join([i for i in range(1, activecode_counter)]) filestr = re.sub(r'^!bc\s+%s\s*\n' % key, """ .. activecode:: activecode_ :language: python "include: %s """ % include, filestr, flags=re.MULTILINE) else: errwarn( '*** error: pysccod for sphinx is not supported without the --runestone flag\n (but pyscpro is via Sage Cell Server)' ) _abort() elif key == '': # any !bc with/without argument becomes a text block: filestr = re.sub(r'^!bc$', '\n.. code-block:: text\n\n', filestr, flags=re.MULTILINE) elif key.endswith('hid'): if key in ('pyhid', 'jshid', 'htmlhid') and option('runestone'): # Allow runestone books to run hidden code blocks # (replace pyhid by pycod, then remove all !bc *hid) for i in range(len(code_block_types)): if code_block_types[i] == key: code_block_types[i] = key.replace('hid', 'cod') key2language = dict(py='python', js='javascript', html='html') language = key2language[key.replace('hid', '')] include = ', '.join([i for i in range(1, activecode_counter)]) filestr = re.sub(r'^!bc +%s\s*\n' % key, """ .. activecode:: activecode_ :language: %s :include: %s :hidecode: """ % (language, include), filestr, flags=re.MULTILINE) else: # Remove hidden code block pattern = r'^!bc +%s\n.+?^!ec' % key filestr = re.sub(pattern, '', filestr, flags=re.MULTILINE | re.DOTALL) else: show_hide = False if key.endswith('-h'): key_orig = key key = key[:-2] show_hide = True # Use the standard sphinx code-block directive if key in envir2pygments: pygments_language = envir2pygments[key] elif key in legal_pygments_languages: pygments_language = key else: errwarn('*** error: detected code environment "%s"' % key) errwarn( ' which is not registered in sphinx.py (sphinx_code)') errwarn(' or not a language registered in pygments') _abort() if show_hide: filestr = re.sub(r'^!bc +%s\s*\n' % key_orig, '\n.. container:: toggle\n\n .. container:: header\n\n **Show/Hide Code**\n\n .. code-block:: %s\n\n' % \ pygments_language, filestr, flags=re.MULTILINE) # Must add 4 indent in corresponding code_blocks[i], done above else: filestr = re.sub(r'^!bc +%s\s*\n' % key, '\n.. code-block:: %s\n\n' % \ pygments_language, filestr, flags=re.MULTILINE) # any !bc with/without argument becomes a text block: filestr = re.sub(r'^!bc.*$', '\n.. code-block:: text\n\n', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!ec *\n', '\n', filestr, flags=re.MULTILINE) #filestr = re.sub(r'^!ec\n', '\n', filestr, flags=re.MULTILINE) #filestr = re.sub(r'^!ec\n', '', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!bt *\n', '\n.. math::\n', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!et *\n', '\n', filestr, flags=re.MULTILINE) # Fix lacking blank line after :label: filestr = re.sub(r'^( :label: .+?)(\n *[^ ]+)', r'\g<1>\n\n\g<2>', filestr, flags=re.MULTILINE) # Insert counters for runestone blocks if option('runestone'): codelens_counter = 0 activecode_counter = 0 lines = filestr.splitlines() for i in range(len(lines)): if '.. codelens:: codelens_' in lines[i]: codelens_counter += 1 lines[i] = lines[i].replace('codelens_', 'codelens_%d' % codelens_counter) if '.. activecode:: activecode_' in lines[i]: activecode_counter += 1 lines[i] = lines[i].replace( 'activecode_', 'activecode_%d' % activecode_counter) filestr = '\n'.join(lines) # Final fixes filestr = fix_underlines_in_headings(filestr) # Ensure blank line before and after comments filestr = re.sub(r'([.:;?!])\n^\.\. ', r'\g<1>\n\n.. ', filestr, flags=re.MULTILINE) filestr = re.sub(r'(^\.\. .+)\n([^ \n]+)', r'\g<1>\n\n\g<2>', filestr, flags=re.MULTILINE) # Line breaks interfer with tables and needs a final blank line too lines = filestr.splitlines() inside_block = False for i in range(len(lines)): if lines[i].startswith('<linebreakpipe>') and not inside_block: inside_block = True lines[i] = lines[i].replace('<linebreakpipe> ', '') + '\n' continue if lines[i].startswith('<linebreakpipe>') and inside_block: lines[i] = '|' + lines[i].replace('<linebreakpipe>', '') continue if inside_block and not lines[i].startswith('<linebreakpipe>'): inside_block = False lines[i] = '| ' + lines[i] + '\n' filestr = '\n'.join(lines) # Remove double !split (TOC with a prefix !split gives two !splits) pattern = '^.. !split\s+.. !split' filestr = re.sub(pattern, '.. !split', filestr, flags=re.MULTILINE) if option('html_links_in_new_window'): # Insert a comment to be recognized by automake_sphinx.py such that it # can replace the default links by proper modified target= option. #filestr = '\n\n.. NOTE: Open external links in new windows.\n\n' + filestr # Use JavaScript instead filestr = """.. raw:: html <script type="text/javascript"> $(document).ready(function() { $("a[href^='http']").attr('target','_blank'); }); </script> """ + filestr # Remove too much vertical space filestr = re.sub(r'\n{3,}', '\n\n', filestr) return filestr
def fix_text(s, tp='answer'): """ Answers and feedback in RunestoneInteractive book quizzes cannot contain math, figure and rst markup. Perform fixes. """ drop = False if 'math::' in s: errwarn('\n*** warning: quiz %s with math block not supported:' % tp) errwarn(s) drop = True if '.. code-block::' in s: errwarn('\n*** warning: quiz %s with code block not supported:' % tp) errwarn(s) drop = True if '.. figure::' in s: errwarn('\n*** warning: quiz %s with figure not supported:' % tp) errwarn(s) drop = True if drop: return '' # Make multi-line paragraph a one-liner s = ' '.join(s.splitlines()).rstrip() # Fixes pattern = r'`(.+?) (<https?.+?)>`__' # URL s = re.sub(pattern, '<a href="\g<2>"> \g<1> </a>', s) pattern = r'``(.+?)``' # verbatim s = re.sub(pattern, '<tt>\g<1></tt>', s) pattern = r':math:`(.+?)`' # inline math s = re.sub(pattern, '<em>\g<1></em>', s) # mimic italic.... pattern = r':\*(.+?)\*' # emphasize s = re.sub(pattern, '\g<1>', s, flags=re.DOTALL) return s
def sphinx_code_orig(filestr, format): # NOTE: THIS FUNCTION IS NOT USED!!!!!! # In rst syntax, code blocks are typeset with :: (verbatim) # followed by intended blocks. This function indents everything # inside code (or TeX) blocks. # grab #sphinx code-blocks: cod=python cpp=c++ etc line # (do this before code is inserted in case verbatim blocks contain # such specifications for illustration) m = re.search(r"#\s*[Ss]phinx\s+code-blocks?:(.+?)\n", filestr) if m: defs_line = m.group(1) # turn defs into a dictionary definition: defs = {} for definition in defs_line.split(): key, value = definition.split("=") defs[key] = value else: # default mappings: defs = dict( cod="python", pro="python", pycod="python", cycod="cython", pypro="python", cypro="cython", fcod="fortran", fpro="fortran", ccod="c", cppcod="c++", cpro="c", cpppro="c++", mcod="matlab", mpro="matlab", plcod="perl", plpro="perl", shcod="bash", shpro="bash", rbcod="ruby", rbpro="ruby", sys="console", dat="python", ipy="python", xmlcod="xml", xmlpro="xml", xml="xml", htmlcod="html", htmlpro="html", html="html", texcod="latex", texpro="latex", tex="latex", ) # (the "python" typesetting is neutral if the text # does not parse as python) # first indent all code/tex blocks by 1) extracting all blocks, # 2) intending each block, and 3) inserting the blocks: filestr, code_blocks, tex_blocks = remove_code_and_tex(filestr, format) for i in range(len(code_blocks)): code_blocks[i] = indent_lines(code_blocks[i], format) for i in range(len(tex_blocks)): tex_blocks[i] = indent_lines(tex_blocks[i], format) # remove all \label{}s inside tex blocks: tex_blocks[i] = re.sub(fix_latex(r"\label\{.+?\}", application="match"), "", tex_blocks[i]) # remove those without \ if there are any: tex_blocks[i] = re.sub(r"label\{.+?\}", "", tex_blocks[i]) # side effects: `label{eq1}` as verbatim, but this is mostly a # problem for doconce documentation and can be rephrased... # fix latex constructions that do not work with sphinx math commands = [ r"\begin{equation}", r"\end{equation}", r"\begin{equation*}", r"\end{equation*}", r"\begin{eqnarray}", r"\end{eqnarray}", r"\begin{eqnarray*}", r"\end{eqnarray*}", r"\begin{align}", r"\end{align}", r"\begin{align*}", r"\end{align*}", r"\begin{multline}", r"\end{multline}", r"\begin{multline*}", r"\end{multline*}", r"\begin{split}", r"\end{split}", r"\begin{gather}", r"\end{gather}", r"\begin{gather*}", r"\end{gather*}", r"\[", r"\]", # some common abbreviations (newcommands): r"\beqan", r"\eeqan", r"\beqa", r"\eeqa", r"\balnn", r"\ealnn", r"\baln", r"\ealn", r"\beq", r"\eeq", # the simplest, contained in others, must come last! ] for command in commands: tex_blocks[i] = tex_blocks[i].replace(command, "") tex_blocks[i] = re.sub("&\s*=\s*&", " &= ", tex_blocks[i]) # provide warnings for problematic environments # if '{alignat' in tex_blocks[i]: # errwarn('*** warning: the "alignat" environment will give errors in Sphinx:\n' + tex_blocks[i] + '\n') filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, "rst") for key in defs: language = defs[key] if not language in legal_pygments_languages: raise TypeError( "%s is not a legal Pygments language " "(lexer) in line with:\n %s" % (language, defs_line) ) # filestr = re.sub(r'^!bc\s+%s\s*\n' % key, # '\n.. code-block:: %s\n\n' % defs[key], filestr, # flags=re.MULTILINE) cpattern = re.compile(r"^!bc\s+%s\s*\n" % key, flags=re.MULTILINE) filestr, n = cpattern.subn("\n.. code-block:: %s\n\n" % defs[key], filestr) errwarn(key + " " + n) if n > 0: errwarn("sphinx: %d subst %s by %s" % (n, key, defs[key])) # any !bc with/without argument becomes a py (python) block: # filestr = re.sub(r'^!bc.+\n', '\n.. code-block:: py\n\n', filestr, # flags=re.MULTILINE) cpattern = re.compile(r"^!bc.+$", flags=re.MULTILINE) filestr = cpattern.sub("\n.. code-block:: py\n\n", filestr) filestr = re.sub(r"^!ec *\n", "\n", filestr, flags=re.MULTILINE) # filestr = re.sub(r'^!ec\n', '\n', filestr, flags=re.MULTILINE) # filestr = re.sub(r'^!ec\n', '', filestr, flags=re.MULTILINE) filestr = re.sub(r"^!bt *\n", "\n.. math::\n\n", filestr, flags=re.MULTILINE) filestr = re.sub(r"^!et *\n", "\n\n", filestr, flags=re.MULTILINE) return filestr
def subst(m): label = m.group(1) try: return r'[(%s)](#%s)' % (label2tag[label], label) except KeyError as e: errwarn('*** error: label "%s" is not defined' % str(e))
def ipynb_figure(m): # m.group() must be called before m.group('name') text = '<!-- dom:%s -->\n<!-- begin figure -->\n' % m.group() filename = m.group('filename') caption = m.group('caption').strip() opts = m.group('options').strip() if opts: info = [s.split('=') for s in opts.split()] opts = ' '.join([ '%s=%s' % (opt, value) for opt, value in info if opt not in ['frac', 'sidecap'] ]) global figure_files if not filename.startswith('http'): figure_files.append(filename) # Extract optional label in caption label = None pattern = r' *label\{(.+?)\}' m = re.search(pattern, caption) if m: label = m.group(1).strip() caption = re.sub(pattern, '', caption) display_method = option('ipynb_figure=', 'imgtag') if display_method == 'md': # Markdown image syntax for embedded image in text # (no control of size, then one must use HTML syntax) if label is not None: #text += '<a name="%s"></a>\n' % label text += '<div id="%s"></div>\n' % label text += '![%s](%s)' % (caption, filename) elif display_method == 'imgtag': # Plain <img tag, allows specifying the image size if label is not None: #text += '<a name="%s"></a>' % label text += '<div id="%s"></div>\n' % label # Fix caption markup so it becomes html from doconce import INLINE_TAGS_SUBST, INLINE_TAGS for tag in 'bold', 'emphasize', 'verbatim': caption = re.sub(INLINE_TAGS[tag], INLINE_TAGS_SUBST['html'][tag], caption, flags=re.MULTILINE) text += """ <p>%s</p> <img src="%s" %s> """ % (caption, filename, opts) elif display_method == 'Image': # Image object # NOTE: This code will normally not work because it inserts a verbatim # block in the file *after* all such blocks have been removed and # numbered. doconce.py makes a test prior to removal of blocks and # runs the handle_figures and movie substitution if ipynb format # and Image or movie object display. text += '\n' if label is not None: text += '<div id="%s"></div>' % label text += '<!-- options: %s -->\n' % opts text = '!bc pycod\n' global figure_encountered if not figure_encountered: # First time we have a figure, we must import Image text += 'from IPython.display import Image\n' figure_encountered = True if caption: text += '# ' + caption if filename.startswith('http'): keyword = 'url' else: keyword = 'filename' text += 'Image(%s="%s")\n' % (keyword, filename) text += '!ec\n' else: errwarn( '*** error: --ipynb_figure=%s is illegal, must be md, imgtag or Image' % display_method) _abort() text += '<!-- end figure -->\n' return text
def ipynb_code(filestr, code_blocks, code_block_types, tex_blocks, format): """ # We expand all newcommands now from html import embed_newcommands newcommands = embed_newcommands(filestr) if newcommands: filestr = newcommands + filestr """ # Fix pandoc citations to normal internal links: [[key]](#key) filestr = re.sub(r'\[@(.+?)\]', r'[[\g<1>]](#\g<1>)', filestr) # filestr becomes json list after this function so we must typeset # envirs here. All envirs are typeset as pandoc_quote. from common import _CODE_BLOCK, _MATH_BLOCK envir_format = option('ipynb_admon=', 'paragraph') # Remove all !bpop-!epop environments (they cause only problens and # have no use) for envir in 'pop', 'slidecell': filestr = re.sub('^<!-- !b%s .*\n' % envir, '', filestr, flags=re.MULTILINE) filestr = re.sub('^<!-- !e%s .*\n' % envir, '', filestr, flags=re.MULTILINE) filestr = re.sub('^<!-- !bnotes.*?<!-- !enotes -->\n', '', filestr, flags=re.DOTALL | re.MULTILINE) filestr = re.sub('^<!-- !split -->\n', '', filestr, flags=re.MULTILINE) from doconce import doconce_envirs envirs = doconce_envirs()[8:-2] for envir in envirs: pattern = r'^!b%s(.*?)\n(.+?)\s*^!e%s' % (envir, envir) if envir_format in ('quote', 'paragraph', 'hrule'): def subst(m): title = m.group(1).strip() # Text size specified in parenthesis? m2 = re.search('^\s*\((.+?)\)', title) if title == '' and envir not in ('block', 'quote'): title = envir.capitalize() + '.' elif title.lower() == 'none': title == '' elif m2: text_size = m2.group(1).lower() title = title.replace('(%s)' % text_size, '').strip() elif title and title[-1] not in ('.', ':', '!', '?'): # Make sure the title ends with puncuation title += '.' # Recall that this formatting is called very late # so native format must be used! if title: title = '**' + title + '**\n' # Could also consider subsubsection formatting block = m.group(2) # Always use quote typesetting for quotes if envir_format == 'quote' or envir == 'quote': # Make Markdown quote of the block: lines start with > lines = [] for line in block.splitlines(): # Just quote plain text if not (_MATH_BLOCK in line or _CODE_BLOCK in line or line.startswith('FIGURE:') or line.startswith('MOVIE:') or line.startswith('|')): lines.append('> ' + line) else: lines.append('\n' + line + '\n') block = '\n'.join(lines) + '\n\n' # Add quote and a blank line after title if title: title = '> ' + title + '>\n' else: # Add a blank line after title if title: title += '\n' if envir_format == 'hrule': # Native ------ does not work, use <hr/> #text = '\n\n----------\n' + title + '----------\n' + \ # block + '\n----------\n\n' text = '\n\n<hr/>\n' + title + \ block + '\n<hr/>\n\n' else: text = title + block + '\n\n' return text else: errwarn('*** error: --ipynb_admon=%s is not supported' % envir_format) filestr = re.sub(pattern, subst, filestr, flags=re.DOTALL | re.MULTILINE) # Fix pyshell and ipy interactive sessions: remove prompt and output. # or split in multiple cells such that output comes out at the end of a cell # Fix sys environments and use run prog.py so programs can be run in cell # Insert %matplotlib inline in the first block using matplotlib # Only typeset Python code as blocks, otherwise !bc environmens # become plain indented Markdown. from doconce import dofile_basename from sets import Set ipynb_tarfile = 'ipynb-%s-src.tar.gz' % dofile_basename src_paths = Set() mpl_inline = False split_pyshell = option('ipynb_split_pyshell=', 'on') if split_pyshell is None: split_pyshell = False elif split_pyshell in ('no', 'False', 'off'): split_pyshell = False else: split_pyshell = True ipynb_code_tp = [None] * len(code_blocks) for i in range(len(code_blocks)): # Check if continuation lines are in the code block, because # doconce.py inserts a blank after the backslash if '\\ \n' in code_blocks[i]: code_blocks[i] = code_blocks[i].replace('\\ \n', '\\\n') if not mpl_inline and ( re.search(r'import +matplotlib', code_blocks[i]) or \ re.search(r'from +matplotlib', code_blocks[i]) or \ re.search(r'import +scitools', code_blocks[i]) or \ re.search(r'from +scitools', code_blocks[i])): code_blocks[i] = '%matplotlib inline\n\n' + code_blocks[i] mpl_inline = True tp = code_block_types[i] if tp.endswith('-t'): # Standard Markdown code with pandoc/github extension language = tp[:-2] language_spec = language2pandoc.get(language, '') #code_blocks[i] = '\n' + indent_lines(code_blocks[i], format) + '\n' code_blocks[i] = "```%s\n" % language_spec + \ indent_lines(code_blocks[i].strip(), format) + \ "```" ipynb_code_tp[i] = 'markdown' elif tp.startswith('pyshell') or tp.startswith('ipy'): lines = code_blocks[i].splitlines() last_cell_end = -1 if split_pyshell: new_code_blocks = [] # Split for each output an put in separate cell for j in range(len(lines)): if lines[j].startswith('>>>') or lines[j].startswith( '... '): lines[j] = lines[j][4:] elif lines[j].startswith('In ['): # IPython lines[j] = ':'.join(lines[j].split(':')[1:]).strip() elif lines[j].startswith(' ...: '): # IPython lines[j] = lines[j][8:] else: # output (no prefix or Out) lines[j] = '' new_code_blocks.append('\n'.join(lines[last_cell_end + 1:j + 1])) last_cell_end = j code_blocks[i] = new_code_blocks ipynb_code_tp[i] = 'cell' else: # Remove prompt and output lines; leave code executable in cell for j in range(len(lines)): if lines[j].startswith('>>> ') or lines[j].startswith( '... '): lines[j] = lines[j][4:] elif lines[j].startswith('In ['): lines[j] = ':'.join(lines[j].split(':')[1:]).strip() else: # output lines[j] = '' for j in range(lines.count('')): lines.remove('') code_blocks[i] = '\n'.join(lines) ipynb_code_tp[i] = 'cell' elif tp.startswith('sys'): # Do we find execution of python file? If so, copy the file # to separate subdir and make a run file command in a cell. # Otherwise, it is just a plain verbatim Markdown block. found_unix_lines = False lines = code_blocks[i].splitlines() for j in range(len(lines)): m = re.search(r'(.+?>|\$) *python +([A-Za-z_0-9]+?\.py)', lines[j]) if m: name = m.group(2).strip() if os.path.isfile(name): src_paths.add(os.path.dirname(name)) lines[j] = '%%run "%s"' % fullpath else: found_unix_lines = True src_paths = list(src_paths) if src_paths and not found_unix_lines: # This is a sys block with run commands only code_blocks[i] = '\n'.join(lines) ipynb_code_tp[i] = 'cell' else: # Standard Markdown code code_blocks[i] = '\n'.join(lines) code_blocks[i] = indent_lines(code_blocks[i], format) ipynb_code_tp[i] = 'markdown' elif tp.endswith('hid'): ipynb_code_tp[i] = 'cell_hidden' elif tp.startswith('py'): ipynb_code_tp[i] = 'cell' else: # Should support other languages as well, but not for now code_blocks[i] = indent_lines(code_blocks[i], format) ipynb_code_tp[i] = 'markdown' # figure_files and movie_files are global variables and contain # all figures and movies referred to src_paths = list(src_paths) if figure_files: src_paths += figure_files if movie_files: src_paths += movie_files if src_paths: # Make tar file with all the source dirs with files # that need to be executed os.system('tar cfz %s %s' % (ipynb_tarfile, ' '.join(src_paths))) errwarn('collected all required additional files in ' + ipynb_tarfile + ' which must be distributed with the notebook') elif os.path.isfile(ipynb_tarfile): os.remove(ipynb_tarfile) # Parse document into markdown text, code blocks, and tex blocks. # Store in nested list notebook_blocks. notebook_blocks = [[]] authors = '' for line in filestr.splitlines(): if line.startswith('authors = [new_author(name='): # old author method authors = line[10:] elif _CODE_BLOCK in line: code_block_tp = line.split()[-1] if code_block_tp in ( 'pyhid', ) or not code_block_tp.endswith('hid'): notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() notebook_blocks.append(line) # else: hidden block to be dropped (may include more languages # with time in the above tuple) elif _MATH_BLOCK in line: notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() notebook_blocks.append(line) else: if not isinstance(notebook_blocks[-1], list): notebook_blocks.append([]) notebook_blocks[-1].append(line) if isinstance(notebook_blocks[-1], list): notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() # Add block type info pattern = r'(\d+) +%s' for i in range(len(notebook_blocks)): if re.match(pattern % _CODE_BLOCK, notebook_blocks[i]): m = re.match(pattern % _CODE_BLOCK, notebook_blocks[i]) idx = int(m.group(1)) if ipynb_code_tp[idx] == 'cell': notebook_blocks[i] = ['cell', notebook_blocks[i]] elif ipynb_code_tp[idx] == 'cell_hidden': notebook_blocks[i] = ['cell_hidden', notebook_blocks[i]] else: notebook_blocks[i] = ['text', notebook_blocks[i]] elif re.match(pattern % _MATH_BLOCK, notebook_blocks[i]): notebook_blocks[i] = ['math', notebook_blocks[i]] else: notebook_blocks[i] = ['text', notebook_blocks[i]] # Go through tex_blocks and wrap in $$ # (doconce.py runs align2equations so there are no align/align* # environments in tex blocks) label2tag = {} tag_counter = 1 for i in range(len(tex_blocks)): # Extract labels and add tags labels = re.findall(r'label\{(.+?)\}', tex_blocks[i]) for label in labels: label2tag[label] = tag_counter # Insert tag to get labeled equation tex_blocks[i] = tex_blocks[i].replace( 'label{%s}' % label, 'label{%s} \\tag{%s}' % (label, tag_counter)) tag_counter += 1 # Remove \[ and \] or \begin/end{equation*} in single equations tex_blocks[i] = tex_blocks[i].replace(r'\[', '') tex_blocks[i] = tex_blocks[i].replace(r'\]', '') tex_blocks[i] = tex_blocks[i].replace(r'\begin{equation*}', '') tex_blocks[i] = tex_blocks[i].replace(r'\end{equation*}', '') # Check for illegal environments m = re.search(r'\\begin\{(.+?)\}', tex_blocks[i]) if m: envir = m.group(1) if envir not in ('equation', 'equation*', 'align*', 'align', 'array'): errwarn("""\ *** warning: latex envir \\begin{%s} does not work well in Markdown. Stick to \\[ ... \\], equation, equation*, align, or align* environments in math environments. """ % envir) eq_type = 'heading' # or '$$' eq_type = '$$' # Markdown: add $$ on each side of the equation if eq_type == '$$': # Make sure there are no newline after equation tex_blocks[i] = '$$\n' + tex_blocks[i].strip() + '\n$$' # Here: use heading (###) and simple formula (remove newline # in math expressions to keep everything within a heading) as # the equation then looks bigger elif eq_type == 'heading': tex_blocks[i] = '### $ ' + ' '.join( tex_blocks[i].splitlines()) + ' $' # Add labels for the eqs above the block (for reference) if labels: #label_tp = '<a name="%s"></a>' label_tp = '<div id="%s"></div>' tex_blocks[i] = '<!-- Equation labels as ordinary links -->\n' + \ ' '.join([label_tp % label for label in labels]) + '\n\n' + \ tex_blocks[i] # blocks is now a list of text chunks in markdown and math/code line # instructions. Insert code and tex blocks for i in range(len(notebook_blocks)): if _CODE_BLOCK in notebook_blocks[i][ 1] or _MATH_BLOCK in notebook_blocks[i][1]: words = notebook_blocks[i][1].split() # start of notebook_blocks[i]: number block-indicator code-type n = int(words[0]) if _CODE_BLOCK in notebook_blocks[i][1]: notebook_blocks[i][1] = code_blocks[n] # can be list! if _MATH_BLOCK in notebook_blocks[i][1]: notebook_blocks[i][1] = tex_blocks[n] # Make IPython structures nb_version = int(option('ipynb_version=', '4')) if nb_version == 3: try: from IPython.nbformat.v3 import (new_code_cell, new_text_cell, new_worksheet, new_notebook, new_metadata, new_author) nb = new_worksheet() except ImportError: errwarn('*** error: could not import IPython.nbformat.v3!') errwarn(' set --ipynb_version=4 or leave out --ipynb_version=3') _abort() elif nb_version == 4: try: from nbformat.v4 import (new_code_cell, new_markdown_cell, new_notebook) except ImportError: # Try old style try: from IPython.nbformat.v4 import (new_code_cell, new_markdown_cell, new_notebook) except ImportError: errwarn( '*** error: cannot do import nbformat.v4 or IPython.nbformat.v4' ) errwarn( ' make sure IPython notebook or Jupyter is installed correctly' ) _abort() cells = [] mdstr = [] # plain md format of the notebook prompt_number = 1 for block_tp, block in notebook_blocks: if (block_tp == 'text' or block_tp == 'math') and block != '': # Pure comments between math/code and math/code come # out as empty blocks, should detect that situation # (challenging - can have multiple lines of comments, # or begin and end comment lines with important things between) if nb_version == 3: nb.cells.append(new_text_cell(u'markdown', source=block)) elif nb_version == 4: cells.append(new_markdown_cell(source=block)) mdstr.append(('markdown', block)) elif block_tp == 'cell' and block != '' and block != []: if isinstance(block, list): for block_ in block: block_ = block_.rstrip() if block_ != '': if nb_version == 3: nb.cells.append( new_code_cell(input=block_, prompt_number=prompt_number, collapsed=False)) elif nb_version == 4: cells.append( new_code_cell(source=block_, execution_count=prompt_number, metadata=dict(collapsed=False))) prompt_number += 1 mdstr.append(('codecell', block_)) else: block = block.rstrip() if block != '': if nb_version == 3: nb.cells.append( new_code_cell(input=block, prompt_number=prompt_number, collapsed=False)) elif nb_version == 4: cells.append( new_code_cell(source=block, execution_count=prompt_number, metadata=dict(collapsed=False))) prompt_number += 1 mdstr.append(('codecell', block)) elif block_tp == 'cell_hidden' and block != '': block = block.rstrip() if nb_version == 3: nb.cells.append( new_code_cell(input=block, prompt_number=prompt_number, collapsed=True)) elif nb_version == 4: cells.append( new_code_cell(source=block, execution_count=prompt_number, metadata=dict(collapsed=True))) prompt_number += 1 mdstr.append(('codecell', block)) """ # Dump the notebook cells in a simple ASCII format # (doc/src/ipynb/ipynb_generator.py can translate it back to .ipynb file) f = open(dofile_basename + '.md-ipynb', 'w') for cell_tp, block in mdstr: if cell_tp == 'markdown': f.write('\n-----\n\n') elif cell_tp == 'codecell': f.write('\n-----py\n\n') f.write(block) f.close() """ if nb_version == 3: # Catch the title as the first heading m = re.search(r'^#+\s*(.+)$', filestr, flags=re.MULTILINE) title = m.group(1).strip() if m else '' # md below is not used for anything if authors: authors = eval(authors) md = new_metadata(name=title, authors=authors) else: md = new_metadata(name=title) nb = new_notebook(worksheets=[nb], metadata=new_metadata()) # Let us make v4 notebook here by upgrading from IPython.nbformat.v4 import upgrade nb = upgrade(nb) import IPython.nbformat.v4.nbjson as nbjson # Convert nb to json format filestr = nbjson.writes(nb) elif nb_version == 4: nb = new_notebook(cells=cells) from IPython.nbformat import writes filestr = writes(nb, version=4) # Check that there are no empty cells: if '"input": []' in filestr: errwarn('*** error: empty cells in notebook - report bug in DocOnce') _abort() # must do the replacements here at the very end when json is written out # \eqref and labels will not work, but labels (only in math) do no harm filestr = re.sub(r'([^\\])label\{', r'\g<1>\\\\label{', filestr, flags=re.MULTILINE) # \\eqref{} just gives (???) link at this stage - future versions # will probably support labels #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\\eqref{\g<1>}', filestr) # Now we use explicit references to tags def subst(m): label = m.group(1) try: return r'[(%s)](#%s)' % (label2tag[label], label) except KeyError as e: errwarn('*** error: label "%s" is not defined' % str(e)) filestr = re.sub(r'\(ref\{(.+?)\}\)', subst, filestr) """ # MathJax reference to tag (recall that the equations have both label # and tag (know that tag only works well in HTML, but this mjx-eqn-no # label does not work in ipynb) filestr = re.sub(r'\(ref\{(.+?)\}\)', lambda m: r'[(%s)](#mjx-eqn-%s)' % (label2tag[m.group(1)], label2tag[m.group(1)]), filestr) """ #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'Eq (\g<1>)', filestr) ''' # Final fixes: replace all text between cells by markdown code cells # Note: the patterns are overlapping so a plain re.sub will not work, # here we run through all blocks found and subsitute the first remaining # one, one by one. pattern = r' \},\n(.+?)\{\n "cell_type":' begin_pattern = r'^(.+?)\{\n "cell_type":' remaining_block_begin = re.findall(begin_pattern, filestr, flags=re.DOTALL) remaining_blocks = re.findall(pattern, filestr, flags=re.DOTALL) import string for block in remaining_block_begin + remaining_blocks: filestr = string.replace(filestr, block, json_markdown(block) + ' ', maxreplace=1) filestr_end = re.sub(r' \{\n "cell_type": .+?\n \},\n', '', filestr, flags=re.DOTALL) filestr = filestr.replace(filestr_end, json_markdown(filestr_end)) filestr = """{ "metadata": { "name": "SOME NAME" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ """ + filestr.rstrip() + '\n'+ \ json_pycode('', final_prompt_no+1, 'python').rstrip()[:-1] + """ ], "metadata": {} } ] }""" ''' return filestr
def rst_bib(filestr, citations, pubfile, pubdata, numbering=True): """ Replace doconce citations and bibliography with reST syntax. If numbering is True, the keys used in the bibliography are replaced by numbers (RefX). This will often look better. """ if not citations: return filestr filestr = cite_with_multiple_args2multiple_cites(filestr) if numbering: # Find max no of digits n = len(str(max(citations.values()))) cite = '[Ref%%0%dd]' % n # cannot have blanks in ref label for label in citations: if numbering: filestr = filestr.replace('cite{%s}' % label, cite % citations[label] + '_') else: filestr = filestr.replace('cite{%s}' % label, '[%s]_' % label) if pubfile is not None: # Could use rst format, but we stick to the common doconce format bibtext = bibliography(pubdata, citations, format='rst') if numbering: for label in citations: try: bibtext = bibtext.replace( '[%s]' % label, cite % citations[label]) except UnicodeDecodeError as e: if "can't decode byte" in str(e): try: bibtext = bibtext.decode('utf-8').replace( '[%s]' % label, cite % citations[label]) except UnicodeDecodeError as e: errwarn('UnicodeDecodeError: ' + e) errwarn('*** error: problems in %s' % pubfile) errwarn(' with key ' + label) errwarn(' tried to do decode("utf-8"), but it did not work') else: errwarn(e) errwarn('*** error: problems in %s' % pubfile) errwarn(' with key ' + label) _abort() filestr = re.sub(r'^BIBFILE:.+$', bibtext, filestr, flags=re.MULTILINE) return filestr
def ipynb_movie(m): # m.group() must be called before m.group('name') text = '<!-- dom:%s -->' % m.group() global html_encountered, movie_encountered, movie_files filename = m.group('filename') caption = m.group('caption').strip() youtube = False if 'youtu.be' in filename or 'youtube.com' in filename: youtube = True if '*' in filename or '->' in filename: errwarn( '*** warning: * or -> in movie filenames is not supported in ipynb' ) return text def YouTubeVideo(filename): # Use YouTubeVideo object if 'watch?v=' in filename: name = filename.split('watch?v=')[1] elif 'youtu.be/' in filename: name = filename.split('youtu.be/')[1] else: errwarn( '*** error: youtube movie name "%s" could not be interpreted' % filename) _abort() text = '' global movie_encountered if not movie_encountered: text += 'from IPython.display import YouTubeVideo\n' movie_encountered = True text += 'YouTubeVideo("%s")\n' % name return text text += '\n<!-- begin movie -->\n' display_method = option('ipynb_movie=', 'HTML') if display_method == 'md': text += html_movie(m) elif display_method.startswith('HTML'): text += '\n!bc pycod\n' if youtube and 'YouTube' in display_method: text += YouTubeVideo(filename) if caption: text += '\nprint "%s"' % caption else: # Use HTML formatting if not html_encountered: text += 'from IPython.display import HTML\n' html_encountered = True text += '_s = """' + html_movie(m) + '"""\n' text += 'HTML(_s)\n' if not filename.startswith('http'): movie_files.append(filename) text += '!ec\n' elif display_method == 'ipynb': text += '!bc pycod\n' if youtube: text += YouTubeVideo(filename) if caption: text += '\nprint "%s"' % caption else: # see http://nbviewer.ipython.org/github/ipython/ipython/blob/1.x/examples/notebooks/Part%205%20-%20Rich%20Display%20System.ipynb # http://stackoverflow.com/questions/18019477/how-can-i-play-a-local-video-in-my-ipython-notebook # http://python.6.x6.nabble.com/IPython-User-embedding-non-YouTube-movies-in-the-IPython-notebook-td5024035.html # Just support .mp4, .ogg, and.webm stem, ext = os.path.splitext(filename) if ext not in ('.mp4', '.ogg', '.webm'): errwarn( '*** error: movie "%s" in format %s is not supported for --ipynb_movie=%s' % (filename, ext, display_method)) errwarn(' use --ipynb_movie=HTML instead') _abort() height = 365 width = 640 if filename.startswith('http'): file_open = 'import urllib\nvideo = urllib.urlopen("%s").read()' % filename else: file_open = 'video = open("%s", "rb").read()' % filename text += """ %s from base64 import b64encode video_encoded = b64encode(video) video_tag = '<video controls loop alt="%s" height="%s" width="%s" src="data:video/%s;base64,{0}">'.format(video_encoded) """ % (file_open, filename, height, width, ext[1:]) if not filename.startswith('http'): movie_files.append(filename) if not html_encountered: text += 'from IPython.display import HTML\n' html_encountered = True text += 'HTML(data=video_tag)\n' if caption: text += '\nprint "%s"' % caption text += '!ec\n' else: errwarn('*** error: --ipynb_movie=%s is not supported' % display_method) _abort() text += '<!-- end movie -->\n' return text
def ref_and_label_commoncode(section_label2title, format, filestr): filestr = fix_ref_section_chapter(filestr, format) # Deal with the problem of identical titles, which makes problem # with non-unique links in reST: add a counter to the title debugtext = '' section_pattern = r'^\s*(={3,9})(.+?)(={3,9})(\s*label\{(.+?)\})?' all_sections = re.findall(section_pattern, filestr, flags=re.MULTILINE) # First count the no of titles with the same wording titles = {} max_heading = 1 # track the top heading level for correct TITLE typesetting for heading, title, dummy2, dummy3, label in all_sections: entry = None if label == '' else label if title in titles: titles[title].append(entry) else: titles[title] = [entry] max_heading = max(max_heading, len(heading)) # Typeset TITLE so that it gets the highest+1 (but no higher) section sevel max_heading += 2 # one level up (2 =) max_heading = min(max_heading, 9) pattern = r'^TITLE:\s*(.+)$' if format == 'sphinx': # Title cannot be more than 63 chars... m = re.search(pattern, filestr, flags=re.MULTILINE) if m: title = m.group(1).strip() if len(title) > 63: errwarn('*** error: sphinx title cannot be longer than 63 characters') errwarn(' current title: "%s" (%d characters)' % (title, len(title))) _abort() filestr = re.sub(pattern, '.. Document title:\n\n%s \g<1> %s\n' % ('='*max_heading, '='*max_heading), filestr, flags=re.MULTILINE) # Make new titles title_counter = {} # count repeated titles (need to append counter to make unique links) sections = [] for heading, title, dummy2, dummy3, label in all_sections: label = None if label == '' else label if len(titles[title]) > 1: if title in title_counter: title_counter[title] += 1 else: title_counter[title] = 1 # Add much whitespace so we can recognize the titles after # formats are compiled and remove the number new_title = title + ' (%d) ' % title_counter[title] sections.append((heading, new_title, label, title)) if label in section_label2title: section_label2title[label] = new_title else: sections.append((heading, title, label, title)) # Make replacements for heading, title, label, old_title in sections: if title != old_title: debugtext += '\nchanged title: %s -> %s\n' % (old_title, title) # Avoid trouble with \t, \n in replacement title = title.replace('\\', '\\\\') # The substitution depends on whether we have a label or not if label is not None: title_pattern = r'%s\s*%s\s*%s\s*label\{%s\}' % (heading, re.escape(old_title), heading, label) # title may contain ? () etc., that's why we take re.escape replacement = '.. _%s:\n\n' % label + r'%s %s %s' % \ (heading, title, heading) else: title_pattern = r'%s\s*%s\s*%s' % (heading, re.escape(old_title), heading) replacement = r'%s %s %s' % (heading, title, heading) filestr, n = re.subn(title_pattern, replacement, filestr, count=1) if n > 1: raise ValueError('Replaced more than one title. BUG!') # remove label{...} from output #filestr = re.sub(r'^label\{.+?\}\s*$', '', filestr, flags=re.MULTILINE) cpattern = re.compile(r'^label\{[^}]+?\}\s*$', flags=re.MULTILINE) filestr = cpattern.sub('', filestr) filestr = re.sub(r'label\{[^}]+?\}', '', filestr) # all the remaining import doconce doconce.debugpr(debugtext) return filestr
def sphinx_quiz_runestone(quiz): quiz_feedback = option('quiz_explanations=', 'on') text = '' if 'new page' in quiz: text += '.. !split\n%s\n%s' % (quiz['new page'], '-' * len(quiz['new page'])) text += '.. begin quiz\n\n' global question_counter question_counter += 1 # Multiple correct answers? if sum([1 for choice in quiz['choices'] if choice[0] == 'right']) > 1: text += '.. mchoicema:: question_%d' % question_counter + '\n' else: text += '.. mchoicemf:: question_%d' % question_counter + '\n' def fix_text(s, tp='answer'): """ Answers and feedback in RunestoneInteractive book quizzes cannot contain math, figure and rst markup. Perform fixes. """ drop = False if 'math::' in s: errwarn('\n*** warning: quiz %s with math block not supported:' % tp) errwarn(s) drop = True if '.. code-block::' in s: errwarn('\n*** warning: quiz %s with code block not supported:' % tp) errwarn(s) drop = True if '.. figure::' in s: errwarn('\n*** warning: quiz %s with figure not supported:' % tp) errwarn(s) drop = True if drop: return '' # Make multi-line paragraph a one-liner s = ' '.join(s.splitlines()).rstrip() # Fixes pattern = r'`(.+?) (<https?.+?)>`__' # URL s = re.sub(pattern, '<a href="\g<2>"> \g<1> </a>', s) pattern = r'``(.+?)``' # verbatim s = re.sub(pattern, '<tt>\g<1></tt>', s) pattern = r':math:`(.+?)`' # inline math s = re.sub(pattern, '<em>\g<1></em>', s) # mimic italic.... pattern = r':\*(.+?)\*' # emphasize s = re.sub(pattern, '\g<1>', s, flags=re.DOTALL) return s import string correct = [] for i, choice in enumerate(quiz['choices']): if i > 4: # not supported errwarn( '*** warning: quiz with %d choices gets truncated (first 5)' % len(quiz['choices'])) break letter = string.ascii_lowercase[i] text += ' :answer_%s: ' % letter answer = fix_text(choice[1], tp='answer') if not answer: answer = 'Too advanced typesetting prevents the text from being rendered' text += answer + '\n' if choice[0] == 'right': correct.append(letter) if correct: text += ' :correct: ' + ', '.join(correct) + '\n' else: errwarn( '*** error: correct choice in quiz has index > 5 (max 5 allowed for RunestoneInteractive books)' ) errwarn(quiz['question']) _abort() for i, choice in enumerate(quiz['choices']): if i > 4: # not supported break letter = string.ascii_lowercase[i] text += ' :feedback_%s: ' % letter # must be present if len(choice) == 3 and quiz_feedback == 'on': feedback = fix_text(choice[2], tp='explanation') if not feedback: feedback = '(Too advanced typesetting prevents the text from being rendered)' text += feedback text += '\n' text += '\n' + indent_lines(quiz['question'], 'sphinx', ' ' * 3) + '\n\n\n' return text
def matlabnb_code(filestr, code_blocks, code_block_types, tex_blocks, format): # Remove all begin-end and \[ \] in tex blocks, join to one line, # embed in $$. Write error message if anything else than a single equation. pattern = 'begin\{(.+?)\}' for i in range(len(tex_blocks)): m = re.search(pattern, tex_blocks[i]) if m: envir = m.group(1) if envir not in ('equation', 'equation*'): errwarn( '*** warning: \\begin{%s}-\\end{%s} does not work in Matlab notebooks' % (envir, envir)) tex_blocks[i] = re.sub(r'\\begin{%s}\s+' % envir, '', tex_blocks[i]) tex_blocks[i] = re.sub(r'\\end{%s}\s+' % envir, '', tex_blocks[i]) tex_blocks[i] = re.sub(r'\\\[', '', tex_blocks[i]) tex_blocks[i] = re.sub(r'\\\]', '', tex_blocks[i]) tex_blocks[i] = re.sub(r'label\{(.+?)\}', '', tex_blocks[i]) tex_blocks[i] = '$$' + ' '.join( tex_blocks[i].strip().splitlines()).strip() + '$$' # Note: now the tex block ends with $$!et # Insert % in code if envir with -t name or if not Matlab code for i in range(len(code_blocks)): executable_matlab = code_block_types[i] in ('mcod', 'mpro') if not executable_matlab: # Note that monospace font requires two blanks after % code_blocks[i] = '\n'.join([ '% ' + line for line in code_blocks[i].splitlines() if not (line.startswith('!bc') or line.startswith('!ec')) ]) + '\n' # Insert % at the beginning of each line from common import _CODE_BLOCK, _MATH_BLOCK code_line = r'^\d+ ' + _CODE_BLOCK code_line_problem = r' (\d+ ' + _CODE_BLOCK + ')' math_line = r'^\d+ ' + _MATH_BLOCK math_line_problem = r' (\d+ ' + _MATH_BLOCK + ')' heading_no = 0 lines = filestr.splitlines() for i in range(len(lines)): if re.search(code_line, lines[i], flags=re.MULTILINE): if heading_no < 2: # Add %% (empty heading) before code block because # code cannot come after the first heading, only # after the second and onwards lines[i] = '%%\n' + lines[i] continue elif re.search(math_line, lines[i], flags=re.MULTILINE): continue elif re.search(code_line_problem, lines[i], flags=re.MULTILINE): # Paragraphs can move a block indicator after its heading, insert \n lines[i] = re.sub(code_line_problem, '\n\g<1>', lines[i]) elif re.search(math_line_problem, lines[i], flags=re.MULTILINE): # Paragraphs can move a block indicator after its heading, insert \n lines[i] = re.sub(math_line_problem, '\n\g<1>', lines[i]) elif lines[i].startswith('>>>H'): # Heading lines[i] = '%%' + lines[i].replace('>>>H', '') heading_no += 1 else: lines[i] = '% ' + lines[i] filestr = '\n'.join(lines) filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'matlabnb') filestr = re.sub(r'\$\$!et', '$$', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!bt\s+\$\$', '% $$', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!bc.+', '', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!ec', '', filestr, flags=re.MULTILINE) # Remove all blank lines filestr = re.sub(r'^\s+', '', filestr, flags=re.MULTILINE) # Fix emphasize markup (conflicts with boldface so we do a hack) filestr = re.sub(r'\^\^\^X(.+?)X\^\^\^', '_\g<1>_', filestr, flags=re.DOTALL) # emph filestr = re.sub(r'\{\{\{X(.+?)X\}\}\}', '*\g<1>*', filestr, flags=re.DOTALL) # bold filestr = re.sub(r'<<<X(.+?)X>>>', '|\g<1>|', filestr, flags=re.DOTALL) # verb return filestr
def sphinx_figure(m): result = '' # m is a MatchObject filename = m.group('filename') caption = m.group('caption').strip() # Stubstitute DocOnce label by rst label in caption # (also, remove final period in caption since caption is used as hyperlink # text to figures). m_label = re.search(r'label\{(.+?)\}', caption) if m_label: label = m_label.group(1) result += '\n.. _%s:\n' % label # remove . at the end of the caption text parts = caption.split('label') parts[0] = parts[0].rstrip() if parts[0] and parts[0][-1] == '.': parts[0] = parts[0][:-1] parts[0] = parts[0].strip() # insert emphasize marks if not latex $ at the # beginning or end (math subst does not work for *$I=1$*) # or if not boldface or emphasize already in the caption caption_font = option('sphinx_figure_captions=', 'emphasize') if parts[0] and \ caption_font == 'emphasize' and \ not parts[0].startswith('$') and \ not parts[0].endswith('$') and \ not '*' in parts[0] and \ not '_' in parts[0]: parts[0] = '*' + parts[0] + '*' #caption = ' label'.join(parts) caption = parts[0] # contrary to rst_figure, we do not write label into caption # since we just want to remove the whole label as part of # the caption (otherwise done when handling ref and label) else: if caption and caption[-1] == '.': caption = caption[:-1] # math is ignored in references to figures, test for math only if caption.startswith('$') and caption.endswith('$'): errwarn( '*** warning: math only in sphinx figure caption (it will be ignored by sphinx, resulting in empty caption)\n %s\n FIGURE: [%s' % (caption, filename)) #stem = os.path.splitext(filename)[0] #result += '\n.. figure:: ' + stem + '.*\n' # utilize flexibility # does not work yet result += '\n.. figure:: ' + filename + '\n' opts = m.group('options') if opts: # opts: width=600 frac=0.5 align=center # opts: width=600, frac=0.5, align=center info = [s.split('=') for s in opts.split()] fig_info = [ ' :%s: %s' % (opt, value.replace(',', '')) for opt, value in info if opt not in ['frac', 'sidecap'] ] result += '\n'.join(fig_info) if caption: result += '\n\n ' + caption + '\n' else: result += '\n\n' #errwarn('sphinx figure: caption=\n', caption, '\nresult:\n', result) return result
def rst_code(filestr, code_blocks, code_block_types, tex_blocks, format): # In rst syntax, code blocks are typeset with :: (verbatim) # followed by intended blocks. This function indents everything # inside code (or TeX) blocks. for i in range(len(code_blocks)): code_blocks[i] = indent_lines(code_blocks[i], format) for i in range(len(tex_blocks)): tex_blocks[i] = indent_lines(tex_blocks[i], format) # Fix labels if option('rst_mathjax'): for i in range(len(tex_blocks)): tex_blocks[i] = tex_blocks[i].replace(' label{', ' \\label{') filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'rst') # substitute !bc and !ec appropriately: # the line before the !bc block must end in [a-zA-z0-9)"...] # followed by [\n:.?!,] see the bc_regex_pattern global variable above # (problems with substituting !bc and !bt may be caused by # missing characters in these two families) filestr = re.sub(bc_regex_pattern, r'\g<1>::\n\n', filestr, flags=re.MULTILINE|re.DOTALL) # Need a fix for :: appended to special comment lines (---:: -> ---\nCode::) filestr = re.sub(r' ---::\n\n', ' ---\nCode::\n\n', filestr) filestr = re.sub(r'^!ec\n', '\n', filestr, flags=re.MULTILINE) #filestr = re.sub(r'^!ec\n', '', filestr, flags=re.MULTILINE) #c = re.compile(r'([a-zA-Z0-9)"])[:.]?\s*?!bt\n', re.DOTALL) #filestr = c.sub(r'\g<1>:\n\n', filestr) #filestr = re.sub(r'^!bt\n', '.. latex-math::\n\n', filestr, re.MULTILINE) #filestr = re.sub(r'^!bt\n', '.. latex::\n\n', filestr, re.MULTILINE) if option('rst_mathjax') and (re.search(r'^!bt', filestr, flags=re.MULTILINE) or re.search(r'\\\( .+ \\\)', filestr)): # First add MathJax script in the very beginning of the file from html import mathjax_header latex = indent_lines(mathjax_header(filestr).lstrip(), 'rst') filestr = '\n.. raw:: html\n\n' + latex + '\n\n' + filestr # Replace all the !bt parts by raw html directive (make sure # the coming block is sufficiently indented, we used 8 chars above)[[[ filestr = re.sub(bt_regex_pattern, r'\g<1>\n\n.. raw:: html\n\n $$', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!et *\n', ' $$\n\n', filestr, flags=re.MULTILINE) # Remove inner \[..\] from equations $$ \[ ... \] $$ filestr = re.sub(r'\$\$\s*\\\[', '$$', filestr) filestr = re.sub(r'\\\]\s*\$\$', '$$', filestr) # Equation references (ref{...}) must be \eqref{...} in MathJax # (note: this affects also (ref{...}) syntax in verbatim blocks...) filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\eqref{\g<1>}', filestr) else: # just use the same substitution for tex blocks as for code blocks: filestr = re.sub(bt_regex_pattern, r'\g<1>::\n', filestr, flags=re.MULTILINE) #filestr = re.sub(r'^!et *\n', '\n\n', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!et *\n', '\n', filestr, flags=re.MULTILINE) # Fix: if there are !bc-!ec or other environments after each # other without text in between, there is a difficulty with the # :: symbol before the code block. In these cases, we get # !ec::, !et::, !bbox:: etc. from the above substitutions. # We just replace these by empty text. filestr = re.sub(r'^(!(b|e)[a-z]+)::', r'\g<1>', filestr, flags=re.MULTILINE) # Check for pattern in '^!bt', '^!et': c = re.compile(pattern, re.MULTILINE) m = c.search(filestr) if m: errwarn(""" Still %s left after handling of code and tex blocks. Problem is probably that %s is not preceded by text which can be extended with :: (required). """ % (pattern, pattern)) _abort() # Final fixes filestr = fix_underlines_in_headings(filestr) # Ensure blank line before and after comments filestr = re.sub(r'([.:;?!])\n^\.\. ', r'\g<1>\n\n.. ', filestr, flags=re.MULTILINE) filestr = re.sub(r'(^\.\. .+)\n([^ \n]+)', r'\g<1>\n\n\g<2>', filestr, flags=re.MULTILINE) # Line breaks interfer with tables and needs a final blank line too lines = filestr.splitlines() inside_block = False for i in range(len(lines)): if lines[i].startswith('<linebreakpipe>') and not inside_block: inside_block = True lines[i] = lines[i].replace('<linebreakpipe> ', '') + '\n' continue if lines[i].startswith('<linebreakpipe>') and inside_block: lines[i] = '|' + lines[i].replace('<linebreakpipe>', '') continue if inside_block and not lines[i].startswith('<linebreakpipe>'): inside_block = False lines[i] = '| ' + lines[i] + '\n' filestr = '\n'.join(lines) # Remove too much vertical space filestr = re.sub(r'\n\n\n+', '\n\n', filestr) return filestr
def sphinx_ref_and_label(section_label2title, format, filestr): # Special fix early in the process: # Deal with !split - by default we place splits before # the all the topmost sections # (This must be done before labels are put above section # headings) if '!split' in filestr and not option('sphinx_keep_splits'): errwarn( '*** warning: new !split inserted (override all existing !split)') # Note: the title is at this stage translated to a chapter heading! # This title/heading must be removed for the algorithm below to work # (remove it, then insert afterwards) pattern = r'^.. Document title:\n\n={3,9}.+?={3,9}' m = re.search(pattern, filestr, flags=re.MULTILINE) title_replacement = '<<<<<<<DOCUMENT TITLE>>>>>>>>>>>>' # "unlikely" str if m: title = m.group() filestr = filestr.replace(title, title_replacement) else: title = '' topmost_section = 0 for i in [9, 7, 5]: if re.search(r'^%s' % ('=' * i), filestr, flags=re.MULTILINE): topmost_section = i errwarn(' before every %s heading %s' % \ ('='*topmost_section, '='*topmost_section)) errwarn(' because this strategy gives a well-functioning') errwarn(' table of contents in Sphinx') errwarn( ' (use --sphinx_keep_splits to enforce your own !split commands)' ) break if topmost_section: # First remove all !split filestr = re.sub(r'^!split *\n', '', filestr, flags=re.MULTILINE) # Insert new splits before all topmost sections pattern = r'^%s (.+?) %s' % \ ('='*topmost_section, '='*topmost_section) lines = filestr.splitlines() for i in range(len(lines)): if re.search(pattern, lines[i]): lines[i] = '!split\n' + lines[i] filestr = '\n'.join(lines) filestr = filestr.replace(title_replacement, title) filestr = ref_and_label_commoncode(section_label2title, format, filestr) # replace all references to sections: for label in section_label2title: filestr = filestr.replace('ref{%s}' % label, ':ref:`%s`' % label) # Not of interest after sphinx got equation references: #from common import ref2equations #filestr = ref2equations(filestr) # Replace remaining ref{x} as :ref:`x` filestr = re.sub(r'ref\{(.+?)\}', ':ref:`\g<1>`', filestr) return filestr
def sphinx_code_orig(filestr, format): # NOTE: THIS FUNCTION IS NOT USED!!!!!! # In rst syntax, code blocks are typeset with :: (verbatim) # followed by intended blocks. This function indents everything # inside code (or TeX) blocks. # grab #sphinx code-blocks: cod=python cpp=c++ etc line # (do this before code is inserted in case verbatim blocks contain # such specifications for illustration) m = re.search(r'#\s*[Ss]phinx\s+code-blocks?:(.+?)\n', filestr) if m: defs_line = m.group(1) # turn defs into a dictionary definition: defs = {} for definition in defs_line.split(): key, value = definition.split('=') defs[key] = value else: # default mappings: defs = dict( cod='python', pro='python', pycod='python', cycod='cython', pypro='python', cypro='cython', fcod='fortran', fpro='fortran', ccod='c', cppcod='c++', cpro='c', cpppro='c++', mcod='matlab', mpro='matlab', plcod='perl', plpro='perl', shcod='bash', shpro='bash', rbcod='ruby', rbpro='ruby', sys='console', dat='python', ipy='python', xmlcod='xml', xmlpro='xml', xml='xml', htmlcod='html', htmlpro='html', html='html', texcod='latex', texpro='latex', tex='latex', ) # (the "python" typesetting is neutral if the text # does not parse as python) # first indent all code/tex blocks by 1) extracting all blocks, # 2) intending each block, and 3) inserting the blocks: filestr, code_blocks, tex_blocks = remove_code_and_tex(filestr, format) for i in range(len(code_blocks)): code_blocks[i] = indent_lines(code_blocks[i], format) for i in range(len(tex_blocks)): tex_blocks[i] = indent_lines(tex_blocks[i], format) # remove all \label{}s inside tex blocks: tex_blocks[i] = re.sub( fix_latex(r'\label\{.+?\}', application='match'), '', tex_blocks[i]) # remove those without \ if there are any: tex_blocks[i] = re.sub(r'label\{.+?\}', '', tex_blocks[i]) # side effects: `label{eq1}` as verbatim, but this is mostly a # problem for doconce documentation and can be rephrased... # fix latex constructions that do not work with sphinx math commands = [ r'\begin{equation}', r'\end{equation}', r'\begin{equation*}', r'\end{equation*}', r'\begin{eqnarray}', r'\end{eqnarray}', r'\begin{eqnarray*}', r'\end{eqnarray*}', r'\begin{align}', r'\end{align}', r'\begin{align*}', r'\end{align*}', r'\begin{multline}', r'\end{multline}', r'\begin{multline*}', r'\end{multline*}', r'\begin{split}', r'\end{split}', r'\begin{gather}', r'\end{gather}', r'\begin{gather*}', r'\end{gather*}', r'\[', r'\]', # some common abbreviations (newcommands): r'\beqan', r'\eeqan', r'\beqa', r'\eeqa', r'\balnn', r'\ealnn', r'\baln', r'\ealn', r'\beq', r'\eeq', # the simplest, contained in others, must come last! ] for command in commands: tex_blocks[i] = tex_blocks[i].replace(command, '') tex_blocks[i] = re.sub('&\s*=\s*&', ' &= ', tex_blocks[i]) # provide warnings for problematic environments #if '{alignat' in tex_blocks[i]: # errwarn('*** warning: the "alignat" environment will give errors in Sphinx:\n' + tex_blocks[i] + '\n') filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'rst') for key in defs: language = defs[key] if not language in legal_pygments_languages: raise TypeError('%s is not a legal Pygments language '\ '(lexer) in line with:\n %s' % \ (language, defs_line)) #filestr = re.sub(r'^!bc\s+%s\s*\n' % key, # '\n.. code-block:: %s\n\n' % defs[key], filestr, # flags=re.MULTILINE) cpattern = re.compile(r'^!bc\s+%s\s*\n' % key, flags=re.MULTILINE) filestr, n = cpattern.subn('\n.. code-block:: %s\n\n' % defs[key], filestr) errwarn(key + ' ' + n) if n > 0: errwarn('sphinx: %d subst %s by %s' % (n, key, defs[key])) # any !bc with/without argument becomes a py (python) block: #filestr = re.sub(r'^!bc.+\n', '\n.. code-block:: py\n\n', filestr, # flags=re.MULTILINE) cpattern = re.compile(r'^!bc.+$', flags=re.MULTILINE) filestr = cpattern.sub('\n.. code-block:: py\n\n', filestr) filestr = re.sub(r'^!ec *\n', '\n', filestr, flags=re.MULTILINE) #filestr = re.sub(r'^!ec\n', '\n', filestr, flags=re.MULTILINE) #filestr = re.sub(r'^!ec\n', '', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!bt *\n', '\n.. math::\n\n', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!et *\n', '\n\n', filestr, flags=re.MULTILINE) return filestr
def define(FILENAME_EXTENSION, BLANKLINE, INLINE_TAGS_SUBST, CODE, LIST, ARGLIST, TABLE, EXERCISE, FIGURE_EXT, CROSS_REFS, INDEX_BIB, TOC, ENVIRS, QUIZ, INTRO, OUTRO, filestr): # all arguments are dicts and accept in-place modifications (extensions) FILENAME_EXTENSION['rst'] = '.rst' BLANKLINE['rst'] = '\n' encoding = 'utf-8' # 'latin-1' INLINE_TAGS_SUBST['rst'] = { 'math': r'\g<begin>\g<subst>\g<end>', 'math2': r'\g<begin>\g<puretext>\g<end>', # math and math2 are redefined below if --rst_mathjax #'math': r'\g<begin>:math:`\g<subst>`\g<end>', # sphinx #'math2': r'\g<begin>:math:`\g<latexmath>`\g<end>', 'emphasize': None, # => just use doconce markup (*emphasized words*) 'bold': r'\g<begin>**\g<subst>**\g<end>', 'verbatim': r'\g<begin>``\g<subst>``\g<end>', 'label': r'\g<subst>', # should be improved, rst has cross ref 'reference': r'\g<subst>', #colortext cannot employ pure HTML code. Recipe: http://stackoverflow.com/questions/4669689/how-to-use-color-in-text-with-restructured-text-rst2html-py-or-how-to-insert-h (this is too comprehensive). Use bold instead. #'colortext': r'<font color="\g<color>">\g<text></font>', 'colortext': r'**\g<text>**', # Use anonymous hyperlink references to avoid warnings if the link # name appears twice #'linkURL': r'\g<begin>`\g<link> <\g<url>>`__\g<end>', #'linkURL': r'\g<begin>`\g<link>`_\g<end>' + '\n\n.. ' + r'__\g<link>: \g<url>' + '\n\n', # better (?): make function instead that stacks up the URLs and dumps them at the end; can be used for citations as well 'linkURL2': r'`\g<link> <\g<url>>`__', 'linkURL3': r'`\g<link> <\g<url>>`__', 'linkURL2v': r'`\g<link> <\g<url>>`__', # no verbatim, does not work well 'linkURL3v': r'`\g<link> <\g<url>>`__', # same 'plainURL': r'`<\g<url>>`_', 'inlinecomment': r'color{red}{(**\g<name>**: \g<comment>})', # the replacement string differs, depending on the match object m: # (note len(m.group('subst')) gives wrong length for latin-1 strings, # seems to work for utf-8, if problems: replace lambda function # with an ordinary function where you can debug and test! #'chapter': lambda m: '%s\n%s' % (m.group('subst'), '%'*len(m.group('subst').decode(encoding))), 'chapter': lambda m: '%s\n%s' % (m.group('subst'), '%'*len(m.group('subst'))), 'section': lambda m: '%s\n%s' % (m.group('subst'), '='*len(m.group('subst'))), 'subsection': lambda m: '%s\n%s' % (m.group('subst'), '-'*len(m.group('subst'))), 'subsubsection': lambda m: '%s\n%s\n' % (m.group('subst'), '~'*len(m.group('subst'))), 'paragraph': r'**\g<subst>**\n', # extra newline 'abstract': rst_abstract, #'title': r'======= \g<subst> =======\n', # doconce top section, must be the highest section level (but no higher than others, need more code) 'title': None, # taken care of in ref_and_label_commoncode 'date': r':Date: \g<subst>\n', 'author': rst_author, 'figure': rst_figure, 'movie': rst_movie, #'comment': '.. %s', # rst does not like empty comment lines: # so therefore we introduce a function to remove empty comment lines # (we insert an extra blank first to be safe) 'comment': lambda c: '' if c.isspace() or c == '' else '\n.. %s\n' % c, #'linebreak': r'| \g<text>', # does not work: interfers with tables and requires a final blank line after block 'linebreak': r'<linebreakpipe> \g<text>', # fixed in rst_code/sphinx_code as a hack 'footnote': rst_footnotes, 'non-breaking-space': ' |nbsp| ', 'horizontal-rule': '---------', 'ampersand2': r' \g<1>&\g<2>', } if option('rst_mathjax'): # rst2html conversion requires four backslashes here for one of them # to survive INLINE_TAGS_SUBST['rst']['math'] = r'\g<begin>\\\\( \g<subst> \\\\)\g<end>' INLINE_TAGS_SUBST['rst']['math2'] = r'\g<begin>\\\\( \g<latexmath> \\\\)\g<end>' ENVIRS['rst'] = { 'quote': rst_quote, 'warning': rst_warning, 'question': rst_question, 'notice': rst_notice, 'summary': rst_summary, 'block': rst_block, 'box': rst_box, } CODE['rst'] = rst_code # function for typesetting code LIST['rst'] = { 'itemize': {'begin': '', 'item': '*', 'end': '\n'}, # lists must end with a blank line - we insert one extra, 'enumerate': {'begin': '', 'item': '%d.', 'end': '\n'}, 'description': {'begin': '', 'item': '%s', 'end': '\n'}, 'separator': '\n', } from common import DEFAULT_ARGLIST ARGLIST['rst'] = DEFAULT_ARGLIST FIGURE_EXT['rst'] = { 'search': ('.png', '.gif', '.jpg', '.jpeg', '.pdf', '.eps', '.ps'), 'convert': ('.png', '.gif', '.jpg')} CROSS_REFS['rst'] = rst_ref_and_label INDEX_BIB['rst'] = rst_index_bib TABLE['rst'] = rst_table EXERCISE['rst'] = plain_exercise TOC['rst'] = lambda s: '.. contents:: Table of Contents\n :depth: 2' QUIZ['rst'] = rst_quiz INTRO['rst'] = """\ .. Automatically generated reStructuredText file from DocOnce source (https://github.com/hplgit/doconce/) """ # http://stackoverflow.com/questions/11830242/non-breaking-space from common import INLINE_TAGS if re.search(INLINE_TAGS['non-breaking-space'], filestr): nbsp = """ .. |nbsp| unicode:: 0xA0 :trim: """ if 'TITLE:' not in filestr: import common if common.format in ('rst', 'sphinx'): errwarn('*** error: non-breaking space character ~ is used,') errwarn(' but this will give an error when the document does') errwarn(' not have a title.') _abort() else: INTRO['rst'] += nbsp
def sphinx_code(filestr, code_blocks, code_block_types, tex_blocks, format): # In rst syntax, code blocks are typeset with :: (verbatim) # followed by intended blocks. This function indents everything # inside code (or TeX) blocks. # default mappings of !bc environments and pygments languages: envir2pygments = dict( cod="python", pro="python", pycod="python", cycod="cython", pypro="python", cypro="cython", fcod="fortran", fpro="fortran", ccod="c", cppcod="c++", cpro="c", cpppro="c++", mcod="matlab", mpro="matlab", plcod="perl", plpro="perl", shcod="bash", shpro="bash", rbcod="ruby", rbpro="ruby", # sys='console', sys="text", rst="rst", css="css", csspro="css", csscod="css", dat="text", csv="text", txt="text", cc="text", ccq="text", # not possible with extra indent for ccq ipy="ipy", xmlcod="xml", xmlpro="xml", xml="xml", htmlcod="html", htmlpro="html", html="html", texcod="latex", texpro="latex", tex="latex", latexcod="latex", latexpro="latex", latex="latex", do="doconce", pyshell="python", pyoptpro="python", pyscpro="python", ) # grab line with: # sphinx code-blocks: cod=python cpp=c++ etc # (do this before code is inserted in case verbatim blocks contain # such specifications for illustration) m = re.search(r".. *[Ss]phinx +code-blocks?:(.+)", filestr) if m: defs_line = m.group(1) # turn specifications into a dictionary: for definition in defs_line.split(): key, value = definition.split("=") envir2pygments[key] = value # First indent all code blocks for i in range(len(code_blocks)): if code_block_types[i].startswith("pyoptpro") and not option("runestone"): code_blocks[i] = online_python_tutor(code_blocks[i], return_tp="iframe") if code_block_types[i].endswith("-h"): indentation = " " * 8 else: indentation = " " * 4 code_blocks[i] = indent_lines(code_blocks[i], format, indentation) # After transforming align environments to separate equations # the problem with math labels in multiple eqs has disappeared. # (doconce.py applies align2equations, which takes all align # envirs and translates them to separate equations, but align* # environments are allowed. # Any output of labels in align means an error in the # align -> equation transformation...) math_labels = [] multiple_math_labels = [] # sphinx has problems with multiple math labels for i in range(len(tex_blocks)): tex_blocks[i] = indent_lines(tex_blocks[i], format) # extract all \label{}s inside tex blocks and typeset them # with :label: tags label_regex = fix_latex(r"label\{(.+?)\}", application="match") labels = re.findall(label_regex, tex_blocks[i]) if len(labels) == 1: tex_blocks[i] = " :label: %s\n" % labels[0] + tex_blocks[i] elif len(labels) > 1: multiple_math_labels.append(labels) if len(labels) > 0: math_labels.extend(labels) tex_blocks[i] = re.sub(label_regex, "", tex_blocks[i]) # fix latex constructions that do not work with sphinx math # (just remove them) commands = [ r"\begin{equation}", r"\end{equation}", r"\begin{equation*}", r"\end{equation*}", # r'\begin{eqnarray}', # r'\end{eqnarray}', # r'\begin{eqnarray*}', # r'\end{eqnarray*}', # r'\begin{align}', # r'\end{align}', # r'\begin{align*}', # r'\end{align*}', r"\begin{multline}", r"\end{multline}", r"\begin{multline*}", r"\end{multline*}", # r'\begin{split}', # r'\end{split}', # r'\begin{gather}', # r'\end{gather}', # r'\begin{gather*}', # r'\end{gather*}', r"\[", r"\]", # some common abbreviations (newcommands): r"\beqan", r"\eeqan", r"\beqa", r"\eeqa", r"\balnn", r"\ealnn", r"\baln", r"\ealn", r"\beq", r"\eeq", # the simplest name, contained in others, must come last! ] for command in commands: tex_blocks[i] = tex_blocks[i].replace(command, "") # &=& -> &= tex_blocks[i] = re.sub("&\s*=\s*&", " &= ", tex_blocks[i]) # provide warnings for problematic environments # Replace all references to equations that have labels in math environments: for label in math_labels: filestr = filestr.replace("(:ref:`%s`)" % label, ":eq:`%s`" % label) multiple_math_labels_with_refs = [] # collect the labels with references for labels in multiple_math_labels: for label in labels: ref = ":eq:`%s`" % label # ref{} is translated to eq:`` if ref in filestr: multiple_math_labels_with_refs.append(label) if multiple_math_labels_with_refs: errwarn( """ *** warning: detected non-align math environment with multiple labels (Sphinx cannot handle this equation system - labels will be removed and references to them will be empty):""" ) for label in multiple_math_labels_with_refs: errwarn(" label{%s}" % label) print filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, "sphinx") # Remove all !bc ipy and !bc pyshell since interactive sessions # are automatically handled by sphinx without indentation # (just a blank line before and after) filestr = re.sub(r"^!bc +d?ipy *\n(.*?)^!ec *\n", "\n\g<1>\n", filestr, re.DOTALL | re.MULTILINE) filestr = re.sub(r"^!bc +d?pyshell *\n(.*?)^!ec *\n", "\n\g<1>\n", filestr, re.DOTALL | re.MULTILINE) # Check if we have custom pygments lexers if "ipy" in code_block_types: if not has_custom_pygments_lexer("ipy"): envir2pygments["ipy"] = "python" if "do" in code_block_types: if not has_custom_pygments_lexer("doconce"): envir2pygments["do"] = "text" # Make correct code-block:: language constructions legal_pygments_languages = get_legal_pygments_lexers() for key in set(code_block_types): if key in envir2pygments: if not envir2pygments[key] in legal_pygments_languages: errwarn( """*** warning: %s is not a legal Pygments language (lexer) found in line: %s The 'text' lexer will be used instead. """ % (envir2pygments[key], defs_line) ) envir2pygments[key] = "text" # filestr = re.sub(r'^!bc\s+%s\s*\n' % key, # '\n.. code-block:: %s\n\n' % envir2pygments[key], filestr, # flags=re.MULTILINE) # Check that we have code installed to handle pyscpro if "pyscpro" in filestr and key == "pyscpro": try: import icsecontrib.sagecellserver except ImportError: errwarn( """ *** warning: pyscpro for computer code (sage cells) is requested, but' icsecontrib.sagecellserver from https://github.com/kriskda/sphinx-sagecell is not installed. Using plain Python typesetting instead.""" ) key = "pypro" if key == "pyoptpro": if option("runestone"): filestr = re.sub( r"^!bc\s+%s\s*\n" % key, "\n.. codelens:: codelens_\n :showoutput:\n\n", filestr, flags=re.MULTILINE, ) else: filestr = re.sub(r"^!bc\s+%s\s*\n" % key, "\n.. raw:: html\n\n", filestr, flags=re.MULTILINE) elif key == "pyscpro": if option("runestone"): filestr = re.sub( r"^!bc\s+%s\s*\n" % key, """ .. activecode:: activecode_ :language: python """, filestr, flags=re.MULTILINE, ) else: filestr = re.sub(r"^!bc\s+%s\s*\n" % key, "\n.. sagecellserver::\n\n", filestr, flags=re.MULTILINE) elif key == "pysccod": if option("runestone"): # Include (i.e., run) all previous code segments... # NOTE: this is most likely not what we want include = ", ".join([i for i in range(1, activecode_counter)]) filestr = re.sub( r"^!bc\s+%s\s*\n" % key, """ .. activecode:: activecode_ :language: python "include: %s """ % include, filestr, flags=re.MULTILINE, ) else: errwarn( "*** error: pysccod for sphinx is not supported without the --runestone flag\n (but pyscpro is via Sage Cell Server)" ) _abort() elif key == "": # any !bc with/without argument becomes a text block: filestr = re.sub(r"^!bc$", "\n.. code-block:: text\n\n", filestr, flags=re.MULTILINE) elif key.endswith("hid"): if key in ("pyhid", "jshid", "htmlhid") and option("runestone"): # Allow runestone books to run hidden code blocks # (replace pyhid by pycod, then remove all !bc *hid) for i in range(len(code_block_types)): if code_block_types[i] == key: code_block_types[i] = key.replace("hid", "cod") key2language = dict(py="python", js="javascript", html="html") language = key2language[key.replace("hid", "")] include = ", ".join([i for i in range(1, activecode_counter)]) filestr = re.sub( r"^!bc +%s\s*\n" % key, """ .. activecode:: activecode_ :language: %s :include: %s :hidecode: """ % (language, include), filestr, flags=re.MULTILINE, ) else: # Remove hidden code block pattern = r"^!bc +%s\n.+?^!ec" % key filestr = re.sub(pattern, "", filestr, flags=re.MULTILINE | re.DOTALL) else: show_hide = False if key.endswith("-h"): key_orig = key key = key[:-2] show_hide = True # Use the standard sphinx code-block directive if key in envir2pygments: pygments_language = envir2pygments[key] elif key in legal_pygments_languages: pygments_language = key else: errwarn('*** error: detected code environment "%s"' % key) errwarn(" which is not registered in sphinx.py (sphinx_code)") errwarn(" or not a language registered in pygments") _abort() if show_hide: filestr = re.sub( r"^!bc +%s\s*\n" % key_orig, "\n.. container:: toggle\n\n .. container:: header\n\n **Show/Hide Code**\n\n .. code-block:: %s\n\n" % pygments_language, filestr, flags=re.MULTILINE, ) # Must add 4 indent in corresponding code_blocks[i], done above else: filestr = re.sub( r"^!bc +%s\s*\n" % key, "\n.. code-block:: %s\n\n" % pygments_language, filestr, flags=re.MULTILINE ) # any !bc with/without argument becomes a text block: filestr = re.sub(r"^!bc.*$", "\n.. code-block:: text\n\n", filestr, flags=re.MULTILINE) filestr = re.sub(r"^!ec *\n", "\n", filestr, flags=re.MULTILINE) # filestr = re.sub(r'^!ec\n', '\n', filestr, flags=re.MULTILINE) # filestr = re.sub(r'^!ec\n', '', filestr, flags=re.MULTILINE) filestr = re.sub(r"^!bt *\n", "\n.. math::\n", filestr, flags=re.MULTILINE) filestr = re.sub(r"^!et *\n", "\n", filestr, flags=re.MULTILINE) # Fix lacking blank line after :label: filestr = re.sub(r"^( :label: .+?)(\n *[^ ]+)", r"\g<1>\n\n\g<2>", filestr, flags=re.MULTILINE) # Insert counters for runestone blocks if option("runestone"): codelens_counter = 0 activecode_counter = 0 lines = filestr.splitlines() for i in range(len(lines)): if ".. codelens:: codelens_" in lines[i]: codelens_counter += 1 lines[i] = lines[i].replace("codelens_", "codelens_%d" % codelens_counter) if ".. activecode:: activecode_" in lines[i]: activecode_counter += 1 lines[i] = lines[i].replace("activecode_", "activecode_%d" % activecode_counter) filestr = "\n".join(lines) # Final fixes filestr = fix_underlines_in_headings(filestr) # Ensure blank line before and after comments filestr = re.sub(r"([.:;?!])\n^\.\. ", r"\g<1>\n\n.. ", filestr, flags=re.MULTILINE) filestr = re.sub(r"(^\.\. .+)\n([^ \n]+)", r"\g<1>\n\n\g<2>", filestr, flags=re.MULTILINE) # Line breaks interfer with tables and needs a final blank line too lines = filestr.splitlines() inside_block = False for i in range(len(lines)): if lines[i].startswith("<linebreakpipe>") and not inside_block: inside_block = True lines[i] = lines[i].replace("<linebreakpipe> ", "") + "\n" continue if lines[i].startswith("<linebreakpipe>") and inside_block: lines[i] = "|" + lines[i].replace("<linebreakpipe>", "") continue if inside_block and not lines[i].startswith("<linebreakpipe>"): inside_block = False lines[i] = "| " + lines[i] + "\n" filestr = "\n".join(lines) # Remove double !split (TOC with a prefix !split gives two !splits) pattern = "^.. !split\s+.. !split" filestr = re.sub(pattern, ".. !split", filestr, flags=re.MULTILINE) if option("html_links_in_new_window"): # Insert a comment to be recognized by automake_sphinx.py such that it # can replace the default links by proper modified target= option. # filestr = '\n\n.. NOTE: Open external links in new windows.\n\n' + filestr # Use JavaScript instead filestr = ( """.. raw:: html <script type="text/javascript"> $(document).ready(function() { $("a[href^='http']").attr('target','_blank'); }); </script> """ + filestr ) # Remove too much vertical space filestr = re.sub(r"\n{3,}", "\n\n", filestr) return filestr
def define(FILENAME_EXTENSION, BLANKLINE, INLINE_TAGS_SUBST, CODE, LIST, ARGLIST, TABLE, EXERCISE, FIGURE_EXT, CROSS_REFS, INDEX_BIB, TOC, ENVIRS, QUIZ, INTRO, OUTRO, filestr): # all arguments are dicts and accept in-place modifications (extensions) FILENAME_EXTENSION['rst'] = '.rst' BLANKLINE['rst'] = '\n' encoding = 'utf-8' # 'latin-1' INLINE_TAGS_SUBST['rst'] = { 'math': r'\g<begin>\g<subst>\g<end>', 'math2': r'\g<begin>\g<puretext>\g<end>', # math and math2 are redefined below if --rst_mathjax #'math': r'\g<begin>:math:`\g<subst>`\g<end>', # sphinx #'math2': r'\g<begin>:math:`\g<latexmath>`\g<end>', 'emphasize': None, # => just use doconce markup (*emphasized words*) 'bold': r'\g<begin>**\g<subst>**\g<end>', 'verbatim': r'\g<begin>``\g<subst>``\g<end>', 'label': r'\g<subst>', # should be improved, rst has cross ref 'reference': r'\g<subst>', #colortext cannot employ pure HTML code. Recipe: http://stackoverflow.com/questions/4669689/how-to-use-color-in-text-with-restructured-text-rst2html-py-or-how-to-insert-h (this is too comprehensive). Use bold instead. #'colortext': r'<font color="\g<color>">\g<text></font>', 'colortext': r'**\g<text>**', # Use anonymous hyperlink references to avoid warnings if the link # name appears twice #'linkURL': r'\g<begin>`\g<link> <\g<url>>`__\g<end>', #'linkURL': r'\g<begin>`\g<link>`_\g<end>' + '\n\n.. ' + r'__\g<link>: \g<url>' + '\n\n', # better (?): make function instead that stacks up the URLs and dumps them at the end; can be used for citations as well 'linkURL2': r'`\g<link> <\g<url>>`__', 'linkURL3': r'`\g<link> <\g<url>>`__', 'linkURL2v': r'`\g<link> <\g<url>>`__', # no verbatim, does not work well 'linkURL3v': r'`\g<link> <\g<url>>`__', # same 'plainURL': r'`<\g<url>>`_', 'inlinecomment': r'color{red}{(**\g<name>**: \g<comment>})', # the replacement string differs, depending on the match object m: # (note len(m.group('subst')) gives wrong length for latin-1 strings, # seems to work for utf-8, if problems: replace lambda function # with an ordinary function where you can debug and test! #'chapter': lambda m: '%s\n%s' % (m.group('subst'), '%'*len(m.group('subst').decode(encoding))), 'chapter': lambda m: '%s\n%s' % (m.group('subst'), '%'*len(m.group('subst'))), 'section': lambda m: '%s\n%s' % (m.group('subst'), '='*len(m.group('subst'))), 'subsection': lambda m: '%s\n%s' % (m.group('subst'), '-'*len(m.group('subst'))), 'subsubsection': lambda m: '%s\n%s\n' % (m.group('subst'), '~'*len(m.group('subst'))), 'paragraph': r'**\g<subst>**' + '\n', # extra newline 'abstract': rst_abstract, #'title': r'======= \g<subst> =======\n', # doconce top section, must be the highest section level (but no higher than others, need more code) 'title': None, # taken care of in ref_and_label_commoncode 'date': r':Date: \g<subst>\n', 'author': rst_author, 'figure': rst_figure, 'movie': rst_movie, #'comment': '.. %s', # rst does not like empty comment lines: # so therefore we introduce a function to remove empty comment lines # (we insert an extra blank first to be safe) 'comment': lambda c: '' if c.isspace() or c == '' else '\n.. %s\n' % c, #'linebreak': r'| \g<text>', # does not work: interfers with tables and requires a final blank line after block 'linebreak': r'<linebreakpipe> \g<text>', # fixed in rst_code/sphinx_code as a hack 'footnote': rst_footnotes, 'non-breaking-space': ' |nbsp| ', 'horizontal-rule': '---------', 'ampersand2': r' \g<1>&\g<2>', } if option('rst_mathjax'): # rst2html conversion requires four backslashes here for one of them # to survive INLINE_TAGS_SUBST['rst']['math'] = r'\g<begin>\\\\( \g<subst> \\\\)\g<end>' INLINE_TAGS_SUBST['rst']['math2'] = r'\g<begin>\\\\( \g<latexmath> \\\\)\g<end>' ENVIRS['rst'] = { 'quote': rst_quote, 'warning': rst_warning, 'question': rst_question, 'notice': rst_notice, 'summary': rst_summary, 'block': rst_block, 'box': rst_box, } CODE['rst'] = rst_code # function for typesetting code LIST['rst'] = { 'itemize': {'begin': '', 'item': '*', 'end': '\n'}, # lists must end with a blank line - we insert one extra, 'enumerate': {'begin': '', 'item': '%d.', 'end': '\n'}, 'description': {'begin': '', 'item': '%s', 'end': '\n'}, 'separator': '\n', } from common import DEFAULT_ARGLIST ARGLIST['rst'] = DEFAULT_ARGLIST FIGURE_EXT['rst'] = { 'search': ('.png', '.gif', '.jpg', '.jpeg', '.pdf', '.eps', '.ps'), 'convert': ('.png', '.gif', '.jpg')} CROSS_REFS['rst'] = rst_ref_and_label INDEX_BIB['rst'] = rst_index_bib TABLE['rst'] = rst_table EXERCISE['rst'] = plain_exercise TOC['rst'] = lambda s, f: '.. contents:: %s\n :depth: 2' % locale_dict[locale_dict['language']].get('toc', 'Table of contents') QUIZ['rst'] = rst_quiz INTRO['rst'] = """\ .. Automatically generated reStructuredText file from DocOnce source (https://github.com/hplgit/doconce/) """ # http://stackoverflow.com/questions/11830242/non-breaking-space from common import INLINE_TAGS if re.search(INLINE_TAGS['non-breaking-space'], filestr): nbsp = """ .. |nbsp| unicode:: 0xA0 :trim: """ if 'TITLE:' not in filestr: import common if common.format in ('rst', 'sphinx'): errwarn('*** error: non-breaking space character ~ is used,') errwarn(' but this will give an error when the document does') errwarn(' not have a title.') _abort() else: INTRO['rst'] += nbsp
def fix_text(s, tp="answer"): """ Answers and feedback in RunestoneInteractive book quizzes cannot contain math, figure and rst markup. Perform fixes. """ drop = False if "math::" in s: errwarn("\n*** warning: quiz %s with math block not supported:" % tp) errwarn(s) drop = True if ".. code-block::" in s: errwarn("\n*** warning: quiz %s with code block not supported:" % tp) errwarn(s) drop = True if ".. figure::" in s: errwarn("\n*** warning: quiz %s with figure not supported:" % tp) errwarn(s) drop = True if drop: return "" # Make multi-line paragraph a one-liner s = " ".join(s.splitlines()).rstrip() # Fixes pattern = r"`(.+?) (<https?.+?)>`__" # URL s = re.sub(pattern, '<a href="\g<2>"> \g<1> </a>', s) pattern = r"``(.+?)``" # verbatim s = re.sub(pattern, "<tt>\g<1></tt>", s) pattern = r":math:`(.+?)`" # inline math s = re.sub(pattern, "<em>\g<1></em>", s) # mimic italic.... pattern = r":\*(.+?)\*" # emphasize s = re.sub(pattern, "\g<1>", s, flags=re.DOTALL) return s
def pandoc_code(filestr, code_blocks, code_block_types, tex_blocks, format): """ # We expand all newcommands now from html import embed_newcommands newcommands = embed_newcommands(filestr) if newcommands: filestr = newcommands + filestr """ # Note: the tex code require the MathJax fix of doconce md2html # to insert right MathJax extensions to interpret align and labels # correctly. # (Also, doconce.py runs align2equations so there are no align/align* # environments in tex blocks.) for i in range(len(tex_blocks)): # Remove latex envir in single equations tex_blocks[i] = tex_blocks[i].replace(r"\[", "") tex_blocks[i] = tex_blocks[i].replace(r"\]", "") tex_blocks[i] = tex_blocks[i].replace(r"\begin{equation*}", "") tex_blocks[i] = tex_blocks[i].replace(r"\end{equation*}", "") # tex_blocks[i] = tex_blocks[i].replace(r'\[', '$$') # tex_blocks[i] = tex_blocks[i].replace(r'\]', '$$') # Check for illegal environments m = re.search(r"\\begin\{(.+?)\}", tex_blocks[i]) if m: envir = m.group(1) if envir not in ("equation", "equation*", "align*", "align", "array"): errwarn( """\ *** warning: latex envir \\begin{%s} does not work well. """ % envir ) # Add $$ on each side of the equation tex_blocks[i] = "$$\n" + tex_blocks[i] + "$$\n" # Note: HTML output from pandoc requires $$ while latex cannot have # them if begin-end inside ($$\begin{...} \end{...}$$) if option("strict_markdown_output"): # Code blocks are just indented for i in range(len(code_blocks)): code_blocks[i] = indent_lines(code_blocks[i], format) filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, format) github_md = option("github_md") if not option("strict_markdown_output"): pass if github_md: for key in language2pandoc: language2pandoc[key] = language2pandoc[key].lower() # Code blocks apply the ~~~~~ delimiter, with blank lines before # and after for key in language2pandoc: language = language2pandoc[key] if github_md: replacement = "\n```%s\n" % language2pandoc[key] else: # pandoc-extended Markdown replacement = "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.%s}\n" % language2pandoc[key] # replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.%s ,numberLines}\n' % language2pandoc[key] # enable line numbering filestr = re.sub(r"^!bc\s+%s\s*\n" % key, replacement, filestr, flags=re.MULTILINE) # any !bc with/without argument becomes an unspecified block if github_md: replacement = "\n```" else: replacement = "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" filestr = re.sub(r"^!bc.*$", replacement, filestr, flags=re.MULTILINE) if github_md: replacement = "```\n" else: replacement = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" filestr = re.sub(r"^!ec\s*$", replacement, filestr, flags=re.MULTILINE) else: # Strict Markdown: just indented blocks filestr = re.sub(r"^!bc.*$", "", filestr, flags=re.MULTILINE) filestr = re.sub(r"^!ec\s*$", "", filestr, flags=re.MULTILINE) filestr = re.sub(r"^!bt *\n", "", filestr, flags=re.MULTILINE) filestr = re.sub(r"^!et *\n", "", filestr, flags=re.MULTILINE) # \eqref and labels will not work, but labels do no harm filestr = filestr.replace(" label{", " \\label{") pattern = r"^label\{" filestr = re.sub(pattern, "\\label{", filestr, flags=re.MULTILINE) filestr = re.sub(r"\(ref\{(.+?)\}\)", r"\eqref{\g<1>}", filestr) # Final fixes # Seems that title and author must appear on the very first lines filestr = filestr.lstrip() # Enable tasks lists: # - [x] task 1 done # - [ ] task 2 not yet done if github_md: pattern = "^(\s+)\*\s+(\[[x ]\])\s+" filestr = re.sub(pattern, "\g<1>- \g<2> ", filestr, flags=re.MULTILINE) return filestr
def sphinx_figure(m): result = "" # m is a MatchObject filename = m.group("filename") caption = m.group("caption").strip() # Stubstitute DocOnce label by rst label in caption # (also, remove final period in caption since caption is used as hyperlink # text to figures). m_label = re.search(r"label\{(.+?)\}", caption) if m_label: label = m_label.group(1) result += "\n.. _%s:\n" % label # remove . at the end of the caption text parts = caption.split("label") parts[0] = parts[0].rstrip() if parts[0] and parts[0][-1] == ".": parts[0] = parts[0][:-1] parts[0] = parts[0].strip() # insert emphasize marks if not latex $ at the # beginning or end (math subst does not work for *$I=1$*) # or if not boldface or emphasize already in the caption caption_font = option("sphinx_figure_captions=", "emphasize") if ( parts[0] and caption_font == "emphasize" and not parts[0].startswith("$") and not parts[0].endswith("$") and not "*" in parts[0] and not "_" in parts[0] ): parts[0] = "*" + parts[0] + "*" # caption = ' label'.join(parts) caption = parts[0] # contrary to rst_figure, we do not write label into caption # since we just want to remove the whole label as part of # the caption (otherwise done when handling ref and label) else: if caption and caption[-1] == ".": caption = caption[:-1] # math is ignored in references to figures, test for math only if caption.startswith("$") and caption.endswith("$"): errwarn( "*** warning: math only in sphinx figure caption (it will be ignored by sphinx, resulting in empty caption)\n %s\n FIGURE: [%s" % (caption, filename) ) # stem = os.path.splitext(filename)[0] # result += '\n.. figure:: ' + stem + '.*\n' # utilize flexibility # does not work yet result += "\n.. figure:: " + filename + "\n" opts = m.group("options") if opts: # opts: width=600 frac=0.5 align=center # opts: width=600, frac=0.5, align=center info = [s.split("=") for s in opts.split()] fig_info = [ " :%s: %s" % (opt, value.replace(",", "")) for opt, value in info if opt not in ["frac", "sidecap"] ] result += "\n".join(fig_info) if caption: result += "\n\n " + caption + "\n" else: result += "\n\n" # errwarn('sphinx figure: caption=\n', caption, '\nresult:\n', result) return result
def matlabnb_code(filestr, code_blocks, code_block_types, tex_blocks, format): # Remove all begin-end and \[ \] in tex blocks, join to one line, # embed in $$. Write error message if anything else than a single equation. pattern = 'begin\{(.+?)\}' for i in range(len(tex_blocks)): m = re.search(pattern, tex_blocks[i]) if m: envir = m.group(1) if envir not in ('equation', 'equation*'): errwarn('*** warning: \\begin{%s}-\\end{%s} does not work in Matlab notebooks' % (envir, envir)) tex_blocks[i] = re.sub(r'\\begin{%s}\s+' % envir, '', tex_blocks[i]) tex_blocks[i] = re.sub(r'\\end{%s}\s+' % envir, '', tex_blocks[i]) tex_blocks[i] = re.sub(r'\\\[', '', tex_blocks[i]) tex_blocks[i] = re.sub(r'\\\]', '', tex_blocks[i]) tex_blocks[i] = re.sub(r'label\{(.+?)\}', '', tex_blocks[i]) tex_blocks[i] = '$$' + ' '.join(tex_blocks[i].strip().splitlines()).strip() + '$$' # Note: now the tex block ends with $$!et # Insert % in code if envir with -t name or if not Matlab code for i in range(len(code_blocks)): executable_matlab = code_block_types[i] in ('mcod', 'mpro') if not executable_matlab: # Note that monospace font requires two blanks after % code_blocks[i] = '\n'.join([ '% ' + line for line in code_blocks[i].splitlines() if not (line.startswith('!bc') or line.startswith('!ec'))]) + '\n' # Insert % at the beginning of each line from common import _CODE_BLOCK, _MATH_BLOCK code_line = r'^\d+ ' + _CODE_BLOCK code_line_problem = r' (\d+ ' + _CODE_BLOCK + ')' math_line = r'^\d+ ' + _MATH_BLOCK math_line_problem = r' (\d+ ' + _MATH_BLOCK + ')' heading_no = 0 lines = filestr.splitlines() for i in range(len(lines)): if re.search(code_line, lines[i], flags=re.MULTILINE): if heading_no < 2: # Add %% (empty heading) before code block because # code cannot come after the first heading, only # after the second and onwards lines[i] = '%%\n' + lines[i] continue elif re.search(math_line, lines[i], flags=re.MULTILINE): continue elif re.search(code_line_problem, lines[i], flags=re.MULTILINE): # Paragraphs can move a block indicator after its heading, insert \n lines[i] = re.sub(code_line_problem, '\n\g<1>', lines[i]) elif re.search(math_line_problem, lines[i], flags=re.MULTILINE): # Paragraphs can move a block indicator after its heading, insert \n lines[i] = re.sub(math_line_problem, '\n\g<1>', lines[i]) elif lines[i].startswith('>>>H'): # Heading lines[i] = '%%' + lines[i].replace('>>>H', '') heading_no += 1 else: lines[i] = '% ' + lines[i] filestr = '\n'.join(lines) filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, 'matlabnb') filestr = re.sub(r'\$\$!et', '$$', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!bt\s+\$\$', '% $$', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!bc.+', '', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!ec', '', filestr, flags=re.MULTILINE) # Remove all blank lines filestr = re.sub(r'^\s+', '', filestr, flags=re.MULTILINE) # Fix emphasize markup (conflicts with boldface so we do a hack) filestr = re.sub(r'\^\^\^X(.+?)X\^\^\^', '_\g<1>_', filestr, flags=re.DOTALL) # emph filestr = re.sub(r'\{\{\{X(.+?)X\}\}\}', '*\g<1>*', filestr, flags=re.DOTALL) # bold filestr = re.sub(r'<<<X(.+?)X>>>', '|\g<1>|', filestr, flags=re.DOTALL) # verb return filestr
def mwiki_figure(m): filename = m.group('filename') link = filename if filename.startswith('http') else None if not link and not os.path.isfile(filename): raise IOError('no figure file %s' % filename) basename = os.path.basename(filename) stem, ext = os.path.splitext(basename) root, ext = os.path.splitext(filename) if link is None: if not ext in '.png .gif .jpg .jpeg'.split(): # try to convert image file to PNG, using # convert from ImageMagick: cmd = 'convert %s png:%s' % (filename, root + '.png') try: output = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: errwarn('\n**** warning: could not run ' + cmd) errwarn(' convert %s to PNG format manually' % filename) _abort() filename = root + '.png' caption = m.group('caption').strip() if caption != '': caption = '|' + caption # add | for non-empty caption else: # Avoid filename as caption when caption is empty # see http://www.mediawiki.org/wiki/Help:Images caption = '|<span title=""></span>' # keep label if it's there: caption = re.sub(r'label\{(.+?)\}', '(\g<1>)', caption) size = '' opts = m.group('options').strip() if opts: info = dict([s.split('=') for s in opts.split()]) if 'width' in info and 'height' in info: size = '|%sx%spx' % (info['width'], info['height']) elif 'width' in info: size = '|%spx' % info['width'] elif 'height' in info: size = '|x%spx' % info['height'] if link: # We link to some image on the web filename = os.path.basename(filename) link = os.path.dirname(link) result = r""" [[File:%s|frame%s|link=%s|alt=%s%s]] """ % (filename, size, link, filename, caption) else: # We try to link to a file at wikimedia.org. found_wikimedia = False orig_filename = filename # Check if the file exists and find the appropriate wikimedia name. # http://en.wikipedia.org/w/api.php?action=query&titles=Image:filename&prop=imageinfo&format=xml # Skip directories - get the basename filename = os.path.basename(filename) import urllib prms = urllib.urlencode({ 'action': 'query', 'titles': 'Image:' + filename, 'prop': 'imageinfo', 'format': 'xml' }) url = 'http://en.wikipedia.org/w/api.php?' + prms try: errwarn( ' ...checking if %s is stored at en.wikipedia.org/w/api.php...' % filename) f = urllib.urlopen(url) imageinfo = f.read() f.close() def get_data(name, text): pattern = '%s="(.*?)"' % name m = re.search(pattern, text) if m: match = m.group(1) if 'Image:' in match: return match.split('Image:')[1] if 'File:' in match: return match.split('File:')[1] else: return match else: return None data = [ 'from', 'to', 'title', 'missing', 'imagerepository', 'timestamp', 'user' ] orig_filename = filename filename = get_data('title', imageinfo) user = get_data('user', imageinfo) timestamp = get_data('timestamp', imageinfo) if user: found_wikimedia = True errwarn(' ...found %s at wikimedia' % filename) result = r""" [[File:%s|frame%s|alt=%s%s]] <!-- user: %s, filename: %s, timestamp: %s --> """ % (filename, size, filename, caption, user, orig_filename, timestamp) except IOError: errwarn(' ...no Internet connection...') if not found_wikimedia: errwarn( ' ...for wikipedia/wikibooks you must upload image file %s to\n common.wikimedia.org' % orig_filename) # see http://commons.wikimedia.org/wiki/Commons:Upload # and http://commons.wikimedia.org/wiki/Special:UploadWizard errwarn(' ...for now we use local file %s' % filename) # This is fine if we use github wiki result = r""" [[File:%s|frame%s|alt=%s%s]] <!-- not yet uploaded to common.wikimedia.org --> """ % (filename, size, filename, caption) return result
def mwiki_code(filestr, code_blocks, code_block_types, tex_blocks, format): # http://en.wikipedia.org/wiki/Help:Displaying_a_formula # MediaWiki math does not support labels in equations. # The enviros equation and \[ \] must be removed (not supported). for i in range(len(tex_blocks)): # Standard align works in Wikipedia and Wikibooks. # Standard align gives somewhat ugly output on wiiki.com services, # but a set of separate equations is not much better. # We therefore stick to align instead. #tex_blocks[i] = align2equations(tex_blocks[i]) tex_blocks[i] = equation2nothing(tex_blocks[i]) tex_blocks[i], labels = remove_labels(tex_blocks[i]) for label in labels: if label in filestr: errwarn( '*** warning: reference to label "%s" in an equation does not work in MediaWiki' % label) filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, format) # Supported programming languages: # http://www.mediawiki.org/wiki/Extension:SyntaxHighlight_GeSHi#Supported_languages envir2lang = dict( cod='python', pycod='python', cycod='python', fcod='fortran', ccod='c', cppcod='cpp', mcod='matlab', plcod='perl', shcod='bash', pro='python', pypro='python', cypro='python', fpro='fortran', cpro='c', cpppro='cpp', mpro='matlab', plpro='perl', shpro='bash', rbpro='ruby', rbcod='ruby', javacod='java', javapro='java', htmlcod='html5', xmlcod='xml', htmlpro='html5', xmlpro='xml', html='html5', xml='xml', sys='bash', dat='text', csv='text', txt='text', pyoptpro='python', pyscpro='python', ipy='python', pyshell='python', ) for key in envir2lang: language = envir2lang[key] cpattern = re.compile(r'^!bc\s+%s\s*\n' % key, flags=re.MULTILINE) filestr = cpattern.sub('<syntaxhighlight lang="%s">\n' % \ envir2lang[key], filestr) c = re.compile(r'^!bc.*$\n', re.MULTILINE) filestr = c.sub('<syntaxhighlight lang="text">\n', filestr) filestr = re.sub(r'!ec\n', '</syntaxhighlight>\n', filestr) c = re.compile(r'^!bt\n', re.MULTILINE) filestr = c.sub(':<math>\n', filestr) filestr = re.sub(r'!et\n', '</math>\n', filestr) # Final fix of MediaWiki file # __TOC__ syntax is misinterpretated as paragraph heading, so we # use <<<TOC>>> instead and replace to right syntax here at the end. filestr = filestr.replace('<<<TOC>>>', '__TOC__') return filestr
def ipynb_figure(m): # m.group() must be called before m.group('name') text = '<!-- dom:%s -->\n<!-- begin figure -->\n' % m.group() filename = m.group('filename') caption = m.group('caption').strip() opts = m.group('options').strip() if opts: info = [s.split('=') for s in opts.split()] opts = ' ' .join(['%s=%s' % (opt, value) for opt, value in info if opt not in ['frac', 'sidecap']]) global figure_files if not filename.startswith('http'): figure_files.append(filename) # Extract optional label in caption label = None pattern = r' *label\{(.+?)\}' m = re.search(pattern, caption) if m: label = m.group(1).strip() caption = re.sub(pattern, '', caption) display_method = option('ipynb_figure=', 'imgtag') if display_method == 'md': # Markdown image syntax for embedded image in text # (no control of size, then one must use HTML syntax) if label is not None: #text += '<a name="%s"></a>\n' % label text += '<div id="%s"></div>\n' % label text += '![%s](%s)' % (caption, filename) elif display_method == 'imgtag': # Plain <img tag, allows specifying the image size if label is not None: #text += '<a name="%s"></a>' % label text += '<div id="%s"></div>\n' % label # Fix caption markup so it becomes html from doconce import INLINE_TAGS_SUBST, INLINE_TAGS for tag in 'bold', 'emphasize', 'verbatim': caption = re.sub(INLINE_TAGS[tag], INLINE_TAGS_SUBST['html'][tag], caption, flags=re.MULTILINE) text += """ <p>%s</p> <img src="%s" %s> """ % (caption, filename, opts) elif display_method == 'Image': # Image object # NOTE: This code will normally not work because it inserts a verbatim # block in the file *after* all such blocks have been removed and # numbered. doconce.py makes a test prior to removal of blocks and # runs the handle_figures and movie substitution if ipynb format # and Image or movie object display. text += '\n' if label is not None: text += '<div id="%s"></div>' % label text += '<!-- options: %s -->\n' % opts text = '!bc pycod\n' global figure_encountered if not figure_encountered: # First time we have a figure, we must import Image text += 'from IPython.display import Image\n' figure_encountered = True if caption: text += '# ' + caption if filename.startswith('http'): keyword = 'url' else: keyword = 'filename' text += 'Image(%s="%s")\n' % (keyword, filename) text += '!ec\n' else: errwarn('*** error: --ipynb_figure=%s is illegal, must be md, imgtag or Image' % display_method) _abort() text += '<!-- end figure -->\n' return text
def mwiki_figure(m): filename = m.group('filename') link = filename if filename.startswith('http') else None if not link and not os.path.isfile(filename): raise IOError('no figure file %s' % filename) basename = os.path.basename(filename) stem, ext = os.path.splitext(basename) root, ext = os.path.splitext(filename) if link is None: if not ext in '.png .gif .jpg .jpeg'.split(): # try to convert image file to PNG, using # convert from ImageMagick: cmd = 'convert %s png:%s' % (filename, root+'.png') try: output = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: errwarn('\n**** warning: could not run ' + cmd) errwarn(' convert %s to PNG format manually' % filename) _abort() filename = root + '.png' caption = m.group('caption').strip() if caption != '': caption = '|' + caption # add | for non-empty caption else: # Avoid filename as caption when caption is empty # see http://www.mediawiki.org/wiki/Help:Images caption = '|<span title=""></span>' # keep label if it's there: caption = re.sub(r'label\{(.+?)\}', '(\g<1>)', caption) size = '' opts = m.group('options').strip() if opts: info = dict([s.split('=') for s in opts.split()]) if 'width' in info and 'height' in info: size = '|%sx%spx' % (info['width'], info['height']) elif 'width' in info: size = '|%spx' % info['width'] elif 'height' in info: size = '|x%spx' % info['height'] if link: # We link to some image on the web filename = os.path.basename(filename) link = os.path.dirname(link) result = r""" [[File:%s|frame%s|link=%s|alt=%s%s]] """ % (filename, size, link, filename, caption) else: # We try to link to a file at wikimedia.org. found_wikimedia = False orig_filename = filename # Check if the file exists and find the appropriate wikimedia name. # http://en.wikipedia.org/w/api.php?action=query&titles=Image:filename&prop=imageinfo&format=xml # Skip directories - get the basename filename = os.path.basename(filename) import urllib prms = urllib.urlencode({ 'action': 'query', 'titles': 'Image:' + filename, 'prop': 'imageinfo', 'format': 'xml'}) url = 'http://en.wikipedia.org/w/api.php?' + prms try: errwarn(' ...checking if %s is stored at en.wikipedia.org/w/api.php...' % filename) f = urllib.urlopen(url) imageinfo = f.read() f.close() def get_data(name, text): pattern = '%s="(.*?)"' % name m = re.search(pattern, text) if m: match = m.group(1) if 'Image:' in match: return match.split('Image:')[1] if 'File:' in match: return match.split('File:')[1] else: return match else: return None data = ['from', 'to', 'title', 'missing', 'imagerepository', 'timestamp', 'user'] orig_filename = filename filename = get_data('title', imageinfo) user = get_data('user', imageinfo) timestamp = get_data('timestamp', imageinfo) if user: found_wikimedia = True errwarn(' ...found %s at wikimedia' % filename) result = r""" [[File:%s|frame%s|alt=%s%s]] <!-- user: %s, filename: %s, timestamp: %s --> """ % (filename, size, filename, caption, user, orig_filename, timestamp) except IOError: errwarn(' ...no Internet connection...') if not found_wikimedia: errwarn(' ...for wikipedia/wikibooks you must upload image file %s to\n common.wikimedia.org' % orig_filename) # see http://commons.wikimedia.org/wiki/Commons:Upload # and http://commons.wikimedia.org/wiki/Special:UploadWizard errwarn(' ...for now we use local file %s' % filename) # This is fine if we use github wiki result = r""" [[File:%s|frame%s|alt=%s%s]] <!-- not yet uploaded to common.wikimedia.org --> """ % (filename, size, filename, caption) return result
def pandoc_code(filestr, code_blocks, code_block_types, tex_blocks, format): """ # We expand all newcommands now from html import embed_newcommands newcommands = embed_newcommands(filestr) if newcommands: filestr = newcommands + filestr """ # Note: the tex code require the MathJax fix of doconce md2html # to insert right MathJax extensions to interpret align and labels # correctly. # (Also, doconce.py runs align2equations so there are no align/align* # environments in tex blocks.) for i in range(len(tex_blocks)): # Remove latex envir in single equations tex_blocks[i] = tex_blocks[i].replace(r'\[', '') tex_blocks[i] = tex_blocks[i].replace(r'\]', '') tex_blocks[i] = tex_blocks[i].replace(r'\begin{equation*}', '') tex_blocks[i] = tex_blocks[i].replace(r'\end{equation*}', '') #tex_blocks[i] = tex_blocks[i].replace(r'\[', '$$') #tex_blocks[i] = tex_blocks[i].replace(r'\]', '$$') # Check for illegal environments m = re.search(r'\\begin\{(.+?)\}', tex_blocks[i]) if m: envir = m.group(1) if envir not in ('equation', 'equation*', 'align*', 'align', 'array'): errwarn("""\ *** warning: latex envir \\begin{%s} does not work well. """ % envir) # Add $$ on each side of the equation tex_blocks[i] = '$$\n' + tex_blocks[i] + '$$\n' # Note: HTML output from pandoc requires $$ while latex cannot have # them if begin-end inside ($$\begin{...} \end{...}$$) if option('strict_markdown_output'): # Code blocks are just indented for i in range(len(code_blocks)): code_blocks[i] = indent_lines(code_blocks[i], format) filestr = insert_code_and_tex(filestr, code_blocks, tex_blocks, format) github_md = option('github_md') if not option('strict_markdown_output'): pass if github_md: for key in language2pandoc: language2pandoc[key] = language2pandoc[key].lower() # Code blocks apply the ~~~~~ delimiter, with blank lines before # and after for key in language2pandoc: language = language2pandoc[key] if github_md: replacement = '\n```%s\n' % language2pandoc[key] else: # pandoc-extended Markdown replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.%s}\n' % language2pandoc[ key] #replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.%s ,numberLines}\n' % language2pandoc[key] # enable line numbering filestr = re.sub(r'^!bc\s+%s\s*\n' % key, replacement, filestr, flags=re.MULTILINE) # any !bc with/without argument becomes an unspecified block if github_md: replacement = '\n```' else: replacement = '\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' filestr = re.sub(r'^!bc.*$', replacement, filestr, flags=re.MULTILINE) if github_md: replacement = '```\n' else: replacement = '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n' filestr = re.sub(r'^!ec\s*$', replacement, filestr, flags=re.MULTILINE) else: # Strict Markdown: just indented blocks filestr = re.sub(r'^!bc.*$', '', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!ec\s*$', '', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!bt *\n', '', filestr, flags=re.MULTILINE) filestr = re.sub(r'^!et *\n', '', filestr, flags=re.MULTILINE) # \eqref and labels will not work, but labels do no harm filestr = filestr.replace(' label{', ' \\label{') pattern = r'^label\{' filestr = re.sub(pattern, '\\label{', filestr, flags=re.MULTILINE) filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\eqref{\g<1>}', filestr) # Final fixes # Seems that title and author must appear on the very first lines filestr = filestr.lstrip() # Enable tasks lists: # - [x] task 1 done # - [ ] task 2 not yet done if github_md: pattern = '^(\s+)\*\s+(\[[x ]\])\s+' filestr = re.sub(pattern, '\g<1>- \g<2> ', filestr, flags=re.MULTILINE) return filestr
def sphinx_quiz_runestone(quiz): quiz_feedback = option("quiz_explanations=", "on") text = "" if "new page" in quiz: text += ".. !split\n%s\n%s" % (quiz["new page"], "-" * len(quiz["new page"])) text += ".. begin quiz\n\n" global question_counter question_counter += 1 # Multiple correct answers? if sum([1 for choice in quiz["choices"] if choice[0] == "right"]) > 1: text += ".. mchoicema:: question_%d" % question_counter + "\n" else: text += ".. mchoicemf:: question_%d" % question_counter + "\n" def fix_text(s, tp="answer"): """ Answers and feedback in RunestoneInteractive book quizzes cannot contain math, figure and rst markup. Perform fixes. """ drop = False if "math::" in s: errwarn("\n*** warning: quiz %s with math block not supported:" % tp) errwarn(s) drop = True if ".. code-block::" in s: errwarn("\n*** warning: quiz %s with code block not supported:" % tp) errwarn(s) drop = True if ".. figure::" in s: errwarn("\n*** warning: quiz %s with figure not supported:" % tp) errwarn(s) drop = True if drop: return "" # Make multi-line paragraph a one-liner s = " ".join(s.splitlines()).rstrip() # Fixes pattern = r"`(.+?) (<https?.+?)>`__" # URL s = re.sub(pattern, '<a href="\g<2>"> \g<1> </a>', s) pattern = r"``(.+?)``" # verbatim s = re.sub(pattern, "<tt>\g<1></tt>", s) pattern = r":math:`(.+?)`" # inline math s = re.sub(pattern, "<em>\g<1></em>", s) # mimic italic.... pattern = r":\*(.+?)\*" # emphasize s = re.sub(pattern, "\g<1>", s, flags=re.DOTALL) return s import string correct = [] for i, choice in enumerate(quiz["choices"]): if i > 4: # not supported errwarn("*** warning: quiz with %d choices gets truncated (first 5)" % len(quiz["choices"])) break letter = string.ascii_lowercase[i] text += " :answer_%s: " % letter answer = fix_text(choice[1], tp="answer") if not answer: answer = "Too advanced typesetting prevents the text from being rendered" text += answer + "\n" if choice[0] == "right": correct.append(letter) if correct: text += " :correct: " + ", ".join(correct) + "\n" else: errwarn("*** error: correct choice in quiz has index > 5 (max 5 allowed for RunestoneInteractive books)") errwarn(quiz["question"]) _abort() for i, choice in enumerate(quiz["choices"]): if i > 4: # not supported break letter = string.ascii_lowercase[i] text += " :feedback_%s: " % letter # must be present if len(choice) == 3 and quiz_feedback == "on": feedback = fix_text(choice[2], tp="explanation") if not feedback: feedback = "(Too advanced typesetting prevents the text from being rendered)" text += feedback text += "\n" text += "\n" + indent_lines(quiz["question"], "sphinx", " " * 3) + "\n\n\n" return text