def md_factory(allow_links=True, allow_images=True, allow_blocks=True): """ Create and configure markdown object """ md = markdown.Markdown(safe_mode='escape', extensions=['nl2br']) # Remove references del md.preprocessors['reference'] del md.inlinePatterns['reference'] del md.inlinePatterns['image_reference'] del md.inlinePatterns['short_reference'] # Add [b], [i], [u] md.inlinePatterns.add('bb_b', inline.bold, '<strong') md.inlinePatterns.add('bb_i', inline.italics, '<emphasis') md.inlinePatterns.add('bb_u', inline.underline, '<emphasis2') # Add ~~deleted~~ striketrough_md = StriketroughExtension() striketrough_md.extendMarkdown(md) if not allow_links: # Remove links del md.inlinePatterns['link'] del md.inlinePatterns['autolink'] del md.inlinePatterns['automail'] if allow_images: # Add [img] short_images_md = ShortImagesExtension() short_images_md.extendMarkdown(md) else: # Remove images del md.inlinePatterns['image_link'] if allow_blocks: # Add [hr] and [quote] blocks md.parser.blockprocessors.add('bb_hr', blocks.BBCodeHRProcessor(md.parser), '>hr') fenced_code = FencedCodeExtension() fenced_code.extendMarkdown(md, None) code_bbcode = blocks.CodeBlockExtension() code_bbcode.extendMarkdown(md) quote_bbcode = blocks.QuoteExtension() quote_bbcode.extendMarkdown(md) else: # Remove blocks del md.parser.blockprocessors['hashheader'] del md.parser.blockprocessors['setextheader'] del md.parser.blockprocessors['code'] del md.parser.blockprocessors['quote'] del md.parser.blockprocessors['hr'] del md.parser.blockprocessors['olist'] del md.parser.blockprocessors['ulist'] return pipeline.extend_markdown(md)
def _get_article_content(self, afile, istxt=False): txt = None if istxt: txt = afile else: if not os.path.exists(afile): slog.error('The file "%s" is inexistance!' % afile) return None, None, None, None txt = read_file(afile) FencedBlockPreprocessor.FENCED_BLOCK_RE = re.compile( r''' (?P<fence>^(?:~{3,}|`{3,}))[ ]* # Opening ``` or ~~~ # Optional {, lang="lang" or lang (\{?\.?(?:lang=")?(?P<lang>[a-zA-Z0-9_+-]*)"?)?[ ]* # Optional highlight lines, single- or double-quote-delimited (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?[ ]* }?[ ]*\n # Optional closing } (?P<code>.*?)(?<=\n) (?P=fence)[ ]*$''', re.MULTILINE | re.DOTALL | re.VERBOSE) fencedcode = FencedCodeExtension() codehilite = CodeHiliteExtension(linenums=False, guess_lang=False) md = markdown.Markdown(extensions=[ 'markdown.extensions.meta', 'markdown.extensions.tables', fencedcode, codehilite, ]) html = md.convert(txt) meta = md.Meta adict = self._get_article_metadata(meta) return html, adict, txt, self._get_medias(txt)
def convert_markdown_to_html(clean_md): """ Take a string `clean_md` and return a string where the Markdown syntax is converted to HTML. """ assert isinstance(clean_md, unicode), "Input `clean_md` is not Unicode" new_html = markdown.markdown(clean_md, output_format="xhtml1", extensions=[ SmartEmphasisExtension(), FencedCodeExtension(), FootnoteExtension(), AttrListExtension(), DefListExtension(), TableExtension(), AbbrExtension(), Nl2BrExtension(), CodeHiliteExtension( noclasses=True, pygments_style=preferences.PREFS.get( const.MARKDOWN_SYNTAX_STYLE), linenums=preferences.PREFS.get( const.MARKDOWN_LINE_NUMS)), SaneListExtension() ], lazy_ol=False) assert isinstance(new_html, unicode) return new_html
def __init__(self, settings: MarkdownSettings): self.settings = settings self.extensions = [] if self.settings.enable_checklist: self.extensions.append(ChecklistExtension()) if self.settings.enable_codehilite: self.extensions.append( CodeHiliteExtension(**self.settings.codehilite_options) ) if self.settings.enable_fenced_code: self.extensions.append(FencedCodeExtension()) if self.settings.enable_footnotes: self.extensions.append(FootnoteExtension(**self.settings.footnotes_options)) if self.settings.enable_smartypants: self.extensions.append(SmartyExtension(**self.settings.smartypants_options)) if self.settings.enable_toc: self.extensions.append(TocExtension(**self.settings.toc_options)) if self.settings.enable_truly_sane_lists: self.extensions.append( TrulySaneListExtension(**self.settings.truly_sane_lists_options) ) super().__init__( output_format=self.settings.output_format, tab_length=self.settings.tab_length, extensions=self.extensions, )
def extendMarkdown(self, md, md_globals): # Built-in extensions FencedCodeExtension().extendMarkdown(md, md_globals) SmartEmphasisExtension().extendMarkdown(md, md_globals) TableExtension().extendMarkdown(md, md_globals) #gfm.AutolinkExtension().extendMarkdown(md, md_globals) #gfm.AutomailExtension().extendMarkdown(md, md_globals) #gfm.HiddenHiliteExtension([ # ('guess_lang', 'False'), # ('css_class', 'highlight') #]).extendMarkdown(md, md_globals) #gfm.SemiSaneListExtension().extendMarkdown(md, md_globals) #gfm.SpacedLinkExtension().extendMarkdown(md, md_globals) #gfm.StrikethroughExtension().extendMarkdown(md, md_globals) # Custom extensions AutolinkExtension().extendMarkdown(md, md_globals) AutomailExtension().extendMarkdown(md, md_globals) HiddenHiliteExtension([ ('guess_lang', 'False'), ('css_class', 'highlight') ]).extendMarkdown(md, md_globals) SemiSaneListExtension().extendMarkdown(md, md_globals) SpacedLinkExtension().extendMarkdown(md, md_globals) StrikethroughExtension().extendMarkdown(md, md_globals)
def format_text( text, useMarkdown, markdownStyle, markdownLineNums, markdownTabLength, ): if useMarkdown: noclasses = markdownStyle != 'default' html_ish = markdown.markdown( text, output_format="xhtml1", extensions=[ SmartEmphasisExtension(), FencedCodeExtension(), FootnoteExtension(), AttrListExtension(), DefListExtension(), TableExtension(), AbbrExtension(), Nl2BrExtension(), CodeHiliteExtension(noclasses=noclasses, pygments_style=markdownStyle, linenums=markdownLineNums), SaneListExtension(), SmartyExtension() ], lazy_ol=False, tab_length=markdownTabLength, ) else: # Preserve whitespace. html_ish = text.replace('\n', '<br>').replace(' ', ' ') return html_ish
def render_markdown(self, text): return markdown.markdown(text, extensions=[ CodeHiliteExtension(linenums=False, noclasses=True), FencedCodeExtension() ])
def parse_markdown(text, noclasses, style, line_nums, tab_len, mathext): extensions = [] if mathext: extensions.append(MathExtension()) extensions.extend([ FencedCodeExtension(), FootnoteExtension(), AttrListExtension(), DefListExtension(), TableExtension(), AbbrExtension(), Nl2BrExtension(), CodeHiliteExtension( noclasses=noclasses, pygments_style=style, linenums=line_nums, ), SaneListExtension(), SmartyExtension() ]) return markdown.markdown( text, output_format="xhtml1", extensions=extensions, lazy_ol=False, tab_length=tab_len, )
def parse(text): """ https://github.com/Python-Markdown/markdown/wiki/Third-Party-Extensions https://facelessuser.github.io/pymdown-extensions """ text = markdown.markdown(text, extensions=[ FootnoteExtension(), TableExtension(), FencedCodeExtension(), CodeHiliteExtension(), Nl2BrExtension(), TocExtension(slugify=slugs.uslugify, permalink=False), TrulySaneExt(), tilde.makeExtension(), caret.makeExtension(), ]) tags = 'a,h1,h2,h3,h4,h5,h6,p,div,pre,code,span,img,br,' \ 'ul,ol,li,table,tr,th,td,thead,tbody,blockquote,' \ 'del,em,strong,sub,sup' attrs = {'*': ['class'], 'a': ['href', 'rel'], 'img': ['alt']} attrs.update({f'h{n}': ['id'] for n in range(1, 7)}) # h1..h6 support TOC anchor text = bleach.clean( text, tags=tags.split(','), attributes=attrs, ) # HTML sanitizer return text
def parse(self, md): builder = TreeBuilder() html = markdown(md, tab_length=self.tab_spaces, extensions=[FencedCodeExtension()]) self.logger.debug('HTML:\n\n%s\n\nEND HTML', html) builder.feed(html) return builder.document
def mdToHtml(md, _extensions, configs): html = markdown.markdown(md, extensions=[ SaneListExtension(), TableExtension(), FencedCodeExtension(), DeleteSubExtension(**configs) ] + strToClassEXt(_extensions)) return html
def view_api_docs(): tpl = settings.TEMPLATES_DIR / 'api.md' md = markdown.Markdown(extensions=[TocExtension(title='Table of Contents'), FencedCodeExtension()]) with open(tpl, 'r') as fh: htres = md.convert(fh.read()) # ctx = dict(v4_host=settings.V4_HOST, v6_host=settings.V6_HOST, main_host=settings.MAIN_HOST) return render_template( 'mdpage.html', content=render_template_string(htres) )
def markdown_to_html(plain): """Convert Markdown to HTML""" import re import base64 from bs4 import BeautifulSoup import markdown from markdown.extensions.abbr import AbbrExtension from markdown.extensions.codehilite import CodeHiliteExtension from markdown.extensions.def_list import DefListExtension from markdown.extensions.fenced_code import FencedCodeExtension from markdown.extensions.footnotes import FootnoteExtension # Don't convert if plain text is really plain if re.match(r"[a-zA-Z0-9æøåÆØÅ ,.?+-]*$", plain): return plain # Fix whitespaces in input plain = plain.replace("\xc2\xa0", " ").replace("\xa0", " ") # For convenience: Fix mathjax escaping plain = plain.replace(r"\[", r"\\[") plain = plain.replace(r"\]", r"\\]") plain = plain.replace(r"\(", r"\\(") plain = plain.replace(r"\)", r"\\)") html = markdown.markdown(plain, extensions=[ AbbrExtension(), CodeHiliteExtension( noclasses=True, linenums=False, pygments_style='friendly', guess_lang=False, ), DefListExtension(), FencedCodeExtension(), FootnoteExtension(), ], output_format="html5") html_tree = BeautifulSoup(html, 'html.parser') tag = _get_first_tag(html_tree) if not tag: if not html: # Add space to prevent input field from shrinking in UI html = " " html_tree = BeautifulSoup(f"<div>{html}</div>", "html.parser") tag = _get_first_tag(html_tree) # Store original text as data-attribute on tree root # Note: convert newlines to <br> to make text readable in the Anki viewer original_html = base64.b64encode( plain.replace("\n", "<br />").encode('utf-8')).decode() tag['data-original-markdown'] = original_html return str(html_tree)
def markdown(value, header_level=1): html = md(value, output_format='html5', safe_mode='escape', extensions=[ TocExtension(baselevel=header_level), CodeHiliteExtension(), FencedCodeExtension(), TableExtension(), ]) return mark_safe(html)
def extendMarkdown(self, md, md_globals): # Built-in extensions FencedCodeExtension().extendMarkdown(md) TableExtension().extendMarkdown(md) # Custom extensions gfm.AutolinkExtension().extendMarkdown(md, md_globals) gfm.AutomailExtension().extendMarkdown(md, md_globals) gfm.HiddenHiliteExtension().extendMarkdown(md, self.config) gfm.SemiSaneListExtension().extendMarkdown(md, md_globals) gfm.SpacedLinkExtension().extendMarkdown(md, md_globals) gfm.StrikethroughExtension().extendMarkdown(md, md_globals) gfm.TaskListExtension().extendMarkdown(md, md_globals)
def renderContent(self) -> None: self.checkModified() if self.__renderedStr is not None: # Previous render is still valid. return if self.errors & DocErrors.CONTENT: # Loading failed; nothing to render. return if self.errors & DocErrors.RENDERING: # Rendering attempted and failed. return # Load content. contentPath = self.contentPath if contentPath is None: # If the init module fails to import, resources in the package # are inaccessible. Don't try to load them, to avoid error spam. return packageName = self.resource.packageName try: content = importlib_resources.read_text(packageName, contentPath.name) except Exception: logging.exception('Error loading documentation content "%s"', contentPath.name) self.errors |= DocErrors.CONTENT return # Create a private Markdown converter. # Rendering a the table of content will trigger Markdown conversion # of child pages, so we can't use a single shared instance. extractor = ExtractionExtension() md = Markdown(extensions=[ extractor, FixupExtension(), DefListExtension(), FencedCodeExtension(), CodeHiliteExtension(guess_lang=False), TableExtension() ]) # Do the actual rendering. try: self.__renderedStr = md.convert(content) except Exception: logging.exception('Error rendering Markdown for %s', packageName) self.errors |= DocErrors.RENDERING else: self.__extractedInfo = extractor.extracted
def render_markdown(orig_source_f: Path, inp: str): outfile = conf.outdir / orig_source_f.with_suffix(".html") outfile.parent.mkdir(parents=True, exist_ok=True) print(outfile) content = markdown.markdown( inp, extensions=[ TocExtension(baselevel=3, permalink=' #'), CodeHiliteExtension(), FencedCodeExtension(), ], ) html = wrap_page(orig_source_f, content) outfile.write_text(html)
def render_file(file, args): body = open(file).read() if str(file).endswith('.md'): out = Template(body, preprocessor=md_preprocessor, strict_undefined=True).render(**args) return markdown(out, extensions=[ CodeHiliteExtension( guess_lang=False, linenums=False, noclasses=True, ), FencedCodeExtension(), ]) else: return Template(body, strict_undefined=True).render(**args)
def md2html(md_text): css = ''' <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <style scoped="scoped"> div {float: left;margin-bottom: 2%} table { border-collapse: collapse; border-spacing: 0; empty-cells: show; border: 1px solid #cbcbcb; {#font-size: 3px;#} margin: 1% auto; table-layout: fixed; {#word-break: break-all;#} word-wrap: break-word; } table td, th { border-left: 1px solid #cbcbcb; border-width: 0 0 0 1px; margin: 0; padding: 0.3em 0.5em; width:100px } table td:first-child, table th:first-child { border-left-width: 0; } table thead, table tfoot { color: #000; text-align: left; vertical-align: bottom; } table thead { background: #e0e0e0; } table tfoot { background: #ededed; } table tr:nth-child(2n-1) td { background-color: #f2f2f2; } </style> ''' html = markdown.markdown( md_text, extensions=[FencedCodeExtension(), TableExtension()]) return css + html
def typesetFile(self, fileName, e=None, xPath=None): u"""Read the XML document and parse it into a tree of document-chapter nodes. Make the typesetter start at page pageNumber and find the name of the flow in the page template. The optional filter can be a list of tag names that need to be included in the composition, ignoring the rest. The optional rootStyle can be defined as style for the root tag, cascading force all child elements. Answer the root node for convenience of the caller.""" fileExtension = fileName.split('.')[-1] if fileExtension == 'md': # If we have MarkDown content, convert to XML (XHTML) f = codecs.open(fileName, mode="r", encoding="utf-8") mdText = f.read() f.close() mdExtensions = [ FencedCodeExtension(), FootnoteExtension(), LiteratureExtension(), Nl2BrExtension() ] xml = u'<?xml version="1.0" encoding="utf-8"?>\n<document>%s</document>' % markdown.markdown( mdText, extensions=mdExtensions) xml = xml.replace(' ', ' ') fileName = fileName + '.xml' # New file name to XML export f = codecs.open(fileName, mode="w", encoding="utf-8") f.write(xml) f.close() tree = ET.parse(fileName) root = tree.getroot() # Get the root element of the tree. # If there is XSL filtering defined, they get the filtered nodes. if xPath is not None: filteredNodes = root.findall(xPath) if filteredNodes: # How to handle if there is multiple result nodes? self.typesetNode(filteredNodes[0], e) else: # Collect all flowing text in one formatted string, while simulating the page/flow, because # we need to keep track on which page/flow nodes results get positioned (e.g. for toc-head # reference, image index and footnote placement. self.typesetNode(root, e) # Answer the root element of the etree (Note this class also is called "Element", another kind # of node than the PageBot Element. return root
def render_post_content(content, first_paragraph=False): allowed_tags = [ 'h1', 'h2', 'h3', 'div', 'p', 'code', 'table', 'span', 'pre', 'tr', 'td', 'tbody' ] allowed_attributes = {'*': ['id', 'class']} html = markdown(content, extensions=[ CodeHiliteExtension(linenums=True), FencedCodeExtension() ]) html = bleach.clean(html, tags=allowed_tags, attributes=allowed_attributes) soup = BeautifulSoup(html, 'html.parser') if first_paragraph: html = soup.p html = Markup(html) return html
def convert_markdown(filename, stylename=None, linenos=False): """ Convert a markdown file to an HTML div, and return the result. """ displayname, content = get_file_content(filename) stylename = stylename.lower() if stylename else DEFAULT_STYLE debug('Converting MD: {}'.format(displayname)) hilighter = CodeHiliteExtension( pygments_style=stylename, linenums=linenos, noclasses=True, css_class='hilight', ) return '\n'.join(('<div class="markdown">', markdown(content, output_format='html5', extensions=[ hilighter, FencedCodeExtension(), SaneListExtension(), ]), '</div>'))
def generateHTML(md_filename=None, source=None, css=None): if md_filename is None: if source is None: raise ValueError('You must supply md_filename or source') html_filename = tempfile.mktemp('html', 'md-edit') else: with open(md_filename, 'r') as f: source = f.read() html_filename = os.path.splitext(md_filename)[0] + '.html' extras = [ 'fenced-code-blocks', 'toc', 'footnotes', 'wiki-tables', 'code-friendly' ] extensions = [ TocExtension(baselevel=3), FencedCodeExtension(), TableExtension(), AdmonitionExtension(), CodeHiliteExtension() ] html = Markdown(extras=extras, extensions=extensions).convert(source) # custom postprocess # strike strike_pathern = re.compile(r'~{2}(.*?)~{2}') html = re.sub(strike_pathern, r'<del>\1</del>', html) # quoted quoted_pathern = re.compile(r'\:\"(.*?)\"\:') html = re.sub(quoted_pathern, r'“\1”', html) with open(html_filename, 'w') as f: f.write(html) if not css is None: html = '<style>' + css + '</style>' + html return html
def extendMarkdown(self, md, md_globals): # Nl2BrExtension().extendMarkdown(md, md_globals) FencedCodeExtension().extendMarkdown(md, md_globals) SmartEmphasisExtension().extendMarkdown(md, md_globals) TableExtension().extendMarkdown(md, md_globals) AdmonitionExtension().extendMarkdown(md, md_globals) CodeHiliteExtension(use_pygments=True, css_class='roca_css').extendMarkdown( md, md_globals) TocExtension(anchorlink=False, permalink=True).extendMarkdown(md, md_globals) gfm.AutomailExtension().extendMarkdown(md, md_globals) gfm.SemiSaneListExtension().extendMarkdown(md, md_globals) gfm.SpacedLinkExtension().extendMarkdown(md, md_globals) gfm.StrikethroughExtension().extendMarkdown(md, md_globals) gfm.AutolinkExtension().extendMarkdown(md, md_globals) gfm.TaskListExtension().extendMarkdown(md, md_globals) SubstituteExtension().extendMarkdown(md, md_globals)
def convert(self, file: File, env: Env) -> None: url = functools.partial(env.get_url, from_file=file) current_page = env.nav.lookup_page(file) nav = env.nav md = Markdown(extensions=[ TocExtension(permalink=True), FencedCodeExtension(), AutoDocExtension(), CodeHiliteExtension(), ConvertURLs(convert_url=url), ]) text = file.read_input_text() content = md.convert(text) context = { "content": content, "url": url, "nav": nav, "current_page": current_page, "toc": file.toc, } html = env.render_template("base.html", context) file.write_output_text(html)
class Typesetter: """The Typesetter takes one or more markdown files or a sequence of markdown strings and builds a galley, using a dictionary of styles for the formatted string attributes. The result is a self.galley that contains a sequence of Element instances, such as formatted images, textboxes (with BabelStrings), ruler elements and other nested galleys. Mostly used by the Composer.""" IMAGE_CLASS = Image TEXTBOX_CLASS = Text RULER_CLASS = Ruler GALLEY_CLASS = Galley CODEBLOCK_CLASS = CodeBlock DEFAULT_BULLET = '•\t' # Used if no valid bullet string can be found in styles. SKIP_TAGS = ( 'document', 'pre', # Ignore as part of a code block 'figure', 'figcaption', # Not implemented by all browsers. Use ![]()*Caption* instead. ) # Default styles for Typesetter, based on the standard markdown HTML-tags # Some ugly colors to show that we're in default mode here, for the user to # supply a better set. DEFAULT_STYLES = dict( document=dict(name='document', font='Georgia', fontSize=pt(10), leading=em(1.2), textFill=blackColor), dropcap=dict(name='dropcap', fontSize=pt(64), leading=em(1.2), textFill=color(1, 0, 0)), h1=dict(name='h1', fontSize=pt(18), leading=em(1.2), textFill=color(1, 0, 0)), h2=dict(name='h2', fontSize=pt(16), leading=em(1.2), textFill=color(1, 0, 0.5)), h3=dict(name='h3', font='Georgia', fontSize=pt(14), leading=em(1.2), textFill=color(1, 0.5, 0.5)), h4=dict(name='h4', font='Georgia', fontSize=pt(12), leading=em(1.2), textFill=color(0, 1, 1)), h5=dict(name='h5', font='Georgia-Bold', fontSize=pt(10), leading=em(1.2), textFill=(1, 0, 1)), p=dict(name='p', font='Georgia', fontSize=pt(10), leading=em(1.2), textFill=(0.5, 1, 0.5)), bullet=dict(name='bullet', font='Georgia', fontSize=pt(10), leading=em(1.2), textFill=(0.5, 1, 0.5)), li=dict(name='li', fontSize=pt(10), leading=em(1.2), textFill=color(0.5)), em=dict(name='em', font='Georgia-Bold'), ) # These extension are needed to make PageBot markdown compatible with # default MacDown behavior. MARKDOWN_EXTENSIONS = [ InlineExtension(), FencedCodeExtension(), FootnoteExtension(), LiteratureExtension(), Nl2BrExtension(), ] def __init__(self, context, styles=None, galley=None, skipTags=None, tryExcept=True, return2Space=False, tabs2Space=False, br2Return=True, stripHead=False, stripTail=False, maxImageWidth=None): """The Typesetter instance interprets an XML or Markdown file (.md) and converts it into a Galley instance with formatted string depending on the current context. >>> from pagebot.filepaths import getResourcesPath >>> from pagebot.toolbox.units import em, pt >>> from pagebot.toolbox.color import color, blackColor >>> from pagebot.contexts.htmlcontext.htmlcontext import HtmlContext >>> context = HtmlContext() >>> h1Style = dict(fontSize=pt(24), textFill=color(1, 0, 0)) >>> h2Style = dict(font='Georgia', fontSize=pt(18), textFill=color(1, 0, 0.5)) >>> h3Style = dict(font='Georgia', fontSize=pt(14), textFill=color(0, 1, 0)) >>> pStyle = dict(fontSize=pt(10), leading=em(1.4), textFill=blackColor) >>> styles = dict(h1=h1Style, h2=h2Style, p=pStyle) >>> ts = Typesetter(context, styles=styles) # Create a new typesetter for this context >>> mdText = ''' ... ~~~Python ... box = page.select['content'] ... ~~~ ... # H1 header ... ## H2 header ... ### H3 header ... ~~Delete~~ ... _Underline_ ... ==Mark== ... *Em* ... "Quote" ... **Strong** ... //Emphasis// ... ^Sup ... !!Sub ... ''' >>> #galley = ts.typesetMarkdown(mdText) >>> #len(galley.elements) #3 """ self.context = context # Find the context, in case no doc has be defined yet. if galley is None: galley = self.GALLEY_CLASS(context=context) self.galley = galley if styles is None: styles = self.DEFAULT_STYLES # Set the maximum image width, in case scaling test is necessary. self.maxImageWidth = maxImageWidth or MAX_IMAGE_WIDTH # Style used, in case the current text box does not have them. self.styles = styles # Stack of graphic state as cascading styles. Last is template for the # next. self.gState = [] # Sequential list of all tags that passed parsing. self.tagHistory = [] # Stack of currently active tag names. self.tagStack = [] # Save some flags in case the typesetter is running in Python # try-except mode. self.tryExcept = tryExcept # If True add tags to the output, otherwise ignore. Can be overwritten # by calling function for debugging. self.writeTags = context.useTags # Will contain the root node after executing typesetFile. self.root = None # Some MarkDown generated tags need to be skipped on output, while # their content still is processed. if skipTags is None: skipTags = self.SKIP_TAGS self.skipTags = skipTags # Flags how to filter white space self.return2Space = return2Space # If True (default), then all \r will be replaced by ' ' self.tabs2Space = tabs2Space # If False, then \t is preserved into <tab/> and later converted back into '\t self.br2Return = br2Return # If True, the <br/> will be replaced by '\r' self.stripHead = stripHead # Strip trailing white space. self.stripTail = stripTail # Strip tail white space. self.currentImage = None # Keep the last processed image, in case there are captions to add. def node_tab(self, node, e): """Non-HTML tag, substituted from \t, now convert back into \t.""" self.append('\t') def dropcap(self, node, e): context = self.context style = self.styles.get('dropcap') def getStyleValue(self, name, e=None, style=None, default=None): """Answers the best style value match for *name*, depending on the status of *style*, *e* and *default*, in that order. Answer None if everything fails.""" value = None if style is not None: value = style.get(name) if value is None and e is not None: value = e.css(name) if value is None: value = default return value # Solve <br/> best by simple style with: doc.newStyle(name='br', postfix='\n') def node_br(self, node, e): """Add newline instance to the Galley.""" if self.br2Return: context = self.context style = self.styles.get('br') or self.styles.get('p') bs = context.newString('\r', style=style) self.append(bs) else: self.typesetNode(node, e) def node_a(self, node, e): """Ignore links, but process the block""" # Typeset the block of the tag. self.typesetNode(node, e) def node_sup(self, node, e): """Collect footnote references on their page number. And typeset the superior footnote index reference.""" nodeId = node.attrib.get('id') # Check if this is a footnote reference if nodeId is not None and nodeId.startswith( 'fnref'): # This is a footnote reference. footnotes = self.getFootnotes(e) if footnotes is not None: nodeId = nodeId.split(':')[1] index = len(footnotes) + 1 # Footnode['p'] content node will be added if <div class="footnote">...</div> is detected. footnotes[index] = dict(nodeId=nodeId, index=index, node=node, e=e, p=None) # Add invisible mark, so we can scan the text after page composition to find # on which page it ended up. #self.append(getMarker('footnote', index)) # Typeset the block of the tag. self.typesetNode(node, e) def node_literatureref(self, node, e): """Collect literature references.""" # Typeset the block of the tag. # Check if this is a literature reference nodeId = node.attrib.get('id') if nodeId.startswith('litref:'): # It is a literature reference. literatureRefs = self.getLiteratureRefs(e) if literatureRefs: nodeId = nodeId.split(':')[1] index = len(literatureRefs) + 1 # Warning if the reference id is already used. assert not nodeId in literatureRefs # Make literature reference entry. Content <p> and split fields will be added later. literatureRefs[index] = dict(nodeId=nodeId, node=node, e=e, p=None, pageIds=[]) #self.append(getMarker('literature', index)) # Typeset the block of the tag. self.typesetNode(node, e) def node_div(self, node, e): """MarkDown generates <div class="footnote">...</div> and <div class="literature">...</div> as output at the end of the HTML export. We will handle them separately by looking them up in the XML-tree. So we'll skip them in the regular flow process.""" if node.attrib.get('class') == 'footnote': # Find the content of the footnotes. Store the content and add marker. footnotes = self.getFootnotes(e) if footnotes is not None: for index, p in enumerate(node.findall('./ol/li/p')): if index + 1 in footnotes: # Store the content as node, so we can process it with a Typesetter in case of child nodes. footnotes[index + 1]['p'] = p else: print( '### Warning: %d footnote reference not found. %s' % (index + 1, footnotes.keys())) result = None # Nothing to return, we handled the references elif node.attrib.get('class') == 'literature': literatureRefs = self.getLiteratureRefs(e) if literatureRefs: for index, p in enumerate(node.findall('./ol/li/p')): if index + 1 in literatureRefs: # Store the content as node, so we can process it with # a Typesetter in case of child nodes. Splitting fields # inside the `p` content will be done by the calling # application or Composer. literatureRefs[index + 1]['p'] = p else: print( '### Warning: %d literature reference not found. %s' % (index + 1, literatureRefs.keys())) else: self.typesetNode(node, e) def node_ul(self, node, e): context = self.galley.context style = self.styles.get('ul') if style is not None: s = context.newString('\n', style=style) self.append(s) self.typesetNode(node, e) if style is not None: self.append(s) def node_li(self, node, e): """Generate bullet/Numbered list item.""" context = self.galley.context bulletStyle = self.styles.get('bullet') or self.styles.get( 'li') or self.styles.get('p') bullet = bulletStyle.get('listBullet', self.DEFAULT_BULLET) # Only defined for non-HTML. Get styled string with bullet. bulletString = context.newBulletString(bullet + '\t', style=bulletStyle) if bulletString is not None: self.append( bulletString) # Append the bullet as defined in the style. # Typeset the block of the tag. self.typesetNode(node, e) # For now, using the full figure tag as HTML in Markdown does not work, as # <figure> and <figcaption> are not supported by all browsers. # <figure class="inlineImage"> # <img src="images/myImage.png" alt="Alt text here"/> # <figcaption>Caption here</figcaption> # </figure> IMAGE_CACHE_WIDTHI = re.compile('wi=([px0123456789]*)') # 200, 200px IMAGE_CACHE_HEIGHTI = re.compile('hi=([px0123456789]*)') # 200, 200px IMAGE_CACHE_WIDTH = re.compile('w=([px0123456789\\%]*)') # 200px, 100% IMAGE_CACHE_HEIGHT = re.compile('h=([px0123456789\\%]*)') # 200px, 100% IMAGE_CACHE_XALIGN = re.compile('x=([a-z]*)') # left, center, right IMAGE_CACHE_YALIGN = re.compile('y=([a-z]*)') # top, middle, bottom IMAGE_CACHE_NOSCALE = re.compile( '(noscale|noScale)') # noscale does not create a scaled/ image. IMAGE_CACHE_SIZE = re.compile('(cover|contain|initial|inherit)') IMAGE_CACHE_REPEAT = re.compile('(%s)' % '|'.join(CSS_BACKGROUND_REPEAT)) def node_img(self, node, e): """Process the image. adding the img tag or a new image element to the galley. The alt attribute can contain additional information for the Image element. Keep the Image element in self.currentImage, in case we need to add captions. If a "w=<number>" pattern is present in the alt-attribute, then use it as width measurement for creating a cached image. This way an author can control the required size from within the content. Markdown could use code such as ![MyImage w=450](images/myImage.jpg) If one or both if (w, h) are defined, then set the imageScale flag accordingly. >>> from pagebot.contexts.htmlcontext.htmlcontext import HtmlContext >>> from pagebot.toolbox.units import units >>> context = HtmlContext() >>> ts = Typesetter(context=context) >>> ts.IMAGE_CACHE_NOSCALE.findall('aaa bbb') [] >>> ts.IMAGE_CACHE_NOSCALE.findall('w=100 noscale') ['noscale'] >>> units(ts.IMAGE_CACHE_WIDTH.findall('w=100 noscale')) (100pt,) >>> units(ts.IMAGE_CACHE_WIDTH.findall('w=50% wi=800 noscale')) (50%,) >>> units(ts.IMAGE_CACHE_WIDTHI.findall('w=50% wi=800 noscale')) (800pt,) >>> ts.IMAGE_CACHE_SIZE.findall('w=50% contain') ['contain'] """ w = wi = ww = h = hi = hh = xAlign = yAlign = None # Values are optional set by alt content. cover = contain = initial = inherit = False path = node.attrib.get('src') doScale = not path.endswith('.' + FILETYPE_SVG) and not path.endswith( '.' + FILETYPE_GIF) alt = node.attrib.get('alt') cssSize = cssRepeat = None if alt: xAlign = (self.IMAGE_CACHE_XALIGN.findall(alt) or [None])[0] # x=center yAlign = (self.IMAGE_CACHE_YALIGN.findall(alt) or [None])[0] # y=top wi = units( self.IMAGE_CACHE_WIDTHI.findall(alt) or [None])[0] # wi=800, wi=100% Defines the scaled cache size hi = units(self.IMAGE_CACHE_HEIGHTI.findall(alt) or [None])[0] # hi=800 w = units(self.IMAGE_CACHE_WIDTH.findall(alt) or [None])[0] # w=800, w=100% h = units(self.IMAGE_CACHE_HEIGHT.findall(alt) or [None])[0] # h=800 doScale = doScale and not self.IMAGE_CACHE_NOSCALE.findall(alt) cssSize = (self.IMAGE_CACHE_SIZE.findall(alt) or [None])[0] cssRepeat = (self.IMAGE_CACHE_REPEAT.findall(alt) or [None])[0] # doScale = doScale or w is not None or h is not None proportional = not (w is not None and h is not None ) # Not proportional if both are defined. # auto Default value. The background image is displayed in its original size # (iw, ih) Sets the width and height of the scaled cached image file. # (w, h) Sets the width and height of the background image for CSS. # The first value sets the width, the second value sets the height. # If only one value is given, the second is set to "auto". Read about length units # w and h can be fixed units or pecentage. # A percentage sets the width and height of the background image in percent of the parent element. # The first value sets the width, the second value sets the height. # If only one value is given, the second is set to "auto" Play it » # cover Resize the background image to cover the entire container, # even if it has to stretch the image or cut a little bit off one of the edges Play it » # contain Resize the background image to make sure the image is fully visible Play it » # initial Sets this property to its default value. Read about initial Play it » # inherit Inherits this property from its parent element. Read about inherit if not cssSize in ('cover', 'contain', 'initial', 'inherit'): if h is not None and w is not None: cssSize = '%s %s' % (w, h) elif w is not None: cssSize = '%s auto' % w elif h is not None: cssSize = 'auto %s' % h else: cssSize = 'auto' self.currentImage = self.IMAGE_CLASS( path=path, parent=self.galley, scaleImage= doScale, # Scale the image if one or both (w, h) is defined. cssSize= cssSize, # Examples "auto 100%" "100% auto" "cover" "contain" "initial" "inherit" cssRepeat=cssRepeat, xAlign=xAlign, yAlign=yAlign, w=wi or self.maxImageWidth, h=hi, # To alter the scaled image file from source level. alt=alt, proportional=proportional, maxImageWidth=self.maxImageWidth, index=node.attrib.get('index', 0), context=self.context) def node_caption(self, node, e): """If self.currentImage is set, then redirect output of the caption nodes into the image, instead of the self.galley. Otherwise just output as plain text, ignoring the caption tag. Multiple captions are added to the the current image until it is changed. The caption tag is triggered by *[[...]]* in MarkDown. """ if self.currentImage is not None: # In a current image exists, attach caption to it. savedGalley = self.galley # Temporary redirect node parent self.galley = self.currentImage self.typesetNode(node, e) self.galley = savedGalley else: self.typesetNode(node, e) def node_code(self, node, e): """Creates a NodeBlock element that contains the code source, to be executed by the Composer in sequence of composition.""" self.CODEBLOCK_CLASS(node.text, parent=self.galley) def pushStyle(self, style): """Pushes the cascaded style on the gState stack. Makes sure that the style is not None and that it is a cascaded style, otherwise it cannot be used as source for child styles. Answers the cascaded style.""" self.gState.append(style) def popStyle(self): """Pop the cascaded style from the gState stack and answer the next style that is on top. Make sure that there still is a style to pop, otherwise raise an error. """ assert self.gState self.gState.pop() return self.peekStyle() def peekStyle(self): """Answers the top cascaded style, without changing the stack.""" if not self.gState: # It's empty, answer None return None return self.gState[-1] def addHistory(self, tag): """Add the *tag* to the history.""" if not self.tagHistory or tag != self.tagHistory[-1]: self.tagHistory.append(tag) def getHistory(self): return self.tagHistory def getFootnotes(self, e): """Answers the footnotes dictionary from the e.lib (derived from the root document)""" if self.doc is not None: lib = self.doc.lib if lib is not None: if not 'footnotes' in lib: lib['footnotes'] = {} return lib['footnotes'] return None def getLiteratureRefs(self, e): """Answers the literature reference dictionary from the e.lib (derived from the root document)""" if self.doc is not None: lib = self.doc.lib if lib is not None: if not 'literatureRefs' in lib: lib['literatureRefs'] = {} return lib['literatureRefs'] return None def getImageRefs(self, e): """Answers the image reference dictionary from the e.lib (derived from the root document) if it exists. Otherwise create an empty e.lib['imageRefs'] and answer it as empty dictionary. Answer None if e.lib does not exist.""" lib = e.lib if lib is not None: if not 'imageRefs' in lib: lib['imageRefs'] = {} return lib['imageRefs'] return None def _strip(self, s, prefix=None, postfix=None): """Strip the white space from string *s* if *prefix* and/or *postfix* are not None. Otherwise answer the untouched *s*.""" s = s or '' if self.stripHead: s = s.lstrip() if self.stripTail: s = s.rstrip() if prefix is not None: # Strip if prefix is not None. Otherwise don't touch. s = str(prefix or '') + s if postfix is not None: s += str(postfix or '') return s def getMatchingStyleNames(self, tag): """Answers the list of matching style, with decreasing relevance.""" revHistory = self.tagHistory[:] revHistory.reverse() matches = [] for n in range(len(revHistory)): styleName = revHistory[:n + 1] styleName.reverse() styleName = ' '.join(styleName) style = self.getNamedStyle(styleName) if style: matches.append(styleName) matches.reverse() return matches def getNamedStyle(self, styleName): """Answers the named style and otherwise an empty style dict if the named style does not exist.""" return self.styles.get(styleName, {}) def getRootStyle(self): for name in ('root', 'document', 'body', 'p'): if name in self.styles: return self.getNamedStyle(name) return {} def getNodeStyle(self, tag): """Makes a copy of the top of the style graphics state and mew *style* into it. Answer the new style. This can be used to match custom tag names (such as <dropcap>...</dropcap> to a style with the same name. """ if self.peekStyle( ) is None: # Not an initialized stack, use doc.rootStyle as default. # Happens if calling directly, without check on e or non-existing style for a node. self.pushStyle(self.getRootStyle()) mergedStyle = copy.copy(self.peekStyle()) # Find the best matching style for tag on order of relevance, # considering the possible HTML tag parents and the history. for styleName in self.getMatchingStyleNames(tag): nodeStyle = self.getNamedStyle(styleName) if nodeStyle: # Not None and not empty for name, value in nodeStyle.items(): mergedStyle[name] = value break return mergedStyle def append(self, bs): """Append the string (or BabelString instance) to the last textbox in galley, if it exists. Otherwise create a new Text and add it to self.galley.""" if self.galley.elements and self.galley.elements[-1].isText: box = self.galley.elements[-1] if box.bs is None: box.bs = bs else: box.bs += bs elif hasattr(bs, 's'): s = bs.s while s and s[0] in ' \t\n\r': s = s[1:] bs.s = s self.TEXTBOX_CLASS(bs, parent=self.galley) else: self.TEXTBOX_CLASS(bs, parent=self.galley) def htmlNode(self, node, end=False): """Open the tag in HTML output and copy the node attributes if there are any.""" htmlTag = u'<%s' % node.tag attrs = [] for name, value in node.items(): if name == 'src' and value.startswith('docs/'): # Exception hack to bridge the .md --> img URL. value = value[5:] attrs.append('%s="%s"' % (name, value)) if attrs: htmlTag += u' ' + u' '.join(attrs) if end: htmlTag += '/' htmlTag += '>' self.append(htmlTag) def _htmlNode(self, node): """Close the html tag of node.""" self.append('</%s>' % node.tag) def htmlNode_(self, node): """Opem+close the html tag of node.""" self.htmlNode(node, end=True) def typesetString(self, sOrBs, e=None, style=None): """If s is a formatted string, then it is placed untouched. If it is a plain string, then use the optional *style* or element *e* (using *e.css(name)*) for searching style parameters. Answer the new formatted string. Used e.g. to measure its size.""" # Only convert if not yet BabelString instance. bs = self.context.newString(sOrBs, e=e, style=style) self.append(bs) return bs def typesetNode(self, node, e=None): """Recursively typeset the etree *node*, using a reference to element *e* or the cascading *style*. If *e* is None, then the tag style is merged on top of the doc.rootStyle. If *e* is defined, then rootstyle of the stack starts with an empty dictionary, leaving root searching for the e.parent path.""" # Ignore <pre> tag output, as it is part of a ~~~Pyhton ... ~~~ code block if self.writeTags and not node.tag in self.skipTags: # Open the node in HTML export for this node self.htmlNode(node) # Add this tag to the tag-history line. It is used to connect to the # right style in case we are rendering towards a FormattedString or # another context-equivalent. self.addHistory(node.tag) # If e is undefined, then we make sure that the stack contains the doc.rootStyle on top. # If e is defined then root queries for style should follow the e.parent path. if self.peekStyle() is None and e is not None: # Root of stack is empty style, to force searching on the e.parent line. self.pushStyle({}) # Define top level for styles. nodeStyle = self.getNodeStyle( node.tag) # Merge found tag style with current top of stack self.pushStyle(nodeStyle) # Push this merged style on the stack # XML-nodes are organized as: node - node.text - node.children - # node.tail If no text exists or if the node does not have tail text, # these are None. Still we want to be able to add the prefix to the # node.text, so then the text is changed to an empty string. nodeText = self._strip(node.text) if nodeText: # Not None and still has content after stripping? # Don't cache the context from self.galley as variable, as it may # become dynamically updated by code blocks. The galley context # will define the type of BabelStrings generated by the Typesetter. firstTagIndent = nodeStyle.get('firstTagIndent') if firstTagIndent is not None and len( self.tagHistory ) > 2 and self.tagHistory[-2] != '_' + node.tag: nodeStyle['firstLineIndent'] = firstTagIndent bs = self.context.newString(nodeText, nodeStyle) self.append(bs) self.tagStack.append(node.tag) # Add current node to the stack # Type set all child node in the current node, by recursive call. for child in node: hook = 'node_' + child.tag # Method will handle the styled body of the element, but not the # tail. if hasattr(self, hook): # There is a hook for this node, let this method do the work. getattr(self, hook)(child, e) # Hook must be able to derive styles from e. # We are in tail mode now, but we don't know what happened in the child block. else: # If no method hook defined, then just solve recursively. Child node will get the style. self.typesetNode(child, e) # XML-nodes are organized as: node - node.text - node.children - # node.tail If no text exists or if the node does not have tail # text, these are None. Still we want to be able to add the # postfix to the tail, so then the tail is changed to empty string? childTail = self._strip(child.tail) #childTail = child.tail #self._strip(child.tail, postfix=self.getStyleValue('postfix', e, nodeStyle, '')) if childTail: # Any tail left after stripping, then append to the galley. # Don't cache the context from self.galley as variable, as it may become dynamically updated by code blocks. bs = self.context.newString(childTail, nodeStyle) self.append(bs) self.tagStack.pop() # Pop current node from the list. # Add this closing tag to the tag-history line. It is used to test on # difference between the last closed tag and the current tag, e.g. to # control the firstTagIndent --> firtLineIndent. self.addHistory('_' + node.tag) # Ignore </pre> tag output, as it is part of a ~~~Pyhton code block if self.writeTags and not node.tag in self.skipTags: # Close the HTML tag of this node. self._htmlNode(node) # Now restore the graphic state at the end of the element content # processing to the style of the parent in order to process the tail # text. Back to the style of the parent, which was in nodeStyle. self.popStyle() def markDown2XmlFile(self, fileName, mdText, mdExtensions=None): """Take the markdown source, convert to HTML/XML and save in the file called fileName. If the fileName does not end with ".xml" extension, then add it. Answer the (new) fileName. >>> import os >>> from pagebot.contexts.htmlcontext.htmlcontext import HtmlContext >>> md = '''## Subtitle at start\\n\\n~~~\\npage = page.next\\n~~~\\n\\n# Title\\n\\n##Subtitle\\n\\nPlain text''' >>> context = HtmlContext() >>> t = Typesetter(context) >>> fileName = t.markDown2XmlFile('/tmp/PageBot_Typesetter_test.xml', md) >>> #os.remove(fileName) """ if mdExtensions is None: mdExtensions = self.MARKDOWN_EXTENSIONS # Otherwise MarkDown will auto-convert. if not self.tabs2Space: # Keep the tabs, as they get replaced into spaced by MarkDown. mdText = mdText.replace('\t', '<tab/>') xmlBody = markdown.markdown(mdText, extensions=mdExtensions) xml = u'<?xml version="1.0" encoding="utf-8"?>\n<document>%s</document>' % xmlBody if self.return2Space: for c1, c2 in (('\r', ' '), ('\n', ' '), (' ', ' ')): # Replace all returns by tabs. Paragraphs should be made with <p> and <br/>. xml = xml.replace(c1, c2) xml += '\r' if not fileName.endswith('.xml'): # Make sure file name has xml extension. fileName = fileName + '.xml' # Save the XML as unicode. f = codecs.open(fileName, mode="w", encoding="utf-8") f.write(xml) f.close() return fileName def typesetMarkdown(self, mdText, mdExtensions=None, e=None, xPath=None): tmpPath = '/tmp/PageBot_Typesetter.xml' fileName = self.markDown2XmlFile(tmpPath, mdText, mdExtensions) self.typesetFile(fileName, e=e, xPath=xPath) #os.remove(tmpPath) return self.galley def typesetFile(self, fileName, e=None, xPath=None, patterns=None): """Read the XML document and parse it into a tree of document-chapter nodes. Make the typesetter start at page pageNumber and find the name of the flow in the page template. The optional filter can be a list of tag names that need to be included in the composition, ignoring the rest. The optional rootStyle can be defined as style for the root tag, cascading force all child elements. Answer the root node.""" fileExtension = fileName.split('.')[-1] if fileExtension.lower() == 'md': # If we have MarkDown content, convert to XML (XHTML) f = codecs.open(fileName, mode="r", encoding="utf-8") mdText = f.read() # Read the raw MarkDown source f.close() # Pre-filtering, to replace easier (for authors) patterns by ~~~...~~~ # Python instructions. if patterns is not None: for pattern, pythonCode in patterns: mdText = mdText.replace(pattern, pythonCode) fileName = self.markDown2XmlFile( fileName, mdText) # Translate MarkDown to HTML and save in file. tree = ET.parse(fileName) self.root = tree.getroot( ) # Get the root element of the tree and store for later retrieval. # If XSL filtering is defined, they get the filtered nodes. if xPath is not None: filteredNodes = self.root.findall(xPath) if filteredNodes: # How to handle if we got multiple result nodes? self.typesetNode(filteredNodes[0], e) else: # Collect all flowing text in one formatted string, while # simulating the page/flow, because we need to keep track on which # page/flow nodes results get positioned (e.g. for toc-head # reference, image index and footnote placement. self.typesetNode(self.root, e) # Remember this galley where it came from. self.galley.name = fileName # Answer the self.galley. return self.galley
def md_factory(allow_links=True, allow_images=True, allow_blocks=True): """creates and configures markdown object""" md = markdown.Markdown(extensions=["markdown.extensions.nl2br"]) # Remove HTML allowances del md.preprocessors["html_block"] del md.inlinePatterns["html"] # Remove references del md.preprocessors["reference"] del md.inlinePatterns["reference"] del md.inlinePatterns["image_reference"] del md.inlinePatterns["short_reference"] # Add [b], [i], [u] md.inlinePatterns.add("bb_b", bold, "<strong") md.inlinePatterns.add("bb_i", italics, "<emphasis") md.inlinePatterns.add("bb_u", underline, "<emphasis2") # Add ~~deleted~~ striketrough_md = StrikethroughExtension() striketrough_md.extendMarkdown(md) if allow_links: # Add [url] md.inlinePatterns.add("bb_url", url(md), "<link") else: # Remove links del md.inlinePatterns["link"] del md.inlinePatterns["autolink"] del md.inlinePatterns["automail"] if allow_images: # Add [img] md.inlinePatterns.add("bb_img", image(md), "<image_link") short_images_md = ShortImagesExtension() short_images_md.extendMarkdown(md) else: # Remove images del md.inlinePatterns["image_link"] if allow_blocks: # Add [hr] and [quote] blocks md.parser.blockprocessors.add("bb_hr", BBCodeHRProcessor(md.parser), ">hr") fenced_code = FencedCodeExtension() fenced_code.extendMarkdown(md, None) code_bbcode = CodeBlockExtension() code_bbcode.extendMarkdown(md) quote_bbcode = QuoteExtension() quote_bbcode.extendMarkdown(md) spoiler_bbcode = SpoilerExtension() spoiler_bbcode.extendMarkdown(md) else: # Remove blocks del md.parser.blockprocessors["hashheader"] del md.parser.blockprocessors["setextheader"] del md.parser.blockprocessors["code"] del md.parser.blockprocessors["quote"] del md.parser.blockprocessors["hr"] del md.parser.blockprocessors["olist"] del md.parser.blockprocessors["ulist"] return pipeline.extend_markdown(md)
class Typesetter: u"""Mostly used by the Composer, fhe Typesetter takes one or more markdown files or a sequence of markdown strings and builds a galley, using a dictionary of styles for the formatted string attributes. The result of the typesetting is a self.galley, that contains a sequence of Element instances, such as formatted images, textboxes (with BabelStrings), ruler elements and other nested galleys. """ IMAGE_CLASS = Image TEXTBOX_CLASS = TextBox RULER_CLASS = Ruler GALLEY_CLASS = Galley CODEBLOCK_CLASS = CodeBlock DEFAULT_BULLET = u'•' # Used if no valid bullet string can be found in styles. SKIP_TAGS = ('document', 'pre') # Default styles for Typesetter, based on the standard markdown HTML-tags # Some ugly colors to show that we're in default mode here, for the user to # supply a better set. DEFAULT_STYLES = dict( body=dict(font='Georgia', fontSize=pt(10), leading=em(1.2), textFill=blackColor), h1=dict(font='Verdana', fontSize=pt(18), leading=em(1.2), textFill=color(1, 0, 0)), h2=dict(font='Verdana', fontSize=pt(16), leading=em(1.2), textFill=color(1, 0, 0.5)), h3=dict(font='Georgia', fontSize=pt(14), leading=em(1.2), textFill=color(1, 0.5, 0.5)), h4=dict(font='Georgia', fontSize=pt(12), leading=em(1.2), textFill=color(0, 1, 1)), h5=dict(font='Georgia-Bold', fontSize=pt(10), leading=em(1.2), textFill=(1, 0, 1)), p=dict(font='Georgia', fontSize=pt(10), leading=em(1.2), textFill=(0.5, 1, 0.5)), li=dict(font='Verdana', fontSize=pt(10), leading=em(1.2), textFill=color(0.5)), em=dict(font='Georgia-Bold'), ) MARKDOWN_EXTENSIONS = [ FencedCodeExtension(), FootnoteExtension(), LiteratureExtension(), Nl2BrExtension() ] def __init__(self, context, styles=None, galley=None, skipTags=None, tryExcept=True): u""" The Typesetter instance interprets an XML or Markdown file (.md) and converts it into a Galley instance, with formatted string depending on the current context. >>> from pagebot import getResourcesPath >>> from pagebot.toolbox.units import em, pt >>> from pagebot.toolbox.color import color, blackColor >>> from pagebot.contexts.platform import getContext >>> context = getContext() >>> path = getResourcesPath() + '/texts/TEST.md' # Get the path to the text markdown. >>> h1Style = dict(font='Verdana', fontSize=pt(24), textFill=color(1, 0, 0)) >>> h2Style = dict(font='Georgia', fontSize=pt(18), textFill=color(1, 0, 0.5)) >>> pStyle = dict(font='Verdana', fontSize=pt(10), leading=em(1.4), textFill=blackColor) >>> styles = dict(h1=h1Style, h2=h2Style, p=pStyle) >>> t = Typesetter(context, styles=styles) # Create a new typesetter for this context >>> galley = t.typesetFile(path) # Context indicates hiding of the tags in the output >>> len(galley.elements) 6 >>> galley.elements[0].__class__.__name__ # First element of test-markdown is a code block 'CodeBlock' >>> galley.elements[1].__class__.__name__ # Second element is a text box 'TextBox' >>> from pagebot.contexts.htmlcontext import HtmlContext >>> context = HtmlContext() >>> t = Typesetter(context, styles=styles) # Create a new typesetter with a HTML context >>> galley = t.typesetFile(path) # Context indicates is to include the HTML tags in output. >>> len(galley.elements) 6 >>> 'What is PageBot' in galley.elements[1].bs # Skip first code block in test-markdown True """ self.context = context # Find the context, in case no doc has be defined yet. if galley is None: galley = self.GALLEY_CLASS(context=context) self.galley = galley if styles is None: styles = self.DEFAULT_STYLES self.styles = styles # Stack of graphic state as cascading styles. Last is template for the next. self.gState = [] self.tagHistory = [] # Save some flags in case the typesetter is running in Python try-except mode. self.tryExcept = tryExcept # If True add tags to the output, otherwise ignore. Can be overwritten by caller for debugging. self.writeTags = context.useTags self.root = None # Will contain the root node after executing typesetFile. # Some MarkDown generated tags need to be skipped on output, while their content still is processed. if skipTags is None: skipTags = self.SKIP_TAGS self.skipTags = skipTags def node_h1(self, node, e): u"""Handle the <h1> tag.""" # Add line break to whatever style/content there was before. # Add invisible h1-marker in the string, to be retrieved by the composer. #headerId = self.document.addTocNode(node) # Store the node in the self.document.toc for later TOC composition. #self.append(getMarker(node.tag, headerId)) # Link the node tag with the TOC headerId. # Typeset the block of the tag. self.typesetNode(node, e) def node_h2(self, node, e): u"""Handle the <h2> tag.""" # Add line break to whatever style/content there was before. # Add invisible h2-marker in the string, to be retrieved by the composer. #headerId = self.document.addTocNode(node) # Store the node in the self.document.toc for later TOC composition. #self.append(getMarker(node.tag, headerId)) # Link the node tag with the TOC headerId. # Typeset the block of the tag. self.typesetNode(node, e) def node_h3(self, node, e): u"""Handle the <h3> tag.""" # Add line break to whatever style/content there was before. # Add invisible h3-marker in the string, to be retrieved by the composer. #headerId = self.document.addTocNode(node) # Store the node in the self.document.toc for later TOC composition. #self.append(getMarker(node.tag, headerId)) # Link the node tag with the TOC headerId. # Typeset the block of the tag. self.typesetNode(node, e) def node_h4(self, node, e): u"""Handle the <h4> tag.""" # Add line break to whatever style/content there was before. # Add invisible h4-marker in the string, to be retrieved by the composer. #headerId = self.document.addTocNode(node) # Store the node in the self.document.toc for later TOC composition. #self.append(getMarker(node.tag, headerId)) # Link the node tag with the TOC headerId. # Typeset the block of the tag. self.typesetNode(node, e) def node_h5(self, node, e): u"""Handle the <h5> tag.""" # Add line break to whatever style/content there was before. # Add invisible h4-marker in the string, to be retrieved by the composer. #headerId = self.document.addTocNode(node) # Store the node in the self.document.toc for later TOC composition. #self.append(getMarker(node.tag, headerId)) # Link the node tag with the TOC headerId. # Typeset the block of the tag. self.typesetNode(node, e) def node_em(self, node, e): u"""Handle the <em> tag""" self.typesetNode(node, e) def node_p(self, node, e): u"""Handle the <p> tag.""" self.typesetNode(node, e) def node_hr(self, node, e): u"""Add Ruler instance to the Galley.""" if self.peekStyle() is None and e is not None: # Root of stack is empty style, to force searching on the e.parent line. self.pushStyle({}) # Define top level for styles. hrStyle = self.getNodeStyle( node.tag) # Merge found tag style with current top of stack self.RULER_CLASS( e, style=hrStyle, parent=self.galley) # Make a new Ruler instance in the Galley def getStyleValue(self, name, e=None, style=None, default=None): u"""Answer the best style value match for *name*, depending on the status of *style*, *e* and *default*, on that order. Answer None if everything failes.""" value = None if style is not None: value = style.get(name) if value is None and e is not None: value = e.css(name) if value is None: value = default return value # Solve <br/> best by simple style with: doc.newStyle(name='br', postfix='\n') def node_br(self, node, e): u"""Add newline instance to the Galley.""" # For now, just ignore, as <br/> already get a break in MarkDown, as part of the exclosing tag. # TODO: now <br/> makes the same vertical spacing as <p> """ if self.peekStyle() is None and e is not None: # Root of stack is empty style, to force searching on the e.parent line. self.pushStyle({}) # Define top level for styles. brStyle = self.getNodeStyle(node.tag) # Merge found tag style with current top of stack s = self.getStyleValue('prefix', e, brStyle, default='') + '\n' + self.getStyleValue('postfix', e, brStyle, default='') bs = self.context.newString(s, e=e, style=brStyle) self.append(bs) # Add newline in the current setting of FormattedString """ def node_a(self, node, e): u"""Ignore links, but process the block""" # Typeset the block of the tag. self.typesetNode(node, e) def node_sup(self, node, e): u"""Collect footnote references on their page number. And typeset the superior footnote index reference.""" nodeId = node.attrib.get('id') # Check if this is a footnote reference if nodeId.startswith('fnref'): # This is a footnote reference. footnotes = self.getFootnotes(e) if footnotes is not None: nodeId = nodeId.split(':')[1] index = len(footnotes) + 1 # Footnode['p'] content node will be added if <div class="footnote">...</div> is detected. footnotes[index] = dict(nodeId=nodeId, index=index, node=node, e=e, p=None) # Add invisible mark, so we can scan the text after page composition to find # on which page it ended up. #self.append(getMarker('footnote', index)) # Typeset the block of the tag. self.typesetNode(node, e) def node_literatureref(self, node, e): u"""Collect literature references.""" # Typeset the block of the tag. # Check if this is a literature reference nodeId = node.attrib.get('id') if nodeId.startswith('litref:'): # It is a literature reference. literatureRefs = self.getLiteratureRefs(e) if literatureRefs: nodeId = nodeId.split(':')[1] index = len(literatureRefs) + 1 # Warning if the reference id is already used. assert not nodeId in literatureRefs # Make literature reference entry. Content <p> and split fields will be added later. literatureRefs[index] = dict(nodeId=nodeId, node=node, e=e, p=None, pageIds=[]) #self.append(getMarker('literature', index)) # Typeset the block of the tag. self.typesetNode(node, e) def node_div(self, node, e): u"""MarkDown generates <div class="footnote">...</div> and <div class="literature">...</div> as output at the end of the HTML export. We will handle them separately by looking them up in the XML-tree. So we'll skip them in the regular flow process.""" if node.attrib.get('class') == 'footnote': # Find the content of the footnotes. Store the content and add marker. footnotes = self.getFootnotes(e) if footnotes is not None: for index, p in enumerate(node.findall('./ol/li/p')): if index + 1 in footnotes: # Store the content as node, so we can process it with a Typesetter in case of child nodes. footnotes[index + 1]['p'] = p else: print( '### Warning: %d footnote reference not found. %s' % (index + 1, footnotes.keys())) result = None # Nothing to return, we handled the references elif node.attrib.get('class') == 'literature': literatureRefs = self.getLiteratureRefs(e) if literatureRefs: for index, p in enumerate(node.findall('./ol/li/p')): if index + 1 in literatureRefs: # Store the content as node, so we can process it with a Typesetter in case of child nodes. # Spltting fields inside the p content will be done by the calling application or Composer. literatureRefs[index + 1]['p'] = p else: print( '### Warning: %d literature reference not found. %s' % (index + 1, literatureRefs.keys())) else: self.typesetNode(node, e) def node_li(self, node, e): u"""Generate bullet/Numbered list item.""" context = self.galley.context bullet = self.DEFAULT_BULLET # Default, in case doc or css does not exist. style = self.styles.get('bullet') or self.styles.get( 'li') or self.styles.get('p') bulletString = context.newBulletString( bullet, e=e, style=style) # Get styled string with bullet. if bulletString is not None: # HtmlContext does not want a bullet character. self.append( bulletString) # Append the bullet as defined in the style. # Typeset the block of the tag. self.typesetNode(node, e) def node_img(self, node, e): u"""Process the image. adding the img tag or a new image element to the gally.""" # Typeset the empty block of the img, which creates the HTML tag. self.htmlNode_(node) def node_code(self, node, e): self.CODEBLOCK_CLASS(node.text, parent=self.galley) def pushStyle(self, tag): u"""Push the cascaded style on the gState stack. Make sure that the style is not None and that it is a cascaded style, otherwise it cannot be used as source for child styles. Answer the cascaded style as convenience for the caller. """ self.gState.append(tag) def popStyle(self): u"""Pop the cascaded style from the gState stack and answer the next style that is on top. Make sure that there still is a style to pop, otherwise raise an error. """ assert self.gState self.gState.pop() return self.peekStyle() def peekStyle(self): u"""Answer the top cascaded style, without changing the stack.""" if not self.gState: # It's empty, answer None return None return self.gState[-1] def addHistory(self, tag): u"""Add the *tag* to the history.""" if not self.tagHistory or tag != self.tagHistory[-1]: self.tagHistory.append(tag) def getHistory(self): return self.tagHistory def getFootnotes(self, e): u"""Answer the footnotes dictionary from the e.lib (derived from the root document)""" if self.doc is not None: lib = self.doc.lib if lib is not None: if not 'footnotes' in lib: lib['footnotes'] = {} return lib['footnotes'] return None def getLiteratureRefs(self, e): u"""Answer the literature reference dictionary from the e.lib (derived from the root document)""" if self.doc is not None: lib = self.doc.lib if lib is not None: if not 'literatureRefs' in lib: lib['literatureRefs'] = {} return lib['literatureRefs'] return None def getImageRefs(self, e): u"""Answer the image reference dictionary from the e.lib (derived from the root document) if it exists. Otherwise create an empty e.lib['imageRefs'] and answer it as empty dictionary. Answer None if e.lib does not exist.""" lib = e.lib if lib is not None: if not 'imageRefs' in lib: lib['imageRefs'] = {} return lib['imageRefs'] return None def _strip(self, s, prefix=None, postfix=None, forceRightStrip=False): u"""Strip the white space from string *s* if *prefix* and/or *postfix* are not None. Otherwise answer the untouched *s*.""" if prefix is not None: # Strip if prefix is not None. Otherwise don't touch. s = prefix + (s or '').lstrip( ) # Force s to empty string in case it is None, to add prefix. elif forceRightStrip: s = (s or '').rstrip() # Force s to empty string in case it is None. elif postfix is not None: # Strip if postfix is not None. Otherwise don't touch. s = (s or '').rstrip( ) + postfix # Force s to empty string in case it is None, to add postfix. return s def getMatchingStyleNames(self, tag): u"""Answer the list of matching style, with decreasing relevance.""" revHistory = self.tagHistory[:] revHistory.reverse() matches = [] for n in range(len(revHistory)): styleName = revHistory[:n + 1] styleName.reverse() styleName = ' '.join(styleName) style = self.getNamedStyle(styleName) if style: matches.append(styleName) matches.reverse() return matches def getNamedStyle(self, styleName): u"""Answer the named style and otherwise an empty style dict if the named style does not exist.""" return self.styles.get(styleName, {}) def getNodeStyle(self, tag): u"""Make a copy of the top of the style graphics state and mew *style* into it. Answer the new style.""" if self.peekStyle( ) is None: # Not an initialized stack, use doc.rootStyle as default. self.pushStyle(self.getNamedStyle( 'root')) # Happens if calling directly, without check on e mergedStyle = copy.copy(self.peekStyle()) # Find the best matching style for tag on order of relevance, # considering the possible HTML tag parents and the history. for styleName in self.getMatchingStyleNames(tag): nodeStyle = self.getNamedStyle(styleName) if nodeStyle: # Not None and not empty for name, value in nodeStyle.items(): mergedStyle[name] = value break return mergedStyle def append(self, bs): u"""Append the string (or BabelString instance) to the last textbox in galley, if it exists. Otherwise create a new TextBox and add it to self.galley.""" if self.galley.elements and self.galley.elements[-1].isTextBox: self.galley.elements[-1].bs += bs else: self.TEXTBOX_CLASS(bs, parent=self.galley) def htmlNode(self, node, end=False): u"""Open the tag in HTML output and copy the node attributes if there are any.""" htmlTag = u'<%s' % node.tag attrs = [] for name, value in node.items(): if name == 'src' and value.startswith( 'docs/'): # Exception hack to bridge the .md --> img url. value = value[5:] attrs.append('%s="%s"' % (name, value)) if attrs: htmlTag += u' ' + u' '.join(attrs) if end: htmlTag += '/' htmlTag += '>' self.append(htmlTag) def _htmlNode(self, node): u"""Close the html tag of node.""" self.append('</%s>' % node.tag) def htmlNode_(self, node): u"""Opem+close the html tag of node.""" self.htmlNode(node, end=True) def typesetString(self, sOrBs, e=None, style=None): u"""If s is a formatted string, then it is placed untouched. If it is a plain string, then use the optional *style* or element *e* (using *e.css(name)*) for searching style parameters. Answer the new formatted string for convenience of the caller. e.g. to measure its size.""" # Only convert if not yet BabelString instance. bs = self.context.newString(sOrBs, e=e, style=style) self.append(bs) return bs def typesetNode(self, node, e=None): u"""Recursively typeset the etree *node*, using a reference to element *e* or the cascading *style*. If *e* is None, then the tag style is merged on top of the doc.rootStyle. If *e* is defined, then rootstyle of the stack starts with an empty dictionary, leaving root searching for the e.parent path.""" # Ignore <pre> tag output, as it is part of a ~~~Pyhton code block if self.writeTags and not node.tag in self.skipTags: # Open the node in HTML export for this node self.htmlNode(node) # Add this tag to the tag-history line. It is used to connect to the right style in case # we are rendering towards a FormattedString or another context-equivalent. self.addHistory(node.tag) # If e is undefined, then we make sure that the stack contains the doc.rootStyle on top. # If e is defined then root queries for style should follow the e.parent path. if self.peekStyle() is None and e is not None: # Root of stack is empty style, to force searching on the e.parent line. self.pushStyle({}) # Define top level for styles. nodeStyle = self.getNodeStyle( node.tag) # Merge found tag style with current top of stack self.pushStyle(nodeStyle) # Push this merged style on the stack # XML-nodes are organized as: node - node.text - node.children - node.tail # If there is no text or if the node does not have tail text, these are None. # Still we want to be able to add the prefix to the node.text, so then the text is changed to an empty string. nodeText = self._strip(node.text) if nodeText: # Not None and still has content after stripping? # Don't cache the context from self.galley as variable, as it may become dynamically updated by code blocks. # The galley context will define the type of BabelStrings generated by the Typesetter. bs = self.context.newString(nodeText, e=e, style=nodeStyle) self.append(bs) # Type set all child node in the current node, by recursive call. for child in node: hook = 'node_' + child.tag # Method will handle the styled body of the element, but not the tail. if hasattr(self, hook): # There is a hook for this node, let this method do the work. getattr(self, hook)(child, e) # Hook must be able to derive styles from e. # We are in tail mode now, but we don't know what happened in the child block. else: # If no method hook defined, then just solve recursively. Child node will get the style. self.typesetNode(child, e) # XML-nodes are organized as: node - node.text - node.children - node.tail # If there is no text or if the node does not have tail text, these are None. # Still we want to be able to add the postfix to the tail, so then the tail is changed # to empty string? childTail = child.tail #self._strip(child.tail, postfix=self.getStyleValue('postfix', e, nodeStyle, '')) if childTail: # Any tail left after stripping, then append to the galley. # Don't cache the context from self.galley as variable, as it may become dynamically updated by code blocks. bs = self.context.newString(childTail, e=e, style=nodeStyle) self.append(bs) # Ignore </pre> tag output, as it is part of a ~~~Pyhton code block if self.writeTags and not node.tag in self.skipTags: # Close the HTML tag of this node. self._htmlNode(node) # Now restore the graphic state at the end of the element content processing to the # style of the parent in order to process the tail text. Back to the style of the parent, # which was in nodeStyle. self.popStyle() def markDown2XmlFile(self, fileName, mdText, mdExtensions=None): u"""Take the markdown source, convert to HTML/XML and save in the file called fileName. If the fileName does not end with ".xml" extension, then add it. Answer the (new) fileName. >>> from pagebot.contexts.htmlcontext import HtmlContext >>> md = '''## Subtitle at start\\n\\n~~~\\npage = page.next\\n~~~\\n\\n# Title\\n\\n##Subtitle\\n\\nPlain text''' >>> context = HtmlContext() >>> t = Typesetter(context) >>> fileName = t.markDown2XmlFile('/tmp/PageBot_Typesetter_test.xml', md) >>> os.remove(fileName) """ if mdExtensions is None: mdExtensions = self.MARKDOWN_EXTENSIONS xmlBody = markdown.markdown(mdText, extensions=mdExtensions) xml = u'<?xml version="1.0" encoding="utf-8"?>\n<document>%s</document>' % xmlBody xml = xml.replace(' ', ' ') if not fileName.endswith('.xml'): fileName = fileName + '.xml' # Make sure file name has xml extension. f = codecs.open(fileName, mode="w", encoding="utf-8") # Save the XML as unicode. f.write(xml) f.close() return fileName def typesetMarkdown(self, mdText, mdExtensions=None, e=None, xPath=None): tmpPath = '/tmp/PageBot_Typesetter.xml' fileName = self.markDown2XmlFile(tmpPath, mdText, mdExtensions) self.typesetFile(fileName, e=e, xPath=xPath) os.remove(tmpPath) return self.galley def typesetFile(self, fileName, e=None, xPath=None): u"""Read the XML document and parse it into a tree of document-chapter nodes. Make the typesetter start at page pageNumber and find the name of the flow in the page template. The optional filter can be a list of tag names that need to be included in the composition, ignoring the rest. The optional rootStyle can be defined as style for the root tag, cascading force all child elements. Answer the root node for convenience of the caller.""" fileExtension = fileName.split('.')[-1] if fileExtension == 'md': # If we have MarkDown content, convert to XML (XHTML) f = codecs.open(fileName, mode="r", encoding="utf-8") mdText = f.read() # Read the raw MarkDown source f.close() fileName = self.markDown2XmlFile( fileName, mdText) # Translate MarkDown to HTML and save in file. tree = ET.parse(fileName) self.root = tree.getroot( ) # Get the root element of the tree and store for later retrieval. # If there is XSL filtering defined, they get the filtered nodes. if xPath is not None: filteredNodes = self.root.findall(xPath) if filteredNodes: # How to handle if we got multiple result nodes? self.typesetNode(filteredNodes[0], e) else: # Collect all flowing text in one formatted string, while simulating the page/flow, because # we need to keep track on which page/flow nodes results get positioned (e.g. for toc-head # reference, image index and footnote placement. self.typesetNode(self.root, e) # Answer the self.galley as convenience for the caller. return self.galley
def main(opts): #{ """@param opts: list of command line args-- sys.argv[1:]. @type opts: list of string""" parser = argparse.ArgumentParser(description=desc, argument_default="", fromfile_prefix_chars="@") parser.add_argument("-V", "--version", action="version", version="%(prog)s v{}".format(__VERSION__), help="print program version and exit") parser.add_argument("-q", "--quiet", dest="verbose", action="store_false", default=True, help="suppress messages") parser.add_argument( "input", help="input file name, - reads from stdin (default stdin)") parser.add_argument("output", help="output file") parser.add_argument("-t", "--title", dest="title", help="page title") parser.add_argument("-c", "--toc", dest="toc", action="store_true", help="insert a table of contents") parser.add_argument( "-l", "--toclocation", dest="toclocation", help= "a Python regular expression that matches the text before which the TOC is to be placed. If the first character is '+', it is removed and the TOC is placed after the first newline following the start of the matched text. Implies -c" ) parser.add_argument( "-T", "--toctitle", dest="toctitle", help= "title text shown (in a span) before the TOC, default ''. Implies -c") args = parser.parse_args(opts) cfg = ConfigParser.SafeConfigParser() if args.verbose: #{ msg("md2html v{}: converting {} to {}".format(__VERSION__, args.input, args.output)) #} # if verbose toc_title = "" page_title = "" toclocation = "" if args.input and args.input != "-": #{ cfgfile = os.path.dirname(args.input) #} # if input else: #{ cfgfile = os.getcwd() #} # no input file cfgfile = os.path.join(cfgfile, "md2html.cfg") #msg("Reading config file {}".format(cfgfile) # debug) if os.path.exists(cfgfile): #{ with io.open(cfgfile, mode='rt', encoding='utf-8-sig') as cfp: #{ try: #{ cfg.readfp(cfp) #} except ConfigParser.Error as e: #{ msg("md2html: Error reading config file: {}".format(str(e))) sys.exit(1) #} # except #} # with #} # if cfgfile exists cfgsection = "" if args.input and args.input != "-": #{ cfgsection = os.path.basename(args.input) #} if cfgsection: #{ if cfg.has_section(cfgsection): #{ #msg("cfg has section {}".format(cfgsection)) try: #{ toc_title = cfg.get(cfgsection, "toctitle") #} # try except ConfigParser.NoOptionError: #{ pass #} # except try: #{ toclocation = cfg.get(cfgsection, "toclocation") #} # try except ConfigParser.NoOptionError: #{ pass #} # except try: #{ page_title = cfg.get(cfgsection, "title") #} # try except ConfigParser.NoOptionError: #{ pass #} # except #} # if has_section #} # if cfgsection if args.toctitle: toc_title = args.toctitle if args.title: page_title = args.title toc = args.toc if args.toclocation: toclocation = args.toclocation if toclocation or toc_title: toc = True # input file if args.input and args.input != "-": #{ f = io.open(args.input, mode="rt", encoding="utf-8-sig") #} else: #{ f = io.open(sys.stdin.fileno(), mode="rt", encoding="utf-8") #} # output file fout = io.open(args.output, mode="wt", encoding="utf-8") # I don't know why, but if I write this encoded I get an extra CR. I would think writing in binary mode would produce UNIX-style line endings, but on my Windows machine it doesn't. #fout = io.open(sys.stdout.fileno(), mode="wb") try: #{ s = f.read() #} # try except UnicodeDecodeError as e: #{ msg("md2html: UnicodeDecodeError in {}: {}".format(f.name, str(e))) sys.exit(1) #} # except finally: #{ f.close() #} # finally if toc: #{ aftertoc = False if toclocation.startswith("+"): #{ aftertoc = True toclocation = toclocation[1:] #} # if if not toclocation: toclocation = "^# " m = re.search(toclocation, s, re.M) if not m: #{ msg("md2html: TOC location not found, disabling toc option. Do your headings start in column 1?" ) toc = False #} else: #{ # toclocation found. tocstart = m.start() if aftertoc: #{ i = s.find("\n", tocstart) if i > -1: tocstart = i + 1 #} # if tocstart s2 = s[:tocstart] + "[TOC]\n" + s[tocstart:] #} # else toclocation found #} # if toc # toc may have been cleared if toclocation not found. if not toc: #{ s2 = s #} # if not toc #print s2 # debug #print "-- after s2" # debug # convert extensions = [FencedCodeExtension()] if toc: #{ extensions.append(TocExtension(title=toc_title)) #} html = markdown.markdown(s2, extensions=extensions) try: #{ fout.write(page_template.format(page_title, html)) #} # try except UnicodeEncodeError as e: #{ msg("md2html: UnicodeEncodeError writing output for {}: {} (mode for output file is {})" .format(f.name, str(e), fout.mode)) sys.exit(1) #} except finally: #{ fout.close()
checkclock_path = Path("~/.config/qtile/widgets").expanduser().absolute() sys.path.insert(1, str(checkclock_path)) from checkclock import ReadOnlyCheckclock, as_time, as_hours_and_minutes checkclock = ReadOnlyCheckclock( Path("~/.config/qtile/checkclock.sqlite").expanduser(), working_days="Mon-Fri" ) ordinal_pattern = re.compile(r"\b([0-9]{1,2})(st|nd|rd|th)\b") md = markdown.Markdown( output_format="html5", extensions=[ FencedCodeExtension(), CodeHiliteExtension(css_class="highlight", guess_lang=False), DefListExtension(), FootnoteExtension(), MetaExtension(), Nl2BrExtension(), SaneListExtension(), TocExtension(), StrikethroughExtension(), TableExtension(), AttrListExtension(), ], ) DOWNLOAD_EXTENSIONS = [".ods", ".odt"] LEXER_MAP = {"pgsql": "sql"}
def md_factory(allow_links=True, allow_images=True, allow_blocks=True): """creates and configures markdown object""" md = markdown.Markdown(extensions=[ 'markdown.extensions.nl2br', ]) # Remove HTML allowances del md.preprocessors['html_block'] del md.inlinePatterns['html'] # Remove references del md.preprocessors['reference'] del md.inlinePatterns['reference'] del md.inlinePatterns['image_reference'] del md.inlinePatterns['short_reference'] # Add [b], [i], [u] md.inlinePatterns.add('bb_b', inline.bold, '<strong') md.inlinePatterns.add('bb_i', inline.italics, '<emphasis') md.inlinePatterns.add('bb_u', inline.underline, '<emphasis2') # Add ~~deleted~~ striketrough_md = StriketroughExtension() striketrough_md.extendMarkdown(md) if allow_links: # Add [url] md.inlinePatterns.add('bb_url', inline.url(md), '<link') else: # Remove links del md.inlinePatterns['link'] del md.inlinePatterns['autolink'] del md.inlinePatterns['automail'] if allow_images: # Add [img] md.inlinePatterns.add('bb_img', inline.image(md), '<image_link') short_images_md = ShortImagesExtension() short_images_md.extendMarkdown(md) else: # Remove images del md.inlinePatterns['image_link'] if allow_blocks: # Add [hr] and [quote] blocks md.parser.blockprocessors.add('bb_hr', blocks.BBCodeHRProcessor(md.parser), '>hr') fenced_code = FencedCodeExtension() fenced_code.extendMarkdown(md, None) code_bbcode = blocks.CodeBlockExtension() code_bbcode.extendMarkdown(md) quote_bbcode = blocks.QuoteExtension() quote_bbcode.extendMarkdown(md) else: # Remove blocks del md.parser.blockprocessors['hashheader'] del md.parser.blockprocessors['setextheader'] del md.parser.blockprocessors['code'] del md.parser.blockprocessors['quote'] del md.parser.blockprocessors['hr'] del md.parser.blockprocessors['olist'] del md.parser.blockprocessors['ulist'] return pipeline.extend_markdown(md)