def make_node(self, cls, element): node = cls() having_block_node = cls in HAVING_BLOCK_NODE if element.text and element.text != "\n": text = self.unescape_char(element.text) if HTML_PLACEHOLDER_RE.search(text): node += nodes.raw(format='html', text=self.unescape_char(text, rawHtml=True)) elif having_block_node: node += nodes.paragraph(text=text) else: node += nodes.Text(text) for child in element: subnode = self.visit(child) if having_block_node and isinstance(subnode, INLINE_NODES): all_nodes_is_in_paragraph = True if len(node) == 0: node += nodes.paragraph() node[0] += subnode else: all_nodes_is_in_paragraph = False node += subnode if child.tail and child.tail != "\n": tail = self.unescape_char(child.tail) if HTML_PLACEHOLDER_RE.search(tail): node += nodes.raw(format='html', text=tail) elif all_nodes_is_in_paragraph: node[0] += nodes.Text(tail) elif having_block_node: node += nodes.paragraph(text=tail) else: node += nodes.Text(tail) return node
def make_node(self, cls, element): node = cls() having_block_node = cls in HAVING_BLOCK_NODE if element.text and element.text != "\n": text = self.unescape_char(element.text) if HTML_PLACEHOLDER_RE.search(text): html_text = self.unescape_char(text, rawHtml=True) if html_text.startswith("<!--math"): g = re.match(r"<!--math(.*?)-->", html_text, re.DOTALL) if g: node += nodes.math(text=g.group(1).strip(), latex=g.group(1).strip()) else: node += nodes.raw(format='html', text=html_text) elif having_block_node: node += nodes.paragraph(text=text) else: node += nodes.Text(text) for child in element: subnode = self.visit(child) if having_block_node and isinstance(subnode, INLINE_NODES): all_nodes_is_in_paragraph = True if len(node) == 0: node += nodes.paragraph() node[0] += subnode else: all_nodes_is_in_paragraph = False node += subnode if child.tail and child.tail != "\n": tail = self.unescape_char(child.tail) if HTML_PLACEHOLDER_RE.search(tail): node += nodes.raw(format='html', text=tail) elif all_nodes_is_in_paragraph: node[0] += nodes.Text(tail) elif having_block_node: node += nodes.paragraph(text=tail) else: node += nodes.Text(tail) return node
def stashedHTML2text(text, md): """ Extract raw HTML, reduce to plain text and swap with placeholder. """ def _html_sub(m): """ Substitute raw html with plain text. """ try: raw, safe = md.htmlStash.rawHtmlBlocks[int(m.group(1))] except (IndexError, TypeError): return m.group(0) if md.safeMode and not safe: return '' # Strip out tags and entities - leaveing text return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw) return HTML_PLACEHOLDER_RE.sub(_html_sub, text)
def unescape_char(self, text, rawHtml=False): def unescape(matched): return chr(int(matched.group(1))) def expand_rawhtml(matched): html_id = int(matched.group(1)) html, safe = self.markdown.htmlStash.rawHtmlBlocks[html_id] if rawHtml or re.match(r'(&[\#a-zA-Z0-9]*;)', html): return html # unescape HTML entities only else: return matched.group(0) text = re.sub('\x02(\d\d)\x03', unescape, text) text = HTML_PLACEHOLDER_RE.sub(expand_rawhtml, text) return text
def stashedHTML2text(text, md, strip_entities=True): """ Extract raw HTML from stash, reduce to plain text and swap with placeholder. """ def _html_sub(m): """ Substitute raw html with plain text. """ try: raw = md.htmlStash.rawHtmlBlocks[int(m.group(1))] except (IndexError, TypeError): # pragma: no cover return m.group(0) # Strip out tags and/or entities - leaving text res = re.sub(r'(<[^>]+>)', '', raw) if strip_entities: res = re.sub(r'(&[\#a-zA-Z0-9]+;)', '', res) return res return HTML_PLACEHOLDER_RE.sub(_html_sub, text)