def stashedHTML2text(text, md): """ Extract raw HTML, reduce to plain text and swap with placeholder. """ def _html_sub(m): """ Substitute raw html with plain text. """ try: raw, safe = md.htmlStash.rawHtmlBlocks[int(m.group(1))] except (IndexError, TypeError): return m.group(0) if md.safeMode and not safe: return '' # Strip out tags and entities - leaveing text return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw) return HTML_PLACEHOLDER_RE.sub(_html_sub, text)
def unescape_char(self, text, rawHtml=False): def unescape(matched): return chr(int(matched.group(1))) def expand_rawhtml(matched): html_id = int(matched.group(1)) html, safe = self.markdown.htmlStash.rawHtmlBlocks[html_id] if rawHtml or re.match(r'(&[\#a-zA-Z0-9]*;)', html): return html # unescape HTML entities only else: return matched.group(0) text = re.sub('\x02(\d\d)\x03', unescape, text) text = HTML_PLACEHOLDER_RE.sub(expand_rawhtml, text) return text
def stashedHTML2text(text, md, strip_entities=True): """ Extract raw HTML from stash, reduce to plain text and swap with placeholder. """ def _html_sub(m): """ Substitute raw html with plain text. """ try: raw = md.htmlStash.rawHtmlBlocks[int(m.group(1))] except (IndexError, TypeError): # pragma: no cover return m.group(0) # Strip out tags and/or entities - leaving text res = re.sub(r'(<[^>]+>)', '', raw) if strip_entities: res = re.sub(r'(&[\#a-zA-Z0-9]+;)', '', res) return res return HTML_PLACEHOLDER_RE.sub(_html_sub, text)