def stashedHTML2text(text, md):
    """ Extract raw HTML, reduce to plain text and swap with placeholder. """
    def _html_sub(m):
        """ Substitute raw html with plain text. """
        try:
    	    raw, safe = md.htmlStash.rawHtmlBlocks[int(m.group(1))]
        except (IndexError, TypeError):
            return m.group(0)
        if md.safeMode and not safe:
            return ''
        # Strip out tags and entities - leaveing text
        return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw)

    return HTML_PLACEHOLDER_RE.sub(_html_sub, text)
Пример #2
0
def stashedHTML2text(text, md):
    """ Extract raw HTML, reduce to plain text and swap with placeholder. """
    def _html_sub(m):
        """ Substitute raw html with plain text. """
        try:
            raw, safe = md.htmlStash.rawHtmlBlocks[int(m.group(1))]
        except (IndexError, TypeError):
            return m.group(0)
        if md.safeMode and not safe:
            return ''
        # Strip out tags and entities - leaveing text
        return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw)

    return HTML_PLACEHOLDER_RE.sub(_html_sub, text)
Пример #3
0
    def unescape_char(self, text, rawHtml=False):
        def unescape(matched):
            return chr(int(matched.group(1)))

        def expand_rawhtml(matched):
            html_id = int(matched.group(1))
            html, safe = self.markdown.htmlStash.rawHtmlBlocks[html_id]
            if rawHtml or re.match(r'(&[\#a-zA-Z0-9]*;)', html):
                return html  # unescape HTML entities only
            else:
                return matched.group(0)

        text = re.sub('\x02(\d\d)\x03', unescape, text)
        text = HTML_PLACEHOLDER_RE.sub(expand_rawhtml, text)
        return text
Пример #4
0
def stashedHTML2text(text, md, strip_entities=True):
    """ Extract raw HTML from stash, reduce to plain text and swap with placeholder. """
    def _html_sub(m):
        """ Substitute raw html with plain text. """
        try:
            raw = md.htmlStash.rawHtmlBlocks[int(m.group(1))]
        except (IndexError, TypeError):  # pragma: no cover
            return m.group(0)
        # Strip out tags and/or entities - leaving text
        res = re.sub(r'(<[^>]+>)', '', raw)
        if strip_entities:
            res = re.sub(r'(&[\#a-zA-Z0-9]+;)', '', res)
        return res

    return HTML_PLACEHOLDER_RE.sub(_html_sub, text)
    def unescape_char(self, text, rawHtml=False):
        def unescape(matched):
            return chr(int(matched.group(1)))

        def expand_rawhtml(matched):
            html_id = int(matched.group(1))
            html, safe = self.markdown.htmlStash.rawHtmlBlocks[html_id]
            if rawHtml or re.match(r'(&[\#a-zA-Z0-9]*;)', html):
                return html  # unescape HTML entities only
            else:
                return matched.group(0)

        text = re.sub('\x02(\d\d)\x03', unescape, text)
        text = HTML_PLACEHOLDER_RE.sub(expand_rawhtml, text)
        return text