示例#1
0
文件: mdx.py 项目: obensonne/markowik
    def preprocess(self, node, nextnode):
        """
        Preprocess the XHTML tree generated by `markdown.convert()`.

        """
        # --- inject linebreaks between subsequent nested paragraphs ----------

        if node.tag in ('li', 'blockquote'):
            prev = None
            index = 0
            minindex = 1 if node.tag == 'li' else 0
            lb = etree.Element('br')
            for child in list(node):
                # For whatever reason, the first linebreak in a list item
                # introduces a new paragraph in GCW while subsequent linebreaks
                # are handled as whitespace, i.e. in these cases an explicit
                # <br/> is needed.
                if index > minindex and prev.tag == 'p' and child.tag == 'p':
                    log("using <br/> to fake nested paragraph '%s'" %
                        truncate(child.text or "...", 15))
                    node.insert(index, lb)
                    index += 1
                index += 1
                prev = child

        # --- replace <abbr> by <span> ----------------------------------------

        if node.tag == 'abbr':
            log("replacing <abbr> by <span> ('%s')" % truncate(node.text, 15))
            node.tag = 'span'

        # --- collapse <pre><code> to <pre> -----------------------------------

        precodeblock = (node.tag == 'pre' and len(node) == 1 and not node.text
                        and node[0].tag == 'code' and not node[0].tail)
        if precodeblock:
            child = node[0]
            node.clear()
            node.text = child.text

        # --- whitespace cleanup ----------------------------------------------

        node.text = node.text or ""
        node.tail = node.tail or ""
        if not (node and node[0].tag in SPANLEVELTAGS):
            node.text = node.text.strip("\n")
        if (not (node.tag in SPANLEVELTAGS and node.tail) and
            not (nextnode and nextnode.tag in SPANLEVELTAGS)):
            node.tail = node.tail.strip("\n")
        if node.tag != 'pre':
            node.text = re.sub(r'\s+', ' ', node.text or "")
        else:
            node.text = textwrap.dedent(node.text)
            assert not node
        node.tail = re.sub(r'\s+', ' ', node.tail or "")

        # --- prefix image urls -----------------------------------------------

        if node.tag == 'img':
            isrc = node.attrib['src']
            if not RXABSURL.search(isrc):
                isrc = "%s%s" % (self.mdx.imagebaseurl, isrc)
            if not RXABSURLX.search(isrc):
                raise BadURL(isrc)
            if not RXIMGEXT.search(isrc):
                conn = "&" if "?" in isrc else "?"
                isrc = "%s%sx=x.png" % (isrc, conn)
                log("appending artificial image file extension (%s)" % isrc)
            node.attrib['src'] = isrc

        # --- check link URLs -------------------------------------------------

        if node.tag == 'a':
            url = node.attrib['href']
            if RXABSURL.search(url):
                if not RXABSURLX.search(url):
                    raise BadURL(url)
            elif not RXPAGENAME.search(url):
                raise BadURL(url)

        # --- traverse child nodes --------------------------------------------

        for child, nextnode in izip_longest(node, node[1:]):
            self.preprocess(child, nextnode)
示例#2
0
文件: mdx.py 项目: obensonne/markowik
 def a(self, _front, text, attrib):
     if attrib['html']:
         log("using an HTML link for '%s'" % text)
         return self.element('a', text, attrib)
     return "[%s %s]" % (attrib['href'], text)