Exemplo n.º 1
1
def clean(soup, toc, ref):
    # Rewrite links
    for link in soup.findAll('a'):
        href = link.get('href')
        if href is None:
            print >> sys.stderr, "WARNING: Link with no href:", link
            continue
        if href.startswith('#') and href != '#':
            href = href[1:]
            if soup.find(attrs={'id': href}) is not None or soup.find(
                    "a", attrs={'name': href}) is not None:
                # Link to an element in the page
                continue
            target = toc.walk_id(href)
            if target is None:
                print >> sys.stderr, "WARNING: Link to an unknown ToC entry \"%s\"" % href
                continue
            link['href'] = target.link(ref)
    # Access elements by id to keep a reference before removing their id attribute
    headerElmt = soup.find("div", attrs={'id': 'header'})
    tocElmt = soup.find("div", attrs={'id': 'toc'})
    footerElmt = soup.find("div", attrs={'id': 'footer'})
    # Prefer class to id
    for id in [
            'header', 'toc', 'toctitle', 'preamble', 'content', 'footer',
            'footer-text'
    ]:
        elmt = soup.find(attrs={'id': id})
        if elmt is not None:
            elmt['class'] = (elmt.get('class', '') + ' ' + elmt['id']).strip()
            del elmt['id']
    # Add icon in header
    if headerElmt is not None:
        iconElmt = BeautifulSoup.Tag(soup,
                                     'div',
                                     attrs={'class': 'page-badge'})
        headerElmt.insert(0, iconElmt)
    # Add breadcrumb in header
    if headerElmt is not None:
        breadcrumbElmt = BeautifulSoup.Tag(soup,
                                           'div',
                                           attrs={'class': 'breadcrumb'})
        for i, entry in enumerate(ref.get_ancestry()[:-1]):
            if i > 0:
                breadcrumbElmt.append(BeautifulSoup.NavigableString(u' » '))
            linkElmt = BeautifulSoup.Tag(soup,
                                         'a',
                                         attrs={'href': entry.link(ref)})
            linkElmt.append(
                BeautifulSoup.NavigableString(
                    entry.title if not entry.is_root() else 'Docs'))
            breadcrumbElmt.append(linkElmt)
        headerElmt.insert(1, breadcrumbElmt)
    # Add ToC in header
    if tocElmt is not None:
        # Remove toc's noscript
        noscript = tocElmt.find("noscript")
        if noscript is not None:
            noscript.decompose()  # causes problems with subsequent soup.find()
        # Inject ToC
        tocHTMLBuffer = cStringIO.StringIO()
        ref.write_html(tocHTMLBuffer, open=ref)
        tocHTML = tocHTMLBuffer.getvalue().decode('utf-8')
        tocHTMLBuffer.close()
        if tocHTML == u'':
            # Remove ToC if empty
            tocElmt.decompose()
        else:
            tocTags = BeautifulSoup.BeautifulSoup(tocHTML)
            tocElmt.append(tocTags)
            # Use a wrapper div
            wrapper = BeautifulSoup.Tag(soup,
                                        'div',
                                        attrs={'class': 'tocwrapper'})
            tocElmt.replaceWith(wrapper)
            wrapper.append(tocElmt)
    # Add license in footer
    if footerElmt is not None:
        footerLicense = BeautifulSoup.BeautifulSoup("""
<div class="footer-license">
  Except as otherwise noted, <span xmlns:dct="http://purl.org/dc/terms/" property="dct:title">WonderPush Documentation</span> by <a xmlns:cc="http://creativecommons.org/ns#" href="http://www.wonderpush.com/docs" property="cc:attributionName" rel="cc:attributionURL">WonderPush</a> is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">Creative Commons Attribution-ShareAlike 4.0 International License</a>,
  and code samples are licensed under the <a rel="license" href="http://www.apache.org/licenses/LICENSE-2.0">Apache 2.0 License</a>.
</div>""")
        footerElmt.insert(0, footerLicense)
    # Return just the interesting html, not the boilerplate
    rtn = soup.body.extract()
    rtn.name = 'div'
    return rtn
Exemplo n.º 2
0
def main(args):
    if len(args) != 3:
        usage(args)
        return 1
    if args[1] == '-h' or args[1] == '--help':
        usage(args)
        return 0
    tocFile = sys.argv[1]
    htmlFile = sys.argv[2]

    id = os.path.relpath(htmlFile)
    while True:
        id, ext = os.path.splitext(id)
        if len(ext) == 0: break
    id = id.replace(os.path.sep, '-')

    toc = tocModule.tocFromFile(tocFile, False, True)
    ref = toc.walk_id(id)
    if ref is None:
        print >> sys.stderr, "ERROR: The file id \"%s\" was not found in the ToC!" % id
        return 1

    soup = BeautifulSoup.BeautifulSoup(sys.stdin)
    out = clean(soup, toc, ref)
    print out

    return 0
Exemplo n.º 3
0
def main(args):
    if len(args) != 3:
        usage(args)
        return 1
    if args[1] == '-h' or args[1] == '--help':
        usage(args)
        return 0
    tocFile = sys.argv[1]
    htmlFile = sys.argv[2]

    id = os.path.relpath(htmlFile)
    while True:
        id, ext = os.path.splitext(id)
        if len(ext) == 0: break
    id = id.replace(os.path.sep, '-')

    toc = tocModule.tocFromFile(tocFile, False, True)
    ref = toc.walk_id(id)
    if ref is None:
        print >> sys.stderr, "ERROR: The file id \"%s\" was not found in the ToC!" % id
        return 1

    soup = BeautifulSoup.BeautifulSoup(sys.stdin)
    out = clean(soup, toc, ref)
    print out

    return 0
Exemplo n.º 4
0
def clean(soup, toc, ref):
    # Rewrite links
    for link in soup.findAll('a'):
        href = link.get('href')
        if href is None:
            print >> sys.stderr, "WARNING: Link with no href:", link
            continue
        if href.startswith('#') and href != '#':
            href = href[1:]
            if soup.find(attrs={'id': href}) is not None or soup.find("a", attrs={'name': href}) is not None:
                # Link to an element in the page
                continue
            target = toc.walk_id(href)
            if target is None:
                print >> sys.stderr, "WARNING: Link to an unknown ToC entry \"%s\"" % href
                continue
            link['href'] = target.link(ref)
    # Access elements by id to keep a reference before removing their id attribute
    headerElmt = soup.find("div", attrs={'id': 'header'})
    tocElmt = soup.find("div", attrs={'id': 'toc'})
    footerElmt = soup.find("div", attrs={'id': 'footer'})
    # Prefer class to id
    for id in ['header', 'toc', 'toctitle', 'preamble', 'content', 'footer', 'footer-text']:
        elmt = soup.find(attrs={'id': id})
        if elmt is not None:
            elmt['class'] = (elmt.get('class', '') + ' ' + elmt['id']).strip()
            del elmt['id']
    # Add icon in header
    if headerElmt is not None:
        iconElmt = BeautifulSoup.Tag(soup, 'div', attrs={'class': 'page-badge'})
        headerElmt.insert(0, iconElmt)
    # Add breadcrumb in header
    if headerElmt is not None:
        breadcrumbElmt = BeautifulSoup.Tag(soup, 'div', attrs={'class': 'breadcrumb'})
        for i, entry in enumerate(ref.get_ancestry()[:-1]):
            if i > 0:
                breadcrumbElmt.append(BeautifulSoup.NavigableString(u' » '))
            linkElmt = BeautifulSoup.Tag(soup, 'a', attrs={'href': entry.link(ref)})
            linkElmt.append(BeautifulSoup.NavigableString(entry.title if not entry.is_root() else 'Docs'))
            breadcrumbElmt.append(linkElmt)
        headerElmt.insert(1, breadcrumbElmt)
    # Add ToC in header
    if tocElmt is not None:
        # Remove toc's noscript
        noscript = tocElmt.find("noscript")
        if noscript is not None:
            noscript.decompose() # causes problems with subsequent soup.find()
        # Inject ToC
        tocHTMLBuffer = cStringIO.StringIO()
        ref.write_html(tocHTMLBuffer, open=ref)
        tocHTML = tocHTMLBuffer.getvalue().decode('utf-8')
        tocHTMLBuffer.close()
        if tocHTML == u'':
            # Remove ToC if empty
            tocElmt.decompose()
        else:
            tocTags = BeautifulSoup.BeautifulSoup(tocHTML)
            tocElmt.append(tocTags)
            # Use a wrapper div
            wrapper = BeautifulSoup.Tag(soup, 'div', attrs={'class':'tocwrapper'})
            tocElmt.replaceWith(wrapper)
            wrapper.append(tocElmt)
    # Add license in footer
    if footerElmt is not None:
        footerLicense = BeautifulSoup.BeautifulSoup("""
<div class="footer-license">
  Except as otherwise noted, <span xmlns:dct="http://purl.org/dc/terms/" property="dct:title">Scoreflex Documentation</span> by <a xmlns:cc="http://creativecommons.org/ns#" href="http://developer.scoreflex.com/docs" property="cc:attributionName" rel="cc:attributionURL">Scoreflex</a> is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">Creative Commons Attribution-ShareAlike 4.0 International License</a>,
  and code samples are licensed under the <a rel="license" href="http://www.apache.org/licenses/LICENSE-2.0">Apache 2.0 License</a>.
</div>""")
        footerElmt.insert(0, footerLicense)
    # Return just the interesting html, not the boilerplate
    rtn = soup.body.extract()
    rtn.name = 'div'
    return rtn