示例#1
0
def parse_mkdocs(content, section, url):  # pylint: disable=unused-argument
    """Get the embed content for the section."""
    ret = []
    headers = []

    if not content or not content.get('content'):
        return (None, None, section)

    body = content['content']
    for element in PQ(body)('h2'):
        headers.append(recurse_while_none(element))

    if not section and headers:
        # If no section is sent, return the content of the first one
        section = list(headers[0].keys())[0].lower()

    if section:
        body_obj = PQ(body)
        escaped_section = escape_selector(section)
        section_list = body_obj(
            ':header:contains("{title}")'.format(title=str(escaped_section)))
        for num in range(len(section_list)):
            header2 = section_list.eq(num)
            # h2_title = h2.text().strip()
            # section_id = h2.attr('id')
            h2_content = ""
            next_p = header2.next()
            while next_p:
                if next_p[0].tag == 'h2':
                    break
                h2_html = next_p.outerHtml()
                if h2_html:
                    h2_content += "\n%s\n" % h2_html
                next_p = next_p.next()
            if h2_content:
                ret.append(h2_content)
                # ret.append({
                #     'id': section_id,
                #     'title': h2_title,
                #     'content': h2_content,
                # })
    return (ret, headers, section)
示例#2
0
def parse_sphinx(content, section, url):
    """Get the embed content for the section."""
    body = content.get('body')
    toc = content.get('toc')

    if not content or not body or not toc:
        return (None, None, section)

    headers = [recurse_while_none(element) for element in PQ(toc)('a')]

    if not section and headers:
        # If no section is sent, return the content of the first one
        # TODO: This will always be the full page content,
        # lets do something smarter here
        section = list(headers[0].keys())[0].lower()

    if not section:
        return [], headers, None

    body_obj = PQ(body)
    escaped_section = escape_selector(section)

    elements_id = [
        escaped_section,
        slugify(escaped_section),
        make_id(escaped_section),
        f'module-{escaped_section}',
    ]
    query_result = []
    for element_id in elements_id:
        if not element_id:
            continue
        query_result = body_obj(f'#{element_id}')
        if query_result:
            break

    if not query_result:
        selector = f':header:contains("{escaped_section}")'
        query_result = body_obj(selector).parent()

    # Handle ``dt`` special cases
    if len(query_result) == 1 and query_result[0].tag == 'dt':
        parent = query_result.parent()
        if 'glossary' in parent.attr('class'):
            # Sphinx HTML structure for term glossary puts the ``id`` in the
            # ``dt`` element with the title of the term. In this case, we
            # need to return the next sibling which contains the definition
            # of the term itself.

            # Structure:
            # <dl class="glossary docutils">
            # <dt id="term-definition">definition</dt>
            # <dd>Text definition for the term</dd>
            # ...
            # </dl>
            query_result = query_result.next()
        elif 'citation' in parent.attr('class'):
            # Sphinx HTML structure for sphinxcontrib-bibtex puts the ``id`` in the
            # ``dt`` element with the title of the cite. In this case, we
            # need to return the next sibling which contains the cite itself.

            # Structure:
            # <dl class="citation">
            # <dt id="cite-id"><span><a>Title of the cite</a></span></dt>
            # <dd>Content of the cite</dd>
            # ...
            # </dl>
            query_result = query_result.next()
        else:
            # Sphinx HTML structure for definition list puts the ``id``
            # the ``dt`` element, instead of the ``dl``. This makes
            # the backend to return just the title of the definition. If we
            # detect this case, we return the parent (the whole ``dl``)

            # Structure:
            # <dl class="confval">
            # <dt id="confval-config">
            # <code class="descname">config</code>
            # <a class="headerlink" href="#confval-config">¶</a></dt>
            # <dd><p>Text with a description</p></dd>
            # </dl>
            query_result = parent

    def dump(obj):
        """Handle API-based doc HTML."""
        if obj[0].tag in ['span', 'h2']:
            return obj.parent().outerHtml()
        return obj.outerHtml()

    ret = [dump(clean_links(PQ(obj), url)) for obj in query_result]
    return ret, headers, section