Пример #1
0
def run_bibtex2html(contents):
    res = AugmentedResult()
    erase = True
    with tmpdir(prefix='bibtex', erase=erase, keep_on_exception=True) as d:
        fn = os.path.join(d, 'input.bib')
        fno = os.path.join(d, 'out')
        fno1 = fno + '.html'
        # fno2 = fno + '_bib.html'
        with open(fn, 'w') as f:
            f.write(contents)

        cmd = ['bibtex2html', '-unicode', '--dl', '-o', fno, fn]

        system_cmd_result(
            '.',
            cmd,
            display_stdout=False,
            display_stderr=False,
            raise_on_error=True,
            display_prefix=None,  # leave it there
            env=None)

        bibtex2html_output = open(fno1).read()

        fixed = bibtex2html_output.replace('<p>\n</dd>', '</dd><!--fix-->')

        with open(os.path.join(d, 'fixed.html'), 'w') as f:
            f.write(fixed)

        out = process_bibtex2html_output(fixed, d)

        write_data_to_file(out, os.path.join(d, 'processed.html'))

        res.set_result(out)
        return res
Пример #2
0
def prerender(joined_aug, symbols):
    joined = joined_aug.get_result()
    soup = bs_entire_document(joined)
    for details in soup.select('details'):
        details.name = 'div'
        add_class(details, 'transmuted-details')
        # details.attrs['open'] = 1

    joined = to_html_entire_document(soup)
    res = AugmentedResult()
    result = prerender_mathjax(joined, symbols=symbols, res=res)
    res.set_result(result)
    return res
Пример #3
0
def make_last_modified(files_contents, nmax=100):
    res = AugmentedResult()
    files_contents = [DocToJoin(*x) for x in files_contents]
    files_contents = [_ for _ in files_contents if _.source_info]

    files_contents = list(
        sorted(files_contents,
               key=lambda x: x.source_info.last_modified,
               reverse=True))

    r = Tag(name='fragment')
    r.append('\n')
    h = Tag(name='h1')
    h.append('Last modified')
    h.attrs['id'] = 'sec:last-modified'
    r.append(h)
    r.append('\n')

    ul = Tag(name='ul')
    ul.append('\n')
    for d in files_contents[:nmax]:
        li = Tag(name='li')
        when = d.source_info.last_modified
        when_s = time.strftime("%a, %b %d", when)
        #          %H:%M
        li.append(when_s)
        li.append(': ')

        hid = get_main_header(bs(d.contents))
        if hid is None:
            what = "File %s" % d.docname
        else:
            what = Tag(name='a')
            what.attrs['href'] = '#' + hid
            what.attrs['class'] = MCDPManualConstants.CLASS_NUMBER_NAME

        li.append(what)
        li.append(' (')
        name = d.source_info.author.name
        li.append(name)
        li.append(')')

        ul.append(li)
        ul.append('\n')

    r.append(ul)
    s = to_html_stripping_fragment(r)
    #     print s

    res.set_result(s)
    return res
Пример #4
0
def document_final_pass_after_toc(soup,
                                  crossrefs=None,
                                  resolve_references=True,
                                  res=None,
                                  location=LocationUnknown()):
    if res is None:
        res = AugmentedResult()
    """ This is done to a final document """

    logger.info('checking errors')
    check_various_errors(soup)

    from .check_missing_links import check_if_any_href_is_invalid
    logger.info('checking hrefs')
    check_if_any_href_is_invalid(soup, res, location, extra_refs=crossrefs)

    # Note that this should be done *after* check_if_any_href_is_invalid()
    # because that one might fix some references
    if resolve_references:
        logger.info('substituting empty links')

        substituting_empty_links(soup,
                                 raise_errors=False,
                                 res=res,
                                 extra_refs=crossrefs)

    for a in soup.select('a[href_external]'):
        a.attrs['href'] = a.attrs['href_external']
        add_class(a, 'interdoc')

    detect_duplicate_IDs(soup, res)
Пример #5
0
def figures_new1():
    s = r"""

<figure>  
    <figcaption>Main caption</figcaption>
    <figure>
        <figcaption>Hello</figcaption>
        <img style='width:8em' src="duckietown-logo-transparent.png"/>
    </figure>
    <figure>  
        <figcaption>second</figcaption>
        <img style='width:8em' src="duckietown-logo-transparent.png"/>
    </figure>
</figure>

"""
    soup = bs(s)

    res = AugmentedResult()
    location = LocationUnknown()
    make_figure_from_figureid_attr(soup, res, location)

    # nfigs = len(list(soup.select('figure')))
    o = to_html_stripping_fragment(soup)
    print o
Пример #6
0
def another2():
    # four spaces in the first line
    s = r"""

(if it exists) of the set of fixed points of~$f$:
\begin{equation}
x = y .\label{eq:lfp-one}
\end{equation}
The equality in \eqref{lfp-one} can be relaxed to ``$xxx$''.

The equality in \ref{eq:lfp-one} can be relaxed to ``$xxx$''.


The least fixed point need not exist. Monotonicity of the map~$f$
plus completeness is sufficient to ensure existence.
"""
    res = AugmentedResult()
    location = LocationUnknown()
    s2 = censor_markdown_code_blocks(s, res, location)

    print('original:')
    print indent_plus_invisibles(s)
    print('later:')
    print indent_plus_invisibles(s2)

    assert not 'censored-code' in s
Пример #7
0
def generate_and_add_toc(soup, raise_error=False, res=None):
    if res is None:
        aug = AugmentedResult()
    logger.info('adding toc')
    body = soup.find('body')
    toc = generate_toc(body, res)

    # logger.info('TOC:\n' + str(toc))
    toc_ul = bs(toc).ul
    if toc_ul is None:
        # empty TOC
        msg = 'Could not find toc.'
        # logger.warning(msg)
        res.note_error(msg)
        # XXX
    else:
        toc_ul.extract()
        assert toc_ul.name == 'ul'
        toc_ul['class'] = 'toc'  # XXX: see XXX13
        toc_ul['id'] = MCDPManualConstants.MAIN_TOC_ID

        toc_selector = MCDPManualConstants.TOC_PLACEHOLDER_SELECTOR
        tocs = list(body.select(toc_selector))
        if not tocs:
            msg = 'Cannot find any element of type %r to put TOC inside.' % toc_selector
            if raise_error:
                raise NoTocPlaceholder(msg)
            logger.warning(msg)
            res.note_error(msg)
        else:
            toc_place = tocs[0]
            toc_place.replaceWith(toc_ul)
Пример #8
0
def generate_toc(soup, max_depth=None, max_levels=2, res=AugmentedResult()):
    max_levels += 1  # since we added "book"
    stack = [Item(None, -1, 'root', 'root', [])]

    headers_depths = list(get_things_to_index(soup))

    for header, depth, using in headers_depths:
        if max_depth is not None:
            if depth > max_depth:
                continue

        item = Item(header, depth, using, header['id'], [])

        while stack[-1].depth >= depth:
            stack.pop()

        stack[-1].items.append(item)
        stack.append(item)

    root = stack[0]

    number_items2(root, res)

    without_levels = root.copy_excluding_levels(
        MCDPManualConstants.exclude_from_toc)
    result = without_levels.to_html(root=True, max_levels=max_levels)

    if ZERO in result:
        res.note_error("Some counters had zero values")
    return result
Пример #9
0
def document_final_pass_before_toc(soup,
                                   remove,
                                   remove_selectors,
                                   res=None,
                                   location=None):
    if res is None:
        logger.warn('no res passed')
        res = AugmentedResult()
    if location is None:
        location = LocationUnknown()

    logger.info('reorganizing contents in <sections>')

    with timeit('find body'):
        body = soup.find('body')
        if body is None:
            msg = 'Cannot find <body>:\n%s' % indent(str(soup)[:1000], '|')
            raise ValueError(msg)

    with timeit('reorganize_contents'):
        body2 = reorganize_contents(body)

    process_assignment(body2, res, location)

    body.replace_with(body2)

    # Removing stuff
    with timeit('remove stuff'):
        do_remove_stuff(body2, remove_selectors, remove)

    with timeit('move_things_around'):
        move_things_around(soup=soup, res=res)
Пример #10
0
def download_reveal(output_dir):
    res = AugmentedResult()
    url = "https://github.com/hakimel/reveal.js/archive/3.6.0.zip"
    target = os.path.join(output_dir, 'revealjs')

    if os.path.exists(target):
        logger.debug('skipping downloading because target exists: %s' % target)
    else:
        dest = os.path.join(output_dir, 'reveal-3.6.0.zip')
        if True or not os.path.exists(dest):
            logger.info('Downloading %s' % url)
            # ctx = ssl.create_default_context()
            # ctx.check_hostname = False
            # ctx.verify_mode = ssl.CERT_NONE

            response = requests.get(
                url, stream=True)  # context=ssl._create_unverified_context())
            # data = response.raw.read() # read()
            with open(dest, 'wb') as f:
                shutil.copyfileobj(response.raw, f)

            # logger.info('downloaded %1.fMB' % (len(data) / (1000.0 * 1000)))
            # write_data_to_file(data, dest)
        logger.info(dest)

        target_tmp = target + '.tmp'
        import zipfile
        zip_ref = zipfile.ZipFile(dest, 'r')
        zip_ref.extractall(target_tmp)
        zip_ref.close()

        actual = os.path.join(target_tmp, 'reveal.js-3.6.0')
        os.rename(actual, target)
        logger.debug('extracted to %r' % target)

    check = [
        "plugin/notes/notes.js",
        "plugin/math/math.js",
        "lib/js/head.min.js",
        "js/reveal.js",
    ]
    for c in check:
        fn = os.path.join(target, c)
        if not os.path.exists(fn):
            msg = 'Incomplete reveal download, not found: %s' % fn
            res.note_error(msg)
    return res
Пример #11
0
def render(library, docname, data, realpath, out_dir, generate_pdf, stylesheet,
           symbols, raise_errors, use_mathjax, do_slides):
    res = AugmentedResult()
    if MCDPConstants.pdf_to_png_dpi < 300:
        msg = (
            'Note that pdf_to_png_dpi is set to %d, which is not suitable for printing'
            % MCDPConstants.pdf_to_png_dpi)
        mcdp_dev_warning(msg)

    from mcdp_docs.pipeline import render_complete

    out = os.path.join(out_dir, docname + '.html')

    html_contents = render_complete(library=library,
                                    s=data,
                                    raise_errors=raise_errors,
                                    realpath=realpath,
                                    generate_pdf=generate_pdf,
                                    symbols=symbols,
                                    use_mathjax=use_mathjax)

    title = docname

    doc = get_minimal_document(html_contents,
                               title=title,
                               stylesheet=stylesheet,
                               add_markdown_css=True,
                               add_manual_css=True)

    soup = bs_entire_document(doc)

    document_final_pass_before_toc(soup,
                                   remove=None,
                                   remove_selectors=[],
                                   res=res)
    generate_and_add_toc(soup, res=res)
    document_final_pass_after_toc(soup, res=res)

    if use_mathjax and symbols:
        add_mathjax_preamble(soup, symbols)

    if do_slides:
        create_reveal(soup, res)

    doc = to_html_entire_document(soup)

    d = os.path.dirname(out)
    if not os.path.exists(d):
        os.makedirs(d)
    with open(out, 'w') as f:
        f.write(doc)

    logger.info('Written %s ' % out)
    return out
Пример #12
0
def move_things_around(soup, raise_if_errors=False, res=None):
    """
        Looks for tags like:

            <move-here src="#line_detector2-line_detector_node2-autogenerated"/>

    """
    if res is None:
        res = AugmentedResult()
    from mcdp_docs.check_missing_links import get_id2element

    with timeit_wall('getting all IDs'):
        id2element, duplicates = get_id2element(soup, 'id')

    for e in soup.find_all('move-here'):

        if not 'src' in e.attrs:
            msg = 'Expected attribute "src" for element %s' % str(e)
            raise ValueError(msg)

        src = e.attrs['src']

        if not src.startswith('#'):
            msg = 'Expected that attribute "src" started with "#" for element %s.' % str(e)
            raise ValueError(msg)
        nid = src[1:]

        # O(n^2)
        # el = soup.find(id=nid)
        el = id2element.get(nid, None)
        if not el:
            msg = 'move-here: Could not find ID %r.' % nid
            e.name = 'span'
            # note_error2(e, "invalid move-here reference", msg)
            res.note_error(msg, HTMLIDLocation.for_element(e))
            if raise_if_errors:
                raise ValueError(msg)
            else:
                continue
        el.extract()
        e.replace_with(el)
Пример #13
0
def elements_abbrevs_test2():
    s = "<p>TODO: paragraph <strong>Strong</strong></p>"
    e = """<div class="todo-wrap"><p class="todo">TODO: paragraph <strong>Strong</strong></p></div>"""
    soup = bs(s.strip())

    res = AugmentedResult()
    location = LocationUnknown()
    substitute_special_paragraphs(soup, res, location)

    o = to_html_stripping_fragment(soup)
    #print o
    assert_equal(o, e)
Пример #14
0
def add_likebtn(joined_aug, likebtn):
    res = AugmentedResult()
    res.merge(joined_aug)
    soup = bs_entire_document(joined_aug.get_result())
    add_likebtn_(soup, likebtn)
    res.set_result(to_html_entire_document(soup))
    return res
Пример #15
0
def add_related(joined_aug):
    res = AugmentedResult()
    res.merge(joined_aug)
    soup = bs_entire_document(joined_aug.get_result())
    add_related_(soup, res)
    res.set_result(to_html_entire_document(soup))
    return res
Пример #16
0
def mark_errors_and_rest(joined_aug):
    soup = bs_entire_document(joined_aug.get_result())
    mark_in_html(joined_aug, soup)
    res = AugmentedResult()
    res.merge(joined_aug)
    res.set_result(to_html_entire_document(soup))
    return res
Пример #17
0
def prerender_main():
    f0 = sys.argv[1]
    f1 = sys.argv[2]
    html = open(f0).read()
    parsed = bs_entire_document(html)
    body = parsed.html.body
    body_string = str(body)
    res = AugmentedResult()
    body2_string = prerender_mathjax_(body_string, res)
    body2 = bs(body2_string)
    parsed.html.body.replace_with(body2)
    html2 = str(parsed)
    write_data_to_file(html2, f1)
Пример #18
0
def make_composite(compose_config, joined_aug):
    data = joined_aug.get_result()
    soup = bs_entire_document(data)
    recipe = compose_config.recipe
    remove_status = compose_config.remove_status
    show_removed = compose_config.show_removed
    permalink_prefix = compose_config.purl_prefix
    aug = compose_go2(soup, recipe, permalink_prefix, remove_status,
                      show_removed)
    soup = aug.get_result()
    results = str(soup)
    res = AugmentedResult()
    res.merge(joined_aug)
    res.merge(aug)
    res.set_result(results)
    return res
Пример #19
0
def create_slides(soup):
    res = AugmentedResult()

    header = soup.find('h1', attrs=dict(type='slides'))

    if header is None:
        # logger.debug('No slides here')
        return

    _id = header.attrs['id'].replace('sec:', '')
    _id_section = (_id + ':section')
    section = soup.find(id=_id_section)
    if section is None:
        msg = 'Could not find section by ID %r' % _id_section
        logger.error(msg)
        return

    section.extract()

    body = soup.find('body')
    body.attrs['type'] = 'slides'
    body = soup.find('div', attrs={'class': 'super'})

    div = Tag(name='div')
    div.attrs['class'] = 'reveal'
    body.append(div)

    div_slides = Tag(name='div')
    div_slides.attrs['class'] = 'slides'

    div.append(div_slides)

    for subsection in section.select('section[level=sub]'):
        if 'without-header-inside' in subsection.attrs['class']:
            continue

        # print 'extracting', subsection.attrs
        subsection.extract()
        div_slides.append(subsection)

    div_slides.insert(0, section)

    sub_notes(div_slides)
    sub_markers(div_slides)

    stylesheet = "v_manual_reveal"
    add_stylesheet(soup, stylesheet)
    embed_css_files(soup)
    create_reveal(soup, res)
Пример #20
0
def test_toc():
    s = """
<html>
<head></head>
<body>
<h1 id='one'>One</h1>

<p>a</p>

<h2 id='two'>Two</h2>

<p>a</p>

<h3 id='three'>Three</h3>

<h2 id='four'>Four</h2>

<p>a</p>
</body>
</html>
    """
    soup = bs(s)
    #     print(soup)
    #     body = soup.find('body')

    # first time it should fail
    try:
        _toc = generate_toc(soup)
    except InvalidHeaders as e:
        #         > InvalidHeaders: I expected that this header would start with either part:,app:,sec:.
        #         > <h1 id="one">One</h1>
        pass
    else:
        raise Exception()

    soup = bs(s)
    fix_ids_and_add_missing(soup, 'prefix-', AugmentedResult(),
                            LocationUnknown())
    generate_toc(soup)

    s = str(soup)
    expected = ['sec:one', 'sub:two']
    #     print(indent(s, 'transformed > '))
    for e in expected:
        assert e in s
Пример #21
0
def test_toc2():
    s = """
<html>
<head></head>
<body>
<h1>One</h1>
<h1>Two</h1>
<h1>Three</h1>
<p></p>

<h2>A</h2>

<h2>B</h2>

<h2>C</h2>

<h3>a</h3>
<h3>b</h3>
<h3>c</h3>

</body>
</html>
    """
    soup = bs(s)
    #     print(soup)
    #     body = soup.find('body')
    fix_ids_and_add_missing(soup, 'prefix', AugmentedResult(),
                            LocationUnknown())
    assert soup.find(id='sub:prefix-5') is not None
    #     <fragment>
    # <h1 id="sec:prefix--1">One</h1>
    # <h1 id="sec:prefix--2">Two</h1>
    # <h1 id="sec:prefix--3">Three</h1>
    # <p></p>
    # <h2 id="sub:prefix--4">A</h2>
    # <h2 id="sub:prefix--5">B</h2>
    # <h2 id="sub:prefix--6">C</h2>
    # <h3 id="subsub:prefix--7">a</h3>
    # <h3 id="subsub:prefix--8">b</h3>
    # <h3 id="subsub:prefix--9">c</h3>
    # </fragment>
    print(soup)

    _toc = generate_toc(soup)
    s = str(soup)
Пример #22
0
def sub1():
    defaults = {'org': 'AndreaCensi', 'repo': 'mcdp', 'branch': 'duckuments'}

    s = """
<a href="github:path=context_eval_as_constant.py"></a> 
"""
    soup = bs(s)
    location = LocationUnknown()
    res = AugmentedResult()

    n = substitute_github_refs(soup, defaults, res=res, location=location)
    assert n == 1

    s2 = str(soup)
    logger.debug(indent(s2, '  '))

    expect = '<code class="github-resource-link">context_eval_as_constant.py</code>'
    if not expect in s2:
        raise Exception(s2)
Пример #23
0
def displayfile1():
    defaults = {'org': 'AndreaCensi', 'repo': 'mcdp', 'branch': 'duckuments'}

    s = """
<display-file src="github:path=context_eval_as_constant.py,from_text=get_connections_for,to_text=return"></a> 
"""
    soup = bs(s)
    res = AugmentedResult()
    location = LocationUnknown()

    n = display_files(soup,
                      defaults,
                      raise_errors=True,
                      res=res,
                      location=location)
    assert n == 1

    s2 = str(soup)
    logger.debug('\n' + indent(s2, '  '))
Пример #24
0
def sub2():
    defaults = {'org': 'AndreaCensi', 'repo': 'mcdp', 'branch': 'duckuments'}

    s = """
<a href="github:path=context_eval_as_constant.py,from_text=get_connections_for,to_text=return"></a> 
"""
    soup = bs(s)
    location = LocationUnknown()
    res = AugmentedResult()
    n = substitute_github_refs(soup, defaults, res=res, location=location)
    assert n == 1

    s2 = str(soup)
    logger.debug('\n' + indent(s2, '  '))

    expect = 'context_eval_as_constant.py#L7-L12'

    if not expect in s2:
        raise Exception('No %s in %s' % (expect, s2))
Пример #25
0
def add_style(data_aug, stylesheet):
    soup = bs_entire_document(data_aug.get_result())
    head = soup.find('head')
    assert head is not None
    link = Tag(name='link')
    link['rel'] = 'stylesheet'
    link['type'] = 'text/css'
    from mcdp_report.html import get_css_filename
    link['href'] = get_css_filename('compiled/%s' % stylesheet)
    head.append(link)
    html = to_html_entire_document(soup)
    res = AugmentedResult()
    res.merge(data_aug)
    res.set_result(html)
    return res
Пример #26
0
def manual_join(template,
                files_contents,
                stylesheet,
                remove=None,
                extra_css=None,
                remove_selectors=None,
                hook_before_toc=None,
                references=None,
                resolve_references=True,
                hook_before_final_pass=None,
                require_toc_placeholder=False,
                permalink_prefix=None,
                crossrefs_aug=None,
                aug0=None):
    """
        files_contents: a list of tuples that can be cast to DocToJoin:
        where the string is a unique one to be used for job naming.

        extra_css: if not None, a string of more CSS to be added
        Remove_selectors: list of selectors to remove (e.g. ".draft").

        hook_before_toc if not None is called with hook_before_toc(soup=soup)
        just before generating the toc
    """
    result = AugmentedResult()

    if references is None:
        references = {}
    check_isinstance(files_contents, list)

    if crossrefs_aug is None:
        crossrefs = Tag(name='no-cross-refs')
    else:
        crossrefs = bs(crossrefs_aug.get_result())
        result.merge(crossrefs_aug)
    if aug0 is not None:
        result.merge(aug0)

    @contextmanager
    def timeit(_):
        yield

    with timeit('manual_join'):

        files_contents = [DocToJoin(*_) for _ in files_contents]

        # cannot use bs because entire document
        with timeit('parsing template'):
            template0 = template
            template = replace_macros(template)
            template_soup = BeautifulSoup(template,
                                          'lxml',
                                          from_encoding='utf-8')
            d = template_soup
            if d.html is None:
                s = "Invalid template"
                raise_desc(ValueError, s, template0=template0)

        with timeit('adding head'):
            assert d.html is not None
            assert '<html' in str(d)
            head = d.find('head')
            if head is None:
                msg = 'Could not find <head> in template:'
                logger.error(msg)
                logger.error(str(d))
                raise Exception(msg)
            assert head is not None
            for x in get_manual_css_frag().contents:
                head.append(x.__copy__())

        with timeit('adding stylesheet'):
            if stylesheet is not None:
                link = Tag(name='link')
                link['rel'] = 'stylesheet'
                link['type'] = 'text/css'
                from mcdp_report.html import get_css_filename
                link['href'] = get_css_filename('compiled/%s' % stylesheet)
                head.append(link)

        with timeit('making basename2soup'):
            basename2soup = OrderedDict()
            for doc_to_join in files_contents:
                if doc_to_join.docname in basename2soup:
                    msg = 'Repeated docname %r' % doc_to_join.docname
                    raise ValueError(msg)
                from .latex.latex_preprocess import assert_not_inside
                if isinstance(doc_to_join.contents, AugmentedResult):
                    result.merge(doc_to_join.contents)
                    contents = doc_to_join.contents.get_result()
                else:
                    contents = doc_to_join.contents
                assert_not_inside(contents, '<fragment')
                assert_not_inside(contents, 'DOCTYPE')

                frag = bs(contents)
                basename2soup[doc_to_join.docname] = frag

        # with timeit('fix_duplicate_ids'):
        # XXX
        # fix_duplicated_ids(basename2soup)

        with timeit('copy contents'):
            body = d.find('body')
            add_comments = False

            for docname, content in basename2soup.items():
                if add_comments:
                    body.append(NavigableString('\n\n'))
                    body.append(
                        Comment('Beginning of document dump of %r' % docname))
                    body.append(NavigableString('\n\n'))

                try_faster = True
                if try_faster:
                    for e in list(content.children):
                        body.append(e.extract())
                else:
                    copy_contents_into(content, body)

                if add_comments:
                    body.append(NavigableString('\n\n'))
                    body.append(Comment('End of document dump of %r' %
                                        docname))
                    body.append(NavigableString('\n\n'))

        with timeit('extract_bibtex_blocks'):
            extract_bibtex_blocks(d)

        with timeit('ID_PUT_BIB_HERE'):

            ID_PUT_BIB_HERE = MCDPManualConstants.ID_PUT_BIB_HERE

            bibhere = d.find('div', id=ID_PUT_BIB_HERE)
            if bibhere is None:
                msg = ('Could not find #%s in document. '
                       'Adding one at end of document.') % ID_PUT_BIB_HERE
                result.note_warning(msg)
                bibhere = Tag(name='div')
                bibhere.attrs['id'] = ID_PUT_BIB_HERE
                d.find('body').append(bibhere)

            do_bib(d, bibhere)

        with timeit('hook_before_final_pass'):
            if hook_before_final_pass is not None:
                hook_before_final_pass(soup=d)

        with timeit('document_final_pass_before_toc'):
            location = LocationUnknown()
            document_final_pass_before_toc(d, remove, remove_selectors, result,
                                           location)

        with timeit('hook_before_toc'):
            if hook_before_toc is not None:
                hook_before_toc(soup=d)

        with timeit('generate_and_add_toc'):
            try:
                generate_and_add_toc(d, raise_error=True, res=result)
            except NoTocPlaceholder as e:
                if require_toc_placeholder:
                    msg = 'Could not find toc placeholder: %s' % e
                    # logger.error(msg)
                    if aug0 is not None:
                        result.note_error(msg)
                    else:
                        raise Exception(msg)

        with timeit('document_final_pass_after_toc'):
            document_final_pass_after_toc(
                soup=d,
                crossrefs=crossrefs,
                resolve_references=resolve_references,
                res=result)

        if extra_css is not None:
            logger.info('adding extra CSS')
            add_extra_css(d, extra_css)

        with timeit('document_only_once'):
            document_only_once(d)

        location = LocationUnknown()
        substitute_github_refs(d, defaults={}, res=result, location=location)

        with timeit('another A pass'):
            for a in d.select('a[href]'):
                href = a.attrs['href']
                if href in references:
                    r = references[href]
                    a.attrs['href'] = r.url
                    if not a.children:  # empty
                        a.append(r.title)

        # do not use to_html_stripping_fragment - this is a complete doc
        # mark_in_html(result, soup=d)

        add_github_links_if_edit_url(soup=d, permalink_prefix=permalink_prefix)

        with timeit('converting to string'):
            res = unicode(d)

        with timeit('encoding'):
            res = res.encode('utf8')

        logger.info('done - %.1f MB' % (len(res) / (1024 * 1024.0)))

        result.set_result(res)
        return result
Пример #27
0
def render_book(
    src_dirs,
    generate_pdf,
    data,
    realpath,
    use_mathjax,
    raise_errors,
    filter_soup=None,
    symbols=None,
    ignore_ref_errors=False,
):
    """ Returns an AugmentedResult(str) """
    res = AugmentedResult()
    from mcdp_docs.pipeline import render_complete

    librarian = get_test_librarian()
    # XXX: these might need to be changed
    if not MCDPConstants.softy_mode:
        for src_dir in src_dirs:
            librarian.find_libraries(src_dir)

    load_library_hooks = [librarian.load_library]
    library_ = MCDPLibrary(load_library_hooks=load_library_hooks)

    for src_dir in src_dirs:
        library_.add_search_dir(src_dir)

    d = tempfile.mkdtemp()
    library_.use_cache_dir(d)

    location = LocalFile(realpath)

    # print('location:\n%s' % location)

    def filter_soup0(soup, library):
        if filter_soup is not None:
            filter_soup(soup=soup, library=library)
        add_edit_links2(soup, location)
        add_last_modified_info(soup, location)

    try:
        html_contents = render_complete(library=library_,
                                        s=data,
                                        raise_errors=raise_errors,
                                        realpath=realpath,
                                        use_mathjax=use_mathjax,
                                        symbols=symbols,
                                        generate_pdf=generate_pdf,
                                        filter_soup=filter_soup0,
                                        location=location,
                                        res=res,
                                        ignore_ref_errors=ignore_ref_errors)
    except DPSyntaxError as e:
        msg = 'Could not compile %s' % realpath
        location0 = LocationInString(e.where, location)
        res.note_error(msg, locations=location0)
        fail = "<p>This file could not be compiled</p>"
        res.set_result(fail)
        return res
        # raise_wrapped(DPSyntaxError, e, msg, compact=True)

    if False:  # write minimal doc
        doc = get_minimal_document(html_contents,
                                   add_markdown_css=True,
                                   extra_css=extra_css)
        dirname = main_file + '.parts'
        if dirname and not os.path.exists(dirname):
            try:
                os.makedirs(dirname)
            except:
                pass
        fn = os.path.join(dirname, '%s.html' % out_part_basename)
        write_data_to_file(doc, fn)

    res.set_result(html_contents)
    return res
Пример #28
0
def get_cross_refs(src_dirs, permalink_prefix, extra_crossrefs, ignore=[]):
    res = AugmentedResult()
    files = look_for_files(src_dirs, "crossref.html")
    id2file = {}
    soup = Tag(name='div')

    def add_from_soup(s, f, ignore_alread_present, ignore_if_conflict):
        for img in list(s.find_all('img')):
            img.extract()

        for e in s.select('[base_url]'):
            e['external_crossref_file'] = f

        # Remove the ones with the same base_url
        for e in list(s.select('[base_url]')):
            if e.attrs['base_url'] == permalink_prefix:
                e.extract()

        for e in s.select('[id]'):
            id_ = e.attrs['id']
            if id_ == 'container': continue  # XXX:

            if id_ in id2file:
                if not ignore_alread_present:
                    msg = 'Found two elements with same ID "%s":' % id_
                    msg += '\n %s' % id2file[id_]
                    msg += '\n %s' % f
                    res.note_error(msg)
            else:
                id2file[id_] = f
                e2 = e.__copy__()
                if ignore_if_conflict:
                    e2.attrs['ignore_if_conflict'] = '1'
                soup.append(e2)
                soup.append('\n')

    ignore = [os.path.realpath(_) for _ in ignore]
    for _f in files:
        if os.path.realpath(_f) in ignore:
            msg = 'Ignoring file %r' % _f
            logger.info(msg)
            continue
        logger.info('cross ref file %s' % _f)
        data = open(_f).read()
        if permalink_prefix in data:
            msg = 'skipping own file'
            logger.debug(msg)
            continue
        s = bs(data)
        add_from_soup(s,
                      _f,
                      ignore_alread_present=False,
                      ignore_if_conflict=False)

    if extra_crossrefs is not None:
        logger.info('Reading external refs\n%s' % extra_crossrefs)
        try:
            r = requests.get(extra_crossrefs)
        except Exception as ex:
            msg = 'Could not read external cross reference links'
            msg += '\n  %s' % extra_crossrefs
            msg += '\n\n' + indent(str(ex), ' > ')
            res.note_error(msg)
        else:
            logger.debug('%s %s' % (r.status_code, extra_crossrefs))
            if r.status_code == 404:
                msg = 'Could not read external cross refs: %s' % r.status_code
                msg += '\n url: ' + extra_crossrefs
                msg += '\n This is normal if you have not pushed this branch yet.'
                res.note_warning(msg)
                # logger.error(msg)
            s = bs(r.text)
            add_from_soup(s,
                          extra_crossrefs,
                          ignore_alread_present=True,
                          ignore_if_conflict=True)

    # print soup
    res.set_result(str(soup))
    return res
Пример #29
0
def compose_go2(soup, recipe, permalink_prefix, remove_status, show_removed):
    res = AugmentedResult()

    # Create context
    doc = soup.__copy__()
    body = Tag(name='body')
    doc.body.replace_with(body)
    elements = recipe.make(RecipeContext(soup=soup))
    check_isinstance(elements, list)
    append_all(body, elements)

    # Now remove stuff
    for status in remove_status:
        removed = []
        for section in list(body.select('section[status=%s]' % status)):
            level = section.attrs['level']
            if not level in ['sec', 'part']:
                continue

            section_id = section.attrs['id']
            pure_id = section_id.replace(':section', '')
            removed.append(section.attrs['id'])

            if show_removed:
                # remove everything that is not a header
                keep = ['h1', 'h2', 'h3', 'h4', 'h5']
                for e in list(section.children):
                    if e.name not in keep:
                        e.extract()
                    else:
                        e.append(' [%s]' % status)

                p = Tag(name='p')
                p.append(
                    "This section has been removed because it is in status %r. "
                    % status)
                a = Tag(name='a')
                a.attrs['href'] = 'http://purl.org/dt/master/%s' % pure_id
                a.append(
                    "If you are feeling adventurous, you can read it on master."
                )
                p.append(a)

                section.append(p)

                p = Tag(name='p')
                p.append(
                    "To disable this behavior, and completely hide the sections, "
                )
                p.append(
                    "set the parameter show_removed to false in fall2017.version.yaml."
                )
                section.append(p)
            else:
                section.extract()

        #             section.replace_with(div)

        if not removed:
            logger.info('Found no section with status = %r to remove.' %
                        status)
        else:
            logger.info('I removed %d sections with status %r.' %
                        (len(removed), status))
            logger.debug('Removed: %s' % ", ".join(removed))

    add_github_links_if_edit_url(doc, permalink_prefix=permalink_prefix)

    generate_and_add_toc(doc)
    doc = doc.__copy__()

    #     generate_and_add_toc(soup)
    #     substituting_empty_links(soup)
    raise_errors = False
    find_links_from_master(master_soup=soup,
                           version_soup=doc,
                           raise_errors=raise_errors,
                           res=res)

    document_final_pass_after_toc(doc)

    res.set_result(doc)
    return res
Пример #30
0
def substituting_empty_links(soup,
                             raise_errors=False,
                             res=None,
                             extra_refs=None):
    """
        soup: where to look for references
        elemtn_to_modify: what to modify (if None, it is equal to soup)


        default style is [](#sec:systems)  "Chapter 10"

        You can also use "class":

            <a href='#sec:name' class='only_number'></a>

    """
    if extra_refs is None:
        extra_refs = Tag(name='div')
    if res is None:
        res = AugmentedResult()

    for le in get_empty_links_to_fragment(soup, extra_refs=extra_refs,
                                          res=res):
        a = le.linker
        element_id = le.eid
        element = le.linked

        if not element:
            msg = ('Cannot find %s' % element_id)
            res.note_error(msg, HTMLIDLocation.for_element(a))

            if raise_errors:
                raise ValueError(msg)
            continue

        sub_link(a, element_id, element, res)

    for a in get_empty_links(soup):
        href = a.attrs.get('href', '(not present)')
        if not href:
            href = '""'
        if href.startswith('python:'):
            continue

        if href.startswith('http:') or href.startswith('https:'):
            msg = """
This link text is empty:

    ELEMENT

Note that the syntax for links in Markdown is

    [link text](URL)

For the internal links (where URL starts with "#"), then the documentation
system can fill in the title automatically, leading to the format:

    [](#other-section)

However, this does not work for external sites, such as:

    [](MYURL)

So, you need to provide some text, such as:

    [this useful website](MYURL)

"""
            msg = msg.replace('ELEMENT', str(a))
            msg = msg.replace('MYURL', href)
            # note_error2(a, 'syntax error', msg.strip())

            res.note_error(msg, HTMLIDLocation.for_element(a))

        else:
            msg = """
This link is empty:

    ELEMENT

It might be that the writer intended for this
link to point to something, but they got the syntax wrong.

    href = %s

As a reminder, to refer to other parts of the document, use
the syntax "#ID", such as:

    See [](#fig:my-figure).

    See [](#section-name).

""" % href
        msg = msg.replace('ELEMENT', str(a))
        # note_error2(a, 'syntax error', msg.strip())
        res.note_error(msg, HTMLIDLocation.for_element(a))