示例#1
0
def run_bibtex2html(contents):
    res = AugmentedResult()
    erase = True
    with tmpdir(prefix='bibtex', erase=erase, keep_on_exception=True) as d:
        fn = os.path.join(d, 'input.bib')
        fno = os.path.join(d, 'out')
        fno1 = fno + '.html'
        # fno2 = fno + '_bib.html'
        with open(fn, 'w') as f:
            f.write(contents)

        cmd = ['bibtex2html', '-unicode', '--dl', '-o', fno, fn]

        system_cmd_result(
            '.',
            cmd,
            display_stdout=False,
            display_stderr=False,
            raise_on_error=True,
            display_prefix=None,  # leave it there
            env=None)

        bibtex2html_output = open(fno1).read()

        fixed = bibtex2html_output.replace('<p>\n</dd>', '</dd><!--fix-->')

        with open(os.path.join(d, 'fixed.html'), 'w') as f:
            f.write(fixed)

        out = process_bibtex2html_output(fixed, d)

        write_data_to_file(out, os.path.join(d, 'processed.html'))

        res.set_result(out)
        return res
示例#2
0
def add_likebtn(joined_aug, likebtn):
    res = AugmentedResult()
    res.merge(joined_aug)
    soup = bs_entire_document(joined_aug.get_result())
    add_likebtn_(soup, likebtn)
    res.set_result(to_html_entire_document(soup))
    return res
示例#3
0
def mark_errors_and_rest(joined_aug):
    soup = bs_entire_document(joined_aug.get_result())
    mark_in_html(joined_aug, soup)
    res = AugmentedResult()
    res.merge(joined_aug)
    res.set_result(to_html_entire_document(soup))
    return res
示例#4
0
def add_related(joined_aug):
    res = AugmentedResult()
    res.merge(joined_aug)
    soup = bs_entire_document(joined_aug.get_result())
    add_related_(soup, res)
    res.set_result(to_html_entire_document(soup))
    return res
示例#5
0
def prerender(joined_aug, symbols):
    joined = joined_aug.get_result()
    soup = bs_entire_document(joined)
    for details in soup.select('details'):
        details.name = 'div'
        add_class(details, 'transmuted-details')
        # details.attrs['open'] = 1

    joined = to_html_entire_document(soup)
    res = AugmentedResult()
    result = prerender_mathjax(joined, symbols=symbols, res=res)
    res.set_result(result)
    return res
示例#6
0
def make_last_modified(files_contents, nmax=100):
    res = AugmentedResult()
    files_contents = [DocToJoin(*x) for x in files_contents]
    files_contents = [_ for _ in files_contents if _.source_info]

    files_contents = list(
        sorted(files_contents,
               key=lambda x: x.source_info.last_modified,
               reverse=True))

    r = Tag(name='fragment')
    r.append('\n')
    h = Tag(name='h1')
    h.append('Last modified')
    h.attrs['id'] = 'sec:last-modified'
    r.append(h)
    r.append('\n')

    ul = Tag(name='ul')
    ul.append('\n')
    for d in files_contents[:nmax]:
        li = Tag(name='li')
        when = d.source_info.last_modified
        when_s = time.strftime("%a, %b %d", when)
        #          %H:%M
        li.append(when_s)
        li.append(': ')

        hid = get_main_header(bs(d.contents))
        if hid is None:
            what = "File %s" % d.docname
        else:
            what = Tag(name='a')
            what.attrs['href'] = '#' + hid
            what.attrs['class'] = MCDPManualConstants.CLASS_NUMBER_NAME

        li.append(what)
        li.append(' (')
        name = d.source_info.author.name
        li.append(name)
        li.append(')')

        ul.append(li)
        ul.append('\n')

    r.append(ul)
    s = to_html_stripping_fragment(r)
    #     print s

    res.set_result(s)
    return res
示例#7
0
def add_style(data_aug, stylesheet):
    soup = bs_entire_document(data_aug.get_result())
    head = soup.find('head')
    assert head is not None
    link = Tag(name='link')
    link['rel'] = 'stylesheet'
    link['type'] = 'text/css'
    from mcdp_report.html import get_css_filename
    link['href'] = get_css_filename('compiled/%s' % stylesheet)
    head.append(link)
    html = to_html_entire_document(soup)
    res = AugmentedResult()
    res.merge(data_aug)
    res.set_result(html)
    return res
示例#8
0
def make_composite(compose_config, joined_aug):
    data = joined_aug.get_result()
    soup = bs_entire_document(data)
    recipe = compose_config.recipe
    remove_status = compose_config.remove_status
    show_removed = compose_config.show_removed
    permalink_prefix = compose_config.purl_prefix
    aug = compose_go2(soup, recipe, permalink_prefix, remove_status,
                      show_removed)
    soup = aug.get_result()
    results = str(soup)
    res = AugmentedResult()
    res.merge(joined_aug)
    res.merge(aug)
    res.set_result(results)
    return res
示例#9
0
def manual_join(template,
                files_contents,
                stylesheet,
                remove=None,
                extra_css=None,
                remove_selectors=None,
                hook_before_toc=None,
                references=None,
                resolve_references=True,
                hook_before_final_pass=None,
                require_toc_placeholder=False,
                permalink_prefix=None,
                crossrefs_aug=None,
                aug0=None):
    """
        files_contents: a list of tuples that can be cast to DocToJoin:
        where the string is a unique one to be used for job naming.

        extra_css: if not None, a string of more CSS to be added
        Remove_selectors: list of selectors to remove (e.g. ".draft").

        hook_before_toc if not None is called with hook_before_toc(soup=soup)
        just before generating the toc
    """
    result = AugmentedResult()

    if references is None:
        references = {}
    check_isinstance(files_contents, list)

    if crossrefs_aug is None:
        crossrefs = Tag(name='no-cross-refs')
    else:
        crossrefs = bs(crossrefs_aug.get_result())
        result.merge(crossrefs_aug)
    if aug0 is not None:
        result.merge(aug0)

    @contextmanager
    def timeit(_):
        yield

    with timeit('manual_join'):

        files_contents = [DocToJoin(*_) for _ in files_contents]

        # cannot use bs because entire document
        with timeit('parsing template'):
            template0 = template
            template = replace_macros(template)
            template_soup = BeautifulSoup(template,
                                          'lxml',
                                          from_encoding='utf-8')
            d = template_soup
            if d.html is None:
                s = "Invalid template"
                raise_desc(ValueError, s, template0=template0)

        with timeit('adding head'):
            assert d.html is not None
            assert '<html' in str(d)
            head = d.find('head')
            if head is None:
                msg = 'Could not find <head> in template:'
                logger.error(msg)
                logger.error(str(d))
                raise Exception(msg)
            assert head is not None
            for x in get_manual_css_frag().contents:
                head.append(x.__copy__())

        with timeit('adding stylesheet'):
            if stylesheet is not None:
                link = Tag(name='link')
                link['rel'] = 'stylesheet'
                link['type'] = 'text/css'
                from mcdp_report.html import get_css_filename
                link['href'] = get_css_filename('compiled/%s' % stylesheet)
                head.append(link)

        with timeit('making basename2soup'):
            basename2soup = OrderedDict()
            for doc_to_join in files_contents:
                if doc_to_join.docname in basename2soup:
                    msg = 'Repeated docname %r' % doc_to_join.docname
                    raise ValueError(msg)
                from .latex.latex_preprocess import assert_not_inside
                if isinstance(doc_to_join.contents, AugmentedResult):
                    result.merge(doc_to_join.contents)
                    contents = doc_to_join.contents.get_result()
                else:
                    contents = doc_to_join.contents
                assert_not_inside(contents, '<fragment')
                assert_not_inside(contents, 'DOCTYPE')

                frag = bs(contents)
                basename2soup[doc_to_join.docname] = frag

        # with timeit('fix_duplicate_ids'):
        # XXX
        # fix_duplicated_ids(basename2soup)

        with timeit('copy contents'):
            body = d.find('body')
            add_comments = False

            for docname, content in basename2soup.items():
                if add_comments:
                    body.append(NavigableString('\n\n'))
                    body.append(
                        Comment('Beginning of document dump of %r' % docname))
                    body.append(NavigableString('\n\n'))

                try_faster = True
                if try_faster:
                    for e in list(content.children):
                        body.append(e.extract())
                else:
                    copy_contents_into(content, body)

                if add_comments:
                    body.append(NavigableString('\n\n'))
                    body.append(Comment('End of document dump of %r' %
                                        docname))
                    body.append(NavigableString('\n\n'))

        with timeit('extract_bibtex_blocks'):
            extract_bibtex_blocks(d)

        with timeit('ID_PUT_BIB_HERE'):

            ID_PUT_BIB_HERE = MCDPManualConstants.ID_PUT_BIB_HERE

            bibhere = d.find('div', id=ID_PUT_BIB_HERE)
            if bibhere is None:
                msg = ('Could not find #%s in document. '
                       'Adding one at end of document.') % ID_PUT_BIB_HERE
                result.note_warning(msg)
                bibhere = Tag(name='div')
                bibhere.attrs['id'] = ID_PUT_BIB_HERE
                d.find('body').append(bibhere)

            do_bib(d, bibhere)

        with timeit('hook_before_final_pass'):
            if hook_before_final_pass is not None:
                hook_before_final_pass(soup=d)

        with timeit('document_final_pass_before_toc'):
            location = LocationUnknown()
            document_final_pass_before_toc(d, remove, remove_selectors, result,
                                           location)

        with timeit('hook_before_toc'):
            if hook_before_toc is not None:
                hook_before_toc(soup=d)

        with timeit('generate_and_add_toc'):
            try:
                generate_and_add_toc(d, raise_error=True, res=result)
            except NoTocPlaceholder as e:
                if require_toc_placeholder:
                    msg = 'Could not find toc placeholder: %s' % e
                    # logger.error(msg)
                    if aug0 is not None:
                        result.note_error(msg)
                    else:
                        raise Exception(msg)

        with timeit('document_final_pass_after_toc'):
            document_final_pass_after_toc(
                soup=d,
                crossrefs=crossrefs,
                resolve_references=resolve_references,
                res=result)

        if extra_css is not None:
            logger.info('adding extra CSS')
            add_extra_css(d, extra_css)

        with timeit('document_only_once'):
            document_only_once(d)

        location = LocationUnknown()
        substitute_github_refs(d, defaults={}, res=result, location=location)

        with timeit('another A pass'):
            for a in d.select('a[href]'):
                href = a.attrs['href']
                if href in references:
                    r = references[href]
                    a.attrs['href'] = r.url
                    if not a.children:  # empty
                        a.append(r.title)

        # do not use to_html_stripping_fragment - this is a complete doc
        # mark_in_html(result, soup=d)

        add_github_links_if_edit_url(soup=d, permalink_prefix=permalink_prefix)

        with timeit('converting to string'):
            res = unicode(d)

        with timeit('encoding'):
            res = res.encode('utf8')

        logger.info('done - %.1f MB' % (len(res) / (1024 * 1024.0)))

        result.set_result(res)
        return result
示例#10
0
def compose_go2(soup, recipe, permalink_prefix, remove_status, show_removed):
    res = AugmentedResult()

    # Create context
    doc = soup.__copy__()
    body = Tag(name='body')
    doc.body.replace_with(body)
    elements = recipe.make(RecipeContext(soup=soup))
    check_isinstance(elements, list)
    append_all(body, elements)

    # Now remove stuff
    for status in remove_status:
        removed = []
        for section in list(body.select('section[status=%s]' % status)):
            level = section.attrs['level']
            if not level in ['sec', 'part']:
                continue

            section_id = section.attrs['id']
            pure_id = section_id.replace(':section', '')
            removed.append(section.attrs['id'])

            if show_removed:
                # remove everything that is not a header
                keep = ['h1', 'h2', 'h3', 'h4', 'h5']
                for e in list(section.children):
                    if e.name not in keep:
                        e.extract()
                    else:
                        e.append(' [%s]' % status)

                p = Tag(name='p')
                p.append(
                    "This section has been removed because it is in status %r. "
                    % status)
                a = Tag(name='a')
                a.attrs['href'] = 'http://purl.org/dt/master/%s' % pure_id
                a.append(
                    "If you are feeling adventurous, you can read it on master."
                )
                p.append(a)

                section.append(p)

                p = Tag(name='p')
                p.append(
                    "To disable this behavior, and completely hide the sections, "
                )
                p.append(
                    "set the parameter show_removed to false in fall2017.version.yaml."
                )
                section.append(p)
            else:
                section.extract()

        #             section.replace_with(div)

        if not removed:
            logger.info('Found no section with status = %r to remove.' %
                        status)
        else:
            logger.info('I removed %d sections with status %r.' %
                        (len(removed), status))
            logger.debug('Removed: %s' % ", ".join(removed))

    add_github_links_if_edit_url(doc, permalink_prefix=permalink_prefix)

    generate_and_add_toc(doc)
    doc = doc.__copy__()

    #     generate_and_add_toc(soup)
    #     substituting_empty_links(soup)
    raise_errors = False
    find_links_from_master(master_soup=soup,
                           version_soup=doc,
                           raise_errors=raise_errors,
                           res=res)

    document_final_pass_after_toc(doc)

    res.set_result(doc)
    return res
示例#11
0
def get_cross_refs(src_dirs, permalink_prefix, extra_crossrefs, ignore=[]):
    res = AugmentedResult()
    files = look_for_files(src_dirs, "crossref.html")
    id2file = {}
    soup = Tag(name='div')

    def add_from_soup(s, f, ignore_alread_present, ignore_if_conflict):
        for img in list(s.find_all('img')):
            img.extract()

        for e in s.select('[base_url]'):
            e['external_crossref_file'] = f

        # Remove the ones with the same base_url
        for e in list(s.select('[base_url]')):
            if e.attrs['base_url'] == permalink_prefix:
                e.extract()

        for e in s.select('[id]'):
            id_ = e.attrs['id']
            if id_ == 'container': continue  # XXX:

            if id_ in id2file:
                if not ignore_alread_present:
                    msg = 'Found two elements with same ID "%s":' % id_
                    msg += '\n %s' % id2file[id_]
                    msg += '\n %s' % f
                    res.note_error(msg)
            else:
                id2file[id_] = f
                e2 = e.__copy__()
                if ignore_if_conflict:
                    e2.attrs['ignore_if_conflict'] = '1'
                soup.append(e2)
                soup.append('\n')

    ignore = [os.path.realpath(_) for _ in ignore]
    for _f in files:
        if os.path.realpath(_f) in ignore:
            msg = 'Ignoring file %r' % _f
            logger.info(msg)
            continue
        logger.info('cross ref file %s' % _f)
        data = open(_f).read()
        if permalink_prefix in data:
            msg = 'skipping own file'
            logger.debug(msg)
            continue
        s = bs(data)
        add_from_soup(s,
                      _f,
                      ignore_alread_present=False,
                      ignore_if_conflict=False)

    if extra_crossrefs is not None:
        logger.info('Reading external refs\n%s' % extra_crossrefs)
        try:
            r = requests.get(extra_crossrefs)
        except Exception as ex:
            msg = 'Could not read external cross reference links'
            msg += '\n  %s' % extra_crossrefs
            msg += '\n\n' + indent(str(ex), ' > ')
            res.note_error(msg)
        else:
            logger.debug('%s %s' % (r.status_code, extra_crossrefs))
            if r.status_code == 404:
                msg = 'Could not read external cross refs: %s' % r.status_code
                msg += '\n url: ' + extra_crossrefs
                msg += '\n This is normal if you have not pushed this branch yet.'
                res.note_warning(msg)
                # logger.error(msg)
            s = bs(r.text)
            add_from_soup(s,
                          extra_crossrefs,
                          ignore_alread_present=True,
                          ignore_if_conflict=True)

    # print soup
    res.set_result(str(soup))
    return res
示例#12
0
def render_book(
    src_dirs,
    generate_pdf,
    data,
    realpath,
    use_mathjax,
    raise_errors,
    filter_soup=None,
    symbols=None,
    ignore_ref_errors=False,
):
    """ Returns an AugmentedResult(str) """
    res = AugmentedResult()
    from mcdp_docs.pipeline import render_complete

    librarian = get_test_librarian()
    # XXX: these might need to be changed
    if not MCDPConstants.softy_mode:
        for src_dir in src_dirs:
            librarian.find_libraries(src_dir)

    load_library_hooks = [librarian.load_library]
    library_ = MCDPLibrary(load_library_hooks=load_library_hooks)

    for src_dir in src_dirs:
        library_.add_search_dir(src_dir)

    d = tempfile.mkdtemp()
    library_.use_cache_dir(d)

    location = LocalFile(realpath)

    # print('location:\n%s' % location)

    def filter_soup0(soup, library):
        if filter_soup is not None:
            filter_soup(soup=soup, library=library)
        add_edit_links2(soup, location)
        add_last_modified_info(soup, location)

    try:
        html_contents = render_complete(library=library_,
                                        s=data,
                                        raise_errors=raise_errors,
                                        realpath=realpath,
                                        use_mathjax=use_mathjax,
                                        symbols=symbols,
                                        generate_pdf=generate_pdf,
                                        filter_soup=filter_soup0,
                                        location=location,
                                        res=res,
                                        ignore_ref_errors=ignore_ref_errors)
    except DPSyntaxError as e:
        msg = 'Could not compile %s' % realpath
        location0 = LocationInString(e.where, location)
        res.note_error(msg, locations=location0)
        fail = "<p>This file could not be compiled</p>"
        res.set_result(fail)
        return res
        # raise_wrapped(DPSyntaxError, e, msg, compact=True)

    if False:  # write minimal doc
        doc = get_minimal_document(html_contents,
                                   add_markdown_css=True,
                                   extra_css=extra_css)
        dirname = main_file + '.parts'
        if dirname and not os.path.exists(dirname):
            try:
                os.makedirs(dirname)
            except:
                pass
        fn = os.path.join(dirname, '%s.html' % out_part_basename)
        write_data_to_file(doc, fn)

    res.set_result(html_contents)
    return res