Exemplo n.º 1
0
def reorganize_by_parts(body):
    elements = list(body.contents)
    with timeit('reorganize_by_parts:make_sections'):
        sections = make_sections2(elements,
                                  is_part_marker,
                                  attrs={'level': 'part-down'})

    with timeit('reorganize_by_parts:copying'):
        res = tag_like(body)

        for header, section in sections:
            if not header:
                S = Tag(name='section')
                S.attrs[ATTR_LEVEL] = 'part'
                S.attrs['class'] = CLASS_WITHOUT_HEADER
                S.append('\n')
                section2 = reorganize_by_chapters(section)
                S.append(section2)
                res.append('\n\n')
                res.append(S)
                res.append('\n\n')
            else:
                S = Tag(name='section')
                S.attrs[ATTR_LEVEL] = 'part'
                S.attrs['class'] = CLASS_WITH_HEADER
                S.append('\n')
                S.append(header)
                section2 = reorganize_by_chapters(section)
                S.append(section2)
                copy_attributes_from_header(S, header)
                res.append('\n\n')
                res.append(S)
                res.append('\n\n')
        return res
Exemplo n.º 2
0
def reorganize_contents(body0, add_debug_comments=False):
    """ reorganizes contents

        h1
        h2
        h1

        section
            h1
            h2
        section
            h1

    """
    # if False:
    # write_data_to_file(str(body0), 'before-reorg.html')

    with timeit('reorganize_by_parts'):
        reorganized = reorganize_by_books(body0)
        # reorganized = reorganize_by_parts(body0)

    with timeit('dissolving'):
        # now dissolve all the elements of the type <div class='without-header-inside'>
        options = ['without-header-inside', 'with-header-inside']
        for x in reorganized.find_all(
                'div',
                attrs={'class': lambda _: _ is not None and _ in options}):
            dissolve(x)

    return reorganized
Exemplo n.º 3
0
def document_final_pass_before_toc(soup,
                                   remove,
                                   remove_selectors,
                                   res=None,
                                   location=None):
    if res is None:
        logger.warn('no res passed')
        res = AugmentedResult()
    if location is None:
        location = LocationUnknown()

    logger.info('reorganizing contents in <sections>')

    with timeit('find body'):
        body = soup.find('body')
        if body is None:
            msg = 'Cannot find <body>:\n%s' % indent(str(soup)[:1000], '|')
            raise ValueError(msg)

    with timeit('reorganize_contents'):
        body2 = reorganize_contents(body)

    process_assignment(body2, res, location)

    body.replace_with(body2)

    # Removing stuff
    with timeit('remove stuff'):
        do_remove_stuff(body2, remove_selectors, remove)

    with timeit('move_things_around'):
        move_things_around(soup=soup, res=res)
Exemplo n.º 4
0
def poset_minima(elements, leq):
    """ Find the minima of a poset according to given comparison 
        function. For small sets only - O(n^2). """
    n = len(elements)

    with timeit('poset_minima with n = %d' % n, minimum=0.5):
        if n == 1:
            return set(elements)

        res = []
        for e in elements:
            # nobody is less than it
            # should_add = all([not leq(r, e) for r in res])
            for r in res:
                if leq(r, e):
                    should_add = False
                    break
            else:
                should_add = True

            if should_add:
                # remove the ones that are less than this
                res = [r for r in res if not leq(e, r)] + [e]
        return set(res)
Exemplo n.º 5
0
def manual_join(template,
                files_contents,
                stylesheet,
                remove=None,
                extra_css=None,
                remove_selectors=None,
                hook_before_toc=None,
                references=None,
                resolve_references=True,
                hook_before_final_pass=None,
                require_toc_placeholder=False,
                permalink_prefix=None,
                crossrefs_aug=None,
                aug0=None):
    """
        files_contents: a list of tuples that can be cast to DocToJoin:
        where the string is a unique one to be used for job naming.

        extra_css: if not None, a string of more CSS to be added
        Remove_selectors: list of selectors to remove (e.g. ".draft").

        hook_before_toc if not None is called with hook_before_toc(soup=soup)
        just before generating the toc
    """
    result = AugmentedResult()

    if references is None:
        references = {}
    check_isinstance(files_contents, list)

    if crossrefs_aug is None:
        crossrefs = Tag(name='no-cross-refs')
    else:
        crossrefs = bs(crossrefs_aug.get_result())
        result.merge(crossrefs_aug)
    if aug0 is not None:
        result.merge(aug0)

    @contextmanager
    def timeit(_):
        yield

    with timeit('manual_join'):

        files_contents = [DocToJoin(*_) for _ in files_contents]

        # cannot use bs because entire document
        with timeit('parsing template'):
            template0 = template
            template = replace_macros(template)
            template_soup = BeautifulSoup(template,
                                          'lxml',
                                          from_encoding='utf-8')
            d = template_soup
            if d.html is None:
                s = "Invalid template"
                raise_desc(ValueError, s, template0=template0)

        with timeit('adding head'):
            assert d.html is not None
            assert '<html' in str(d)
            head = d.find('head')
            if head is None:
                msg = 'Could not find <head> in template:'
                logger.error(msg)
                logger.error(str(d))
                raise Exception(msg)
            assert head is not None
            for x in get_manual_css_frag().contents:
                head.append(x.__copy__())

        with timeit('adding stylesheet'):
            if stylesheet is not None:
                link = Tag(name='link')
                link['rel'] = 'stylesheet'
                link['type'] = 'text/css'
                from mcdp_report.html import get_css_filename
                link['href'] = get_css_filename('compiled/%s' % stylesheet)
                head.append(link)

        with timeit('making basename2soup'):
            basename2soup = OrderedDict()
            for doc_to_join in files_contents:
                if doc_to_join.docname in basename2soup:
                    msg = 'Repeated docname %r' % doc_to_join.docname
                    raise ValueError(msg)
                from .latex.latex_preprocess import assert_not_inside
                if isinstance(doc_to_join.contents, AugmentedResult):
                    result.merge(doc_to_join.contents)
                    contents = doc_to_join.contents.get_result()
                else:
                    contents = doc_to_join.contents
                assert_not_inside(contents, '<fragment')
                assert_not_inside(contents, 'DOCTYPE')

                frag = bs(contents)
                basename2soup[doc_to_join.docname] = frag

        # with timeit('fix_duplicate_ids'):
        # XXX
        # fix_duplicated_ids(basename2soup)

        with timeit('copy contents'):
            body = d.find('body')
            add_comments = False

            for docname, content in basename2soup.items():
                if add_comments:
                    body.append(NavigableString('\n\n'))
                    body.append(
                        Comment('Beginning of document dump of %r' % docname))
                    body.append(NavigableString('\n\n'))

                try_faster = True
                if try_faster:
                    for e in list(content.children):
                        body.append(e.extract())
                else:
                    copy_contents_into(content, body)

                if add_comments:
                    body.append(NavigableString('\n\n'))
                    body.append(Comment('End of document dump of %r' %
                                        docname))
                    body.append(NavigableString('\n\n'))

        with timeit('extract_bibtex_blocks'):
            extract_bibtex_blocks(d)

        with timeit('ID_PUT_BIB_HERE'):

            ID_PUT_BIB_HERE = MCDPManualConstants.ID_PUT_BIB_HERE

            bibhere = d.find('div', id=ID_PUT_BIB_HERE)
            if bibhere is None:
                msg = ('Could not find #%s in document. '
                       'Adding one at end of document.') % ID_PUT_BIB_HERE
                result.note_warning(msg)
                bibhere = Tag(name='div')
                bibhere.attrs['id'] = ID_PUT_BIB_HERE
                d.find('body').append(bibhere)

            do_bib(d, bibhere)

        with timeit('hook_before_final_pass'):
            if hook_before_final_pass is not None:
                hook_before_final_pass(soup=d)

        with timeit('document_final_pass_before_toc'):
            location = LocationUnknown()
            document_final_pass_before_toc(d, remove, remove_selectors, result,
                                           location)

        with timeit('hook_before_toc'):
            if hook_before_toc is not None:
                hook_before_toc(soup=d)

        with timeit('generate_and_add_toc'):
            try:
                generate_and_add_toc(d, raise_error=True, res=result)
            except NoTocPlaceholder as e:
                if require_toc_placeholder:
                    msg = 'Could not find toc placeholder: %s' % e
                    # logger.error(msg)
                    if aug0 is not None:
                        result.note_error(msg)
                    else:
                        raise Exception(msg)

        with timeit('document_final_pass_after_toc'):
            document_final_pass_after_toc(
                soup=d,
                crossrefs=crossrefs,
                resolve_references=resolve_references,
                res=result)

        if extra_css is not None:
            logger.info('adding extra CSS')
            add_extra_css(d, extra_css)

        with timeit('document_only_once'):
            document_only_once(d)

        location = LocationUnknown()
        substitute_github_refs(d, defaults={}, res=result, location=location)

        with timeit('another A pass'):
            for a in d.select('a[href]'):
                href = a.attrs['href']
                if href in references:
                    r = references[href]
                    a.attrs['href'] = r.url
                    if not a.children:  # empty
                        a.append(r.title)

        # do not use to_html_stripping_fragment - this is a complete doc
        # mark_in_html(result, soup=d)

        add_github_links_if_edit_url(soup=d, permalink_prefix=permalink_prefix)

        with timeit('converting to string'):
            res = unicode(d)

        with timeit('encoding'):
            res = res.encode('utf8')

        logger.info('done - %.1f MB' % (len(res) / (1024 * 1024.0)))

        result.set_result(res)
        return result
Exemplo n.º 6
0
def parse_wrap(expr, string):
    """


        transparent to MemoryError
    """

    from .refinement import namedtuple_visitor_ext

    if isinstance(string, unicode):
        msg = 'The string is unicode. It should be a str with utf-8 encoding.'
        msg += '\n' + string.encode('utf-8').__repr__()
        raise ValueError(msg)

    check_isinstance(string, bytes)

    # Nice trick: the remove_comments doesn't change the number of lines
    # it only truncates them...

    string0 = remove_comments(string)

    if not string0.strip():
        msg = 'Nothing to parse.'
        where = Where(string, character=len(string))
        raise DPSyntaxError(msg, where=where)

    try:
        try:
            w = str(find_parsing_element(expr))
        except ValueError:
            w = '(unknown)'

        with timeit(w, MCDPConstants.parsing_too_slow_threshold):
            expr.parseWithTabs()

            parsed = expr.parseString(string0, parseAll=True)  # [0]

            def transform(x, parents):  # @UnusedVariable
                if x.where is None:  # pragma: no cover
                    msg = 'Where is None for this element'
                    raise_desc(DPInternalError,
                               msg,
                               x=recursive_print(x),
                               all=recursive_print(parsed[0]))

                where = translate_where(x.where, string)
                return get_copy_with_where(x, where)

            parsed_transformed = namedtuple_visitor_ext(parsed[0], transform)

            if hasattr(parsed_transformed, 'where'):
                # could be an int, str
                assert_equal(parsed_transformed.where.string, string)

            res = fix_whitespace(parsed_transformed)
            return [res]

    except (ParseException, ParseFatalException) as e:
        where1 = Where(string0, e.loc)
        where2 = translate_where(where1, string)
        s0 = e.__str__()
        check_isinstance(s0, bytes)
        s = s0
        e2 = DPSyntaxError(s, where=where2)
        raise DPSyntaxError, e2.args, sys.exc_info()[2]

    except DPSemanticError as e:
        msg = 'This should not throw a DPSemanticError'
        raise_wrapped(DPInternalError, e, msg, exc=sys.exc_info())
    except MemoryError as e:
        raise
    except RuntimeError as e:
        msg = 'RuntimeError %s while parsing string.' % (type(e).__name__)
        msg += '\n' + indent(string, 'string: ')
        compact = 'maximum recursion depth' in str(e)
        #         compact = False # XXX
        raise_wrapped(DPInternalError, e, msg, compact=compact)
    except BaseException as e:
        msg = 'Unexpected exception %s while parsing string.' % (
            type(e).__name__)
        msg += '\n' + indent(string, 'string: ')
        raise_wrapped(DPInternalError, e, msg)