示例#1
0
文件: pipeline.py 项目: afcarl/mcdp
def get_document_properties(soup):
    """ Reads a document's <meta> tags into a dict """
    metas = list(soup.select('meta'))
    FK, FV = 'name', 'content'
    properties = {}
    for e in metas:
        if not FK in e.attrs or not FV in e.attrs:
            msg = 'Expected "%s" and "%s" attribute for meta tag.' % (FK, FV)
            raise_desc(ValueError, msg, tag=describe_tag(e))

        properties[e[FK]] = e[FV]
    return properties
示例#2
0
文件: macro_col2.py 项目: rusi/mcdp
def col_macro_(e, ncols):
    """
        Bug: For some reasone bd4 removes the whitespace I use for indentation.
        
    
    """
    assert e.name == 'div' 
    assert e.has_attr('make-col%d' % ncols)
    
#     print describe_tag(e)
    children = list(e.children) 
    # remove strings from this
    is_string = lambda x: isinstance(x, NavigableString)
    strings = [_ for _ in children if is_string(_)]
    children = [_ for _ in children if not is_string(_)]
    
    if len(children) < ncols:
        msg = ('Cannot create table with %r cols with only %d children' % 
               (ncols, len(children)))
        raise_desc(ValueError, msg, tag=describe_tag(e))
    
    for c in children:
        c.extract()
        
    for s in strings:
        ss = str(s)
        empty = not ss.strip()
        if not empty:
            msg = 'Found nonempty string %r between children.' % ss 
            raise_desc(ValueError, msg, tag=describe_tag(e))
        # remove it
        s.extract()
        
    nchildren = len(children)
    nrows = int(math.ceil(nchildren / float(ncols)))
    
    parent = e.parent
    original_position = parent.index(e)
    e.extract()
    table = e
    e.name = 'table'
    add_class(table, 'col%d' % ncols)
    add_class(table, 'colN') 
    
    wrapper = Tag(name='div')
    add_class(wrapper, 'col%d-wrap' % ncols)
    add_class(wrapper, 'colN-wrap')
    
    NL = '\n'
    # S = '-' * 4
    # XXX: change to above to see the problem with indentation
    S = ' ' * 4
    tbody = Tag(name='tbody')
    for row in range(nrows):
        tbody.append(NavigableString(NL))
        tbody.append(NavigableString(S+S))
        tr = Tag(name='tr')
        tr.append(NavigableString(NL))
        for col in range(ncols):
            td = Tag(name='td')
            i = col + row * ncols
            if i < len(children):
                child = children[i]
                td.append(child)
            else:
                td.append(Comment('empty row %d col %d' % (row, col)))
            tr.append(NavigableString(S+S+S))
            tr.append(td)
            tr.append(NavigableString(NL))
        tr.append(S+S)
        if row == 0 and ('labels-row1' in e.attrs.get('class', '')):
            thead = Tag(name='thead')
            thead.append(tr)
            table.append(thead) # add in table, not tbody
        else:
            tbody.append(tr)   # add in tbody
        tbody.append(NavigableString(NL+S))
    table.append(tbody)
    
    wrapper.append(NavigableString(NL + S))  
    wrapper.append(table)
    wrapper.append(NavigableString(NL))
    
    parent.insert(original_position, wrapper) 
    
    
示例#3
0
def highlight_mcdp_code(library,
                        soup,
                        realpath,
                        res,
                        location,
                        generate_pdf=False,
                        raise_errors=False):
    #     print(indent(frag, 'highlight_mcdp_code '))
    """ Looks for codes like:

    <pre class="mcdp">mcdp {
        # empty model
    }
    </pre>

        and does syntax hihglighting.
    """

    assert soup.name == 'fragment'

    def go(selector, parse_expr, extension, use_pre=True, refine=None):
        for tag in soup.select(selector):
            source_code = '<unset>'  # XXX
            try:
                if tag.string is None:  # or not tag.string.strip():
                    if not tag.has_attr('id'):
                        msg = "If <pre> is empty then it needs to have an id."
                        raise_desc(ValueError, msg, tag=describe_tag(tag))

                    # load it
                    tag_id = tag['id'].encode('utf-8')
                    if '.' in tag_id:
                        i = tag_id.index('.')
                        libname, name = tag_id[:i], tag_id[i + 1:]
                        use_library = library.load_library(libname)
                    else:
                        name = tag_id
                        use_library = library
                    basename = '%s.%s' % (name, extension)
                    data = use_library._get_file_data(basename)
                    source_code = data['data']
                else:
                    source_code = get_source_code(tag)

                # prettify.
                # remove spurious indentation
                source_code = source_code.strip()

                do_apply_suggestions = (not tag.has_attr('noprettify')
                                        and not tag.has_attr('np'))
                # then apply suggestions
                try:
                    if do_apply_suggestions:
                        x = parse_wrap(parse_expr, source_code)[0]
                        xr = parse_ndp_refine(x, Context())
                        suggestions = get_suggestions(xr)
                        source_code = apply_suggestions(
                            source_code, suggestions)
                except DPSyntaxError as e:
                    if raise_errors:
                        raise
                    else:
                        res.note_error(str(e), HTMLIDLocation.for_element(tag))
                        continue
                # we don't want the browser to choose different tab size
                # source_code = source_code.replace('\t', ' ' * 4)

                # we are not using it
                _realpath = realpath
                context = Context()

                def postprocess(x):
                    if refine is not None:
                        return refine(x, context=context)
                    else:
                        return x

                #                 print('rendering source code %r' % source_code)
                html = ast_to_html(source_code,
                                   parse_expr=parse_expr,
                                   add_line_gutter=False,
                                   postprocess=postprocess)

                for w in context.warnings:
                    if w.where is not None:
                        from mcdp_web.editor_fancy.app_editor_fancy_generic import html_mark
                        html = html_mark(html, w.where, "language_warning")

                frag2 = BeautifulSoup(html, 'lxml', from_encoding='utf-8')

                if use_pre:
                    rendered = Tag(name='div', attrs={'class': 'rendered'})
                    pre = frag2.pre
                    pre.extract()
                    rendered.append(pre)
                    if not rendered.has_attr('class'):
                        rendered['class'] = ""
                    if tag.has_attr('label'):
                        text = tag['label']
                        tag_label = Tag(name='span')
                        add_class(tag_label, 'label')
                        add_class(tag_label, 'label_inside')
                        tag_label.append(NavigableString(text))

                        pre.insert(0, tag_label)

                        tag_label_outside = Tag(name='span')
                        add_class(tag_label_outside, 'label')
                        add_class(tag_label_outside, 'label_outside')
                        tag_label_outside.append(NavigableString(text))
                        rendered.insert(0, tag_label_outside)

                    max_len = max_len_of_pre_html(html)

                    if tag.has_attr('label'):
                        add_class(rendered, 'has_label')
                        max_len = max(max_len, len(tag['label']) + 6)

                    style = ''
                else:
                    # using <code>
                    rendered = frag2.pre.code
                    rendered.extract()
                    if not rendered.has_attr('class'):
                        rendered['class'] = ""

                    style = ''

                if tag.has_attr('style'):
                    style = style + tag['style']

                if style:
                    rendered['style'] = style

                if tag.has_attr('class'):
                    add_class(rendered, tag['class'])

                if tag.has_attr('id'):
                    rendered['id'] = tag['id']

                if use_pre:
                    if generate_pdf:
                        pdf = get_ast_as_pdf(source_code, parse_expr)
                        if tag.has_attr('id'):
                            basename = tag['id']
                        else:
                            hashcode = hashlib.sha224(
                                source_code).hexdigest()[-8:]
                            basename = 'code-%s' % hashcode

                        docname = os.path.splitext(
                            os.path.basename(realpath))[0]
                        download = docname + '.' + basename + '.source_code.pdf'
                        a = create_a_to_data(download=download,
                                             data_format='pdf',
                                             data=pdf)
                        a['class'] = 'pdf_data'
                        a.append(NavigableString(download))
                        div = Tag(name='div')
                        div.append(rendered)
                        div.append(a)
                        tag.replaceWith(div)
                    else:
                        tag.replaceWith(rendered)
                else:
                    tag.replaceWith(rendered)

            except DPSyntaxError as e:
                if raise_errors:
                    raise
                else:
                    res.note_error(str(e), HTMLIDLocation.for_element(tag))
                    # note_error(tag, e)
                    if tag.string is None:
                        tag.string = "`%s" % tag['id']
                    continue

            except DPSemanticError as e:
                if raise_errors:
                    raise
                else:
                    res.note_error(str(e), HTMLIDLocation.for_element(tag))
                    # note_error(tag, e)
                    if tag.string is None:
                        tag.string = "`%s" % tag['id']
                    continue

            except DPInternalError as ex:
                msg = 'Error while interpreting the code:\n\n'
                msg += indent(source_code, '  | ')
                raise_wrapped(DPInternalError, ex, msg, exc=sys.exc_info())

    abbrevs = {
        # tag name:  (new name, classes to add)
        'fname': ('code', ['FName']),
        'rname': ('code', ['RName']),
        'poset': ('code', ['mcdp_poset']),
        'value': ('code', ['mcdp_value']),
        'fvalue': ('code', ['mcdp_value', 'fvalue']),
        'rvalue': ('code', ['mcdp_value', 'rvalue']),
        'impname': ('code', ['impname']),
        'k': ('code', ['keyword']),
        'program': ('code', ['program']),
        'f': ('span', ['f']),
        'r': ('span', ['r']),
        'imp': ('span', ['imp']),
        'kf': ('code', ['f', 'keyword']),
        'kr': ('code', ['r', 'keyword']),
        'cf': ('code', ['f']),
        'cr': ('code', ['r']),
    }
    for original_tag_name, (new_tag_name, classes_to_add) in abbrevs.items():
        for e in soup.select(original_tag_name):
            e.name = new_tag_name
            for c in classes_to_add:
                add_class(e, c)

    # warn not to get confused ith '_' and '-'
    special_classes = [
        'mcdp_poset', 'mcdp_fvalue', 'mcdp_rvalue', 'mcdp_value'
    ]
    for x in special_classes:
        # we do not expect to see an element that has class with '-' instead of '_'
        erroring = x.replace('_', '-')
        mistakes = list(soup.select('.%s' % erroring))
        if mistakes:
            msg = 'You cannot use %r as a class; use lowercase.' % erroring
            tags = "\n\n".join(
                indent(describe_tag(_), ' | ') for _ in mistakes)
            raise_desc(ValueError, msg, tags=tags)

    for x in special_classes:
        # mcdp_poset -> mcdp-poset
        corresponding = x.replace('_', '-')

        for e in soup.select(corresponding):
            #             e2 = Tag(name='code')
            #             copy_string_and_attrs(e, e2)
            e.name = 'code'
            # THEN add class
            add_class(e, x)

    prettify = list(soup.select('fname')) + list(soup.select('rname'))
    for e in prettify:
        if e.has_attr('np') or e.has_attr('noprettify'):
            x0 = e.text.encode('utf-88')
            x1 = get_suggested_identifier(x0)
            e.text = unicode(x1, 'utf-8')

    mcdp_dev_warning('lets try if this goes away')  # XXX
    # this is a bug with bs4. The replace_with above only adds an escaped
    # text rather than the actual tag (!).
    # soup = bs(to_html_stripping_fragment(soup))
    # assert soup.name == 'fragment'

    go('pre.mcdp',
       Syntax.ndpt_dp_rvalue,
       "mcdp",
       use_pre=True,
       refine=parse_ndp_refine)
    go('pre.mcdp_poset',
       Syntax.space,
       "mcdp_poset",
       use_pre=True,
       refine=parse_poset_refine)
    go('pre.mcdp_template',
       Syntax.template,
       "mcdp_template",
       use_pre=True,
       refine=parse_template_refine)

    go('pre.mcdp_statements',
       Syntax.dp_model_statements,
       "mcdp_statements",
       use_pre=True)
    go('pre.mcdp_fvalue', Syntax.fvalue, "mcdp_fvalue", use_pre=True)
    go('pre.mcdp_rvalue', Syntax.rvalue, "mcdp_rvalue", use_pre=True)
    # todo: add deprecation
    go('pre.mcdp_value', Syntax.rvalue, "mcdp_value", use_pre=True)

    go('code.mcdp', Syntax.ndpt_dp_rvalue, "mcdp", use_pre=False)
    go('code.mcdp_poset', Syntax.space, "mcdp_poset", use_pre=False)
    go('code.mcdp_value', Syntax.rvalue, "mcdp_value", use_pre=False)
    go('code.mcdp_template', Syntax.template, "mcdp_template", use_pre=False)

    # this is a bug with bs4...
    for pre in soup.select('pre + pre'):
        #         print('adding br between PREs')
        br_ = br()
        br_['class'] = 'pre_after_pre'
        pre.parent.insert(pre.parent.index(pre), br_)
示例#4
0
    def go(selector, parse_expr, extension, use_pre=True, refine=None):
        for tag in soup.select(selector):
            source_code = '<unset>'  # XXX
            try:
                if tag.string is None:  # or not tag.string.strip():
                    if not tag.has_attr('id'):
                        msg = "If <pre> is empty then it needs to have an id."
                        raise_desc(ValueError, msg, tag=describe_tag(tag))

                    # load it
                    tag_id = tag['id'].encode('utf-8')
                    if '.' in tag_id:
                        i = tag_id.index('.')
                        libname, name = tag_id[:i], tag_id[i + 1:]
                        use_library = library.load_library(libname)
                    else:
                        name = tag_id
                        use_library = library
                    basename = '%s.%s' % (name, extension)
                    data = use_library._get_file_data(basename)
                    source_code = data['data']
                else:
                    source_code = get_source_code(tag)

                # prettify.
                # remove spurious indentation
                source_code = source_code.strip()

                do_apply_suggestions = (not tag.has_attr('noprettify')
                                        and not tag.has_attr('np'))
                # then apply suggestions
                try:
                    if do_apply_suggestions:
                        x = parse_wrap(parse_expr, source_code)[0]
                        xr = parse_ndp_refine(x, Context())
                        suggestions = get_suggestions(xr)
                        source_code = apply_suggestions(
                            source_code, suggestions)
                except DPSyntaxError as e:
                    if raise_errors:
                        raise
                    else:
                        res.note_error(str(e), HTMLIDLocation.for_element(tag))
                        continue
                # we don't want the browser to choose different tab size
                # source_code = source_code.replace('\t', ' ' * 4)

                # we are not using it
                _realpath = realpath
                context = Context()

                def postprocess(x):
                    if refine is not None:
                        return refine(x, context=context)
                    else:
                        return x

                #                 print('rendering source code %r' % source_code)
                html = ast_to_html(source_code,
                                   parse_expr=parse_expr,
                                   add_line_gutter=False,
                                   postprocess=postprocess)

                for w in context.warnings:
                    if w.where is not None:
                        from mcdp_web.editor_fancy.app_editor_fancy_generic import html_mark
                        html = html_mark(html, w.where, "language_warning")

                frag2 = BeautifulSoup(html, 'lxml', from_encoding='utf-8')

                if use_pre:
                    rendered = Tag(name='div', attrs={'class': 'rendered'})
                    pre = frag2.pre
                    pre.extract()
                    rendered.append(pre)
                    if not rendered.has_attr('class'):
                        rendered['class'] = ""
                    if tag.has_attr('label'):
                        text = tag['label']
                        tag_label = Tag(name='span')
                        add_class(tag_label, 'label')
                        add_class(tag_label, 'label_inside')
                        tag_label.append(NavigableString(text))

                        pre.insert(0, tag_label)

                        tag_label_outside = Tag(name='span')
                        add_class(tag_label_outside, 'label')
                        add_class(tag_label_outside, 'label_outside')
                        tag_label_outside.append(NavigableString(text))
                        rendered.insert(0, tag_label_outside)

                    max_len = max_len_of_pre_html(html)

                    if tag.has_attr('label'):
                        add_class(rendered, 'has_label')
                        max_len = max(max_len, len(tag['label']) + 6)

                    style = ''
                else:
                    # using <code>
                    rendered = frag2.pre.code
                    rendered.extract()
                    if not rendered.has_attr('class'):
                        rendered['class'] = ""

                    style = ''

                if tag.has_attr('style'):
                    style = style + tag['style']

                if style:
                    rendered['style'] = style

                if tag.has_attr('class'):
                    add_class(rendered, tag['class'])

                if tag.has_attr('id'):
                    rendered['id'] = tag['id']

                if use_pre:
                    if generate_pdf:
                        pdf = get_ast_as_pdf(source_code, parse_expr)
                        if tag.has_attr('id'):
                            basename = tag['id']
                        else:
                            hashcode = hashlib.sha224(
                                source_code).hexdigest()[-8:]
                            basename = 'code-%s' % hashcode

                        docname = os.path.splitext(
                            os.path.basename(realpath))[0]
                        download = docname + '.' + basename + '.source_code.pdf'
                        a = create_a_to_data(download=download,
                                             data_format='pdf',
                                             data=pdf)
                        a['class'] = 'pdf_data'
                        a.append(NavigableString(download))
                        div = Tag(name='div')
                        div.append(rendered)
                        div.append(a)
                        tag.replaceWith(div)
                    else:
                        tag.replaceWith(rendered)
                else:
                    tag.replaceWith(rendered)

            except DPSyntaxError as e:
                if raise_errors:
                    raise
                else:
                    res.note_error(str(e), HTMLIDLocation.for_element(tag))
                    # note_error(tag, e)
                    if tag.string is None:
                        tag.string = "`%s" % tag['id']
                    continue

            except DPSemanticError as e:
                if raise_errors:
                    raise
                else:
                    res.note_error(str(e), HTMLIDLocation.for_element(tag))
                    # note_error(tag, e)
                    if tag.string is None:
                        tag.string = "`%s" % tag['id']
                    continue

            except DPInternalError as ex:
                msg = 'Error while interpreting the code:\n\n'
                msg += indent(source_code, '  | ')
                raise_wrapped(DPInternalError, ex, msg, exc=sys.exc_info())
示例#5
0
文件: make_figures.py 项目: rusi/mcdp
def make_figure_from_figureid_attr(soup):
    """
        Makes a figure:
            <e figure-id='fig:ure' figure-caption='ciao'/> 
                    
        <figure id="fig:ure">
            <e figure-id='fig:ure' figure-caption='ciao'/>
            <figcaption>ciao</figcaption>
        </figure>

        Makes a table:
            <e figure-id='tab:ure' figure-caption='ciao'/>
            
        becomes
        
        
        figure-id
        figure-class
        
        
        
        
    """
    from mcdp_docs.highlight import add_class 
    
    for towrap in soup.select('[figure-id]'):
        ID = towrap['figure-id']
        parent = towrap.parent
        fig = Tag(name='figure')
        fig['id'] = ID
        caption_below = True
        if ID.startswith('fig:'):
            add_class(fig, 'figure')
        elif ID.startswith('subfig:'):
            add_class(fig, 'subfloat')
        elif ID.startswith('tab:'):
            add_class(fig, 'table')
            caption_below = False
        elif ID.startswith('code:'):
            add_class(fig, 'code')
            pass
        else:
            msg = 'The ID %r should start with fig: or tab: or code:' % ID
            raise_desc(ValueError, msg, tag=describe_tag(towrap))
            
        if 'caption-left' in towrap.attrs.get('figure-class', ''): 
            caption_below = False
        external_caption_id = '%s:caption' % ID
        external_caption = soup.find(id=external_caption_id)
        if external_caption is None:
            external_caption = towrap.find(name='figcaption')
        
        if external_caption is not None:
#             print('using external caption %s' % str(external_caption))
            external_caption.extract()
            if external_caption.name != 'figcaption':
                logger.error('Element %s#%r should have name figcaption.' %
                             (external_caption.name, external_caption_id))
                external_caption.name = 'figcaption'
            figcaption = external_caption
            
            if towrap.has_attr('figure-caption'):
                msg = 'Already using external caption for %s' % ID
                raise_desc(ValueError, msg, describe_tag(towrap))
        else:
#             print('could not find external caption %s' % external_caption_id)
            if towrap.has_attr('figure-caption'):
                caption = towrap['figure-caption']
            else:
                caption = ''
            figcaption = Tag(name='figcaption')
            figcaption.append(NavigableString(caption))
        
        outside = Tag(name='div')
        outside['id'] = ID + '-wrap'
        if towrap.has_attr('figure-style'):
            outside['style'] = towrap['figure-style']
        if towrap.has_attr('figure-class'):
            for k in towrap['figure-class'].split(' '):
                add_class(towrap, k)
                add_class(outside, k )
        
        i = parent.index(towrap)
        towrap.extract()
        figcontent = Tag(name='div', attrs={'class':'figcontent'})
        figcontent.append(towrap)
        fig.append(figcontent)
        
        if caption_below:
            fig.append(figcaption)
        else:
            fig.insert(0, figcaption)
        
        add_class(outside, 'generated-figure-wrap')
        add_class(fig, 'generated-figure')
        outside.append(fig)
        parent.insert(i, outside)