Пример #1
0
def links(key, value, format, meta):
    if key == 'Link':
        [_, title, target] = value
        if (is_absolute(target[0])):
            # citation = [{"citationSuffix"  : [],
            #              "citationNoteNum" : 0,
            #              "citationMode"    : {"t":"NormalCitation"},
            #              "citationPrefix"  : [],
            #              "citationId"      : target[0],
            #              "citationHash"    : 0}]
            # return Cite(citation, title)
            return Span(attributes({}), [
                Str(u'\u201c'),
                Span(attributes({}), title),
                Str(u'\u201d'),
                Space(),
                Str('('),
                Str(target[0]),
                Str(')')
            ])
        else:
            [_, _, targetInternal] = target[0].rpartition('#')
            citation = [{
                "citationSuffix": [],
                "citationNoteNum": 0,
                "citationMode": {
                    "t": "NormalCitation"
                },
                "citationPrefix": [],
                "citationId": targetInternal,
                "citationHash": 0
            }]
            return Cite(citation, [Str("[@{0}]".format(targetInternal))])
Пример #2
0
def fix_underline(key, value, format, meta):
    if isUnderline(key, value):
        return [
            RawInline('latex', '\\uline{'),
            Span(value[0], value[1]),
            RawInline('latex', '}')
        ]
Пример #3
0
def inline_footnotes(key, val, fmt, meta):
    """
    Replace Pandoc footnotes with spans so we can post-process into Distill
    footnotes.
    """
    if key == 'Note':
        return Span(['', ['engrafo-footnote'], []], flatten_blocks(val))
Пример #4
0
def metavars(key, value, format, meta):
    if key == 'Str':
        m = pattern.match(value)
        if m:
            field = m.group(1)
            result = meta.get(field, {})
            if 'MetaInlines' in result['t']:
                return Span(attributes({'class': 'interpolated',
                                        'field': field}),
                            result['c'])
            elif 'MetaString' in result['t']:
                return Str(result['c'])
Пример #5
0
def replace_cite_references(key, val, fmt, meta):
    if key == 'Cite':
        label = val[0][0]['citationId']
        if label and label in label_map:
            ref_string, ref_id, prev_strings = label_map[label]
            return [
                Link(['', ['engrafo-cite'], []], [Str(ref_string)],
                     ['#%s' % ref_id, ''])
            ]
        # TODO: below doesn't work yet
        else:
            return Span(['', ['engrafo-cite', 'engrafo-missing-cite'], []],
                        [Str('[?]')])
Пример #6
0
def interpretManLinks(key, value, fmt, meta):
    if key == 'Link':
        text, link = value
        url, title = link
        match = _man_link_re.match(url)
        if match is not None:
            str = stringify(text)
            if str.startswith("lenskit"):
                return text
            else:
                rv = Span(
                    attributes(None), text + [
                        Str(" ("),
                        Strong([Str(match.group(1))]),
                        Str("(%s))" % (match.group(2), ))
                    ])
                return rv
        else:
            return None
Пример #7
0
def make_figures(key, val, fmt, meta):
    """
    Turn <p><img alt="some caption"><img></p> into
    <div class="engrafo-figure"><img><img>
    <span class="engrafo-figcaption">some caption</span></div>
    """
    if key != 'Para' or not val:
        return
    children = [c for c in val if c['t'] == 'Image']
    if not children:
        return
    # Pick first child's caption to be the caption. This is because pandoc
    # gives each image in a figure the same caption.
    alt = children[0]['c'][1]
    # Pandoc sets alt text to "image" if there is none
    if alt and alt != [{u'c': u'image', u't': u'Str'}]:
        children.append(Span(['', ['engrafo-figcaption'], []], alt))
    # Pandoc requires that a Div has a Para in it, so insert a single Para to
    # wrap all the children
    return Div(['', ['engrafo-figure'], []], [Para(children)])
Пример #8
0
def label_to_mathjax(env_label, label_postfix='_span', env_tag=None):
    r""" Replace labels with MathJax-based hack that
    preserves LaTeX-like functionality and rendering.

    This works by creating a hidden Span with a labeled dummy
    MathJax equation environment.  References like `\ref{env_label}`
    will then automatically resolve to the containing Div.

    Arguments
    =========
    env_label: str
        The label token (i.e. `\label{env_label}`).
    label_postfix: str (Optional)
        String to append to the generated Span's id
        (env_label + label_postfix).
    env_tag: int (Optional)
        Tag for the labeled content (e.g. equation number).

    Returns
    =======
    The Para(Span) AST object that links to our label (i.e.
    the new MathJax "label" object).
    """
    hack_span_id = env_label + label_postfix

    # This is how we're hijacking MathJax's numbering system:
    ref_hack = r'$$\begin{equation}'
    if env_tag is not None:
        ref_hack += r'\tag{{{}}}'.format(env_tag)
    ref_hack += r'\label{{{}}}'.format(env_label)
    ref_hack += r'\end{equation}$$'

    # Hide the display of our equation hack in a Span:
    label_div = Span([hack_span_id, [],
                      [["style", "display:none;visibility:hidden"]]
                      ],
                     [RawInline('latex', ref_hack)])

    return label_div
Пример #9
0
def textohtml(key, value, format, meta):
    if key == 'RawInline':
        fmt, s = value
        if fmt == "tex":
            for x in trans:
                m = x['re'].match(s)
                if m:
                    return [
                        Span(attributes({'class': x['class']}), [
                            Str(x['cont'] if x['key'] ==
                                'Str' else m.group(x['cont']))
                        ]),
                        Space()
                    ]
            if cboxStart.match(s):
                return RawInline("html", "<span class='cbox'>")
            if cboxEnd.match(s):
                return RawInline("html", "</span>")
            if image.match(s):
                m = image.match(s)
                #                return Image([Str("description")], [m.group(1),""])  # works only for pandocfilters < 1.3.0
                return Image(
                    ['', [], []], [Str("description")],
                    [m.group(1), ""])  # should work for pandocfilter >= 1.3.0
Пример #10
0
def process_image(key, value, oformat, meta):
    r''' Rewrite filename in Image AST object--adding paths from the
    meta information and/or LaTeX `\graphicspaths` directive.

    This can be used to reassign paths to image file names when the
    meta information has only one entry.  It will also wrap
    LaTeX-labeled Image objects in a Span--for later
    referencing/linking, say.
    '''
    if key != "Image":
        return None

    global figure_dirs, fig_fname_ext, processed_figures

    # TODO: Find and use labels.
    # TODO: Perhaps check that it's a valid file?
    new_value = copy(value[2])

    new_fig_fname = rename_find_fig(new_value[0],
                                    figure_dirs,
                                    fig_fname_ext)

    pandoc_logger.debug("figure_dirs: {}\tfig_fname_ext: {}\n".format(
        figure_dirs, fig_fname_ext))
    pandoc_logger.debug("new_value: {}\tnew_fig_fname: {}\n".format(
        new_value, new_fig_fname))

    # XXX: Avoid an endless loop of Image replacements.
    if new_fig_fname in processed_figures.keys():
        return None

    processed_figures[new_fig_fname] = [None, None]

    new_value[0] = new_fig_fname

    # Wrap the image in a div with an `id`, so that we can
    # reference it in HTML.
    new_image = Image(value[0], value[1], new_value)
    wrapped_image = new_image
    try:
        fig_label_obj = value[1][-1]['c'][0][-1][0]

        pandoc_logger.debug("fig_label_obj: {}\n".format(fig_label_obj))

        if fig_label_obj[0] == 'data-label':
            fig_label = fig_label_obj[1]

            processed_figures[new_fig_fname][0] = fig_label
            env_num = len(processed_figures)
            processed_figures[new_fig_fname][1] = env_num

            hack_span = label_to_mathjax(fig_label, env_tag=env_num)

            wrapped_image = Span([copy(fig_label), [], []],
                                 [hack_span, new_image])
    except:
        pass

    pandoc_logger.debug("wrapped_image: {}\n".format(wrapped_image))

    return [wrapped_image]
Пример #11
0
    def _cite_replacement(key, value, fmt, meta):
        """Returns context-dependent content to replace a Cite element."""

        assert key == 'Cite'

        # Extract the attributes
        attrs = PandocAttributes(value[0], 'pandoc')

        # Check if the nolink attribute is set
        nolink = attrs['nolink'].capitalize() == 'True' if 'nolink' in attrs \
          else False

        # Extract the label
        label = value[-2][0]['citationId']
        if allow_implicit_refs and not label in references and ':' in label:
            testlabel = label.split(':')[-1]
            if testlabel in references:
                label = testlabel

        # Get the target metadata; typecast it as a Target for easier access
        target = references[label] if label in references else None
        if target and not isinstance(target, Target):
            target = Target(*target)

        # Issue a warning for duplicate targets
        if _WARNINGLEVEL and target and target.has_duplicate:
            msg = textwrap.dedent("""
                %s: Referenced label has duplicate: %s
            """ % (_FILTERNAME, label))
            STDERR.write(msg)
            STDERR.flush()

        # Get the replacement value
        text = str(target.num) if target else '??'

        # Choose between \Cref, \cref and \ref
        use_cleveref = attrs['modifier'] in ['*', '+'] \
          if 'modifier' in attrs else use_cleveref_default
        is_plus_ref = attrs['modifier'] == '+' if 'modifier' in attrs \
          else use_cleveref_default
        refname = plusname[0] if is_plus_ref else starname[0]  # Reference name

        # The replacement content depends on the output format
        if fmt == 'latex':
            if use_cleveref:
                macro = r'\cref' if is_plus_ref else r'\Cref'
                ret = RawInline('tex', r'%s{%s}' % (macro, label))
            elif use_eqref:
                ret = RawInline('tex', r'\eqref{%s}' % label)
            else:
                ret = RawInline('tex', r'\ref{%s}' % label)
            if nolink:  # https://tex.stackexchange.com/a/323919
                ret['c'][1] = \
                  r'{\protect\NoHyper' + ret['c'][1] + r'\protect\endNoHyper}'
        else:
            if use_eqref:
                text = '(' + text + ')'

            elem = Math({"t":"InlineMath", "c":[]}, text[1:-1]) \
              if text.startswith('$') and text.endswith('$') \
              else Str(text)

            if not nolink and target:
                prefix = 'ch%03d.xhtml' % target.secno \
                  if fmt in ['epub', 'epub2', 'epub3'] and \
                  target.secno else ''

                elem = elt('Link', 2)([elem],
                                      ['%s#%s' % (prefix, label), '']) \
                  if version(_PANDOCVERSION) < version('1.16') else \
                  Link(['', [], []], [elem], ['%s#%s' % (prefix, label), ''])

            ret = ([Str(refname + NBSP)] if use_cleveref else []) + [elem]

        # If the Cite was square-bracketed then wrap everything in a span
        s = stringify(value[-1])

        # pandoc strips off intervening space between the prefix and the Cite;
        # we may have to add it back in
        prefix = value[-2][0]['citationPrefix']
        spacer = [Space()] \
          if prefix and not stringify(prefix).endswith(('{', '+', '*', '!')) \
          else []
        if s.startswith('[') and s.endswith(']'):
            els = value[-2][0]['citationPrefix'] + \
              spacer + ([ret] if fmt == 'latex' else ret) + \
              value[-2][0]['citationSuffix']
            # We don't yet know if there will be attributes, so leave them
            # as None.  This is fixed later when attributes are processed.
            ret = Span(None, els)

        return ret
Пример #12
0
def replace_references(key, val, fmt, meta):
    '''
    Replace

    [Str("Foo"), Space(), RawInLine("latex", "figref")]
    with
    [Str("Foo"), Space(), Link([Str("Figure"), Space(), Str("7")])]

    and

    [Str("Figure"), Space(), RawInLine("latex", "figref")]
    with
    [Link([Str("Figure"), Space(), Str("7")])]

    also works with abbreviations.
    '''

    if isinstance(val, list):
        altered = []
        for i, obj in enumerate(val):
            new_objs = [obj]
            if (isinstance(obj, dict) and obj['t'] == 'RawInline'
                    and obj['c'][0] == 'latex'):

                label = match_ref(obj['c'][1])
                if not label:
                    continue
                if label in label_map:
                    ref_string, ref_id, prev_strings = label_map[label]
                    prev = val[i - 1] if i > 0 else None
                    prevprev = val[i - 2] if i > 1 else None

                    new_objs = []

                    # handle "Table ", "(Table" etc.
                    if (prev_strings and prevprev and prev['t'] == 'Space'
                            and 'c' in prevprev and prevprev['t'] == 'Str'):
                        prevprev_lower = prevprev['c'].lower()
                        for needle in prev_strings:
                            if prevprev_lower.endswith(needle):
                                altered = altered[:-2]
                                prefix = prevprev_lower[:-len(needle)]
                                if prefix:
                                    new_objs.append(Str(prefix))

                    # hack around bug in pandoc where non-breaking space
                    # doesn't tokenize properly
                    if (prev_strings
                            and prev['t'] == 'Str' and prev['c'].replace(
                                u'\xa0', ' ').strip().lower() in prev_strings):
                        altered = altered[:-1]

                    link_content = []

                    link_content.append(Str(ref_string))
                    new_objs += [
                        Link(['', [], []], link_content, ['#%s' % ref_id, ''])
                    ]
                else:
                    new_objs += [
                        Space(),
                        Span(['', ['engrafo-missing-ref'], []], [Str('?')])
                    ]
            altered += new_objs

        return {'t': key, 'c': altered}
Пример #13
0
def insert_section_labels(key, val, fmt, meta):
    '''
    Insert section labels for headings like
    1 This is a top level heading
    1.1 This is a subsection
    A This is a top-level appendix
    A.1 This is an appendix subheader
    etc.

    Also inserts a dummy div with id=appendix-below before the appendix.
    '''

    global is_appendix

    if key == 'RawBlock' and val[1] == r'\appendix':
        is_appendix = True
        sec_lengths[0] = 0

        return Div(['engrafo-appendix-below', [], []], [])

    if key == 'Header':
        level, attrs, children = val

        # Ignore \subsubsection{}, \paragraph{} and smaller
        if level >= 3:
            return Header(level + 1, attrs, children)

        unnumbered = 'unnumbered' in val[1][1]

        label = attrs[0]
        sec_lengths[level - 1] += 1
        sec_lengths[level:] = [0] * (len(sec_lengths) - level)

        if is_appendix:
            # appendix: h1 is alpha
            sec_number = '.'.join([
                chr(x + ord('A') - 1) if i == 0 else str(x)
                for i, x in enumerate(sec_lengths[:level])
            ])
        else:
            sec_number = '.'.join([str(x) for x in sec_lengths[:level]])

        if label and label not in label_map:
            if is_appendix:
                ref_string = 'Appendix %s' % sec_number
                ref_index = 'appendix-%s' % sec_number
                prev_strings = ['appendix', 'app.']
            else:
                ref_string = 'Section %s' % sec_number
                ref_index = 'section-%s' % sec_number
                prev_strings = ['section', 'sec.']

            label_map[label] = Label(
                ref_string=ref_string,
                ref_index=ref_index,
                prev_strings=prev_strings,
            )

        if not unnumbered:
            span = Span(['', ['section-number'], []], [Str(sec_number)])
            children = [span] + children
        attrs[0] = 'section-%s' % sec_number.lower()

        # Decrease levels one more than Pandoc outputs (<h1> -> <h2>)
        level += 1

        return Header(level, attrs, children)
Пример #14
0
def makeSpan(contents, classes="", author="", date=""):
    attrs = {'classes': classes.split(), 'author': author, 'date': date}
    return Span(attributes(attrs), contents)
Пример #15
0
def filter_main(key, value, format, meta):
    # f.write(repr(key) + '\n')
    # f.write(repr(value) + '\n')
    # f.write('------\n')
    if key == 'CodeBlock':
        text = value[1]
        m = re.match(r'%%%%lyxblog-raw\n(.*)', text, flags=re.DOTALL | re.I)
        if m:
            return RawBlock('html', m[1])
    elif key == 'Math' and value[0]['t'] == 'DisplayMath':  # i.e. not inline
        # MathJax supports labels and eq. numbering only for AMS envs, so we
        # convert non-AMS envs into AMS envs.
        latex = value[1]
        if not latex.startswith(r'\begin{'):  # not AMS env
            # We assume there are no comments inside math blocks (if the file
            # is produced by LyX, there shouldn't be any).
            pos = latex.find(r'\label{')
            if pos == -1:  # no labels => no numbering
                fixed = r'\begin{align*}' + value[1] + r'\end{align*}'
            else:
                fixed = r'\begin{align}' + value[1] + r'\end{align}'
            return Math(value[0], fixed)
    elif key == 'Span':
        # This supports general labels (i.e. labels not in equations, captions
        # or section headers).
        id, classes, key_values = value[0]
        if len(key_values) == 1 and key_values[0][0] == 'label':
            # we remove the text from the label.
            return Span(value[0], [])
    elif key == 'Header':
        content = value[2]
        if content[-1]['t'] == 'Span':
            [id, classes, key_values], text = content[-1]['c']
            if len(key_values) == 1 and key_values[0][0] == 'label':
                # we label the header itself (id) and delete the label-span
                label_name = key_values[0][1]
                value[1][0] = label_name
                return Header(value[0], value[1], content[:-1])
    elif key == 'Math' and value[0]['t'] == 'InlineMath':
        if value[1].startswith('\\ref{') and value[1][-1] == '}':
            name = value[1][len('\\ref{'):-1]

            # We try to extract the text from the label itself.
            # (=00007B and =00007D represent '{' and '}' and are in the TeX
            # file produced by LyX.)
            m = re.match(r'.*=00007B([^}]+)=00007D$', name)
            if m:
                return RawInline('html',
                                 '<a href="#{}">{}</a>'.format(name, m[1]))

            # We only handle references to sections and images here.
            # (Mathjax already handles the equations.)
            num = sec_name_to_num.get(name, img_name_to_num.get(name, None))
            if num:
                return RawInline('html',
                                 '<a href="#{}">{}</a>'.format(name, num))

    elif key == 'Para' and value[0]['t'] == 'Image':
        # NOTE:
        #   In pandoc 2, a Para[Image] where Image.title is 'fig:' becomes
        #   a <figure> with a <figcaption>.

        [id, classes, style], alt, [src, title] = value[0]['c']
        style = {k: v for k, v in style}
        width = float(style.get('width', '100.0%')[:-1])
        margin = (100 - width) / 2

        global image_idx
        src = image_info[image_idx]
        image_idx += 1

        label = ''
        if alt[-1]['t'] == 'Span':
            id, classes, key_values = alt[-1]['c'][0]  # attr
            key_values = dict(key_values)
            if 'label' in key_values:
                # remove the label from the caption (it'll be put right before
                # the image).
                alt = alt[:-1]  # remove the label from the caption
                label = key_values['label']

        fake_class = '{}:{:.5}%'.format(UID2, margin)
        img_attrs = make_attrs(label, [fake_class], {'width': '100%'})
        caption = [Emph([Str('Figure {}.'.format(image_idx))])]
        if title == 'fig:':
            caption += [Space()] + alt

        para_content = [Image(img_attrs, caption, (src, 'fig:'))]

        return Para(para_content)