Exemplo n.º 1
0
 def test_combined_0(self):
     lw = latexwalker.LatexWalker(r'\cmd{ab}c*')
     s = MacroStandardArgsParser('{*[{*')
     parsing_state = lw.make_parsing_state()
     (argd, p, l) = s.parse_args(lw,
                                 len(r'\cmd'),
                                 parsing_state=parsing_state)
     self.assertPMAEqual(
         argd,
         ParsedMacroArgs(argspec='{*[{*',
                         argnlist=[
                             LatexGroupNode(
                                 parsing_state=parsing_state,
                                 delimiters=('{', '}'),
                                 nodelist=[
                                     LatexCharsNode(
                                         parsing_state=parsing_state,
                                         chars='ab',
                                         pos=5,
                                         len=2)
                                 ],
                                 pos=4,
                                 len=4), None, None,
                             LatexCharsNode(parsing_state=parsing_state,
                                            chars='c',
                                            pos=8,
                                            len=1),
                             LatexCharsNode(parsing_state=parsing_state,
                                            chars='*',
                                            pos=9,
                                            len=1)
                         ]))
Exemplo n.º 2
0
 def test_star_0(self):
     lw = latexwalker.LatexWalker(r'\cmd xyz')
     s = MacroStandardArgsParser('*')
     (argd, p, l) = s.parse_args(lw, len(r'\cmd'))
     self.assertPMAEqual(
         argd,
         ParsedMacroArgs(argspec='*', argnlist=[ None ])
     )
Exemplo n.º 3
0
    def latex_to_text(self, latex, **parse_flags):
        """
        Parses the `latex` LaTeX code heuristically, and returns a text approximation of it.
        Suitable, e.g. for indexing in a database.

        The `parse_flags` are the flags to give on to the
        py:class:`~pylatexenc.latexwalker.LatexWalker` constructor.
        """
        return self.nodelist_to_text(
            latexwalker.LatexWalker(latex, **parse_flags).get_latex_nodes()[0])
Exemplo n.º 4
0
def custom_latex_to_text( input_latex ):
    # the latex parser instance with custom latex_context
    lw_obj = latexwalker.LatexWalker(input_latex,
                                     latex_context=lw_context_db)
    # parse to node list
    nodelist, pos, length = lw_obj.get_latex_nodes()
    # initialize the converter to text with custom latex_context
    l2t_obj = latex2text.LatexNodes2Text(latex_context=l2t_context_db)
    # convert to text
    return l2t_obj.nodelist_to_text( nodelist )
Exemplo n.º 5
0
 def test_marg_1(self):
     lw = latexwalker.LatexWalker(r'\cmd ab')
     s = MacroStandardArgsParser('{')
     parsing_state = lw.make_parsing_state()
     (argd, p, l) = s.parse_args(lw, len(r'\cmd'), parsing_state=parsing_state)
     self.assertPMAEqual(
         argd,
         ParsedMacroArgs(argspec='{', argnlist=[
             LatexCharsNode(parsing_state=parsing_state,
                            chars='a',
                            pos=len(r'\cmd')+1,len=1)
         ])
     )
Exemplo n.º 6
0
    def _callback_input(self, n):
        #
        # recurse into files upon '\input{}'
        #

        if (len(n.nodeargs) != 1):
            logger.warning(
                u"Expected exactly one argument for '\\input' ! Got = %r",
                n.nodeargs)

        inputtex = self.read_input_file(
            self.nodelist_to_text([n.nodeargs[0]]).strip())

        return self.nodelist_to_text(
            latexwalker.LatexWalker(
                inputtex, **self.latex_walker_init_args).get_latex_nodes()[0])
Exemplo n.º 7
0
 def test_oarg_0(self):
     lw = latexwalker.LatexWalker(r'\cmd[ab] xyz')
     s = MacroStandardArgsParser('[')
     parsing_state = lw.make_parsing_state()
     (argd, p, l) = s.parse_args(lw, len(r'\cmd'), parsing_state=parsing_state)
     self.assertPMAEqual(
         argd,
         ParsedMacroArgs(argspec='[', argnlist=[
             LatexGroupNode(
                 parsing_state=parsing_state,
                 delimiters=('[', ']'),
                 nodelist=[
                     LatexCharsNode(parsing_state=parsing_state,
                                    chars='ab',
                                    pos=5,len=2)
                 ],
                 pos=4,len=4)
         ])
     )
Exemplo n.º 8
0
 def test_marg_0(self):
     lw = latexwalker.LatexWalker(r'{ab}')
     s = MacroStandardArgsParser('{')
     parsing_state = lw.make_parsing_state()
     (argd, p, l) = s.parse_args(lw, 0, parsing_state=parsing_state)
     self.assertPMAEqual(
         argd,
         ParsedMacroArgs(
             argspec='{',
             argnlist=[ LatexGroupNode(
                 parsing_state=parsing_state,
                 delimiters=('{','}'),
                 nodelist=[
                     LatexCharsNode(parsing_state=parsing_state,
                                    chars='ab',
                                    pos=1,len=2)
                 ],
                 pos=0,len=4)
             ])
     )
Exemplo n.º 9
0
    def test_preprocess_00(self):
        class MyFix(BaseFix):
            def fix_node(self, n, **kwargs):
                if n.isNodeType(latexwalker.LatexMacroNode
                                ) and n.macroname == 'testmacro':
                    return latexwalker.LatexMacroNode(
                        macroname=r'replacemacro',
                        nodeargd=None,
                        pos=0,
                        len=len(r'\replacemacro'))
                return None

        latex = r"""Test: \testmacro% a comment
Text and \`accent and \textbf{bold text} and $\vec b$ more stuff for Fran\c cois
\begin{enumerate}[(i)]
\item Hi there!  % here goes a comment
 \item[a] Hello!  @@@
     \end{enumerate}
Indeed thanks to \cite[Lemma 3]{Author}, we know that...
Also: {\itshape some italic text}."""

        nodelist = latexwalker.LatexWalker(
            latex, tolerant_parsing=False).get_latex_nodes()[0]

        myfix = MyFix()

        testnodelist = nodelist[0:1] + nodelist[
            2:4]  # not \testmacro, all fix_node()'s return None
        newnodes = myfix.preprocess(testnodelist)
        self.assertEqual(newnodes, testnodelist)

        testnodelist = nodelist[0:3]  # with \testmacro
        newnodes = myfix.preprocess(testnodelist)
        self.assertEqual(
            newnodes, testnodelist[0:1] + [
                latexwalker.LatexMacroNode(macroname=r'replacemacro',
                                           nodeargd=None,
                                           pos=0,
                                           len=len(r'\replacemacro'))
            ] + testnodelist[2:3])
Exemplo n.º 10
0
import os
import os.path

import logging
import fileinput
import json

from pylatexenc import latexwalker

from helpers import nodelist_to_d

if __name__ == '__main__':

    in_latex = ''
    for line in fileinput.input():
        in_latex += line

    nodelist = latexwalker.LatexWalker(
        in_latex, tolerant_parsing=False).get_latex_nodes()[0]

    d = nodelist_to_d(nodelist)

    print(repr(d))

    print(json.dumps(d, indent=4))
Exemplo n.º 11
0
        return

    # got a symbol macro, go for it:
    print("    MacroTextSpec(%r, u'\\N{%s}'), # ‘%s’" %
          (thenode.macroname, unicodedata.name(chr(uni)), chr(uni)))


for builtin_name in ('defaults', 'unicode-xml'):

    rules = latexencode.get_builtin_conversion_rules(builtin_name)

    logger.info("Reader latexencode defaults %r", builtin_name)
    print("    # Rules from latexencode defaults '%s'" % (builtin_name))

    for rule in rules:

        if rule.rule_type != latexencode.RULE_DICT:
            logger.warning("Ignoring non-dict rule type %d", rule.rule_type)
            continue

        # inspect rules for symbols that latex2text might not already be aware of
        for uni, latex in rule.rule.items():
            try:
                nodelist, _, _ = latexwalker.LatexWalker(
                    latex, tolerant_parsing=False).get_latex_nodes()
            except latexwalker.LatexWalkerError as e:
                logger.warning("Error parsing %r (%s): %s", latex, chr(uni), e)
                continue

            extract_symbol_node(nodelist, uni, latex)