示例#1
0
    def printLatexNodes(self, text):
        print("print " + text)
        w = LatexWalker(text)

        (nodelist, pos, len_) = w.get_latex_nodes(pos=0)
        print("len_ " + str(len_))
        self._printLatexNodes(nodelist)
示例#2
0
    def test_input(self):
        latex = r'''ABCDEF fdksanfkld safnkd anfklsa

\input{test_input_1.tex}

MORENKFDNSN'''
        correct_text = r'''ABCDEF fdksanfkld safnkd anfklsa

hi there! This is an equation:

    x + y i = 0

where i is the imaginary unit.

MORENKFDNSN'''

        testdir = os.path.realpath(os.path.abspath(os.path.dirname(__file__)))

        l2t = LatexNodes2Text()
        l2t.set_tex_input_directory(testdir)

        output = l2t.nodelist_to_text(LatexWalker(latex).get_latex_nodes()[0])

        self.assertEqualUpToWhitespace(output, correct_text)

        latex = r'''ABCDEF fdksanfkld safnkd anfklsa

\input{test_input_1}

MORENKFDNSN'''

        self.assertEqualUpToWhitespace(
            l2t.nodelist_to_text(LatexWalker(latex).get_latex_nodes()[0]),
            correct_text)

        latex = r'''ABCDEF fdksanfkld safnkd anfklsa

\input{../test_input_1}

MORENKFDNSN'''

        correct_text_unsafe = correct_text  # as before
        correct_text_safe = r'''ABCDEF fdksanfkld safnkd anfklsa

MORENKFDNSN'''

        # make sure that the \input{} directive failed to include the file.
        l2t = LatexNodes2Text()
        l2t.set_tex_input_directory(os.path.join(testdir, 'dummy'))
        self.assertEqualUpToWhitespace(
            l2t.nodelist_to_text(LatexWalker(latex).get_latex_nodes()[0]),
            correct_text_safe)
        # but without the strict_input flag, it can access it.
        l2t.set_tex_input_directory(os.path.join(testdir, 'dummy'),
                                    strict_input=False)
        self.assertEqualUpToWhitespace(
            l2t.nodelist_to_text(LatexWalker(latex).get_latex_nodes()[0]),
            correct_text_unsafe)
示例#3
0
 def test_accents(self):
     self.assertEqual(
         LatexNodes2Text().nodelist_to_text(LatexWalker(r"Fran\c cais").get_latex_nodes()[0]),
         '''Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais'''
     )
     self.assertEqual(
         LatexNodes2Text().nodelist_to_text(LatexWalker(r"Fr\'en{\'{e}}tique").get_latex_nodes()[0]),
         '''Fr\N{LATIN SMALL LETTER E WITH ACUTE}n\N{LATIN SMALL LETTER E WITH ACUTE}tique'''
     )
示例#4
0
 def test_accents(self):
     self.assertEqual(
         LatexNodes2Text().nodelist_to_text(
             LatexWalker(r"Fran\c cais").get_latex_nodes()[0]),
         '''Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais''')
     self.assertEqual(
         LatexNodes2Text().nodelist_to_text(
             LatexWalker(r"Fr\'en{\'{e}}tique").get_latex_nodes()[0]),
         '''Fr\N{LATIN SMALL LETTER E WITH ACUTE}n\N{LATIN SMALL LETTER E WITH ACUTE}tique'''
     )
     self.assertEqual(
         LatexNodes2Text(math_mode='with-delimiters').nodelist_to_text(
             LatexWalker(r"$1 \not= 2$").get_latex_nodes()[0]),
         '''$1 {} 2$'''.format(
             unicodedata.normalize('NFC',
                                   "=\N{COMBINING LONG SOLIDUS OVERLAY}")))
示例#5
0
    def test_keep_braced_groups(self):
        self.assertEqual(
            LatexNodes2Text(keep_braced_groups=True)
            .nodelist_to_text(LatexWalker(r"\textit{Voil\`a du texte}. Il est \'{e}crit {en fran{\c{c}}ais}")
                              .get_latex_nodes()[0]),
            '''Voil\N{LATIN SMALL LETTER A WITH GRAVE} du texte. Il est \N{LATIN SMALL LETTER E WITH ACUTE}crit {en fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais}'''
        )

        self.assertEqual(
            LatexNodes2Text(keep_braced_groups=True, keep_braced_groups_minlen=4)
            .nodelist_to_text(LatexWalker(r"A{XYZ}{ABCD}").get_latex_nodes()[0]),
            '''AXYZ{ABCD}'''
        )
        self.assertEqual(
            LatexNodes2Text(keep_braced_groups=True, keep_braced_groups_minlen=0)
            .nodelist_to_text(LatexWalker(r"{A}{XYZ}{ABCD}").get_latex_nodes()[0]),
            '''{A}{XYZ}{ABCD}'''
        )
示例#6
0
    def test_get_latex_environment(self):

        latextext = r'''Text and \`accent and \textbf{bold text} and $\vec b$ more stuff for Fran\c cois
\begin{enumerate}[(i)]
\item Hi there!  % here goes a comment
\item[a] Hello!  @@@
     \end{enumerate}
Indeed thanks to \cite[Lemma 3]{Author}, we know that...
Also: {\itshape some italic text}.
'''
        lw = LatexWalker(latextext, tolerant_parsing=False)

        p = latextext.find(r'\begin{enumerate}')
        self.assertEqual(
            lw.get_latex_environment(pos=p, environmentname='enumerate'), (
                LatexEnvironmentNode('enumerate', [
                    LatexCharsNode('\n'),
                    LatexMacroNode('item', None, [], macro_post_space=' '),
                    LatexCharsNode('Hi there!  '),
                    LatexCommentNode(' here goes a comment'),
                    LatexMacroNode('item', LatexGroupNode(
                        [LatexCharsNode('a')]), []),
                    LatexCharsNode(' Hello!  @@@\n     ')
                ], [LatexGroupNode([LatexCharsNode('(i)')])], []),
                p,
                latextext.find(r'\end{enumerate}') + len(r'\end{enumerate}') -
                p,
            ))
        self.assertEqual(lw.get_latex_environment(pos=p), (
            LatexEnvironmentNode('enumerate', [
                LatexCharsNode('\n'),
                LatexMacroNode('item', None, [], macro_post_space=' '),
                LatexCharsNode('Hi there!  '),
                LatexCommentNode(' here goes a comment'),
                LatexMacroNode('item', LatexGroupNode([LatexCharsNode('a')]),
                               []),
                LatexCharsNode(' Hello!  @@@\n     ')
            ], [LatexGroupNode([LatexCharsNode('(i)')])], []),
            p,
            latextext.find(r'\end{enumerate}') + len(r'\end{enumerate}') - p,
        ))
        with self.assertRaises(LatexWalkerParseError):
            dummy = lw.get_latex_environment(pos=p,
                                             environmentname='XYZNFKLD-WRONG')
示例#7
0
    def test_get_latex_maybe_optional_arg(self):

        latextext = r'''Text and \`accent and \textbf{bold text} and $\vec b$ more stuff for Fran\c cois
\begin{enumerate}[(i)]
\item Hi there!  % here goes a comment
\item[a] Hello!  @@@
     \end{enumerate}
Indeed thanks to \cite[Lemma 3]{Author}, we know that...
'''
        lw = LatexWalker(latextext, tolerant_parsing=False)

        p = latextext.find(r'\textbf') + len(r'\textbf')
        self.assertEqual(lw.get_latex_maybe_optional_arg(pos=p), None)
        p = latextext.find(r'\cite') + len(r'\cite')
        self.assertEqual(lw.get_latex_maybe_optional_arg(pos=p), (
            LatexGroupNode([LatexCharsNode('Lemma 3')]),
            p,
            9,
        ))
示例#8
0
    def test_get_latex_braced_group(self):

        latextext = r'''Text and \`accent and \textbf{bold text} and $\vec b$ more stuff for Fran\c cois
\begin{enumerate}[(i)]
\item Hi there!  % here goes a comment
\item[a] Hello!  @@@
     \end{enumerate}
Indeed thanks to \cite[Lemma 3]{Author}, we know that...
Also: {\itshape some italic text}.
'''
        lw = LatexWalker(latextext, tolerant_parsing=False)

        p = latextext.find(r'Also: {') + len(
            'Also:')  # points on space after 'Also:'
        self.assertEqual(lw.get_latex_braced_group(pos=p, brace_type='{'), (
            LatexGroupNode([
                LatexMacroNode('itshape', None, [], macro_post_space=' '),
                LatexCharsNode('some italic text')
            ]),
            p + 1,
            len('{\itshape some italic text}'),
        ))
        self.assertEqual(lw.get_latex_braced_group(
            pos=p + 1, brace_type='{'), (
                LatexGroupNode([
                    LatexMacroNode('itshape', None, [], macro_post_space=' '),
                    LatexCharsNode('some italic text')
                ]),
                p + 1,
                len('{\itshape some italic text}'),
            ))
        p = latextext.find(r'[(i)]')
        self.assertEqual(lw.get_latex_braced_group(pos=p, brace_type='['), (
            LatexGroupNode([LatexCharsNode('(i)')]),
            p,
            5,
        ))
示例#9
0
    def test_basic(self):

        self.assertEqual(
            LatexNodes2Text().nodelist_to_text(LatexWalker(r'\textbf{A}').get_latex_nodes()[0]),
            'A'
        )

        latex = r'''\textit{hi there!} This is {\em an equation}:
\begin{equation}
    x + y i = 0
\end{equation}

where $i$ is the imaginary unit.
'''
        self.assertEqualUpToWhitespace(
            LatexNodes2Text().nodelist_to_text(LatexWalker(latex).get_latex_nodes()[0]),
            r'''hi there! This is an equation:

    x + y i = 0

where i is the imaginary unit.
'''
        )
        self.assertEqualUpToWhitespace(
            LatexNodes2Text(keep_inline_math=True).nodelist_to_text(LatexWalker(latex).get_latex_nodes()[0]),
            r'''hi there! This is an equation:

    x + y i = 0

where $i$ is the imaginary unit.
'''
        )

        self.assertEqual(
            LatexNodes2Text().nodelist_to_text(LatexWalker(latex).get_latex_nodes()[0]),
            LatexNodes2Text().latex_to_text(latex)
        )
示例#10
0
    def test_basic(self):

        self.assertEqual(
            LatexNodes2Text().nodelist_to_text(
                LatexWalker(r'\textbf{A}').get_latex_nodes()[0]), 'A')

        latex = r'''\textit{hi there!} This is {\em an equation}:
\begin{equation}
    x + y i = 0
\end{equation}

where $i$ is the ``imaginary unit.''
'''
        self.assertEqualUpToWhitespace(
            LatexNodes2Text().nodelist_to_text(
                LatexWalker(latex).get_latex_nodes()[0]),
            u'''hi there! This is an equation:

    x + y i = 0

where i is the “imaginary unit.”
''')
        self.assertEqualUpToWhitespace(
            LatexNodes2Text(math_mode='with-delimiters').nodelist_to_text(
                LatexWalker(latex).get_latex_nodes()[0]),
            u'''hi there! This is an equation:
\\begin{equation}
    x + y i = 0
\\end{equation}
where $i$ is the “imaginary unit.”
''')

        self.assertEqual(
            LatexNodes2Text().nodelist_to_text(
                LatexWalker(latex).get_latex_nodes()[0]),
            LatexNodes2Text().latex_to_text(latex))
示例#11
0
    def test_get_latex_nodes(self):

        latextext = r'''Text and \`accent and \textbf{bold text} and $\vec b$ more stuff for Fran\c cois
\begin{enumerate}[(i)]
\item Hi there!  % here goes a comment
\item[a] Hello!  @@@
     \end{enumerate}
Indeed thanks to \cite[Lemma 3]{Author}, we know that...
Also: {\itshape some italic text}.
'''
        lw = LatexWalker(latextext, tolerant_parsing=False)

        #lw.get_latex_nodes(pos=0,stop_upon_closing_brace=None,stop_upon_end_environment=None,
        #                   stop_upon_closing_mathmode=None)

        p = latextext.find('Also: {')
        self.assertEqual(lw.get_latex_nodes(pos=p), ([
            LatexCharsNode('Also: '),
            LatexGroupNode([
                LatexMacroNode('itshape', None, [], macro_post_space=' '),
                LatexCharsNode('some italic text')
            ]),
            LatexCharsNode('.')
        ], p, len(latextext) - p - 1))  # trailing '\n' is not included

        p = latextext.find('Also: {') + len(
            'Also: {')  # points inside right after open brace
        self.assertEqual(
            lw.get_latex_nodes(pos=p, stop_upon_closing_brace='}'), ([
                LatexMacroNode('itshape', None, [], macro_post_space=' '),
                LatexCharsNode('some italic text')
            ], p, len('\itshape some italic text}')))

        # test our own macro lists etc.
        pindeed = latextext.find('Indeed thanks to')
        lineindeed = latextext[pindeed:latextext.find('\n', pindeed)]
        lw2 = LatexWalker(lineindeed,
                          tolerant_parsing=False,
                          macro_dict={'cite': MacrosDef('cite', False, 4)})
        self.assertEqual(lw2.get_latex_nodes(pos=0), ([
            LatexCharsNode('Indeed thanks to '),
            LatexMacroNode('cite', None, [
                LatexCharsNode('['),
                LatexCharsNode('L'),
                LatexCharsNode('e'),
                LatexCharsNode('m'),
            ]),
            LatexCharsNode('ma 3]'),
            LatexGroupNode([LatexCharsNode('Author')]),
            LatexCharsNode(', we know that...'),
        ], 0, len(lineindeed)))
示例#12
0
    def test_errors(self):
        latextext = get_test_latex_data_with_possible_inconsistencies()

        lw = LatexWalker(latextext, tolerant_parsing=False)
        with self.assertRaises(LatexWalkerParseError):
            dummy = lw.get_latex_nodes()

        lwOk = LatexWalker(latextext, tolerant_parsing=True)
        # make sure that it goes through without raising:
        try:
            lwOk.get_latex_nodes()
        except LatexWalkerParseError as e:
            # should not raise this.
            self.fail(
                u"get_latex_nodes() raised LatexWalkerParseError, but it shouldn't have in "
                u"tolerant parsing mode!\n" + unicode(e))
示例#13
0
    def test_get_token(self):

        latextext = r'''Text \`accent and \textbf{bold text} and $\vec b$ vector \& also Fran\c cois
\begin{enumerate}[(i)]
\item Hi there!  % here goes a comment
\item[a] Hello!  @@@
     \end{enumerate}
'''
        lw = LatexWalker(latextext)

        self.assertEqual(
            lw.get_token(pos=0),
            LatexToken(tok='char', arg='T', pos=0, len=1, pre_space=''))
        self.assertEqual(
            lw.get_token(pos=1),
            LatexToken(tok='char', arg='e', pos=1, len=1, pre_space=''))
        p = latextext.find(r'\`')
        self.assertEqual(
            lw.get_token(pos=p),
            LatexToken(tok='macro', arg='`', pos=p, len=2, pre_space=''))
        p = latextext.find(r'\textbf') - 1  # pre space
        self.assertEqual(
            lw.get_token(pos=p),
            LatexToken(tok='macro',
                       arg='textbf',
                       pos=p + 1,
                       len=7,
                       pre_space=' '))
        p = latextext.find(r'\vec')  # post-space
        self.assertEqual(
            lw.get_token(pos=p),
            LatexToken(tok='macro',
                       arg='vec',
                       pos=p,
                       len=5,
                       pre_space='',
                       post_space=' '))
        p = latextext.find(r'\&') - 1  # pre-space and *no* post-space
        self.assertEqual(
            lw.get_token(pos=p),
            LatexToken(tok='macro',
                       arg='&',
                       pos=p + 1,
                       len=2,
                       pre_space=' ',
                       post_space=''))
        p = latextext.find(r'\begin')
        self.assertEqual(
            lw.get_token(pos=p, environments=False),
            LatexToken(tok='macro', arg='begin', pos=p, len=6, pre_space=''))
        p = latextext.find(r'\begin')
        self.assertEqual(
            lw.get_token(pos=p),
            LatexToken(tok='begin_environment',
                       arg='enumerate',
                       pos=p,
                       len=len(r'\begin{enumerate}'),
                       pre_space=''))
        p = latextext.find(r'@@@') + 3  # pre space to \end
        self.assertEqual(
            lw.get_token(pos=p),
            LatexToken(tok='end_environment',
                       arg='enumerate',
                       pos=p + 6,
                       len=len(r'\end{enumerate}'),
                       pre_space='\n     '))
        p = latextext.find(r'%') - 1
        self.assertEqual(
            lw.get_token(pos=p),
            LatexToken(tok='comment',
                       arg=' here goes a comment',
                       pos=p + 1,
                       len=len('% here goes a comment\n'),
                       pre_space=' ',
                       post_space='\n'))
        p = latextext.find(r'{')
        self.assertEqual(
            lw.get_token(pos=p),
            LatexToken(tok='brace_open', arg='{', pos=p, len=1, pre_space=''))
        p = latextext.find(r'}')
        self.assertEqual(
            lw.get_token(pos=p),
            LatexToken(tok='brace_close', arg='}', pos=p, len=1, pre_space=''))
        p = latextext.find(r'[')
        self.assertEqual(
            lw.get_token(pos=p, brackets_are_chars=False),
            LatexToken(tok='brace_open', arg='[', pos=p, len=1, pre_space=''))
        p = latextext.find(r']')
        self.assertEqual(
            lw.get_token(pos=p, brackets_are_chars=False),
            LatexToken(tok='brace_close', arg=']', pos=p, len=1, pre_space=''))
        p = latextext.find(r'$')
        self.assertEqual(
            lw.get_token(pos=p),
            LatexToken(tok='char', arg='$', pos=p, len=1, pre_space=''))

        lw2 = LatexWalker(latextext, keep_inline_math=True)
        p = latextext.find(r'$')
        self.assertEqual(
            lw2.get_token(pos=p),
            LatexToken(tok='mathmode_inline',
                       arg='$',
                       pos=p,
                       len=1,
                       pre_space=''))
示例#14
0
re_single_quote = re.compile(r"`|'")


def is_symbol(node, LatexMacroNode):
    # TODO: improve this function
    return len(node.macroname) == 1


if __name__ == "__main__":

    # Read latex file
    with open(input_tex, "r") as fp:
        text = fp.read()

    # Parse latex
    walker = LatexWalker(text)

    # Parse nodes
    nodes, pos, length = walker.get_latex_nodes()

    # Write output to a file
    with open(output_txt, "w") as fp:
        # Number of titles
        num_titles = 0

        # If previous node is a citation
        no_new_paragraph = False

        # Loop over all nodes
        for node in nodes:
            # TODO: Implement more rules and process environments recursively
示例#15
0
    def test2(self):
        w = LatexWalker(r"""\[\int_{a}^{b} x^2 \,dx \]""")
        (nodelist, pos, len_) = w.get_latex_nodes(pos=0)

        for i in range(len(nodelist[0].nodelist)):
            print(nodelist[0].nodelist[i])
示例#16
0
    def traverseLatex(self, text):
        w = LatexWalker(text)
        (nodelist, pos, len_) = w.get_latex_nodes(pos=0)

        self._traverseLatex(nodelist)
示例#17
0
 def findMathNode(self):
     w = LatexWalker(self.equation)
     (nodelist, pos, len_) = w.get_latex_nodes(pos=0)
     return self._findMathNodeInList(nodelist)
示例#18
0
    def test_get_latex_expression(self):

        latextext = r'''Text and \`accent and \textbf{bold text} and $\vec b$ more stuff for Fran\c cois
\begin{enumerate}[(i)]
\item Hi there!  % here goes a comment
\item[a] Hello!  @@@
     \end{enumerate}
'''
        lw = LatexWalker(latextext, tolerant_parsing=True)

        self.assertEqual(lw.get_latex_expression(pos=0), (
            LatexCharsNode('T'),
            0,
            1,
        ))
        p = latextext.find(r'\`')
        self.assertEqual(lw.get_latex_expression(pos=p), (
            LatexMacroNode('`', None, []),
            p,
            2,
        ))
        p = latextext.find(r'{')
        self.assertEqual(lw.get_latex_expression(pos=p), (
            LatexGroupNode([LatexCharsNode('bold text')]),
            p,
            11,
        ))
        p = latextext.find(r'%')  # check: correctly skips comments
        self.assertEqual(lw.get_latex_expression(pos=p), (
            LatexMacroNode('item', None, []),
            p + len('% here goes a comment\n'),
            5,
        ))
        p = latextext.find(r'%')  # check: correctly skips comments
        self.assertEqual(lw.get_latex_expression(pos=p), (
            LatexMacroNode('item', None, []),
            p + len('% here goes a comment\n'),
            5,
        ))
        # check correct behavior if directly on brace close
        p = latextext.find(r'}')
        self.assertEqual(lw.get_latex_expression(pos=p, strict_braces=True), (
            LatexCharsNode(''),
            p,
            0,
        ))
        lw2 = LatexWalker(latextext, tolerant_parsing=False)
        self.assertEqual(lw2.get_latex_expression(pos=p, strict_braces=False),
                         (
                             LatexCharsNode(''),
                             p,
                             0,
                         ))
        with self.assertRaises(LatexWalkerParseError):
            dummy = lw2.get_latex_expression(pos=p, strict_braces=True)
示例#19
0
from pylatexenc.latexwalker import LatexWalker, LatexEnvironmentNode
import pylatexenc
import sys

fname = sys.argv[1]
with open(fname, "r") as f:
    tex = f.read()
    print(fname)
walker = LatexWalker(tex)
nodelist, pos, leen =  walker.get_latex_nodes()
charslist = []
for node in nodelist:
    if node.isNodeType(pylatexenc.latexwalker.LatexEnvironmentNode):
        for subnode in node.nodelist:
            if subnode.isNodeType(pylatexenc.latexwalker.LatexCharsNode):
                charslist.append(str(subnode.chars).strip())
                
fullst = " ".join(charslist).replace("\n"," ")


print(fullst)