def test_latex_comments(): # "normal" comment latex = 'a %x\nb\n' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'a b\n' assert nums == [1, 2, 6, 7] # join lines latex = 'a%x\n b\n' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'ab\n' assert nums == [1, 7, 8] # join lines: protect macro name latex = 'a\\aa%x\nb\n' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'aåb\n' assert nums == [1, 2, 8, 9] # do not join lines, if next line empty latex = 'a%x\n\nb\n' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'a\n\nb\n' assert nums == [1, 4, 5, 6, 7] # remove pure comment lines latex = 'a %x\n %x\nb\n' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'a b\n' assert nums == [1, 2, 10, 11]
def test_remove_blank_lines_left_by_macros(): # a normal macro: \label latex = 'a\n\\label{x}\nb\n' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'a\nb\n' assert nums == [1, 2, 13, 14] # macro plus comment latex = 'a\n\\label{x} %x\nb\n' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'a\n b\n' assert nums == [1, 2, 12, 16, 17] # \begin and \end latex = 'a\n\\begin{x}\nb\n\\end{x}\nc\n' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'a\nb\nc\n' assert nums == [1, 2, 13, 14, 23, 24] # (actually no blank lines) latex = 'a\n\\begin{x}b\n\\end{x}c\n' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'a\nb\nc\n' assert nums == [1, 2, 12, 13, 21, 22]
def test_cite(): latex = '\\cite{x}' plain, nums = tex2txt.tex2txt(latex, options) assert plain == '[0]' latex = '\\cite[y]{x}' plain, nums = tex2txt.tex2txt(latex, options) assert plain == '[0, y]'
def test_proof(): latex = '\\usepackage{amsthm}\n\\begin{proof}' plain, nums = tex2txt.tex2txt(latex, options) assert plain == '\n\nProof.\n' latex = '\\usepackage{amsthm}\nA \\begin{proof}[Test] B' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'A \n\nTest.\n B' assert nums == [21, 22, 23, 23, 37, 38, 39, 40, 40, 40, 42, 43]
def test_phantom(): latex = r'A\phantom{XYZ}B\phantom{\label{lab}}C' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'A BC' latex = r'A\hphantom{XYZ}B\hphantom{\label{lab}}C' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'A BC' latex = r'A\vphantom{XYZ}B\vphantom{\label{lab}}C' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'ABC'
def test_unknown_macro(): latex = 'a\\xxx b' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'ab' latex = 'a\\xxx \n{b} c' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'ab c' latex = 'a\\xxx\n\n{b} c' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'a\n\nb c'
def addpacks(cmdline): packs = '.yalafi.shell.addpacks' if cmdline.packages.strip(','): packs = cmdline.packages.strip(',') + ',' + packs opts = tex2txt.Options(defs=cmdline.define, lang=cmdline.language[:2], dcls=cmdline.documentclass, pack=packs) f = tex2txt.myopen(cmdline.add_modules, encoding=cmdline.encoding) latex = f.read() f.close() tex2txt.tex2txt(latex, opts) return documentclass[0], packages
def test_renewcommand(): latex = 'a\\newcommand\n{\\x}\n{y}b' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'ab' latex = 'a\\newcommand{\\x}[1][z]{y}b' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'ab' latex = 'a\\newcommand*{\\x}[1][z]{y}b' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'ab' latex = 'a\\renewcommand{\\x}[1][z]{y}b' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'ab' latex = 'a\\newcommand\n\n{\\x}[1]{y}b' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'a\n\n[1]yb' latex = 'a\\newcommand{\\x}\n\n[1]{y}b' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'a\n\n[1]yb' latex = 'a\\newcommand{\\x}[1]\n\n{y}b' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'a\n\nyb'
def test_footnotemark(): latex = 'a\\footnotemark b' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'ab' latex = 'a\\footnotemark[1] b' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'a b' latex = 'a\\footnotemark\n[1]b' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'ab' # do not cross paragraph border latex = '\\footnotemark\n\n[1]' plain, nums = tex2txt.tex2txt(latex, options) assert plain == '\n[1]'
def test_macro_arguments(): # normal expansion latex = '\\usepackage{xcolor}\n\\textcolor\n{red}\n{blue}' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'blue' assert nums == [39, 40, 41, 42] # no expansion: argument in next paragraph latex = '\\usepackage{xcolor}\\textcolor\n{red}\n \n{blue}' plain, nums = tex2txt.tex2txt(latex, options) assert plain == ' \nblue' # no expansion: argument in next paragraph latex = '\\usepackage{xcolor}\\textcolor\n\n{red}\n{blue}' plain, nums = tex2txt.tex2txt(latex, options) assert plain == '\nred\nblue' # expansion: comment line latex = '\\usepackage{xcolor}\\textcolor\n %x\n{red}\n{blue}' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'blue'
def test_3(): plain, nums = tex2txt.tex2txt(latex_3, tex2txt.Options(pack='*')) assert plain == plain_3
def test_verb(): latex = '\\verb?%x\\y?\\label{z}?' plain, nums = tex2txt.tex2txt(latex, options) assert plain == '%x\\y?'
def test_verbatim(): # extra blank lines: see LAB:VERBATIM in tex2txt.py latex = 'A\\begin{verbatim}\\verb?%\\x?\n\\end{verbatim}B' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'A\n\n\\verb?%\\x?\n\n\nB'
def test_newthm(): plain, nums = tex2txt.tex2txt(latex_newthm, options) assert plain == plain_newthm
def test_delimiters(latex, plain_expected): prefix = '\\usepackage{.tests.defs}\n' plain, nums = tex2txt.tex2txt(prefix + latex, options) assert plain == plain_expected
def run_proofreader_options(tex, source, source_defs, language, disable, enable, disablecategories, enablecategories, lt_options): t2t_options = tex2txt.Options(char=True, repl=cmdline.replace, defs=cmdline.define, lang=language, extr=cmdline.extract, unkn=cmdline.list_unknown, seqs=cmdline.simple_equations, dcls=cmdline.documentclass, pack=cmdline.packages, nosp=cmdline.no_specials) if cmdline.plain_input: plain_map = {language: [(tex, list(range(1, len(tex) + 1)))]} else: if cmdline.list_unknown: # only look for unknown macros and environemnts plain, charmap = tex2txt.tex2txt(tex, t2t_options, source=source, source_defs=source_defs) return (tex, plain, charmap, []) if cmdline.multi_language: def mod_parms(parms): parms.ml_continue_thresh = cmdline.ml_continue_threshold plain_map = tex2txt.tex2txt(tex, t2t_options, multi_language=True, modify_parms=mod_parms, source=source, source_defs=source_defs) else: plain, charmap = tex2txt.tex2txt(tex, t2t_options, source=source, source_defs=source_defs) plain_map = {language: [(plain, charmap)]} disa_thresh = disable if cmdline.ml_disable: if disa_thresh: disa_thresh += ',' disa_thresh += cmdline.ml_disable disacat_thresh = disablecategories if cmdline.ml_disablecategories: if disacat_thresh: disacat_thresh += ',' disacat_thresh += cmdline.ml_disablecategories delim = '\n\n' # NB: issue #6 matches_tot = [] plain_tot = '' charmap_tot = [] for lang in plain_map: for plain, charmap in plain_map[lang]: if not plain.strip(): continue # here, we could dispatch to other tools, see for instance # - https://textgears.com/api # - Python package prowritingaid.python # if cmdline.textgears: matches = run_textgears(plain) else: flag = (cmdline.multi_language and len(plain.split()) <= cmdline.ml_rule_threshold) matches = run_languagetool( plain, lang, disa_thresh if flag else disable, enable, disacat_thresh if flag else disablecategories, enablecategories, lt_options) matches += checks.create_single_letter_matches(plain, cmdline) matches += checks.create_equation_punct_messages( plain, cmdline, equation_replacements_display, equation_replacements_inline, equation_replacements) for m in matches: m['offset'] = json_get(m, 'offset', int) + len(plain_tot) matches_tot += matches plain_tot += plain charmap_tot += charmap plain_tot += delim charmap_tot += [charmap_tot[-1]] * len(delim) # sort matches according to position in LaTeX text # def f(m): beg = json_get(m, 'offset', int) if beg < 0 or beg >= len(charmap_tot): tex2txt.fatal('run_proofreader():' + ' bad message read from proofreader') return abs(charmap_tot[beg]) matches_tot.sort(key=f) return (tex, plain_tot, charmap_tot, matches_tot)
# does file name match regex from option --skip? return cmdline.skip and re.search(r'\A' + cmdline.skip + r'\Z', fn) todo = cmdline.file done = [] while todo: f = todo.pop(0) if f in done or skip_file(f): continue done.append(f) if not cmdline.include: continue fp = tex2txt.myopen(f, encoding=cmdline.encoding) tex = fp.read() fp.close() (plain, _) = tex2txt.tex2txt(tex, opts, source=f, source_defs=source_defs) for f in plain.split(): if not f.endswith('.tex'): f += '.tex' if f not in done + todo and not skip_file(f): todo.append(f) cmdline.file = done if cmdline.include: sys.stderr.write(', '.join(cmdline.file) + '\n') sys.stderr.flush() # helpers for robust JSON evaluation # json_decoder = json.JSONDecoder()
def test_6(): plain, nums = tex2txt.tex2txt(latex_6, tex2txt.Options()) assert plain == plain_6
def test_1(): plain, nums = tex2txt.tex2txt(latex_1, options) assert plain_1 == plain
def test_system_macros(): latex = 'a\\footnote[2]{x}b' plain, nums = tex2txt.tex2txt(latex, options) assert plain == 'ab\n\n\nx\n'
return cmdline.skip and re.search(r'\A' + cmdline.skip + r'\Z', fn) todo = cmdline.file done = [] while todo: f = todo.pop(0) if f in done or skip_file(f): continue done.append(f) if not cmdline.include: continue fp = tex2txt.myopen(f, encoding=cmdline.encoding) tex = fp.read() fp.close() (plain, _) = tex2txt.tex2txt(tex, opts) for f in plain.split(): if not f.endswith('.tex'): f += '.tex' if f not in done + todo and not skip_file(f): todo.append(f) cmdline.file = done if cmdline.include: sys.stderr.write(', '.join(cmdline.file) + '\n') sys.stderr.flush() # helpers for robust JSON evaluation # json_decoder = json.JSONDecoder()
def test_2(): plain, nums = tex2txt.tex2txt(latex_2, options) assert plain_2 == plain
U_\epsilon(x) &\subset M \quad\text{for all } x \in \Omega, \notag \\ f(x) % LINE 11 &> 0 \quad\text{for all}\ x \in \Omega \label{l1} \\ f(x) &= 0 \quad\text{for all} x \in M \setminus \Omega. \label{l2} \end{align} """ plain_t = r""" We consider a set C-C-C, a domain D-D-D, andx a function E-E-E. With a constant F-F-F, we require V-V-V equal W-W-W for all X-X-X, Y-Y-Y equal Z-Z-Z for all U-U-U U-U-U equal V-V-V for allW-W-W. Thix is a footnote. """ options = tex2txt.Options(lang='en', char=True) plain, nums = tex2txt.tex2txt(latex, options) def test_text(): assert plain == plain_t
def test_4(): plain, nums = tex2txt.tex2txt(latex_4, tex2txt.Options(pack='.tests.defs')) assert plain == plain_4