def test_extract_macro(): tex = "\\macro" extractor = MacroExtractor() macros = list(extractor.parse(tex, MacroDefinition("macro", ""))) assert len(macros) == 1 assert macros[0].start == 0 assert macros[0].end == 6
def test_extract_macro_balance_nested_braces_for_argument(): tex = "\\macro{{nested}}" extractor = MacroExtractor() macros = list(extractor.parse(tex, MacroDefinition("macro", "#1"))) assert len(macros) == 1 assert macros[0].start == 0 assert macros[0].end == 16 assert macros[0].tex == "\\macro{{nested}}"
def test_extract_macro_with_delimited_parameter(): tex = "\\macro arg." extractor = MacroExtractor() macros = list(extractor.parse(tex, MacroDefinition("macro", "#1."))) assert len(macros) == 1 assert macros[0].start == 0 assert macros[0].end == 11 assert macros[0].tex == "\\macro arg."
def test_extract_macro_with_undelimited_parameter(): # the scanner for undelimited parameter '#1' should match the first non-blank token 'a'. tex = "\\macro a" extractor = MacroExtractor() macros = list(extractor.parse(tex, MacroDefinition("macro", "#1"))) assert len(macros) == 1 assert macros[0].start == 0 assert macros[0].end == 9 assert macros[0].tex == "\\macro a"
def _replace_unwanted_commands_with_spaces(tex: str) -> str: """ KaTeX isn't programmed to support the entire vocabulary of LaTeX equation markup (though it does support a lot, see https://katex.org/docs/support_table.html). For those commands that we don't need to have parsed (e.g., 'label'), this function will strip those commands out, so that they cause KaTeX to crash or have unexpected behavior. 'label', for example, if not removed, will have its argument parsed as an equation, and will be identified as consisting of many symbols. """ UNWANTED_MACROS = [ MacroDefinition("ref", "#1"), MacroDefinition("label", "#1"), MacroDefinition("nonumber", ""), ] macro_extractor = MacroExtractor() for macro_definition in UNWANTED_MACROS: for macro in macro_extractor.parse(tex, macro_definition): tex = _replace_substring_with_space(tex, macro.start, macro.end) length_assignment_extractor = EquationLengthAssignmentExtractor() length_assignments = length_assignment_extractor.parse(tex) for assignment in length_assignments: tex = _replace_substring_with_space(tex, assignment.start, assignment.end) UNWANTED_PATTERNS = [ Pattern("ampersand", "&"), Pattern("split_start", begin_environment_regex("split")), Pattern("split_end", end_environment_regex("split")), ] unwanted_matches = scan_tex(tex, UNWANTED_PATTERNS) for match in unwanted_matches: tex = _replace_substring_with_space(tex, match.start, match.end) return tex