def test_handle_unclosed_environments(): extractor = EquationExtractor() equations = list(extractor.parse("main.tex", "$x + \\hbox{\\begin{equation}y}$")) assert len(equations) == 1 equation = equations[0] assert equation.start == 0 assert equation.end == 30
def test_extract_equation_environment_with_argument(): extractor = EquationExtractor() equations = list(extractor.parse("main.tex", "\\begin{array}{c}x\\end{array}")) assert len(equations) == 1 equation = equations[0] assert equation.content_start == 16
def annotate_symbols_and_equations_for_file( tex: str, tex_path: RelativePath, symbols: SymbolDict, characters: CharacterDict) -> Tuple[str, Set[str]]: # Extract all equations equation_extractor = EquationExtractor() equations = list(equation_extractor.parse(tex_path, tex)) # Group symbols by equation ID symbols_by_equation_id = _group_by_equation(symbols) # Create a list of annotations annotations: List[Annotation] = [] symbol_tex: Set[str] = set() for equation in equations: equation_id = EquationId(tex_path, equation.i) equation_symbols = symbols_by_equation_id.get(equation_id, []) equation_annotations = _create_annotations_for_equation( tex, equation, equation_id, equation_symbols, characters) annotations.extend(equation_annotations.annotations) symbol_tex.update(equation_annotations.symbol_tex) # Annotate the TeX annotated_tex = tex annotations_reverse_order = sorted(annotations, key=lambda a: a.position, reverse=True) for annotation in annotations_reverse_order: position = annotation.position annotated_tex = (annotated_tex[:position] + annotation.text + annotated_tex[position:]) return annotated_tex, symbol_tex
def adjust_color_positions(entity: SerializableEntity) -> CharacterRange: """ Color commands sometimes introduce unwanted space when added right before or after an equation. One solution is to put color commands right inside the equation. """ term = cast(Term, entity) equation_extractor = EquationExtractor() equations = list(equation_extractor.parse(entity.tex_path, term.tex)) if len(equations) == 0: return CharacterRange(term.start, term.end) # If the term starts with an equation, move the coloring command inside the equation. adjusted_start = term.start first_equation = min(equations, key=lambda e: e.start) first_nonspace = re.search("\S", term.tex) if first_nonspace is not None: if first_nonspace.start(0) == first_equation.start: adjusted_start = term.start + first_equation.content_start # If the term ends with an equation, move the coloring command inside the equation. adjusted_end = term.end last_equation = max(equations, key=lambda e: e.end) last_nonspace = re.search("\S(?=\s*$)", term.tex) if last_nonspace is not None: if last_nonspace.end(0) == last_equation.end: adjusted_end = term.start + last_equation.content_end return CharacterRange(adjusted_start, adjusted_end)
def test_extract_equation_from_double_dollar_signs(): extractor = EquationExtractor() equations = list(extractor.parse("main.tex", "$$x$$")) assert len(equations) == 1 equation = equations[0] assert equation.start == 0 assert equation.end == 5
def test_extract_equation_from_star_environment(): extractor = EquationExtractor() equations = list(extractor.parse("main.tex", "\\begin{equation*}x\\end{equation*}")) assert len(equations) == 1 equation = equations[0] assert equation.start == 0 assert equation.end == 33
def test_extract_equation_from_brackets(): extractor = EquationExtractor() equations = list(extractor.parse("main.tex", "\\[x + y\\]")) assert len(equations) == 1 equation = equations[0] assert equation.start == 0 assert equation.content_start == 2 assert equation.end == 9
def test_extract_nested_equations(): extractor = EquationExtractor() equations = list( extractor.parse("main.tex", "$x + \\hbox{\\begin{equation}y\\end{equation}}$")) assert len(equations) == 2 outer = next(filter(lambda e: e.start == 0, equations)) assert outer.end == 44 inner = next(filter(lambda e: e.start == 11, equations)) assert inner.end == 42
def test_extract_equation_from_dollar_sign(): extractor = EquationExtractor() equations = list(extractor.parse("main.tex", "$x + y$")) assert len(equations) == 1 equation = equations[0] assert equation.start == 0 assert equation.content_start == 1 assert equation.end == 7 assert equation.content_tex == "x + y" assert equation.tex == "$x + y$"
def test_extract_equation_from_equation_environment(): extractor = EquationExtractor() equations = list(extractor.parse("main.tex", "\\begin{equation}x\\end{equation}")) assert len(equations) == 1 equation = equations[0] assert equation.start == 0 assert equation.content_start == 16 assert equation.end == 31 assert equation.content_tex == "x" assert equation.tex == "\\begin{equation}x\\end{equation}"
def test_ignore_escaped_dollar_sign(): extractor = EquationExtractor() equations = list(extractor.parse("main.tex", "\\$\\$")) assert len(equations) == 0
def test_dont_extract_equation_from_command_argument_brackets(): extractor = EquationExtractor() equations = list( extractor.parse("main.tex", "\\documentclass[11pt]{article}")) assert len(equations) == 0
def test_extract_equation_from_dollar_sign_after_newline(): # This pattern was observed in arXiv paper 1703.00102. extractor = EquationExtractor() equations = list(extractor.parse("main.tex", r"\\$x$")) assert len(equations) == 1