def test_issue_55(): latex = r'\begin{array}{rcl}ABC&=&a\\A&=&abc\end{array}' expected = [ r'\begin{array}', '{', 'r', 'c', 'l', '}', 'A', 'B', 'C', '&', '=', '&', 'a', r'\\', 'A', '&', '=', '&', 'a', 'b', 'c', r'\end{array}' ] assert expected == list(tokenize(latex))
def test_simple_array(self): self.assertListEqual( [r'\array', 'cc', '{', '1', '&', '2', r'\\', '3', '&', '4', '}'], list( tokenizer.tokenize( r'\begin{array}{cc} 1 & 2 \\ 3 & 4 \end{array}' '')))
def test_issue_33(): latex = r'''\begin{bmatrix} a_{1,1} & a_{1,2} & \cdots & a_{1,n} \\ a_{2,1} & a_{2,2} & \cdots & a_{2,n} \\ \vdots & \vdots & \ddots & \vdots \\ a_{m,1} & a_{m,2} & \cdots & a_{m,n} \end{bmatrix}''' expected = [r'\begin{bmatrix}', 'a', '_', '{', '1', ',', '1', '}', '&', 'a', '_', '{', '1', ',', '2', '}', '&', r'\cdots', '&', 'a', '_', '{', '1', ',', 'n', '}', r'\\', 'a', '_', '{', '2', ',', '1', '}', '&', 'a', '_', '{', '2', ',', '2', '}', '&', r'\cdots', '&', 'a', '_', '{', '2', ',', 'n', '}', r'\\', r'\vdots', '&', r'\vdots', '&', r'\ddots', '&', r'\vdots', r'\\', 'a', '_', '{', 'm', ',', '1', '}', '&', 'a', '_', '{', 'm', ',', '2', '}', '&', r'\cdots', '&', 'a', '_', '{', 'm', ',', 'n', '}', r'\end{bmatrix}'] assert expected == list(tokenize(latex))
def test_numbers_with_decimals(): decimal = '12.56' assert [decimal] == list(tokenize(decimal))
def test_decimals_and_alphabets(self): string = '5.8x' self.assertListEqual(['5.8', 'x'], list(tokenizer.tokenize(string)))
def test_single_backslash(): assert ['\\'] == list(tokenize('\\'))
def test_numbers(): numbers = '1234567890' assert [numbers] == list(tokenize(numbers))
def test_numbers(self): numbers = '1234567890' self.assertListEqual([numbers], list(tokenizer.tokenize(numbers)))
def test_tokenize(name: str, latex: str, expected: list): assert list(tokenize(latex)) == expected
def test_matrix(self): self.assertListEqual([r'\matrix', '{', 'a', '&', 'b', r'\\', 'c', '&', 'd', '}'], list(tokenizer.tokenize(r'\begin{matrix}a & b \\ c & d \end{matrix}')))
def test_matrix_with_alignment(): assert [r'\begin{matrix*}', '[', 'r', ']', 'a', '&', 'b', r'\\', 'c', '&', 'd', r'\end{matrix*}'] == \ list(tokenize(r'\begin{matrix*}[r]a & b \\ c & d \end{matrix*}'))
def test_symbols(self): string = r'\alpha\beta' self.assertListEqual([r'\alpha', r'\beta'], list(tokenizer.tokenize(string)))
def test_symbols_appended_number(self): string = r'\frac2x' self.assertListEqual([r'\frac', '2', 'x'], list(tokenizer.tokenize(string)))
def test_numbers_alphabets_and_operators(self): string = '3 + 5x - 5y = 7' self.assertListEqual(['3', '+', '5', 'x', '-', '5', 'y', '=', '7'], list( tokenizer.tokenize(string)))
def test_operators(self): string = '+-*/=()[]_^{}' self.assertListEqual(list(string), list(tokenizer.tokenize(string)))
def test_string_with_spaces(self): string = '3 x' self.assertListEqual(['3', 'x'], list(tokenizer.tokenize(string)))
def test_numbers_alphabets_and_operators(): s = '3 + 5x - 5y = 7' assert ['3', '+', '5', 'x', '-', '5', 'y', '=', '7'] == list(tokenize(s))
def test_matrix_with_alignment(self): self.assertListEqual([r'\matrix*', 'r', '{', 'a', '&', 'b', r'\\', 'c', '&', 'd', '}'], list(tokenizer.tokenize(r'\begin{matrix*}[r]a & b \\ c & d \end{matrix*}')))
def test_symbols_appended_number(): s = r'\frac2x' assert [r'\frac', '2', 'x'] == list(tokenize(s))
def test_matrix_with_negative_sign(self): self.assertListEqual([r'\matrix', '{', '-', 'a', '&', 'b', r'\\', 'c', '&', 'd', '}'], list(tokenizer.tokenize(r'\begin{matrix}-a & b \\ c & d \end{matrix}')))
def test_simple_array(): assert [r'\begin{array}', '{', 'c', 'c', '}', '1', '&', '2', r'\\', '3', '&', '4', r'\end{array}'] == \ list(tokenize(r'\begin{array}{cc} 1 & 2 \\ 3 & 4 \end{array}'''))
def test_simple_array(self): self.assertListEqual([r'\array', 'cc', '{', '1', '&', '2', r'\\', '3', '&', '4', '}'], list(tokenizer.tokenize(r'\begin{array}{cc} 1 & 2 \\ 3 & 4 \end{array}''')))
def aggregate(latex): aggregation = [] subgroups = [aggregation] insert_before_last_item = False environment = None has_negative_sign = False for token in tokenize(latex): if token in MATRICES: environment = token _insert_before_last_item(insert_before_last_item, token, subgroups) elif token in '{([': try: a = subgroups[-1][-1] if a != r'\left': raise IndexError except IndexError: n = [] _insert_before_last_item(insert_before_last_item, n, subgroups) subgroups.append(n) if environment and environment in MATRICES: if a and a in MATRICES: _add_new_subgroup(subgroups) else: try: b = subgroups[-2][-3] if b in MATRICES: _add_new_subgroup(subgroups) except IndexError: pass elif token == '[' and subgroups[-2][-2] == r'\sqrt': subgroups[-2][ -2] = r'\root' # change name from \sqrt to \root - not a latex command! elif token != '{': subgroups[-1].append(token) elif token in '})]': try: a = subgroups[-1][-1] except IndexError: pass if token == ']' and subgroups[-2][-2] == r'\root': insert_before_last_item = True elif token != '}': subgroups[-1].append(token) if a and a == r'\right': pass else: subgroups.pop() elif token in '_^': try: a = subgroups[-1][-3] if a == '_' and token == '^': subgroups[-1][-3] = '_^' elif a == '^' and token == '_': subgroups[-1][-3] = '_^' insert_before_last_item = True else: subgroups[-1].insert(-1, token) except IndexError: subgroups[-1].insert(-1, token) elif token == '-' and environment and environment in MATRICES: _add_new_subgroup(subgroups) _insert_before_last_item(insert_before_last_item, token, subgroups) has_negative_sign = True elif token == '&' and environment and environment in MATRICES: if has_negative_sign: subgroups.pop() has_negative_sign = False elif (token == r'\\' or token == r'\cr') and environment and environment in MATRICES: if has_negative_sign: subgroups.pop() has_negative_sign = False subgroups.pop() _add_new_subgroup(subgroups) else: _insert_before_last_item(insert_before_last_item, token, subgroups) return aggregation
def test_alphabets(self): alphabets = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' self.assertListEqual(list(alphabets), list(tokenizer.tokenize(alphabets)))
def test_superscript_with_curly_braces(): assert ['a', '^', '{', 'i', '+', '1', '}', '_', '3'] == list(tokenize('a^{i+1}_3'))
def test_numbers_with_decimals(self): decimal = '12.56' self.assertListEqual([decimal], list(tokenizer.tokenize(decimal)))
def test_alphabets(): alphabets = string.ascii_letters assert list(alphabets) == list(tokenize(alphabets))
def test_numbers_and_alphabets(): s = '5x' assert list(s) == list(tokenize(s))
def test_double_backslash_after_number(): assert ['123', '\\\\'] == list(tokenize('123\\\\'))
def test_string_with_spaces(): s = '3 x' assert ['3', 'x'] == list(tokenize(s))
def test_incomplete_decimal(): decimal = '12.\\\\' assert ['12', '.', '\\\\'] == list(tokenize(decimal))
def test_numbers_and_alphabets(self): string = '5x' self.assertListEqual(list(string), list(tokenizer.tokenize(string)))
def test_decimals_and_alphabets(): s = '5.8x' assert ['5.8', 'x'] == list(tokenize(s))
def test_text(): latex = r'\text{if} a=b \text{then} b = a' expected = [r'\text', 'if', 'a', '=', 'b', r'\text', 'then', 'b', '=', 'a'] assert expected == list(tokenize(latex))
def test_operators(): s = '+-*/=()[]_^{}' assert list(s) == list(tokenize(s))
def test_double_backslash1(): assert [r'\\'] == list(tokenize(r'\\'))
def test_symbols(): s = r'\alpha\beta' assert [r'\alpha', r'\beta'] == list(tokenize(s))
def test_spaces(): spaces =r'a\, b\; c\quad d \qquad e' assert ['a', r'\,', 'b', r'\;', 'c', r'\quad', 'd', r'\qquad', 'e'] == list(tokenize(spaces))
def test_matrix(): assert [r'\begin{matrix}', 'a', '&', 'b', r'\\', 'c', '&', 'd', r'\end{matrix}'] == \ list(tokenize(r'\begin{matrix}a & b \\ c & d \end{matrix}'))
def test_numbers_with_comma(): decimal = '12,56' assert ['12', ',', '56'] == list(tokenize(decimal))
def test_matrix_with_negative_sign(): assert [r'\begin{matrix}', '-', 'a', '&', 'b', r'\\', 'c', '&', 'd', r'\end{matrix}'] == \ list(tokenize(r'\begin{matrix}-a & b \\ c & d \end{matrix}'))
def walk(data: str) -> List[Node]: tokens = tokenize(data) return _walk(tokens)
def test_subscript(): assert ['a', '_', '{', '2', ',', 'n', '}'] == list(tokenize('a_{2,n}'))
def aggregate(data): tokens = tokenize(data) return _aggregate(tokens)