def test_tknz(parameters, test_input: str, expected: List[str]): r"""Tokenize text into characters.""" tknzr = CharTknzr( is_uncased=parameters['is_uncased'], max_vocab=parameters['max_vocab'], min_count=parameters['min_count'], ) assert tknzr.tknz(test_input) == expected
def test_char_tknzr(capsys, char_tknzr: CharTknzr, exp_name: str, seed: int) -> None: """Ensure tokenize script output consistency when using :py:class:`lmp.tknzr.CharTknzr`.""" txt = 'abc' lmp.script.tknz_txt.main(argv=[ '--exp_name', exp_name, '--seed', str(seed), '--txt', txt, ]) captured = capsys.readouterr() assert str(char_tknzr.tknz(txt=txt)) in captured.out