Exemplo n.º 1
0
def test_tokenizers_to_from_bytes(name):
    text = "hello world"
    tokenizer_cls = get_pytt_tokenizer(name)
    tokenizer = tokenizer_cls.from_pretrained(name)
    doc = tokenizer.tokenize(text)
    assert isinstance(doc, list) and len(doc)
    bytes_data = tokenizer.to_bytes(name)
    new_tokenizer = tokenizer_cls.blank().from_bytes(bytes_data)
    new_doc = new_tokenizer.tokenize(text)
    assert isinstance(new_doc, list) and len(new_doc)
    assert doc == new_doc
Exemplo n.º 2
0
def tokenizer(name):
    return get_pytt_tokenizer(name).from_pretrained(name)