def test_mitie_add_cls_token(): from rasa.nlu.tokenizers.mitie_tokenizer import MitieTokenizer component_config = {"use_cls_token": True} tk = MitieTokenizer(component_config) text = "Forecast for lunch" assert [t.text for t in tk.tokenize(text)] == [ "Forecast", "for", "lunch", CLS_TOKEN, ] assert [t.offset for t in tk.tokenize(text)] == [0, 9, 13, 19]
def test_mitie(): from rasa.nlu.tokenizers.mitie_tokenizer import MitieTokenizer tk = MitieTokenizer() text = "Forecast for lunch" assert [t.text for t in tk.tokenize(text)] == \ ['Forecast', 'for', 'lunch'] assert [t.offset for t in tk.tokenize(text)] == \ [0, 9, 13] text = "hey ńöñàśçií how're you?" assert [t.text for t in tk.tokenize(text)] == \ ['hey', 'ńöñàśçií', 'how', '\'re', 'you', '?'] assert [t.offset for t in tk.tokenize(text)] == \ [0, 4, 13, 16, 20, 23]
def test_mitie(text, expected_tokens, expected_indices): tk = MitieTokenizer() tokens = tk.tokenize(Message(text), attribute=TEXT) assert [t.text for t in tokens] == expected_tokens assert [t.start for t in tokens] == [i[0] for i in expected_indices] assert [t.end for t in tokens] == [i[1] for i in expected_indices]
def test_mitie(): from rasa.nlu.tokenizers.mitie_tokenizer import MitieTokenizer tk = MitieTokenizer() text = "Forecast for lunch" assert [t.text for t in tk.tokenize(text)] == ["Forecast", "for", "lunch"] assert [t.offset for t in tk.tokenize(text)] == [0, 9, 13] text = "hey ńöñàśçií how're you?" assert [t.text for t in tk.tokenize(text)] == [ "hey", "ńöñàśçií", "how", "'re", "you", "?", ] assert [t.offset for t in tk.tokenize(text)] == [0, 4, 13, 16, 20, 23]