def test_tokenize_should_raise_on_string(self): # Given s = bytes(b"let's eat food tonight") language = bytes(b"en") # When / Then with self.assertRaises(TypeError): tokenize(s, language)
def test_should_tokenize(self): # Given u = "let's eat food tonight" language = "en" # When tokens = tokenize(u, language) # Then self.assertGreater(len(tokens), 0) self.assertTrue(all(isinstance(t, dict) for t in tokens))
def test_should_tokenize(self): # Given u = "foo bär baz" language = "en" # When tokens = tokenize(u, language) # Then expected_tokens = [ { "value": "foo", "range": { "start": 0, "end": 3 }, "char_range": { "start": 0, "end": 3 } }, { "value": "bär", "range": { "start": 4, "end": 8 }, "char_range": { "start": 4, "end": 7 } }, { "value": "baz", "range": { "start": 9, "end": 12 }, "char_range": { "start": 8, "end": 11 } }, ] self.assertListEqual(expected_tokens, tokens)
def test_should_tokenize_empty_string(self): self.assertListEqual([], tokenize("", "en"))