Пример #1
0
  def test_split_token_to_subtokens(self):
    token = "abc"
    subtoken_dict = {"a": 0, "b": 1, "c": 2, "ab": 3}
    max_subtoken_length = 2

    subtokens = tokenizer._split_token_to_subtokens(
        token, subtoken_dict, max_subtoken_length)
    self.assertEqual(["ab", "c"], subtokens)
Пример #2
0
  def test_split_token_to_subtokens(self):
    token = "abc"
    subtoken_dict = {"a": 0, "b": 1, "c": 2, "ab": 3}
    max_subtoken_length = 2

    subtokens = tokenizer._split_token_to_subtokens(
        token, subtoken_dict, max_subtoken_length)
    self.assertEqual(["ab", "c"], subtokens)