Exemplo n.º 1
0
 def test_encode_decode_from_loaded_tokenizer_with_1_maxlen(self, temp_dir):
     tokenizer = Tokenizer(self.captions, 1)
     tokenizer.save_dictionaries(temp_dir.path)
     loading_tokenizer = Tokenizer()
     loading_tokenizer.load_dictionaries(temp_dir.path)
     phrase_encoded = loading_tokenizer.encode_caption(self.phrase)
     phrase_decoded = tokenizer.decode_caption(phrase_encoded)
     self.assertEqual(phrase_decoded, [Tokenizer.END])
Exemplo n.º 2
0
 def test_encode_decode(self):
     for value in [None, 5, 10]:
         with self.subTest(captions=self.captions, user_maxlen=value):
             tokenizer = Tokenizer(self.captions, value)
             phrase_encoded = tokenizer.encode_caption(self.phrase)
             phrase_decoded = tokenizer.decode_caption(phrase_encoded)
             expected_string = ["THERE", Tokenizer.UNK, "ONE", "HAND"]
             pad_length = self.max_phraselen if (value is None) or (
                 value >= self.max_phraselen) else value
             self.assertEqual(
                 phrase_decoded[:pad_length], expected_string +
                 [Tokenizer.END] * (pad_length - len(expected_string)))
Exemplo n.º 3
0
 def test_get_string(self):
     tokenizer = Tokenizer(self.captions, 5)
     first_chunk = ["THERE", tokenizer.UNK, "ONE", "HAND"]
     for remove_end in [True, False]:
         phrase_encoded = tokenizer.encode_caption(self.phrase)
         if remove_end:
             phrase_encoded[-1] = phrase_encoded[0]
             expected = " ".join(first_chunk + [first_chunk[0]])
         else:
             expected = " ".join(first_chunk)
         with self.subTest(remove_end=remove_end):
             string = tokenizer.get_string(phrase_encoded)
             self.assertEqual(expected, string)
Exemplo n.º 4
0
 def test_encode_decode_from_loaded_tokenizer(self, temp_dir):
     for value in [None, 5, 10]:
         with self.subTest(captions=self.captions, user_maxlen=value):
             tokenizer = Tokenizer(self.captions, value)
             tokenizer.save_dictionaries(temp_dir.path)
             loading_tokenizer = Tokenizer()
             loading_tokenizer.load_dictionaries(temp_dir.path)
             phrase_encoded = loading_tokenizer.encode_caption(self.phrase)
             phrase_decoded = tokenizer.decode_caption(phrase_encoded)
             expected_string = ["THERE", Tokenizer.UNK, "ONE", "HAND"]
             pad_length = self.max_phraselen if (value is None) or (
                 value >= self.max_phraselen) else value
             self.assertEqual(
                 phrase_decoded[:pad_length], expected_string +
                 [Tokenizer.END] * (pad_length - len(expected_string)))
Exemplo n.º 5
0
 def test_encoding_length_equal_max_len(self):
     for value in [None, 1, 5, 10]:
         with self.subTest(captions=self.captions, user_maxlen=value):
             tokenizer = Tokenizer(self.captions, value)
             phrase_encoded = tokenizer.encode_caption(self.phrase)
             self.assertEqual(len(phrase_encoded), tokenizer.maxlen)
Exemplo n.º 6
0
 def test_encode_decode_with_1_maxlen(self):
     tokenizer = Tokenizer(self.captions, 1)
     phrase_encoded = tokenizer.encode_caption(self.phrase)
     phrase_decoded = tokenizer.decode_caption(phrase_encoded)
     self.assertEqual(phrase_decoded, [Tokenizer.END])