Python UnicodeCharsVocabulary.encode_chars примеры использования

Язык программирования: Python

Пространство имен/Пакет: data

Метод/Функция: encode_chars

Примеров на hotexamples.com: 2

Python UnicodeCharsVocabulary.encode_chars - 2 примера найдено. Это лучшие примеры Python кода для data.UnicodeCharsVocabulary.encode_chars, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

UnicodeCharsVocabulary(6)

encode_chars(2)

id_to_word(1)

word_to_char_ids(1)

word_to_id(1)

Пример #1

Показать файл

class TestUnicodeCharsVocabulary(unittest.TestCase):
    def setUp(self):
        words = ['the', '.', chr(256) + 't', '<S>', '</S>', '<UNK>']
        (_, tmp) = tempfile.mkstemp()
        with open(tmp, 'w') as fout:
            fout.write('\n'.join(words))
        self.vocab = UnicodeCharsVocabulary(tmp, 5)
        self._tmp = tmp

    def test_vocab_word_to_char_ids(self):
        char_ids = self.vocab.word_to_char_ids('th')
        expected = np.array([258, 116, 104, 259, 260], dtype=np.int32)
        self.assertTrue((char_ids == expected).all())

        char_ids = self.vocab.word_to_char_ids('thhhhh')
        expected = np.array([258, 116, 104, 104, 259])
        self.assertTrue((char_ids == expected).all())

        char_ids = self.vocab.word_to_char_ids(chr(256) + 't')
        expected = np.array([258, 196, 128, 116, 259], dtype=np.int32)
        self.assertTrue((char_ids == expected).all())

    def test_bos_eos(self):
        bos_ids = self.vocab.word_to_char_ids('<S>')
        self.assertTrue((bos_ids == self.vocab.bos_chars).all())

        bos_ids = self.vocab.word_char_ids[self.vocab.word_to_id('<S>')]
        self.assertTrue((bos_ids == self.vocab.bos_chars).all())

        eos_ids = self.vocab.word_to_char_ids('</S>')
        self.assertTrue((eos_ids == self.vocab.eos_chars).all())

        eos_ids = self.vocab.word_char_ids[self.vocab.word_to_id('</S>')]
        self.assertTrue((eos_ids == self.vocab.eos_chars).all())

    def test_vocab_encode_chars(self):
        sentence = ' '.join(['th', 'thhhhh', chr(256) + 't'])
        char_ids = self.vocab.encode_chars(sentence)
        expected = np.array(
            [[258, 256, 259, 260, 260], [258, 116, 104, 259, 260],
             [258, 116, 104, 104, 259], [258, 196, 128, 116, 259],
             [258, 257, 259, 260, 260]],
            dtype=np.int32)
        self.assertTrue((char_ids == expected).all())

    def test_vocab_encode_chars_reverse(self):
        sentence = ' '.join(reversed(['th', 'thhhhh', chr(256) + 't']))
        vocab = UnicodeCharsVocabulary(self._tmp, 5)
        char_ids = vocab.encode_chars(sentence, reverse=True)
        expected = np.array(
            [[258, 256, 259, 260, 260], [258, 116, 104, 259, 260],
             [258, 116, 104, 104, 259], [258, 196, 128, 116, 259],
             [258, 257, 259, 260, 260]],
            dtype=np.int32)[::-1, :]
        self.assertTrue((char_ids == expected).all())

    def tearDown(self):
        os.remove(self._tmp)

Пример #2

Показать файл

 def test_vocab_encode_chars_reverse(self):
     sentence = ' '.join(reversed(['th', 'thhhhh', chr(256) + 't']))
     vocab = UnicodeCharsVocabulary(self._tmp, 5)
     char_ids = vocab.encode_chars(sentence, reverse=True)
     expected = np.array(
         [[258, 256, 259, 260, 260], [258, 116, 104, 259, 260],
          [258, 116, 104, 104, 259], [258, 196, 128, 116, 259],
          [258, 257, 259, 260, 260]],
         dtype=np.int32)[::-1, :]
     self.assertTrue((char_ids == expected).all())