Python make_byte_inputs示例

编程语言: Python

命名空间/包名称: pytext.utils.torch

方法/功能: make_byte_inputs

hotexamples.com的示例: 3

Python make_byte_inputs - 已找到3个示例。这些是从开源项目中提取的最受好评的pytext.utils.torch.make_byte_inputs现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

    def test_make_bytes_input(self):
        s1 = "I want some coffee today"
        s2 = "Turn it up"
        max_char_length = 5

        batch = [s1.split(), s2.split()]
        bytes, seq_lens = make_byte_inputs(batch, max_char_length)

        def to_bytes(word, pad_to):
            return list(word.encode()) + [0] * (pad_to - len(word))

        expected_bytes = [
            [
                to_bytes("I", 5),
                to_bytes("want", 5),
                to_bytes("some", 5),
                to_bytes("coffe", 5),
                to_bytes("today", 5),
            ],
            [
                to_bytes("Turn", 5),
                to_bytes("it", 5),
                to_bytes("up", 5),
                to_bytes("", 5),
                to_bytes("", 5),
            ],
        ]
        expected_seq_lens = [5, 3]

        self.assertIsInstance(bytes, torch.LongTensor)
        self.assertIsInstance(seq_lens, torch.LongTensor)
        self.assertEqual(bytes.tolist(), expected_bytes)
        self.assertEqual(seq_lens.tolist(), expected_seq_lens)

示例#2

显示文件

 def forward(self, tokens: List[List[str]]):
     seq_lens = make_sequence_lengths(tokens)
     word_ids = self.vocab.lookup_indices_2d(tokens)
     word_ids = pad_2d(word_ids, seq_lens, self.pad_idx)
     token_bytes, _ = make_byte_inputs(
         tokens, self.max_byte_len,
         self.byte_offset_for_non_padding)
     logits = self.model(torch.tensor(word_ids), token_bytes,
                         torch.tensor(seq_lens))
     return self.output_layer(logits)

示例#3

显示文件

 def forward(self, tokens: List[List[str]],
             dense_feat: List[List[float]]):
     seq_lens = make_sequence_lengths(tokens)
     word_ids = self.vocab.lookup_indices_2d(tokens)
     word_ids = pad_2d(word_ids, seq_lens, self.pad_idx)
     token_bytes, _ = make_byte_inputs(
         tokens, self.max_byte_len,
         self.byte_offset_for_non_padding)
     dense_feat = self.normalizer.normalize(dense_feat)
     logits = self.model(
         torch.tensor(word_ids),
         token_bytes,
         torch.tensor(seq_lens),
         torch.tensor(dense_feat, dtype=torch.float),
     )
     return self.output_layer(logits)