Exemplo n.º 1
0
 def setUp(self):
     d = mock_dict()
     tokens_1 = torch.LongTensor([i for i in range(1, 5000, 2)]).view(1, -1)
     tokens_ds1 = TokenBlockDataset(
         tokens_1,
         sizes=[tokens_1.size(-1)],
         block_size=1,
         pad=0,
         eos=1,
         include_targets=False,
     )
     self.dataset_1 = LanguagePairDataset(tokens_ds1,
                                          tokens_ds1.sizes,
                                          d,
                                          shuffle=False)
     tokens_2 = torch.LongTensor([i for i in range(0, 5000, 2)]).view(1, -1)
     tokens_ds2 = TokenBlockDataset(
         tokens_2,
         sizes=[tokens_2.size(-1)],
         block_size=1,
         pad=0,
         eos=1,
         include_targets=False,
     )
     self.dataset_2 = LanguagePairDataset(tokens_ds2,
                                          tokens_ds2.sizes,
                                          d,
                                          shuffle=False)
Exemplo n.º 2
0
def lang_pair_dataset(lengths: Sequence[int]) -> LanguagePairDataset:
    tokens = [[i] * l for i, l in enumerate(lengths)]
    return LanguagePairDataset(ListDataset(tokens), lengths, mock_dict())