Пример #1
0
    def __call__(self, batch):
        myid, text_input, humor_rating,humor_contr = map(list, zip(*batch))

        input_lengths = torch.tensor(
            [len(s) for s in text_input], device=self.device)

        # attention mask
        max_length = max(input_lengths)
        inputs_pad_mask = pad_mask(input_lengths, max_length=max_length,
                                   device=self.device)
        # Pad inputs and targets
        padded_inputs = (
            pad_sequence(text_input, batch_first=True,
                         padding_value=self.pad_indx)
                .to(self.device))

        humor_rating = mktensor(humor_rating, dtype=torch.float)
        humor_contr = mktensor(humor_contr, dtype=torch.long)
        return myid, padded_inputs,inputs_pad_mask, humor_rating, humor_contr
Пример #2
0
    def __getitem__(self, index):
        text, humor = self.data[index]
        is_humor = self.label2idx(humor)
        if not self.transforms:
            text = self.tokenizer(text)
            text = mktensor(text['input_ids'], dtype=torch.long)
        else:
            for t in self.transforms:
                text = t(text)

        is_humor = int(is_humor)

        return text, is_humor
Пример #3
0
    def __getitem__(self, index):
        if self.splitname == 'train':
            myid, text, is_humor, humor_rating, humor_controversy, offense_rating\
                = \
                self.data[index]
        else:
            myid, text = self.data[index]
            humor_rating = None
            humor_controversy = None
        if not self.transforms:
            text = self.tokenizer(text)
            text = mktensor(text['input_ids'], dtype=torch.long)
        else:
            for t in self.transforms:
                text = t(text)

        myid = int(myid)
        if self.splitname == 'train':
            humor_rating = float(humor_rating)
            humor_controversy = int(humor_controversy)
        return myid, text, humor_rating, humor_controversy