Пример #1
0
    def process_x_pad_batch(self, x_a_pad_batch, x_t_pad_batch):
        # preprocess with the acoustic inputs
        a_valid_batchid, a_inputs, a_mask_labels, a_attention_mask, a_labels = process_train_MAM_data(
            spec=(x_a_pad_batch, ), config=self.acoustic_config)
        # preprocess with the semantic inputs
        x_t_pad_batch = self.tokenizer.pad(x_t_pad_batch, return_tensors="pt")
        s_inputs, s_labels = self.mlm_collater.mask_tokens(
            x_t_pad_batch['input_ids'])
        s_attention_mask = x_t_pad_batch['attention_mask']
        s_valid_batchid = torch.nonzero(torch.sum(s_labels != -100, dim=1),
                                        as_tuple=False).view(-1)
        #---------- process the valid batch id ----------#
        a_valid = torch.zeros(a_labels.size(0))
        a_valid[a_valid_batchid] = 1
        s_valid = torch.zeros(s_labels.size(0))
        s_valid[s_valid_batchid] = 1
        valid_batchid = a_valid.long() & s_valid.long()
        valid_batchid = torch.nonzero(valid_batchid, as_tuple=False).view(-1)
        #---------- valid assertation ----------#
        batch_is_valid = len(valid_batchid) > 0
        #---------- acoustic features ----------#
        a_inputs = a_inputs[valid_batchid]
        a_mask_labels = a_mask_labels[valid_batchid]
        a_attention_mask = a_attention_mask[valid_batchid]
        a_labels = a_labels[valid_batchid]
        #---------- semantic features ----------#
        s_inputs = s_inputs[valid_batchid]
        s_attention_mask = s_attention_mask[valid_batchid]
        s_labels = s_labels[valid_batchid]

        return batch_is_valid, (a_inputs, a_mask_labels, a_attention_mask,
                                a_labels), (s_inputs, s_attention_mask,
                                            s_labels)
Пример #2
0
    def process_x_pad_batch(self, x_a_pad_batch, x_t_pad_batch):
        # preprocess with the acoustic inputs
        a_valid_batchid, a_inputs, a_mask_labels, a_attention_mask, a_labels = process_train_MAM_data(
            spec=(x_a_pad_batch, ),
            mask_proportion=self.mask_proportion,
            config=self.acoustic_config,
            tail_masking=False,
            main_random=self.main_random_noise,
            do_downsampling=False)
        # preprocess with the semantic inputs
        x_t_pad_batch = self.tokenizer.pad(x_t_pad_batch, return_tensors="pt")
        s_inputs, s_labels = mask_tokens(inputs=x_t_pad_batch['input_ids'],
                                         mlm_probability=self.mask_proportion,
                                         tokenizer=self.tokenizer,
                                         tail_masking=False,
                                         main_random=self.main_random_noise)

        s_attention_mask = x_t_pad_batch['attention_mask']
        s_valid_batchid = torch.nonzero(torch.sum(s_labels != -100, dim=1),
                                        as_tuple=False).view(-1)
        # ---------- process the valid batch id ----------#
        a_valid = torch.zeros(a_labels.size(0))
        a_valid[a_valid_batchid] = 1
        s_valid = torch.zeros(s_labels.size(0))
        s_valid[s_valid_batchid] = 1
        valid_batchid = a_valid.long() & s_valid.long()
        valid_batchid = torch.nonzero(valid_batchid, as_tuple=False).view(-1)
        # ---------- valid assertation ----------#
        batch_is_valid = len(valid_batchid) > 0
        # ---------- acoustic features ----------#
        a_inputs = a_inputs[valid_batchid]
        a_mask_labels = a_mask_labels[valid_batchid]
        a_attention_mask = a_attention_mask[valid_batchid]
        a_labels = a_labels[valid_batchid]
        # ---------- semantic features ----------#
        s_inputs = s_inputs[valid_batchid]
        s_attention_mask = s_attention_mask[valid_batchid]
        s_labels = s_labels[valid_batchid]
        text_raw = x_t_pad_batch['input_ids'][valid_batchid]

        return batch_is_valid, (a_inputs, a_mask_labels, a_attention_mask,
                                a_labels), (s_inputs, s_attention_mask,
                                            s_labels, text_raw)
Пример #3
0
    def process_x_pad_batch(self, x_a_pad_batch, x_t_pad_batch):
        # preprocess with the acoustic inputs
        a_valid_batchid, a_inputs, a_mask_labels, a_attention_mask, a_labels = process_train_MAM_data(
            spec=(x_a_pad_batch, ),
            mask_proportion=self.mask_proportion,
            config=self.acoustic_config,
            tail_masking=False,
            main_random=self.main_random_noise,
            do_downsampling=False)

        # preprocess with the semantic inputs
        s_inputs = x_t_pad_batch
        s_labels = x_t_pad_batch
        s_attention_mask = torch.ones_like(x_t_pad_batch[:, :, 0],
                                           dtype=torch.float)

        # ---------- process the valid batch id ----------#
        a_valid = torch.zeros(a_labels.size(0))
        a_valid[a_valid_batchid] = 1
        valid_batchid = a_valid.long()
        valid_batchid = torch.nonzero(valid_batchid, as_tuple=False).view(-1)
        # ---------- valid assertation ----------#
        batch_is_valid = len(valid_batchid) > 0
        # ---------- acoustic features ----------#
        a_inputs = a_inputs[valid_batchid]
        a_mask_labels = a_mask_labels[valid_batchid]
        a_attention_mask = a_attention_mask[valid_batchid]
        a_labels = a_labels[valid_batchid]
        # ---------- semantic features ----------#
        s_inputs = s_inputs[valid_batchid]
        s_attention_mask = s_attention_mask[valid_batchid]
        s_labels = s_labels[valid_batchid]
        x_t_pad_batch = x_t_pad_batch[valid_batchid]

        return batch_is_valid, (a_inputs, a_mask_labels, a_attention_mask,
                                a_labels), (s_inputs, s_attention_mask,
                                            s_labels, x_t_pad_batch)