def prepare_config_and_inputs_for_decoder(self):
        (
            config,
            input_ids,
            token_type_ids,
            input_mask,
            sequence_labels,
            token_labels,
            choice_labels,
        ) = self.prepare_config_and_inputs()

        config.is_decoder = True
        encoder_hidden_states = floats_tensor(
            [self.batch_size, self.seq_length, self.hidden_size])
        encoder_attention_mask = ids_tensor([self.batch_size, self.seq_length],
                                            vocab_size=2)

        return (
            config,
            input_ids,
            token_type_ids,
            input_mask,
            sequence_labels,
            token_labels,
            choice_labels,
            encoder_hidden_states,
            encoder_attention_mask,
        )
 def create_and_check_reader(
     self,
     config,
     input_ids,
     token_type_ids,
     input_mask,
     scorer_encoder_inputs,
     reader_inputs,
     sequence_labels,
     token_labels,
     choice_labels,
 ):
     model = RealmReader(config=config)
     model.to(torch_device)
     model.eval()
     relevance_score = floats_tensor([self.reader_beam_size])
     result = model(
         reader_inputs[0],
         attention_mask=reader_inputs[1],
         token_type_ids=reader_inputs[2],
         relevance_score=relevance_score,
     )
     self.parent.assertEqual(result.block_idx.shape, ())
     self.parent.assertEqual(result.candidate.shape, ())
     self.parent.assertEqual(result.start_pos.shape, ())
     self.parent.assertEqual(result.end_pos.shape, ())
    def prepare_config_and_inputs(self):
        input_values = floats_tensor([self.batch_size, self.seq_length], self.vocab_size)
        attention_mask = random_attention_mask([self.batch_size, self.seq_length])

        config = self.get_config()

        return config, input_values, attention_mask
    def prepare_config_and_inputs(self):
        input_values = floats_tensor([self.batch_size, self.seq_length],
                                     self.vocab_size)
        attention_mask = random_attention_mask(
            [self.batch_size, self.seq_length])

        config = HubertConfig(
            hidden_size=self.hidden_size,
            feat_extract_norm=self.feat_extract_norm,
            feat_extract_dropout=self.feat_extract_dropout,
            feat_extract_activation=self.feat_extract_activation,
            conv_dim=self.conv_dim,
            conv_stride=self.conv_stride,
            conv_kernel=self.conv_kernel,
            conv_bias=self.conv_bias,
            num_conv_pos_embeddings=self.num_conv_pos_embeddings,
            num_conv_pos_embedding_groups=self.num_conv_pos_embedding_groups,
            num_hidden_layers=self.num_hidden_layers,
            num_attention_heads=self.num_attention_heads,
            hidden_dropout_prob=self.hidden_dropout_prob,
            intermediate_size=self.intermediate_size,
            layer_norm_eps=self.layer_norm_eps,
            hidden_act=self.hidden_act,
            initializer_range=self.initializer_range,
            vocab_size=self.vocab_size,
        )

        return config, input_values, attention_mask
 def create_and_check_encoder(
     self,
     config,
     input_ids,
     token_type_ids,
     input_mask,
     scorer_encoder_inputs,
     reader_inputs,
     sequence_labels,
     token_labels,
     choice_labels,
 ):
     model = RealmKnowledgeAugEncoder(config=config)
     model.to(torch_device)
     model.eval()
     relevance_score = floats_tensor([self.batch_size, self.num_candidates])
     result = model(
         scorer_encoder_inputs[0],
         attention_mask=scorer_encoder_inputs[1],
         token_type_ids=scorer_encoder_inputs[2],
         relevance_score=relevance_score,
         labels=token_labels,
     )
     self.parent.assertEqual(result.logits.shape,
                             (self.batch_size * self.num_candidates,
                              self.seq_length, self.vocab_size))
示例#6
0
    def prepare_config_and_inputs_for_common(self):
        input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
        visual_embeds = floats_tensor([self.batch_size, self.visual_seq_length, self.visual_embedding_dim])

        attention_mask = None
        if self.use_attention_mask:
            attention_mask = torch.ones((self.batch_size, self.seq_length), dtype=torch.long, device=torch_device)

        visual_attention_mask = None
        if self.use_visual_attention_mask:
            visual_attention_mask = torch.ones(
                (self.batch_size, self.visual_seq_length), dtype=torch.long, device=torch_device
            )

        token_type_ids = None
        if self.use_token_type_ids:
            token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)

        visual_token_type_ids = None
        if self.use_visual_token_type_ids:
            visual_token_type_ids = ids_tensor([self.batch_size, self.visual_seq_length], self.type_vocab_size)

        config = self.prepare_config()
        return config, {
            "input_ids": input_ids,
            "token_type_ids": token_type_ids,
            "attention_mask": attention_mask,
            "visual_embeds": visual_embeds,
            "visual_token_type_ids": visual_token_type_ids,
            "visual_attention_mask": visual_attention_mask,
        }
示例#7
0
    def check_xvector_training(self, config, *args):
        config.ctc_zero_infinity = True
        model = UniSpeechSatForXVector(config=config)
        model.to(torch_device)
        model.train()

        # freeze everything but the classification head
        model.freeze_base_model()

        # use a longer sequence length to account for TDNN temporal downsampling
        input_values = floats_tensor([self.batch_size, self.seq_length * 2],
                                     self.vocab_size)

        input_lengths = [input_values.shape[-1] // i for i in [4, 2, 1]]
        labels = ids_tensor((input_values.shape[0], 1),
                            len(model.config.id2label))

        # pad input
        for i in range(len(input_lengths)):
            input_values[i, input_lengths[i]:] = 0.0

        loss = model(input_values, labels=labels).loss
        self.parent.assertFalse(torch.isinf(loss).item())

        loss.backward()
示例#8
0
    def prepare_config_and_inputs_for_flickr(self):
        region_to_phrase_position = torch.cat(
            (
                ids_tensor([self.batch_size, self.seq_length],
                           self.visual_seq_length),
                torch.ones(self.batch_size,
                           self.visual_seq_length,
                           dtype=torch.long,
                           device=torch_device) * -1,
            ),
            dim=-1,
        )
        flickr_labels = None
        if self.use_labels:
            flickr_labels = floats_tensor([
                self.batch_size, self.seq_length + self.visual_seq_length,
                self.visual_seq_length
            ])

        config, input_dict = self.prepare_config_and_inputs_for_common()

        input_dict.update({
            "region_to_phrase_position": region_to_phrase_position,
            "labels": flickr_labels
        })
        return config, input_dict
示例#9
0
    def test_training(self):
        if not self.model_tester.is_training:
            return

        config, *inputs = self.model_tester.prepare_config_and_inputs()
        input_ids, token_type_ids, input_mask, scorer_encoder_inputs = inputs[0:4]
        config.return_dict = True

        tokenizer = RealmTokenizer.from_pretrained("google/realm-orqa-nq-openqa")

        # RealmKnowledgeAugEncoder training
        model = RealmKnowledgeAugEncoder(config)
        model.to(torch_device)
        model.train()

        inputs_dict = {
            "input_ids": scorer_encoder_inputs[0].to(torch_device),
            "attention_mask": scorer_encoder_inputs[1].to(torch_device),
            "token_type_ids": scorer_encoder_inputs[2].to(torch_device),
            "relevance_score": floats_tensor([self.model_tester.batch_size, self.model_tester.num_candidates]),
        }
        inputs_dict["labels"] = torch.zeros(
            (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
        )
        inputs = inputs_dict
        loss = model(**inputs).loss
        loss.backward()

        # RealmForOpenQA training
        openqa_config = copy.deepcopy(config)
        openqa_config.vocab_size = 30522  # the retrieved texts will inevitably have more than 99 vocabs.
        openqa_config.num_block_records = 5
        openqa_config.searcher_beam_size = 2

        block_records = np.array(
            [
                b"This is the first record.",
                b"This is the second record.",
                b"This is the third record.",
                b"This is the fourth record.",
                b"This is the fifth record.",
            ],
            dtype=np.object,
        )
        retriever = RealmRetriever(block_records, tokenizer)
        model = RealmForOpenQA(openqa_config, retriever)
        model.to(torch_device)
        model.train()

        inputs_dict = {
            "input_ids": input_ids[:1].to(torch_device),
            "attention_mask": input_mask[:1].to(torch_device),
            "token_type_ids": token_type_ids[:1].to(torch_device),
            "answer_ids": input_ids[:1].tolist(),
        }
        inputs = self._prepare_for_class(inputs_dict, RealmForOpenQA)
        loss = model(**inputs).reader_output.loss
        loss.backward()
示例#10
0
    def prepare_config_and_inputs_for_vqa(self):
        vqa_labels = None

        if self.use_labels:
            vqa_labels = floats_tensor([self.batch_size, self.num_labels])

        config, input_dict = self.prepare_config_and_inputs_for_common()

        input_dict.update({"labels": vqa_labels})
        return config, input_dict
示例#11
0
    def prepare_config_and_inputs(self):
        pixel_values = floats_tensor([
            self.batch_size, self.num_channels, self.image_size,
            self.image_size
        ])

        labels = None
        if self.use_labels:
            labels = ids_tensor([self.batch_size],
                                self.type_sequence_label_size)

        config = self.get_config()

        return config, pixel_values, labels
示例#12
0
    def prepare_config_and_inputs(self):
        pixel_values = floats_tensor([self.batch_size, self.num_channels, self.min_size, self.max_size]).to(
            torch_device
        )

        pixel_mask = torch.ones([self.batch_size, self.min_size, self.max_size], device=torch_device)

        mask_labels = (
            torch.rand([self.batch_size, self.num_labels, self.min_size, self.max_size], device=torch_device) > 0.5
        ).float()
        class_labels = (torch.rand((self.batch_size, self.num_labels), device=torch_device) > 0.5).long()

        config = self.get_config()
        return config, pixel_values, pixel_mask, mask_labels, class_labels