示例#1
0
 def test_inference_no_head(self):
     model = OPTModel.from_pretrained("facebook/opt-350m").to(torch_device)
     input_ids = _long_tensor(
         [[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
     with torch.no_grad():
         output = model(input_ids=input_ids).last_hidden_state
     expected_shape = torch.Size((1, 11, 512))
     self.assertEqual(output.shape, expected_shape)
     expected_slice = torch.tensor(
         [[-0.2867, -1.9256, -0.3062], [-1.2711, -0.1337, -0.1897],
          [0.4109, 0.1187, -1.3142]],
         device=torch_device)
     self.assertTrue(
         torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3))
示例#2
0
    def create_and_check_decoder_model_past_large_inputs(
            self, config, inputs_dict):
        model = OPTModel(config=config).to(torch_device).eval()

        input_ids = inputs_dict["input_ids"]
        attention_mask = inputs_dict["attention_mask"]
        head_mask = inputs_dict["head_mask"]

        # first forward pass
        outputs = model(input_ids,
                        attention_mask=attention_mask,
                        head_mask=head_mask,
                        use_cache=True)

        output, past_key_values = outputs.to_tuple()

        # create hypothetical multiple next token and extent to next_input_ids
        next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
        next_attn_mask = ids_tensor((self.batch_size, 3), 2)

        # append to next input_ids and
        next_input_ids = torch.cat([input_ids, next_tokens], dim=-1)
        next_attention_mask = torch.cat([attention_mask, next_attn_mask],
                                        dim=-1)

        output_from_no_past = model(
            next_input_ids,
            attention_mask=next_attention_mask)["last_hidden_state"]
        output_from_past = model(
            next_tokens,
            attention_mask=next_attention_mask,
            past_key_values=past_key_values)["last_hidden_state"]

        # select random slice
        random_slice_idx = ids_tensor((1, ), output_from_past.shape[-1]).item()
        output_from_no_past_slice = output_from_no_past[:, -3:,
                                                        random_slice_idx].detach(
                                                        )
        output_from_past_slice = output_from_past[:, :,
                                                  random_slice_idx].detach()

        self.parent.assertTrue(
            output_from_past_slice.shape[1] == next_tokens.shape[1])

        # test that outputs are equal for slice
        self.parent.assertTrue(
            torch.allclose(output_from_past_slice,
                           output_from_no_past_slice,
                           atol=1e-3))
示例#3
0
    def test_inference_no_head(self):
        model = OPTModel.from_pretrained("facebook/opt-350m").to(torch_device)
        input_ids = _long_tensor(
            [[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])

        with torch.no_grad():
            output = model(input_ids=input_ids).last_hidden_state

        expected_shape = torch.Size((1, 11, 512))
        self.assertEqual(output.shape, expected_shape)
        # expected value works for CPU, as well as GPU (with TF32 disabled)
        expected_slice = torch.tensor(
            [
                [-0.28726277, -1.9241608, -0.3058734],
                [-1.2737825, -0.13332152, -0.18766522],
                [0.41159445, 0.1191957, -1.3107123],
            ],
            device=torch_device,
        )
        assert_tensors_close(output[0, :3, :3], expected_slice, atol=5e-5)
示例#4
0
def convert_opt_checkpoint(checkpoint_path, pytorch_dump_folder_path, config=None):
    """
    Copy/paste/tweak model's weights to our BERT structure.
    """
    state_dict = load_checkpoint(checkpoint_path)

    if config is not None:
        config = OPTConfig.from_pretrained(config)
    else:
        config = OPTConfig()

    model = OPTModel(config).half().eval()
    model.load_state_dict(state_dict)

    # Check results
    Path(pytorch_dump_folder_path).mkdir(exist_ok=True)
    model.save_pretrained(pytorch_dump_folder_path)