def test_inference_no_head(self): model = OPTModel.from_pretrained("facebook/opt-350m").to(torch_device) input_ids = _long_tensor( [[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) with torch.no_grad(): output = model(input_ids=input_ids).last_hidden_state expected_shape = torch.Size((1, 11, 512)) self.assertEqual(output.shape, expected_shape) expected_slice = torch.tensor( [[-0.2867, -1.9256, -0.3062], [-1.2711, -0.1337, -0.1897], [0.4109, 0.1187, -1.3142]], device=torch_device) self.assertTrue( torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3))
def create_and_check_decoder_model_past_large_inputs( self, config, inputs_dict): model = OPTModel(config=config).to(torch_device).eval() input_ids = inputs_dict["input_ids"] attention_mask = inputs_dict["attention_mask"] head_mask = inputs_dict["head_mask"] # first forward pass outputs = model(input_ids, attention_mask=attention_mask, head_mask=head_mask, use_cache=True) output, past_key_values = outputs.to_tuple() # create hypothetical multiple next token and extent to next_input_ids next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size) next_attn_mask = ids_tensor((self.batch_size, 3), 2) # append to next input_ids and next_input_ids = torch.cat([input_ids, next_tokens], dim=-1) next_attention_mask = torch.cat([attention_mask, next_attn_mask], dim=-1) output_from_no_past = model( next_input_ids, attention_mask=next_attention_mask)["last_hidden_state"] output_from_past = model( next_tokens, attention_mask=next_attention_mask, past_key_values=past_key_values)["last_hidden_state"] # select random slice random_slice_idx = ids_tensor((1, ), output_from_past.shape[-1]).item() output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx].detach( ) output_from_past_slice = output_from_past[:, :, random_slice_idx].detach() self.parent.assertTrue( output_from_past_slice.shape[1] == next_tokens.shape[1]) # test that outputs are equal for slice self.parent.assertTrue( torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3))
def test_inference_no_head(self): model = OPTModel.from_pretrained("facebook/opt-350m").to(torch_device) input_ids = _long_tensor( [[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) with torch.no_grad(): output = model(input_ids=input_ids).last_hidden_state expected_shape = torch.Size((1, 11, 512)) self.assertEqual(output.shape, expected_shape) # expected value works for CPU, as well as GPU (with TF32 disabled) expected_slice = torch.tensor( [ [-0.28726277, -1.9241608, -0.3058734], [-1.2737825, -0.13332152, -0.18766522], [0.41159445, 0.1191957, -1.3107123], ], device=torch_device, ) assert_tensors_close(output[0, :3, :3], expected_slice, atol=5e-5)
def convert_opt_checkpoint(checkpoint_path, pytorch_dump_folder_path, config=None): """ Copy/paste/tweak model's weights to our BERT structure. """ state_dict = load_checkpoint(checkpoint_path) if config is not None: config = OPTConfig.from_pretrained(config) else: config = OPTConfig() model = OPTModel(config).half().eval() model.load_state_dict(state_dict) # Check results Path(pytorch_dump_folder_path).mkdir(exist_ok=True) model.save_pretrained(pytorch_dump_folder_path)