def test_batched_nan_fp16(self):
        # a bug manifested starting at models facebook/opt-1.3 and larger when running batched generations,
        # therefore not using a tiny model, but the smallest model the problem was seen with which is opt-1.3b.
        # please refer to this github thread: https://github.com/huggingface/transformers/pull/17437 for more details
        model_name = "facebook/opt-1.3b"
        tokenizer = GPT2Tokenizer.from_pretrained(model_name,
                                                  use_fast=False,
                                                  padding_side="left")

        model = OPTForCausalLM.from_pretrained(model_name,
                                               torch_dtype=torch.float16,
                                               use_cache=True).cuda()
        model = model.eval()

        batch = tokenizer(["Who are you?", "Joe Biden is the president of"],
                          padding=True,
                          return_tensors="pt")

        input_ids = batch["input_ids"].cuda()
        attention_mask = batch["attention_mask"].cuda()

        with torch.no_grad():
            outputs = model(input_ids, attention_mask=attention_mask)
            self.assertFalse(torch.isnan(outputs.logits[0]).any().item(
            ))  # the first logits could contain NaNs if it fails
示例#2
0
    def test_batch_generation(self):
        model_id = "facebook/opt-350m"

        tokenizer = GPT2Tokenizer.from_pretrained(model_id)
        model = OPTForCausalLM.from_pretrained(model_id)
        model.to(torch_device)

        tokenizer.padding_side = "left"

        # use different length sentences to test batching
        sentences = [
            "Hello, my dog is a little",
            "Today, I",
        ]

        inputs = tokenizer(sentences, return_tensors="pt", padding=True)
        input_ids = inputs["input_ids"].to(torch_device)

        outputs = model.generate(
            input_ids=input_ids,
            attention_mask=inputs["attention_mask"].to(torch_device),
        )

        inputs_non_padded = tokenizer(
            sentences[0], return_tensors="pt").input_ids.to(torch_device)
        output_non_padded = model.generate(input_ids=inputs_non_padded)

        num_paddings = inputs_non_padded.shape[-1] - inputs["attention_mask"][
            -1].long().sum().cpu().item()
        inputs_padded = tokenizer(
            sentences[1], return_tensors="pt").input_ids.to(torch_device)
        output_padded = model.generate(input_ids=inputs_padded,
                                       max_length=model.config.max_length -
                                       num_paddings)

        batch_out_sentence = tokenizer.batch_decode(outputs,
                                                    skip_special_tokens=True)
        non_padded_sentence = tokenizer.decode(output_non_padded[0],
                                               skip_special_tokens=True)
        padded_sentence = tokenizer.decode(output_padded[0],
                                           skip_special_tokens=True)

        expected_output_sentence = [
            "Hello, my dog is a little bit of a dork.\nI'm a little bit",
            "Today, I was in the middle of a conversation with a friend about the",
        ]
        self.assertListEqual(expected_output_sentence, batch_out_sentence)
        self.assertListEqual(batch_out_sentence,
                             [non_padded_sentence, padded_sentence])
示例#3
0
    def test_logits(self):
        model = OPTForCausalLM.from_pretrained(self.path_model)
        model = model.eval()
        tokenizer = GPT2Tokenizer.from_pretrained(self.path_model)

        prompts = [
            "Today is a beautiful day and I want to",
            "In the city of",
            "Paris is the capital of France and",
            "Computers and mobile phones have taken",
        ]
        # verify that prompt without BOS token is identical to Metaseq -> add_special_tokens=False
        inputs = tokenizer(prompts,
                           return_tensors="pt",
                           padding=True,
                           add_special_tokens=False)
        logits = model(inputs.input_ids,
                       attention_mask=inputs.attention_mask)[0].mean(dim=-1)
        # logits_meta = torch.load(self.path_logits_meta)
        logits_meta = torch.Tensor([
            [
                1.3851, -13.8923, -10.5229, -10.7533, -0.2309, -10.2384,
                -0.5365, -9.0947, -5.1670
            ],
            [
                -4.7073, -10.6276, -3.9415, -21.5242, -0.2822, -0.2822,
                -0.2822, -0.2822, -0.2822
            ],
            [
                0.6247, -3.4229, -8.9179, -1.4297, -14.1650, 1.4146, -9.0218,
                -0.2703, -0.2703
            ],
            [
                6.4783, -1.9913, -10.7926, -2.3336, 1.5092, -0.9974, -6.8213,
                1.3477, 1.3477
            ],
        ])
        assert torch.allclose(logits, logits_meta, atol=1e-4)
示例#4
0
    def test_generation_post_attn_layer_norm(self):
        model_id = "facebook/opt-350m"

        EXPECTED_OUTPUTS = [
            "Today is a beautiful day and I want to",
            "In the city of San Francisco, the city",
            "Paris is the capital of France and the capital",
            "Computers and mobile phones have taken over the",
        ]

        predicted_outputs = []
        tokenizer = GPT2Tokenizer.from_pretrained(model_id)
        model = OPTForCausalLM.from_pretrained(model_id)

        for prompt in self.prompts:
            input_ids = tokenizer(prompt, return_tensors="pt").input_ids

            generated_ids = model.generate(input_ids, max_length=10)

            generated_string = tokenizer.batch_decode(generated_ids,
                                                      skip_special_tokens=True)
            predicted_outputs += generated_string

        self.assertListEqual(predicted_outputs, EXPECTED_OUTPUTS)
示例#5
0
 def test_load_model(self):
     try:
         _ = OPTForCausalLM.from_pretrained(self.path_model)
     except BaseException:
         self.fail("Failed loading model")