def convert_fairseq_xglm_checkpoint_from_disk(checkpoint_path):
    checkpoint = torch.load(checkpoint_path, map_location="cpu")
    args = Namespace(**checkpoint["cfg"]["model"])
    state_dict = checkpoint["model"]
    remove_ignore_keys_(state_dict)
    vocab_size = state_dict["decoder.embed_tokens.weight"].shape[0]

    state_dict = {
        key.replace("decoder", "model"): val
        for key, val in state_dict.items()
    }

    config = XGLMConfig(
        vocab_size=vocab_size,
        max_position_embeddings=args.max_target_positions,
        num_layers=args.decoder_layers,
        attention_heads=args.decoder_attention_heads,
        ffn_dim=args.decoder_ffn_embed_dim,
        d_model=args.decoder_embed_dim,
        layerdrop=args.decoder_layerdrop,
        dropout=args.dropout,
        attention_dropout=args.attention_dropout,
        activation_dropout=args.activation_dropout,
        activation_function="gelu",
        scale_embedding=not args.no_scale_embedding,
        tie_word_embeddings=args.share_decoder_input_output_embed,
    )

    model = XGLMForCausalLM(config)
    missing = model.load_state_dict(state_dict, strict=False)
    print(missing)
    model.lm_head = make_linear_from_emb(model.model.embed_tokens)

    return model
    def test_xglm_sample_max_time(self):
        tokenizer = XGLMTokenizer.from_pretrained("facebook/xglm-564M")
        model = XGLMForCausalLM.from_pretrained("facebook/xglm-564M")
        model.to(torch_device)

        torch.manual_seed(0)
        tokenized = tokenizer("Today is a nice day and", return_tensors="pt")
        input_ids = tokenized.input_ids.to(torch_device)

        MAX_TIME = 0.15

        start = datetime.datetime.now()
        model.generate(input_ids,
                       do_sample=True,
                       max_time=MAX_TIME,
                       max_length=256)
        duration = datetime.datetime.now() - start
        self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME))
        self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME))

        start = datetime.datetime.now()
        model.generate(input_ids,
                       do_sample=False,
                       max_time=MAX_TIME,
                       max_length=256)
        duration = datetime.datetime.now() - start
        self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME))
        self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME))

        start = datetime.datetime.now()
        model.generate(input_ids,
                       do_sample=False,
                       num_beams=2,
                       max_time=MAX_TIME,
                       max_length=256)
        duration = datetime.datetime.now() - start
        self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME))
        self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME))

        start = datetime.datetime.now()
        model.generate(input_ids,
                       do_sample=True,
                       num_beams=2,
                       max_time=MAX_TIME,
                       max_length=256)
        duration = datetime.datetime.now() - start
        self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME))
        self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME))

        start = datetime.datetime.now()
        model.generate(input_ids,
                       do_sample=False,
                       max_time=None,
                       max_length=256)
        duration = datetime.datetime.now() - start
        self.assertGreater(duration,
                           datetime.timedelta(seconds=1.25 * MAX_TIME))
示例#3
0
    def test_xglm_sample(self):
        tokenizer = XGLMTokenizer.from_pretrained("facebook/xglm-564M")
        model = XGLMForCausalLM.from_pretrained("facebook/xglm-564M")

        torch.manual_seed(0)
        tokenized = tokenizer("Today is a nice day and", return_tensors="pt")
        input_ids = tokenized.input_ids
        output_ids = model.generate(input_ids, do_sample=True, num_beams=1)
        output_str = tokenizer.decode(output_ids[0], skip_special_tokens=True)

        EXPECTED_OUTPUT_STR = "Today is a nice day and the sun is shining. A nice day with warm rainy"
        self.assertEqual(output_str, EXPECTED_OUTPUT_STR)
    def test_xglm_sample(self):
        tokenizer = XGLMTokenizer.from_pretrained("facebook/xglm-564M")
        model = XGLMForCausalLM.from_pretrained("facebook/xglm-564M")
        model.to(torch_device)

        torch.manual_seed(0)
        tokenized = tokenizer("Today is a nice day and", return_tensors="pt")
        input_ids = tokenized.input_ids.to(torch_device)
        output_ids = model.generate(input_ids, do_sample=True, num_beams=1)
        output_str = tokenizer.decode(output_ids[0], skip_special_tokens=True)

        EXPECTED_OUTPUT_STR = "Today is a nice day and I am happy to show you all about a recent project for my"
        self.assertEqual(output_str, EXPECTED_OUTPUT_STR)
示例#5
0
    def create_and_check_lm_head_model(self, config, input_ids, input_mask, head_mask, *args):
        model = XGLMForCausalLM(config)
        model.to(torch_device)
        model.eval()

        result = model(input_ids, labels=input_ids)
        self.parent.assertEqual(result.loss.shape, ())
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
    def test_batch_generation(self):
        model = XGLMForCausalLM.from_pretrained("facebook/xglm-564M")
        model.to(torch_device)
        tokenizer = XGLMTokenizer.from_pretrained("facebook/xglm-564M")

        tokenizer.padding_side = "left"

        # use different length sentences to test batching
        sentences = [
            "Hello, my dog is a little",
            "Today, I",
        ]

        inputs = tokenizer(sentences, return_tensors="pt", padding=True)
        input_ids = inputs["input_ids"].to(torch_device)

        outputs = model.generate(
            input_ids=input_ids,
            attention_mask=inputs["attention_mask"].to(torch_device),
        )

        inputs_non_padded = tokenizer(
            sentences[0], return_tensors="pt").input_ids.to(torch_device)
        output_non_padded = model.generate(input_ids=inputs_non_padded)

        num_paddings = inputs_non_padded.shape[-1] - inputs["attention_mask"][
            -1].long().sum().cpu().item()
        inputs_padded = tokenizer(
            sentences[1], return_tensors="pt").input_ids.to(torch_device)
        output_padded = model.generate(input_ids=inputs_padded,
                                       max_length=model.config.max_length -
                                       num_paddings)

        batch_out_sentence = tokenizer.batch_decode(outputs,
                                                    skip_special_tokens=True)
        non_padded_sentence = tokenizer.decode(output_non_padded[0],
                                               skip_special_tokens=True)
        padded_sentence = tokenizer.decode(output_padded[0],
                                           skip_special_tokens=True)

        expected_output_sentence = [
            "Hello, my dog is a little bit of a shy one, but he is very friendly",
            "Today, I am going to share with you a few of my favorite things",
        ]
        self.assertListEqual(expected_output_sentence, batch_out_sentence)
        self.assertListEqual(expected_output_sentence,
                             [non_padded_sentence, padded_sentence])
示例#7
0
    def create_and_check_forward_and_backwards(
        self, config, input_ids, input_mask, head_mask, *args, gradient_checkpointing=False
    ):
        model = XGLMForCausalLM(config)
        model.to(torch_device)
        if gradient_checkpointing:
            model.gradient_checkpointing_enable()

        result = model(input_ids, labels=input_ids)
        self.parent.assertEqual(result.loss.shape, ())
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
        result.loss.backward()
示例#8
0
 def _test_lm_generate_xglm_helper(
     self,
     gradient_checkpointing=False,
     verify_outputs=True,
 ):
     model = XGLMForCausalLM.from_pretrained("facebook/xglm-564M")
     if gradient_checkpointing:
         model.gradient_checkpointing_enable()
     else:
         model.gradient_checkpointing_disable()
     model.to(torch_device)
     input_ids = torch.tensor([[2, 268, 9865]], dtype=torch.long, device=torch_device)  # The dog
     # </s> The dog is a very friendly dog. He is very affectionate and loves to play with other
     # fmt: off
     expected_output_ids = [2, 268, 9865, 67, 11, 1988, 57252, 9865, 5, 984, 67, 1988, 213838, 1658, 53, 70446, 33, 6657, 278, 1581]
     # fmt: on
     output_ids = model.generate(input_ids, do_sample=False, num_beams=1)
     if verify_outputs:
         self.assertListEqual(output_ids[0].tolist(), expected_output_ids)