def convert_fairseq_xglm_checkpoint_from_disk(checkpoint_path): checkpoint = torch.load(checkpoint_path, map_location="cpu") args = Namespace(**checkpoint["cfg"]["model"]) state_dict = checkpoint["model"] remove_ignore_keys_(state_dict) vocab_size = state_dict["decoder.embed_tokens.weight"].shape[0] state_dict = { key.replace("decoder", "model"): val for key, val in state_dict.items() } config = XGLMConfig( vocab_size=vocab_size, max_position_embeddings=args.max_target_positions, num_layers=args.decoder_layers, attention_heads=args.decoder_attention_heads, ffn_dim=args.decoder_ffn_embed_dim, d_model=args.decoder_embed_dim, layerdrop=args.decoder_layerdrop, dropout=args.dropout, attention_dropout=args.attention_dropout, activation_dropout=args.activation_dropout, activation_function="gelu", scale_embedding=not args.no_scale_embedding, tie_word_embeddings=args.share_decoder_input_output_embed, ) model = XGLMForCausalLM(config) missing = model.load_state_dict(state_dict, strict=False) print(missing) model.lm_head = make_linear_from_emb(model.model.embed_tokens) return model
def test_xglm_sample_max_time(self): tokenizer = XGLMTokenizer.from_pretrained("facebook/xglm-564M") model = XGLMForCausalLM.from_pretrained("facebook/xglm-564M") model.to(torch_device) torch.manual_seed(0) tokenized = tokenizer("Today is a nice day and", return_tensors="pt") input_ids = tokenized.input_ids.to(torch_device) MAX_TIME = 0.15 start = datetime.datetime.now() model.generate(input_ids, do_sample=True, max_time=MAX_TIME, max_length=256) duration = datetime.datetime.now() - start self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME)) self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME)) start = datetime.datetime.now() model.generate(input_ids, do_sample=False, max_time=MAX_TIME, max_length=256) duration = datetime.datetime.now() - start self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME)) self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME)) start = datetime.datetime.now() model.generate(input_ids, do_sample=False, num_beams=2, max_time=MAX_TIME, max_length=256) duration = datetime.datetime.now() - start self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME)) self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME)) start = datetime.datetime.now() model.generate(input_ids, do_sample=True, num_beams=2, max_time=MAX_TIME, max_length=256) duration = datetime.datetime.now() - start self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME)) self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME)) start = datetime.datetime.now() model.generate(input_ids, do_sample=False, max_time=None, max_length=256) duration = datetime.datetime.now() - start self.assertGreater(duration, datetime.timedelta(seconds=1.25 * MAX_TIME))
def test_xglm_sample(self): tokenizer = XGLMTokenizer.from_pretrained("facebook/xglm-564M") model = XGLMForCausalLM.from_pretrained("facebook/xglm-564M") torch.manual_seed(0) tokenized = tokenizer("Today is a nice day and", return_tensors="pt") input_ids = tokenized.input_ids output_ids = model.generate(input_ids, do_sample=True, num_beams=1) output_str = tokenizer.decode(output_ids[0], skip_special_tokens=True) EXPECTED_OUTPUT_STR = "Today is a nice day and the sun is shining. A nice day with warm rainy" self.assertEqual(output_str, EXPECTED_OUTPUT_STR)
def test_xglm_sample(self): tokenizer = XGLMTokenizer.from_pretrained("facebook/xglm-564M") model = XGLMForCausalLM.from_pretrained("facebook/xglm-564M") model.to(torch_device) torch.manual_seed(0) tokenized = tokenizer("Today is a nice day and", return_tensors="pt") input_ids = tokenized.input_ids.to(torch_device) output_ids = model.generate(input_ids, do_sample=True, num_beams=1) output_str = tokenizer.decode(output_ids[0], skip_special_tokens=True) EXPECTED_OUTPUT_STR = "Today is a nice day and I am happy to show you all about a recent project for my" self.assertEqual(output_str, EXPECTED_OUTPUT_STR)
def create_and_check_lm_head_model(self, config, input_ids, input_mask, head_mask, *args): model = XGLMForCausalLM(config) model.to(torch_device) model.eval() result = model(input_ids, labels=input_ids) self.parent.assertEqual(result.loss.shape, ()) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
def test_batch_generation(self): model = XGLMForCausalLM.from_pretrained("facebook/xglm-564M") model.to(torch_device) tokenizer = XGLMTokenizer.from_pretrained("facebook/xglm-564M") tokenizer.padding_side = "left" # use different length sentences to test batching sentences = [ "Hello, my dog is a little", "Today, I", ] inputs = tokenizer(sentences, return_tensors="pt", padding=True) input_ids = inputs["input_ids"].to(torch_device) outputs = model.generate( input_ids=input_ids, attention_mask=inputs["attention_mask"].to(torch_device), ) inputs_non_padded = tokenizer( sentences[0], return_tensors="pt").input_ids.to(torch_device) output_non_padded = model.generate(input_ids=inputs_non_padded) num_paddings = inputs_non_padded.shape[-1] - inputs["attention_mask"][ -1].long().sum().cpu().item() inputs_padded = tokenizer( sentences[1], return_tensors="pt").input_ids.to(torch_device) output_padded = model.generate(input_ids=inputs_padded, max_length=model.config.max_length - num_paddings) batch_out_sentence = tokenizer.batch_decode(outputs, skip_special_tokens=True) non_padded_sentence = tokenizer.decode(output_non_padded[0], skip_special_tokens=True) padded_sentence = tokenizer.decode(output_padded[0], skip_special_tokens=True) expected_output_sentence = [ "Hello, my dog is a little bit of a shy one, but he is very friendly", "Today, I am going to share with you a few of my favorite things", ] self.assertListEqual(expected_output_sentence, batch_out_sentence) self.assertListEqual(expected_output_sentence, [non_padded_sentence, padded_sentence])
def create_and_check_forward_and_backwards( self, config, input_ids, input_mask, head_mask, *args, gradient_checkpointing=False ): model = XGLMForCausalLM(config) model.to(torch_device) if gradient_checkpointing: model.gradient_checkpointing_enable() result = model(input_ids, labels=input_ids) self.parent.assertEqual(result.loss.shape, ()) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) result.loss.backward()
def _test_lm_generate_xglm_helper( self, gradient_checkpointing=False, verify_outputs=True, ): model = XGLMForCausalLM.from_pretrained("facebook/xglm-564M") if gradient_checkpointing: model.gradient_checkpointing_enable() else: model.gradient_checkpointing_disable() model.to(torch_device) input_ids = torch.tensor([[2, 268, 9865]], dtype=torch.long, device=torch_device) # The dog # </s> The dog is a very friendly dog. He is very affectionate and loves to play with other # fmt: off expected_output_ids = [2, 268, 9865, 67, 11, 1988, 57252, 9865, 5, 984, 67, 1988, 213838, 1658, 53, 70446, 33, 6657, 278, 1581] # fmt: on output_ids = model.generate(input_ids, do_sample=False, num_beams=1) if verify_outputs: self.assertListEqual(output_ids[0].tolist(), expected_output_ids)