def test_question_answering_forward(self): config, input_ids, batch_size = self._get_config_and_data() sequence_labels = ids_tensor([batch_size], 2).to(torch_device) model = BartForQuestionAnswering(config) model.to(torch_device) outputs = model(input_ids=input_ids, start_positions=sequence_labels, end_positions=sequence_labels,) self.assertEqual(outputs["start_logits"].shape, input_ids.shape) self.assertEqual(outputs["end_logits"].shape, input_ids.shape) self.assertIsInstance(outputs["loss"].item(), float)
def training(config: Dict) -> None: """ config: dict - Contains arguments in the values whose keys are the arguments specified in interface/cli.py """ # Load data train_data = SquadDataset('data/squad/sq_dev.json', debug=True) test_data = train_data # Load model model = BartForQuestionAnswering.from_pretrained('facebook/bart-base') # Build directory output_dir = f"/runs/results_{config['learning_rate']:.2e}" wandb.init(project='autoregkd', name=output_dir) training_args = CustomArguments( output_dir=output_dir, # output directory num_train_epochs=config['epochs'], # total number of training epochs per_device_train_batch_size=2** config['log_batch_size'], # batch size per device during training per_device_eval_batch_size=2** config['log_eval_batch_size'], # batch size for evaluation warmup_steps=500, # number of warmup steps for learning rate scheduler weight_decay=0.01, # strength of weight decay learning_rate=config['learning_rate'], # learning rate evaluation_strategy='steps', logging_dir='./logs', # directory for storing logs logging_steps=10, ) trainer = CustomTrainer( model=model, # the instantiated Transformers model to be trained args=training_args, # training arguments, defined above train_dataset=train_data, # training dataset eval_dataset=test_data, # evaluation dataset ) trainer.train() trainer.evaluate()
def create_model(self, transformer="longformer"): if transformer == "distilbert": from transformers import DistilBertForQuestionAnswering self.model = DistilBertForQuestionAnswering.from_pretrained( "distilbert-base-uncased") elif transformer == "bert": from transformers import BertForQuestionAnswering self.model = BertForQuestionAnswering.from_pretrained( "bert-base-uncased") elif transformer == "roberta": from transformers import RobertaForQuestionAnswering self.model = RobertaForQuestionAnswering.from_pretrained( "roberta-base") elif transformer == "roberta_squad": from transformers import RobertaForQuestionAnswering self.model = RobertaForQuestionAnswering.from_pretrained( "deepset/roberta-base-squad2") elif transformer == "longformer": from transformers import LongformerForQuestionAnswering self.model = LongformerForQuestionAnswering.from_pretrained( "allenai/longformer-base-4096") elif transformer == "bart": from transformers import BartForQuestionAnswering self.model = BartForQuestionAnswering.from_pretrained( "facebook/bart-base") elif transformer == "electra": from transformers import ElectraForQuestionAnswering self.model = ElectraForQuestionAnswering.from_pretrained( "google/electra-small-discriminator") else: print( "The model you chose is not available in this version. You can try to manually change the code or manually overwrite the variable self.model" ) print( "The available choices are 'distilbert' , 'bert' , 'roberta' , 'longformer' , 'bart' , 'electra' " )
''' load data ''' train_contexts, train_questions, train_answers = read_squad( 'data/train-v2.0.json') val_contexts, val_questions, val_answers = read_squad('data/dev-v2.0.json') ''' generate answer end indices ''' add_end_idx(train_answers, train_contexts) add_end_idx(val_answers, val_contexts) ''' tokenizers and models ''' tokenizer = BartTokenizerFast.from_pretrained('facebook/bart-base') model = BartForQuestionAnswering.from_pretrained('facebook/bart-base') ''' tokenize ''' train_encodings = tokenizer(train_contexts, train_questions, truncation=True, padding=True) val_encodings = tokenizer(val_contexts, val_questions, truncation=True, padding=True) ''' last step preparing model inputs ''' add_token_positions(train_encodings, train_answers)
from transformers import BartTokenizer, BartForQuestionAnswering import torch # 试着在这个上面做finetune # 可以手动去https://huggingface.co/valhalla/bart-large-finetuned-squadv1# 下载需要的文件. tokenizer = BartTokenizer.from_pretrained('/mnt/qa_data') # 只是一个纯英文的字典. model = BartForQuestionAnswering.from_pretrained('/mnt/qa_data') question, text = "Who am I ?", "he a nice dog in China, I am Zhangbo, and i love it very much, i deadly wanna to eat it" encoding = tokenizer(question, text, return_tensors='pt') input_ids = encoding['input_ids'] attention_mask = encoding['attention_mask'] start_scores, end_scores = model(input_ids, attention_mask=attention_mask, output_attentions=False)[:2] all_tokens = tokenizer.convert_ids_to_tokens(input_ids[0]) answer = ' '.join(all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores)+1]) answer = tokenizer.convert_tokens_to_ids(answer.split()) answer = tokenizer.decode(answer) print(answer) #answer => 'a nice puppet'