def main(args): # If output_dir not provided, a folder will be generated in pwd if not args.output_dir: args.output_dir = os.path.join( "./results", f"{args.task}_{time.strftime('%Y%m%d_%H%M%S')}", ) os.makedirs(args.output_dir) model = SummarizationTrainer(args) trainer = generic_train(model, args) # Optionally, predict on dev set and write to output_dir if args.do_predict: # See https://github.com/huggingface/transformers/issues/3159 # pl use this format to create a checkpoint: # https://github.com/PyTorchLightning/pytorch-lightning/blob/master\ # /pytorch_lightning/callbacks/model_checkpoint.py#L169 checkpoints = list( sorted( glob.glob(os.path.join(args.output_dir, "checkpointepoch=*.ckpt"), recursive=True))) model = model.load_from_checkpoint(checkpoints[-1]) trainer.test(model)
def main(args): # If output_dir not provided, a folder will be generated in pwd if not args.output_dir: args.output_dir = os.path.join("./results", f"{args.task}_{args.model_type}_{time.strftime('%Y%m%d_%H%M%S')}",) os.makedirs(args.output_dir) model = SummarizationTrainer(args) trainer = generic_train(model, args) # Optionally, predict on dev set and write to output_dir if args.do_predict: checkpoints = list(sorted(glob.glob(os.path.join(args.output_dir, "checkpointepoch=*.ckpt"), recursive=True))) SummarizationTrainer.load_from_checkpoint(checkpoints[-1]) trainer.test(model)
parser.add_argument( "--overwrite_cache", action="store_true", help="Overwrite the cached training and evaluation sets" ) parser.add_argument( "--tags", nargs='+', type=str, help="experiment tags for neptune.ai", default=['FT', 'last-layer'] ) return parser if __name__ == "__main__": parser = argparse.ArgumentParser() add_generic_args(parser, os.getcwd()) parser = GLUETransformer.add_model_specific_args(parser, os.getcwd()) args = parser.parse_args() # If output_dir not provided, a folder will be generated in pwd if args.output_dir is None: args.output_dir = os.path.join("./results", f"{args.task}_{time.strftime('%Y%m%d_%H%M%S')}",) os.makedirs(args.output_dir) model = GLUETransformer(args) trainer = generic_train(model, args) # Optionally, predict on dev set and write to output_dir if args.do_predict: checkpoints = list(sorted(glob.glob(os.path.join(args.output_dir, "checkpointepoch=*.ckpt"), recursive=True))) model = model.load_from_checkpoint(checkpoints[-1]) trainer.test(model)
def main(args): # If output_dir not provided, a folder will be generated in pwd if not args.output_dir: args.output_dir = os.path.join( "./results", f"{args.task}_{time.strftime('%Y%m%d_%H%M%S')}", ) os.makedirs(args.output_dir) model = SummarizationTrainer(args) trainer = generic_train(model, args) # Optionally, predict on dev set and write to output_dir if args.do_predict: # See https://github.com/huggingface/transformers/issues/3159 # pl use this format to create a checkpoint: # https://github.com/PyTorchLightning/pytorch-lightning/blob/master\ # /pytorch_lightning/callbacks/model_checkpoint.py#L169 checkpoints = list( sorted( glob.glob(os.path.join(args.output_dir, "checkpointepoch=*.ckpt"), recursive=True))) print(str(checkpoints)) model = model.load_from_checkpoint(checkpoints[-1]) # trainer.test(model) tokenizer = T5Tokenizer.from_pretrained(args.model_name_or_path) test_examples = [ x.rstrip() for x in open('./csqa.train.qac.src').readlines() ] test_fout = open('train_csqa.txt', 'w') val_examples = [ x.rstrip() for x in open('./csqa.dev.qac.src').readlines() ] val_fout = open('val_csqa.txt', 'w') max_length = 24 min_length = 1 def chunks(lst, n): for i in range(0, len(lst), n): yield lst[i:i + n] device = "cuda" if torch.cuda.is_available() else "cpu" print(device) model.to(device) for batch in tqdm(list(chunks(test_examples, 8))): dct = tokenizer.batch_encode_plus(batch, max_length=64, return_tensors="pt", pad_to_max_length=True) summaries = model.model.generate( input_ids=dct["input_ids"].to(device), attention_mask=dct["attention_mask"].to(device), num_beams=5, length_penalty=0.6, max_length=max_length + 2, # +2 from original because we start at step=1 and stop before max_length min_length=min_length + 1, # +1 from original because we start at step=1 no_repeat_ngram_size=3, early_stopping=True, decoder_start_token_id=model.config.eos_token_id, ) dec = [ tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summaries ] for hypothesis in dec: test_fout.write(hypothesis + "\n") test_fout.flush() for batch in tqdm(list(chunks(val_examples, 8))): dct = tokenizer.batch_encode_plus(batch, max_length=64, return_tensors="pt", pad_to_max_length=True) summaries = model.model.generate( input_ids=dct["input_ids"].to(device), attention_mask=dct["attention_mask"].to(device), num_beams=5, length_penalty=0.6, max_length=max_length + 2, # +2 from original because we start at step=1 and stop before max_length min_length=min_length + 1, # +1 from original because we start at step=1 no_repeat_ngram_size=3, early_stopping=True, decoder_start_token_id=model.config.eos_token_id, ) dec = [ tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summaries ] for hypothesis in dec: val_fout.write(hypothesis + "\n") val_fout.flush()