Пример #1
0
def main(args):

    # If output_dir not provided, a folder will be generated in pwd
    if not args.output_dir:
        args.output_dir = os.path.join(
            "./results",
            f"{args.task}_{time.strftime('%Y%m%d_%H%M%S')}",
        )
        os.makedirs(args.output_dir)
    model = SummarizationTrainer(args)
    trainer = generic_train(model, args)

    # Optionally, predict on dev set and write to output_dir
    if args.do_predict:
        # See https://github.com/huggingface/transformers/issues/3159
        # pl use this format to create a checkpoint:
        # https://github.com/PyTorchLightning/pytorch-lightning/blob/master\
        # /pytorch_lightning/callbacks/model_checkpoint.py#L169
        checkpoints = list(
            sorted(
                glob.glob(os.path.join(args.output_dir,
                                       "checkpointepoch=*.ckpt"),
                          recursive=True)))
        model = model.load_from_checkpoint(checkpoints[-1])
        trainer.test(model)
Пример #2
0
def main(args):

    # If output_dir not provided, a folder will be generated in pwd
    if not args.output_dir:
        args.output_dir = os.path.join("./results", f"{args.task}_{args.model_type}_{time.strftime('%Y%m%d_%H%M%S')}",)
        os.makedirs(args.output_dir)
    model = SummarizationTrainer(args)
    trainer = generic_train(model, args)

    # Optionally, predict on dev set and write to output_dir
    if args.do_predict:
        checkpoints = list(sorted(glob.glob(os.path.join(args.output_dir, "checkpointepoch=*.ckpt"), recursive=True)))
        SummarizationTrainer.load_from_checkpoint(checkpoints[-1])
        trainer.test(model)
        parser.add_argument(
            "--overwrite_cache", action="store_true", help="Overwrite the cached training and evaluation sets"
        )

        parser.add_argument(
            "--tags", nargs='+', type=str, help="experiment tags for neptune.ai", default=['FT', 'last-layer']
        )


        return parser


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    add_generic_args(parser, os.getcwd())
    parser = GLUETransformer.add_model_specific_args(parser, os.getcwd())
    args = parser.parse_args()

    # If output_dir not provided, a folder will be generated in pwd
    if args.output_dir is None:
        args.output_dir = os.path.join("./results", f"{args.task}_{time.strftime('%Y%m%d_%H%M%S')}",)
        os.makedirs(args.output_dir)

    model = GLUETransformer(args)
    trainer = generic_train(model, args)

    # Optionally, predict on dev set and write to output_dir
    if args.do_predict:
        checkpoints = list(sorted(glob.glob(os.path.join(args.output_dir, "checkpointepoch=*.ckpt"), recursive=True)))
        model = model.load_from_checkpoint(checkpoints[-1])
        trainer.test(model)
Пример #4
0
def main(args):

    # If output_dir not provided, a folder will be generated in pwd
    if not args.output_dir:
        args.output_dir = os.path.join(
            "./results",
            f"{args.task}_{time.strftime('%Y%m%d_%H%M%S')}",
        )
        os.makedirs(args.output_dir)
    model = SummarizationTrainer(args)
    trainer = generic_train(model, args)

    # Optionally, predict on dev set and write to output_dir
    if args.do_predict:
        # See https://github.com/huggingface/transformers/issues/3159
        # pl use this format to create a checkpoint:
        # https://github.com/PyTorchLightning/pytorch-lightning/blob/master\
        # /pytorch_lightning/callbacks/model_checkpoint.py#L169
        checkpoints = list(
            sorted(
                glob.glob(os.path.join(args.output_dir,
                                       "checkpointepoch=*.ckpt"),
                          recursive=True)))
        print(str(checkpoints))
        model = model.load_from_checkpoint(checkpoints[-1])
        # trainer.test(model)

        tokenizer = T5Tokenizer.from_pretrained(args.model_name_or_path)
        test_examples = [
            x.rstrip() for x in open('./csqa.train.qac.src').readlines()
        ]
        test_fout = open('train_csqa.txt', 'w')
        val_examples = [
            x.rstrip() for x in open('./csqa.dev.qac.src').readlines()
        ]
        val_fout = open('val_csqa.txt', 'w')

        max_length = 24
        min_length = 1

        def chunks(lst, n):
            for i in range(0, len(lst), n):
                yield lst[i:i + n]

        device = "cuda" if torch.cuda.is_available() else "cpu"
        print(device)
        model.to(device)
        for batch in tqdm(list(chunks(test_examples, 8))):
            dct = tokenizer.batch_encode_plus(batch,
                                              max_length=64,
                                              return_tensors="pt",
                                              pad_to_max_length=True)
            summaries = model.model.generate(
                input_ids=dct["input_ids"].to(device),
                attention_mask=dct["attention_mask"].to(device),
                num_beams=5,
                length_penalty=0.6,
                max_length=max_length +
                2,  # +2 from original because we start at step=1 and stop before max_length
                min_length=min_length +
                1,  # +1 from original because we start at step=1
                no_repeat_ngram_size=3,
                early_stopping=True,
                decoder_start_token_id=model.config.eos_token_id,
            )
            dec = [
                tokenizer.decode(g,
                                 skip_special_tokens=True,
                                 clean_up_tokenization_spaces=False)
                for g in summaries
            ]
            for hypothesis in dec:
                test_fout.write(hypothesis + "\n")
                test_fout.flush()
        for batch in tqdm(list(chunks(val_examples, 8))):
            dct = tokenizer.batch_encode_plus(batch,
                                              max_length=64,
                                              return_tensors="pt",
                                              pad_to_max_length=True)
            summaries = model.model.generate(
                input_ids=dct["input_ids"].to(device),
                attention_mask=dct["attention_mask"].to(device),
                num_beams=5,
                length_penalty=0.6,
                max_length=max_length +
                2,  # +2 from original because we start at step=1 and stop before max_length
                min_length=min_length +
                1,  # +1 from original because we start at step=1
                no_repeat_ngram_size=3,
                early_stopping=True,
                decoder_start_token_id=model.config.eos_token_id,
            )
            dec = [
                tokenizer.decode(g,
                                 skip_special_tokens=True,
                                 clean_up_tokenization_spaces=False)
                for g in summaries
            ]
            for hypothesis in dec:
                val_fout.write(hypothesis + "\n")
                val_fout.flush()