Exemplo n.º 1
0
 # Model, evaluation and data
 parser.add_argument(
     "--model_dir",
     required=True,
     type=str,
     help="Path to save the models, logs, data and other project files")
 parser.add_argument(
     "--data_dir",
     default="../data",
     type=str,
     help="The input data directory (should live outside the project dir)")
 parser.add_argument("--model_type",
                     required=True,
                     type=str,
                     help="Model type selected from the following list: " +
                     ", ".join(MODEL_CLASSES.keys()))
 parser.add_argument(
     "--num_pair_labels",
     default=2,
     type=int,
     help="How many classes are required for the Pair Classification task?")
 parser.add_argument(
     "--load_train_model",
     default=None,
     type=str,
     help="Load a specific model for **training**, instead of from: %s" %
     str(MODEL_PATH_MAP))
 parser.add_argument(
     "--load_eval_model",
     default=None,
     type=str,
Exemplo n.º 2
0
    if args.do_eval:
        trainer.load_model()
        trainer.evaluate("test")


if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    parser.add_argument("--task", default=None, required=True, type=str, help="The name of the task to train")
    parser.add_argument("--model_dir", default=None, required=True, type=str, help="Path to save, load model")
    parser.add_argument("--data_dir", default="./data", type=str, help="The input data dir")
    parser.add_argument("--intent_label_file", default="intent_label.txt", type=str, help="Intent Label file")
    parser.add_argument("--slot_label_file", default="slot_label.txt", type=str, help="Slot Label file")

    parser.add_argument("--model_type", default="bert", type=str, help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()))

    parser.add_argument('--seed', type=int, default=1234, help="random seed for initialization")
    parser.add_argument("--train_batch_size", default=32, type=int, help="Batch size for training.")
    parser.add_argument("--eval_batch_size", default=64, type=int, help="Batch size for evaluation.")
    parser.add_argument("--max_seq_len", default=50, type=int, help="The maximum total input sequence length after tokenization.")
    parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.")
    parser.add_argument("--num_train_epochs", default=10.0, type=float, help="Total number of training epochs to perform.")
    parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.")
    parser.add_argument('--gradient_accumulation_steps', type=int, default=1,
                        help="Number of updates steps to accumulate before performing a backward/update pass.")
    parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.")
    parser.add_argument("--max_steps", default=-1, type=int, help="If > 0: set total number of training steps to perform. Override num_train_epochs.")
    parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.")
    parser.add_argument("--dropout_rate", default=0.1, type=float, help="Dropout for fully-connected layers")
Exemplo n.º 3
0
        write_csvFile(os.path.join(args.data_dir, "result.csv"), results)



if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    parser.add_argument("--task", default="nsmc", type=str, help="The name of the task to train")
    parser.add_argument("--model_dir", default="./model", type=str, help="Path to save, load model")
    parser.add_argument("--data_dir", default="./data", type=str, help="The input data dir")
    parser.add_argument("--train_file", default="news_train.tsv", type=str, help="Train file")
    parser.add_argument("--dev_file", default="news_test_temp.tsv", type=str, help="Test file")
    parser.add_argument("--test_file", default="news_test.tsv", type=str, help="Test file")

    parser.add_argument("--model_type", default="hanbert", type=str,
                        help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()))
    parser.add_argument("--model_name_or_path", default="HanBert-54kN-torch", type=str,
                        help="Path to pre-trained model or shortcut name")

    parser.add_argument('--seed', type=int, default=42, help="random seed for initialization")
    parser.add_argument("--train_batch_size", default=32, type=int, help="Batch size for training.")
    parser.add_argument("--eval_batch_size", default=64, type=int, help="Batch size for evaluation.")
    parser.add_argument("--max_title_len", default=50, type=int,
                        help="The maximum title input sequence length after tokenization.")
    parser.add_argument("--max_sentence_len", default=100, type=int,
                        help="The maximum sentence of contents input sequence length after tokenization.")
    parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.")
    parser.add_argument("--num_train_epochs", default=5.0, type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.")
    parser.add_argument('--gradient_accumulation_steps', type=int, default=1,
Exemplo n.º 4
0
                        type=str,
                        help='File path for loading intent_label vocab')
    parser.add_argument('--slot_label_file',
                        default='slot_label.txt',
                        type=str,
                        help='File path for loading slot_label vocab ')
    parser.add_argument('--word_vocab_file',
                        default='word_vocab.txt',
                        type=str,
                        help='File path for loading word vocab ')

    parser.add_argument('--model_type',
                        default='joint_bert',
                        type=str,
                        required=True,
                        choices=MODEL_CLASSES.keys(),
                        help='Model type selected in the list:' +
                        ','.join(MODEL_CLASSES.keys()))

    parser.add_argument('--random_seed',
                        type=int,
                        default=1234,
                        help='set random seed')
    parser.add_argument(
        '--max_seqLen',
        type=int,
        default=50,
        help='Set max sequence len After tokenize text.Default is 50')

    parser.add_argument(
        '--num_train_epochs',
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--per_gpu_eval_batch_size",
                        default=20,
                        type=int,
                        help="Batch size per GPU/CPU for evaluation.")
    parser.add_argument("--per_gpu_train_batch_size",
                        default=20,
                        type=int,
                        help="Batch size per GPU/CPU for evaluation.")
    parser.add_argument("--no_cuda",
                        action="store_true",
                        help="Avoid using CUDA when available")
    parser.add_argument("--model_type",
                        required=True,
                        type=str,
                        choices=list(MODEL_CLASSES.keys()),
                        help="The model architecture to be fine-tuned.")
    parser.add_argument(
        "--model_name_or_path",
        required=True,
        type=str,
        help="The model checkpoint for weights initialization.",
    )
    parser.add_argument(
        "--overwrite_cache",
        action="store_true",
        help="Overwrite the cached training and evaluation sets")
    parser.add_argument("--sequence_length",
                        default=128,
                        type=int,
                        help="Sequence length for language model.")
    parser.add_argument(
        "--mlm_probability",
        type=float,
        default=0.15,
        help="Ratio of tokens to mask for masked language modeling loss")
    parser.add_argument("--num_train_epochs",
                        default=5,
                        type=int,
                        help="Total number of training epochs to perform.")
    parser.add_argument(
        "--max_steps",
        default=-1,
        type=int,
        help=
        "If > 0: set total number of training steps to perform. Override num_train_epochs.",
    )
    parser.add_argument("--warmup_steps",
                        default=0,
                        type=int,
                        help="Linear warmup over warmup_steps.")
    parser.add_argument("--logging_steps",
                        type=int,
                        default=20,
                        help="Log every X updates steps.")
    parser.add_argument("--save_steps",
                        type=int,
                        default=1000,
                        help="Save checkpoint every X updates steps.")
    parser.add_argument(
        "--gradient_accumulation_steps",
        type=int,
        default=1,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass.",
    )
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--weight_decay",
                        default=0.0,
                        type=float,
                        help="Weight decay if we apply some.")
    parser.add_argument("--adam_epsilon",
                        default=1e-8,
                        type=float,
                        help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm",
                        default=1.0,
                        type=float,
                        help="Max gradient norm.")
    parser.add_argument(
        "--save_total_limit",
        type=int,
        default=None,
        help=
        "Limit the total amount of checkpoints, delete the older checkpoints in the output_dir, does not delete by default",
    )
    parser.add_argument(
        "--cache_dir",
        default="",
        type=str,
        help=
        "Optional directory to store the pre-trained models downloaded from s3 (instead of the default one)",
    )
    parser.add_argument(
        "--output_dir",
        default="",
        type=str,
        help=
        "Optional directory to store the pre-trained models downloaded from s3 (instead of the default one)",
    )
    parser.add_argument("--training_type",
                        required=True,
                        choices=["comment", "post"],
                        help="Choose between a comment/post fine-tuned model.")
    parser.add_argument("--train_files", nargs='+', help='Training file(s)')
    parser.add_argument("--seed",
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument(
        "--legacy",
        action="store_true",
        help="Legacy code for compatibility with older pytorch versions.")

    args = parser.parse_args()

    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    args.n_gpu = torch.cuda.device_count()

    args.device = device

    # Setup logging
    logging.basicConfig(
        format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
        datefmt='%m/%d/%Y %H:%M:%S',
        level=logging.INFO)
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        -1, device, args.n_gpu, False, False)

    # Set seed
    set_seed(args)

    config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]

    config = config_class.from_pretrained(
        args.model_name_or_path,
        cache_dir=args.cache_dir if args.cache_dir else None)
    config.output_hidden_states = True
    tokenizer = tokenizer_class.from_pretrained(
        args.model_name_or_path,
        cache_dir=args.cache_dir if args.cache_dir else None)

    model = model_class.from_pretrained(
        args.model_name_or_path,
        from_tf=bool('.ckpt' in args.model_name_or_path),
        config=config,
        cache_dir=args.cache_dir if args.cache_dir else None)

    model.to(args.device)
    logger.info("Training/evaluation parameters %s", args)

    # Training
    dataset = load_and_cache_datasets(args, tokenizer)
    train(args, dataset, model, tokenizer)