# Model, evaluation and data parser.add_argument( "--model_dir", required=True, type=str, help="Path to save the models, logs, data and other project files") parser.add_argument( "--data_dir", default="../data", type=str, help="The input data directory (should live outside the project dir)") parser.add_argument("--model_type", required=True, type=str, help="Model type selected from the following list: " + ", ".join(MODEL_CLASSES.keys())) parser.add_argument( "--num_pair_labels", default=2, type=int, help="How many classes are required for the Pair Classification task?") parser.add_argument( "--load_train_model", default=None, type=str, help="Load a specific model for **training**, instead of from: %s" % str(MODEL_PATH_MAP)) parser.add_argument( "--load_eval_model", default=None, type=str,
if args.do_eval: trainer.load_model() trainer.evaluate("test") if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--task", default=None, required=True, type=str, help="The name of the task to train") parser.add_argument("--model_dir", default=None, required=True, type=str, help="Path to save, load model") parser.add_argument("--data_dir", default="./data", type=str, help="The input data dir") parser.add_argument("--intent_label_file", default="intent_label.txt", type=str, help="Intent Label file") parser.add_argument("--slot_label_file", default="slot_label.txt", type=str, help="Slot Label file") parser.add_argument("--model_type", default="bert", type=str, help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys())) parser.add_argument('--seed', type=int, default=1234, help="random seed for initialization") parser.add_argument("--train_batch_size", default=32, type=int, help="Batch size for training.") parser.add_argument("--eval_batch_size", default=64, type=int, help="Batch size for evaluation.") parser.add_argument("--max_seq_len", default=50, type=int, help="The maximum total input sequence length after tokenization.") parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--num_train_epochs", default=10.0, type=float, help="Total number of training epochs to perform.") parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.") parser.add_argument('--gradient_accumulation_steps', type=int, default=1, help="Number of updates steps to accumulate before performing a backward/update pass.") parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.") parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.") parser.add_argument("--max_steps", default=-1, type=int, help="If > 0: set total number of training steps to perform. Override num_train_epochs.") parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.") parser.add_argument("--dropout_rate", default=0.1, type=float, help="Dropout for fully-connected layers")
write_csvFile(os.path.join(args.data_dir, "result.csv"), results) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--task", default="nsmc", type=str, help="The name of the task to train") parser.add_argument("--model_dir", default="./model", type=str, help="Path to save, load model") parser.add_argument("--data_dir", default="./data", type=str, help="The input data dir") parser.add_argument("--train_file", default="news_train.tsv", type=str, help="Train file") parser.add_argument("--dev_file", default="news_test_temp.tsv", type=str, help="Test file") parser.add_argument("--test_file", default="news_test.tsv", type=str, help="Test file") parser.add_argument("--model_type", default="hanbert", type=str, help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys())) parser.add_argument("--model_name_or_path", default="HanBert-54kN-torch", type=str, help="Path to pre-trained model or shortcut name") parser.add_argument('--seed', type=int, default=42, help="random seed for initialization") parser.add_argument("--train_batch_size", default=32, type=int, help="Batch size for training.") parser.add_argument("--eval_batch_size", default=64, type=int, help="Batch size for evaluation.") parser.add_argument("--max_title_len", default=50, type=int, help="The maximum title input sequence length after tokenization.") parser.add_argument("--max_sentence_len", default=100, type=int, help="The maximum sentence of contents input sequence length after tokenization.") parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--num_train_epochs", default=5.0, type=float, help="Total number of training epochs to perform.") parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.") parser.add_argument('--gradient_accumulation_steps', type=int, default=1,
type=str, help='File path for loading intent_label vocab') parser.add_argument('--slot_label_file', default='slot_label.txt', type=str, help='File path for loading slot_label vocab ') parser.add_argument('--word_vocab_file', default='word_vocab.txt', type=str, help='File path for loading word vocab ') parser.add_argument('--model_type', default='joint_bert', type=str, required=True, choices=MODEL_CLASSES.keys(), help='Model type selected in the list:' + ','.join(MODEL_CLASSES.keys())) parser.add_argument('--random_seed', type=int, default=1234, help='set random seed') parser.add_argument( '--max_seqLen', type=int, default=50, help='Set max sequence len After tokenize text.Default is 50') parser.add_argument( '--num_train_epochs',
def main(): parser = argparse.ArgumentParser() parser.add_argument("--per_gpu_eval_batch_size", default=20, type=int, help="Batch size per GPU/CPU for evaluation.") parser.add_argument("--per_gpu_train_batch_size", default=20, type=int, help="Batch size per GPU/CPU for evaluation.") parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available") parser.add_argument("--model_type", required=True, type=str, choices=list(MODEL_CLASSES.keys()), help="The model architecture to be fine-tuned.") parser.add_argument( "--model_name_or_path", required=True, type=str, help="The model checkpoint for weights initialization.", ) parser.add_argument( "--overwrite_cache", action="store_true", help="Overwrite the cached training and evaluation sets") parser.add_argument("--sequence_length", default=128, type=int, help="Sequence length for language model.") parser.add_argument( "--mlm_probability", type=float, default=0.15, help="Ratio of tokens to mask for masked language modeling loss") parser.add_argument("--num_train_epochs", default=5, type=int, help="Total number of training epochs to perform.") parser.add_argument( "--max_steps", default=-1, type=int, help= "If > 0: set total number of training steps to perform. Override num_train_epochs.", ) parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.") parser.add_argument("--logging_steps", type=int, default=20, help="Log every X updates steps.") parser.add_argument("--save_steps", type=int, default=1000, help="Save checkpoint every X updates steps.") parser.add_argument( "--gradient_accumulation_steps", type=int, default=1, help= "Number of updates steps to accumulate before performing a backward/update pass.", ) parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.") parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.") parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.") parser.add_argument( "--save_total_limit", type=int, default=None, help= "Limit the total amount of checkpoints, delete the older checkpoints in the output_dir, does not delete by default", ) parser.add_argument( "--cache_dir", default="", type=str, help= "Optional directory to store the pre-trained models downloaded from s3 (instead of the default one)", ) parser.add_argument( "--output_dir", default="", type=str, help= "Optional directory to store the pre-trained models downloaded from s3 (instead of the default one)", ) parser.add_argument("--training_type", required=True, choices=["comment", "post"], help="Choose between a comment/post fine-tuned model.") parser.add_argument("--train_files", nargs='+', help='Training file(s)') parser.add_argument("--seed", type=int, default=42, help="random seed for initialization") parser.add_argument( "--legacy", action="store_true", help="Legacy code for compatibility with older pytorch versions.") args = parser.parse_args() device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.n_gpu = torch.cuda.device_count() args.device = device # Setup logging logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) logger.warning( "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", -1, device, args.n_gpu, False, False) # Set seed set_seed(args) config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type] config = config_class.from_pretrained( args.model_name_or_path, cache_dir=args.cache_dir if args.cache_dir else None) config.output_hidden_states = True tokenizer = tokenizer_class.from_pretrained( args.model_name_or_path, cache_dir=args.cache_dir if args.cache_dir else None) model = model_class.from_pretrained( args.model_name_or_path, from_tf=bool('.ckpt' in args.model_name_or_path), config=config, cache_dir=args.cache_dir if args.cache_dir else None) model.to(args.device) logger.info("Training/evaluation parameters %s", args) # Training dataset = load_and_cache_datasets(args, tokenizer) train(args, dataset, model, tokenizer)