def main(): parser = argparse.ArgumentParser() add_encoder_params(parser) add_training_params(parser) add_tokenizer_params(parser) add_reader_preprocessing_params(parser) # reader specific params parser.add_argument("--max_n_answers", default=10, type=int, help="Max amount of answer spans to marginalize per singe passage") parser.add_argument('--passages_per_question', type=int, default=2, help="Total amount of positive and negative passages per question") parser.add_argument('--passages_per_question_predict', type=int, default=50, help="Total amount of positive and negative passages per question for evaluation") parser.add_argument("--max_answer_length", default=10, type=int, help="The maximum length of an answer that can be generated. This is needed because the start " "and end predictions are not conditioned on one another.") parser.add_argument('--eval_top_docs', nargs='+', type=int, help="top retrival passages thresholds to analyze prediction results for") parser.add_argument('--checkpoint_file_name', type=str, default='dpr_reader') parser.add_argument('--prediction_results_file', type=str, help='path to a file to write prediction results to') # training parameters parser.add_argument("--eval_step", default=2000, type=int, help="batch steps to run validation and save checkpoint") parser.add_argument("--output_dir", default=None, type=str, help="The output directory where the model checkpoints will be written to") parser.add_argument('--fully_resumable', action='store_true', help="Enables resumable mode by specifying global step dependent random seed before shuffling " "in-batch data") args = parser.parse_args() if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) setup_args_gpu(args) set_seed(args) print_args(args) trainer = ReaderTrainer(args) if args.train_file is not None: trainer.run_train() elif args.dev_file: logger.info("No train files are specified. Run validation.") trainer.validate() else: logger.warning("Neither train_file or (model_file & dev_file) parameters are specified. Nothing to do.")
def setup_reader(model_file): global reader parser = argparse.ArgumentParser() add_encoder_params(parser) add_training_params(parser) add_tokenizer_params(parser) add_reader_preprocessing_params(parser) args = parser.parse_args() setup_args_gpu(args) set_seed(args) print_args(args) reader = Reader(args, model_file)
# retrieval specific params parser.add_argument('--dense_index_path', type=str, default="") parser.add_argument('--tfidf_path', type=str, default="/checkpoint/sewonmin/dpr/drqa_retrieval_seen_only/db-tfidf-ngram=2-hash=16777216-tokenizer=simple.npz") parser.add_argument('--match', type=str, default='string', choices=['regex', 'string']) parser.add_argument('--n-docs', type=int, default=100) #parser.add_argument('--batch_size', type=int, default=32, help="Batch size for question encoder forward pass") parser.add_argument('--index_buffer', type=int, default=50000, help="Temporal memory data buffer size (in samples) for indexer") parser.add_argument("--hnsw_index", action='store_true', help='If enabled, use inference time efficient HNSW index') parser.add_argument("--save_or_load_index", action='store_true', default=True, help='If enabled, save index') # reader specific params add_encoder_params(parser) add_training_params(parser) add_tokenizer_params(parser) add_reader_preprocessing_params(parser) parser.add_argument("--max_n_answers", default=10, type=int, help="Max amount of answer spans to marginalize per singe passage") parser.add_argument('--passages_per_question', type=int, default=2, help="Total amount of positive and negative passages per question") parser.add_argument('--passages_per_question_predict', type=int, default=50, help="Total amount of positive and negative passages per question for evaluation") parser.add_argument("--max_answer_length", default=10, type=int, help="The maximum length of an answer that can be generated. This is needed because the start " "and end predictions are not conditioned on one another.") parser.add_argument('--eval_top_docs', nargs='+', type=int, help="top retrival passages thresholds to analyze prediction results for") parser.add_argument('--checkpoint_file_name', type=str, default='dpr_reader') parser.add_argument('--prediction_results_file', type=str, help='path to a file to write prediction results to')