# load the bert setting if 'albert' not in args.bert_config_file: bert_config = BertConfig.from_json_file(args.bert_config_file) else: if 'google' in args.bert_config_file: bert_config = AlbertConfig.from_json_file(args.bert_config_file) else: bert_config = ALBertConfig.from_json_file(args.bert_config_file) # load data print('loading data...') tokenizer = tokenization.BertTokenizer(vocab_file=args.vocab_file, do_lower_case=True) assert args.vocab_size == len(tokenizer.vocab) if not os.path.exists(args.test_dir1) or not os.path.exists(args.test_dir2): json2features(args.test_file, [args.test_dir1, args.test_dir2], tokenizer, is_training=False, max_seq_length=args.max_seq_length) if not os.path.exists(args.test_dir1): json2features(input_file=args.test_file, output_files=[args.test_dir1, args.test_dir2], tokenizer=tokenizer, is_training=False, repeat_limit=3, max_query_length=96, max_seq_length=args.max_seq_length, doc_stride=128) test_examples = json.load(open(args.test_dir1, 'r')) test_features = json.load(open(args.test_dir2, 'r')) dev_steps_per_epoch = len(test_features) // args.n_batch if len(test_features) % args.n_batch != 0: dev_steps_per_epoch += 1 # init model print('init model...') if 'albert' not in args.init_restore_dir:
bert_config = BertConfig.from_json_file(args.bert_config_file) else: if 'google' in args.bert_config_file: bert_config = AlbertConfig.from_json_file(args.bert_config_file) else: bert_config = ALBertConfig.from_json_file(args.bert_config_file) # load data print('loading data...') tokenizer = tokenization.BertTokenizer(vocab_file=args.vocab_file, do_lower_case=True) assert args.vocab_size == len(tokenizer.vocab) if not os.path.exists(args.train_dir): json2features(args.train_file, [ args.train_dir.replace('_features_', '_examples_'), args.train_dir ], tokenizer, is_training=True, max_seq_length=args.max_seq_length) if not os.path.exists(args.dev_dir1) or not os.path.exists(args.dev_dir2): json2features(args.dev_file, [args.dev_dir1, args.dev_dir2], tokenizer, is_training=False, max_seq_length=args.max_seq_length) train_features = json.load(open(args.train_dir, 'r')) dev_examples = json.load(open(args.dev_dir1, 'r')) dev_features = json.load(open(args.dev_dir2, 'r')) if os.path.exists(args.log_file): os.remove(args.log_file)
# load the bert setting if 'albert' not in args.bert_config_file: bert_config = BertConfig.from_json_file(args.bert_config_file) else: bert_config = ALBertConfig.from_json_file(args.bert_config_file) # load data print('loading data...') tokenizer = tokenization.BertTokenizer(vocab_file=args.vocab_file, do_lower_case=True) assert args.vocab_size == len(tokenizer.vocab) if not os.path.exists(args.train_dir): json2features(args.train_file, [ args.train_dir.replace('_features_', '_examples_'), args.train_dir ], tokenizer, is_training=True, max_seq_length=bert_config.max_position_embeddings) if not os.path.exists(args.dev_dir1) or not os.path.exists(args.dev_dir2): json2features(args.dev_file, [args.dev_dir1, args.dev_dir2], tokenizer, is_training=False, max_seq_length=bert_config.max_position_embeddings) train_features = json.load(open(args.train_dir, 'r')) dev_examples = json.load(open(args.dev_dir1, 'r')) dev_features = json.load(open(args.dev_dir2, 'r')) if os.path.exists(args.log_file): os.remove(args.log_file)
# load the bert setting if 'albert' not in args.bert_config_file: bert_config = BertConfig.from_json_file(args.bert_config_file) else: bert_config = ALBertConfig.from_json_file(args.bert_config_file) # load data print('loading data...') tokenizer = tokenization.BertTokenizer(vocab_file=args.vocab_file, do_lower_case=True) assert args.vocab_size == len(tokenizer.vocab) if not os.path.exists(args.test_dir1) or not os.path.exists( args.test_dir2): json2features(args.test_file, [args.test_dir1, args.test_dir2], tokenizer, is_training=False, max_seq_length=bert_config.max_position_embeddings) test_examples = json.load(open(args.test_dir1, 'r')) test_features = json.load(open(args.test_dir2, 'r')) dev_steps_per_epoch = len(test_features) // args.n_batch if len(test_features) % args.n_batch != 0: dev_steps_per_epoch += 1 # init model print('init model...') if 'albert' not in args.init_restore_dir: model = BertForQuestionAnswering(bert_config) else: model = ALBertForQA(bert_config, dropout_rate=args.dropout)