def _validate_paths(self, data_path): validate_existing_directory(data_path) for f in self.dataset_files: _f_path = path.join(data_path, self.dataset_files[f]) validate_existing_filepath(_f_path) self.dataset_files[f] = _f_path
params_dict = {} params_dict['batch_size'] = args.batch_size_squad params_dict['embed_size'] = 300 params_dict['pad_idx'] = 0 params_dict['hs'] = hidden_size params_dict['glove_dim'] = 300 params_dict['iter_interval'] = 8000 params_dict['num_iterations'] = 500000 params_dict['ax'] = ax # Initialzer init = GlorotInit() params_dict['init'] = init validate_existing_directory(args.data_path) path_gen = sanitize_path(args.data_path) path_gen=os.path.join(path_gen+"/") file_name_dict={} file_name_dict['train_para_ids']='train.ids.context' file_name_dict['train_ques_ids']='train.ids.question' file_name_dict['train_answer']='train.span' file_name_dict['val_para_ids']='dev.ids.context' file_name_dict['val_ques_ids']='dev.ids.question' file_name_dict['val_ans']='dev.span' file_name_dict['vocab_file']='vocab.dat' train_para_ids = os.path.join(path_gen + file_name_dict['train_para_ids']) train_ques_ids = os.path.join(path_gen + file_name_dict['train_ques_ids'])
elmo_ecb_embeddings = load_elmo_for_vocab(mentions) with open(out_file, 'wb') as f: pickle.dump(elmo_ecb_embeddings, f) logger.info('Saving dump to file-%s', out_file) if __name__ == '__main__': parser = argparse.ArgumentParser( description='Create Elmo Embedding dataset only dump') parser.add_argument('--mentions', type=str, help='mentions_file file', required=True) parser.add_argument('--output', type=str, help='location were to create dump file', required=True) args = parser.parse_args() if os.path.isdir(args.mentions): io.validate_existing_directory(args.mentions) else: io.validate_existing_filepath(args.mentions) elmo_dump() print('Done!')
parser.add_argument('--data_path', help='enter path where training data and the \ glove embeddings were downloaded', type=str) parser.add_argument( '--no_preprocess_glove', action="store_true", help='Chose whether or not to preprocess glove embeddings') parser.set_defaults() args = parser.parse_args() glove_flag = not args.no_preprocess_glove validate_existing_directory(args.data_path) data_path = sanitize_path(args.data_path) data_path = os.path.join(data_path + "/") # Load Train and Dev Data train_filename = os.path.join(data_path + "train-v1.1.json") dev_filename = os.path.join(data_path + "dev-v1.1.json") with open(train_filename) as train_file: train_data = json.load(train_file) with open(dev_filename) as dev_file: dev_data = json.load(dev_file) print('Extracting data from json files') # Extract training data from raw files train_para, train_question, train_ans = extract_data_from_files(train_data) # Extract dev data from raw dataset