def save_data_graph_feat(out_folder, features, examples, graphs, f_type, config): cached_features_file = os.path.join( out_folder, get_cached_filename('{}_features'.format(f_type), config)) print('(1). Start saving {} features'.format(len(features))) with gzip.open(cached_features_file, 'wb') as fout: pickle.dump(features, fout) print('Save {} features into {}'.format(len(features), cached_features_file)) print('(2). Start saving {} examples'.format(len(examples))) cached_examples_file = os.path.join( out_folder, get_cached_filename('{}_examples'.format(f_type), config)) with gzip.open(cached_examples_file, 'wb') as fout: pickle.dump(examples, fout) print('Save {} examples into {}'.format(len(examples), cached_examples_file)) print('(3). Start saving {} graphs'.format(len(graphs))) cached_graph_file = os.path.join( out_folder, get_cached_filename('{}_graphs'.format(f_type), config)) with gzip.open(cached_graph_file, 'wb') as fout: pickle.dump(graphs, fout) print('Save {} graphs into {}'.format(len(graphs), cached_graph_file))
def get_graph_file(self, tag, f_type=None): cached_filename = get_cached_filename('{}_graphs'.format(f_type), self.config) return join(self.data_dir, tag, cached_filename)
def get_graph_file(data_dir, tag, f_type, config): cached_filename = get_cached_filename('{}_graphs'.format(f_type), config) return join(data_dir, tag, cached_filename)
def get_example_file(data_dir, tag, f_type, config): cached_filename = get_cached_filename('{}_examples'.format(f_type), config) return join(data_dir, tag, cached_filename)
data_source_name = "{}".format(ranker) if "train" in data_type: data_source_type = data_source_name else: data_source_type = None print('data type = {} \n data source type = {} \n data source name = {}'. format(data_type, data_source_type, data_source_name)) examples = read_hotpot_examples(para_file=args.para_path, full_file=args.full_data, ner_file=args.ner_path, doc_link_file=args.doc_link_ner, data_source_type=data_source_type) cached_examples_file = os.path.join( args.output_dir, get_cached_filename('{}_examples'.format(data_source_name), args)) with gzip.open(cached_examples_file, 'wb') as fout: pickle.dump(examples, fout) features = convert_examples_to_features( examples, tokenizer, max_seq_length=args.max_seq_length, max_query_length=args.max_query_length, max_entity_num=args.max_entity_num, cls_token=tokenizer.cls_token, sep_token=tokenizer.sep_token, is_roberta=bool(args.model_type in ['roberta']), filter_no_ans=args.filter_no_ans)
def get_example_file(self, tag, f_type): cached_filename = get_cached_filename('{}_examples'.format(f_type), self.config) return join(self.data_dir, tag, cached_filename)