def __init__(self, mode=None, task_id=None, shard_id=None, question_id=None): self.data = CsrData() self.data.load_csr_data(full_wiki=FLAGS.full_wiki, files_dir=FLAGS.apr_files_dir, mode=mode, task_id=task_id, shard_id=shard_id, question_id=question_id) self.high_freq_relations = { 'P31': 'instance of', 'P17': 'country', 'P131': 'located in the administrative territorial entity', 'P106': 'occupation', 'P21': 'sex or gender', 'P735': 'given name', 'P27': 'country of citizenship', 'P19': 'place of birth' }
max_tasks = {"train": 50, "dev": 5} max_shards = {"train": 7, "dev": 17} apr = ApproximatePageRank() for mode in [FLAGS.split]: # Parse all shards in each mode # Currently sequentially, can be parallelized later for task_id in [FLAGS.task_id]: #range(0, max_tasks[mode]): for shard_id in [FLAGS.shard_split_id ]: #range(0, max_shards[mode]): # if task_id == 0 and shard_id in range(0, 16): # print("skipping finished job") # continue nq_data, entities = get_examples(FLAGS.nq_dir, mode, task_id, shard_id) if nq_data is None: print("No examples here") continue print("Size of all entities: %d", len(entities)) two_hop_entities = apr.get_khop_entities( entities, FLAGS.csr_num_hops) print("Size of two hop entities: %d", len(two_hop_entities)) csr_data = CsrData() csr_data.create_and_save_csr_data( full_wiki=FLAGS.full_wiki, decompose_ppv=FLAGS.decompose_ppv, files_dir=FLAGS.apr_files_dir, sub_entities=two_hop_entities, mode=mode, task_id=task_id, shard_id=shard_id)
def __init__(self): self.data = CsrData() self.data.load_csr_data(full_wiki=FLAGS.full_wiki, files_dir=FLAGS.apr_files_dir)