def train_or_dev_or_test_2_eachworld_splitter(self, train_dev_testflag): assert train_dev_testflag in ["train", "dev", "test"] mention_path = self.args.mentions_dir + train_dev_testflag + '.json' if train_dev_testflag != "dev" else self.args.mentions_dir + "val" + ".json" mentions = mentions_in_train_dev_test_loader(mention_path) print('\n{0} mentions are now preprocessed...\n'.format( train_dev_testflag)) world_2_idx2mention = {} skipped = 0 for mention_data in tqdm(mentions): try: mention_json = self.mentionConverter( one_line_mention=mention_data) except: skipped += 1 print("mention id", mention_data["mention_id"], "is skipped because gold cannot be found") continue world_belongingto = mention_json["gold_world"] if world_belongingto not in world_2_idx2mention: world_2_idx2mention.update({world_belongingto: {}}) world_2_idx2mention[world_belongingto].update( {len(world_2_idx2mention[world_belongingto]): mention_json}) for world, its_preprocesseddata in world_2_idx2mention.items(): jdump( its_preprocesseddata, self.args.mentions_splitbyworld_dir + world + "/mentions.json")
def devEvalExperimentEntireDevWorldLog(experiment_logdir, t_entire_h1c, t_entire_h10c, t_entire_h50c, t_entire_h64c, t_entire_h100c, t_entire_h500c, t_entire_datapoints, epoch=0): l = [ t_entire_h1c, t_entire_h10c, t_entire_h50c, t_entire_h64c, t_entire_h100c, t_entire_h500c ] devEvalResultWithPercent = [ round(hits_c / t_entire_datapoints * 100, 4) for hits_c in l ] print('\nt_h1c, h10c, h50c, h64c, h100c, h500c @ Percent:\n', devEvalResultWithPercent) dump_dir = experiment_logdir + DevEvalDuringTrainDirForEachExperiment + '/' if not os.path.exists(dump_dir): os.mkdir(path=dump_dir) jpath = dump_dir + 'ep' + str(epoch) + 'devEntireEvalResult.json' j = { 't_h1c_Dev': devEvalResultWithPercent[0], 'h10c_Dev': devEvalResultWithPercent[1], 'h50c_Dev': devEvalResultWithPercent[2], 'h64c_Dev': devEvalResultWithPercent[3], 'h100c_Dev': devEvalResultWithPercent[4], 'h500c_Dev': devEvalResultWithPercent[5] } jdump(j=j, path=jpath)
def logDevEvaluationOfOneWorldDuringTrain(self, h1count, h10count, h50count, h64count, h100count, h500count, data_points, trainEpoch): if not os.path.exists(self.experiment_logdir + DevEvalDuringTrainDirForEachExperiment + '/'): os.mkdir(self.experiment_logdir + DevEvalDuringTrainDirForEachExperiment + '/') if not os.path.exists(self.experiment_logdir + DevEvalDuringTrainDirForEachExperiment + '/' + self.world_name + '/'): os.mkdir(self.experiment_logdir + DevEvalDuringTrainDirForEachExperiment + '/' + self.world_name + '/') dumped_jsonpath = self.experiment_logdir + DevEvalDuringTrainDirForEachExperiment + '/' + self.world_name + '/' + 'devEval_ep' + str( trainEpoch) + '.json' jdump(j={ 'h1_percent': h1count / data_points * 100, 'h10_percent': h10count / data_points * 100, 'h50_percent': h50count / data_points * 100, 'h64_percent': h64count / data_points * 100, 'h100_percent': h100count / data_points * 100, 'h500_percent': h500count / data_points * 100, 'data_points': data_points, 'world_name': self.world_name, 'train_ep': trainEpoch }, path=dumped_jsonpath)
def log_one_world(self, h1count, h10count, h50count, h64count, h100count, h500count, data_points): if not os.path.exists(self.experiment_logdir + 'final_' + self.dev_or_test): os.mkdir(self.experiment_logdir + 'final_' + self.dev_or_test) dumped_jsonpath = self.experiment_logdir + 'final_' + self.dev_or_test + '/' + self.world_name + '_eval.json' jdump(j={'h1_percent': h1count / data_points * 100, 'h10_percent': h10count / data_points * 100, 'h50_percent': h50count / data_points * 100, 'h64_percent': h64count / data_points * 100, 'h100_percent': h100count / data_points * 100, 'h500_percent': h500count / data_points * 100, 'data_points':data_points }, path=dumped_jsonpath)
def from_oneworld_dump_preprocessed_world(self, world_name): ''' :param world_name: :return: dump dui2title, dui2desc, dui2idx, idx2dui into ./data/worlds/$world_name/ ''' try: assert world_name in ALL_WORLDS except: AssertionError(world_name + "is not included in datasets") each_world_dir = self.args.dataset_dir + 'worlds/' + world_name + '/' if not os.path.exists(each_world_dir): os.mkdir(each_world_dir) one_world_json_path = self.args.documents_dir + world_name + '.json' one_world_json = oneworld_opener(one_world_json_path) dui2title, idx2dui, dui2idx = self.oneworld_json_2_docid2title( one_world_json=one_world_json) dui2title = self.dui2tokens_tokentrimmer( dui2tokens=dui2title, max_token=self.args.extracted_first_token_for_title) dui2desc_raw = self.oneworld_json_2_dui2desc_raw( one_world_json=one_world_json) dui2desc = self.dui2tokens_tokentrimmer( dui2tokens=dui2desc_raw, max_token=self.args.extracted_first_token_for_description) jdump(j=dui2title, path=each_world_dir + 'dui2title.json') jdump(j=idx2dui, path=each_world_dir + 'idx2dui.json') jdump(j=dui2idx, path=each_world_dir + 'dui2idx.json') jdump(j=dui2desc, path=each_world_dir + 'dui2desc.json') jdump(j=dui2desc_raw, path=each_world_dir + 'dui2desc_raw.json') print('\n==={0} was preprocessed==='.format(world_name)) print('total entities in {0}:'.format(world_name), len(dui2title))
def on_get(self, req, resp, page_id): """Get request Keyword arguments: req - HTTP request resp - HTTP response page_id - Faceboook page ID """ if self._check_last_access(page_id): self._request_events(page_id) self._save_last_access(page_id) else: data = self.db.get_items(page_id, self.table) # Remove pagination dict del(data['pages']) # Add expiration cache time in seconds data['cache_expiration_time'] = self.cache_expiration_time resp.body = u.jdump(data)
def dump(self): print(utils.jdump(self._data))