def dynamic_load(trec, qrel, q_info, doc_info): if (type(trec) is str) | (type(trec) is unicode): l_q_rank = load_trec_ranking_with_score(trec) else: l_q_rank = trec if (type(qrel) is str) | (type(qrel) is unicode): h_qrel = load_trec_labels_dict(qrel) else: h_qrel = qrel if (type(q_info) is str) | (type(q_info) is unicode): h_q_info = load_json_info(q_info, 'qid') else: h_q_info = q_info if (type(doc_info) is str) | (type(doc_info) is unicode): h_doc_info = load_json_info(doc_info, 'docno') else: h_doc_info = doc_info return l_q_rank, h_qrel, h_q_info, h_doc_info
def __init__(self, **kwargs): super(KNRMCenter, self).__init__(**kwargs) self.k_nrm = self.h_model[self.model_name](**kwargs) self.hyper_para = HyperParameter(**kwargs) if self.embedding_npy_in: logging.info('loading embedding for model [%s]', self.model_name) emb_mtx = np.load(self.embedding_npy_in) self.k_nrm.set_embedding(emb_mtx) else: logging.info('model [%s] not using embedding', self.model_name) self.ranker, self.learner = self.k_nrm.build() logging.info('built ranking model:') self.ranker.summary() logging.info('pairwise training model:') self.learner.summary() if self.io_format == 'raw': self.h_q_info = load_json_info(self.q_info_in, 'qid') self.h_doc_info = load_json_info(self.doc_info_in, 'docno') self.h_qrel = load_trec_labels_dict(self.qrel_in)
def _load_data(self): self.h_qrel = load_trec_labels_dict(self.qrel_in) logging.info('loaded qrel [%s]', self.qrel_in) logging.info('loading q info') # l_h_data = [json.loads(line) for line in open(self.q_info_in)] # l_qid = [h['qid'] for h in l_h_data] # self.h_q_info = dict(zip(l_qid, l_h_data)) self.h_q_info = load_json_info(self.q_info_in, 'qid', unpack=True) logging.info('loaded [%d] q info [%s]', len(self.h_q_info), self.q_info_in) logging.info('loading doc info') self.h_doc_info = load_json_info(self.doc_info_in, 'docno', unpack=False) # for line in open(self.doc_info_in): # docno = json.loads(line)['docno'] # self.h_doc_info[docno] = line.strip() logging.info('loaded [%d] doc info [%s]', len(self.h_doc_info), self.doc_info_in)
def _load_data(self): """ load data from the initialized data path load h_qrel, h_qid_q_info, h_q_doc_score :return: """ self._h_qrel = load_trec_labels_dict(self.qrel_in) self._h_qid_q_info = load_json_info(self.q_info_in, 'qid') l_q_ranking_score = load_trec_ranking_with_score( self.q_doc_candidate_in) for qid, ranking_score in l_q_ranking_score: self._h_q_doc_score[qid] = dict(ranking_score[:self.rank_top_k]) logging.debug('q [%s] [%d] candidate docs', qid, len(self._h_q_doc_score[qid])) logging.info('feature extraction data pre loaded') return
def _load_data(self): """ load data from the initialized data path load h_qrel, h_qid_q_info, h_q_doc_score :return: """ self._h_qrel = load_trec_labels_dict(self.qrel_in) self._h_qid_q_info = load_json_info(self.q_info_in, key_field='qid') l_q_ranking_score = load_trec_ranking_with_score( self.q_doc_candidate_in) if self.ext_base_rank: l_q_ext_base = load_trec_ranking_with_score(self.ext_base_rank) for q, l_rank in l_q_ext_base: for doc, score in l_rank: self.h_ext_base[q + '\t' + doc] = score logging.info('external base ranking scores loaded [%s]', self.ext_base_rank) for qid, ranking_score in l_q_ranking_score: self._h_q_doc_score[qid] = dict(ranking_score[:self.rank_top_k]) logging.debug('q [%s] [%d] candidate docs', qid, len(self._h_q_doc_score[qid])) logging.info('feature extraction data pre loaded') return
combine two info with same key (!) input: doc info 1 + 2 output: update doc 1 with doc 2 """ import json from knowledge4ir.utils import load_json_info import sys if 4 != len(sys.argv): print "merge two info file" print "3 para: json info 1 + 2 + out name" sys.exit() h_info_a = load_json_info(sys.argv[1], 'docno') print "[%d] in [%s]" % (len(h_info_a), sys.argv[1]) h_info_b = load_json_info(sys.argv[2], 'docno') print "[%d] in [%s]" % (len(h_info_b), sys.argv[2]) for key in h_info_a.keys(): h = h_info_b.get(key, {}) h_info_a[key].update(h) print "combined, dumping..." out = open(sys.argv[3], 'w') for key, value in h_info_a.items(): print >> out, json.dumps(value) out.close() print "finished"
def __init__(self, **kwargs): super(RankEvaAtQMeta, self).__init__(**kwargs) assert self.q_meta_in self.h_q_meta = load_json_info(self.q_meta_in, key_field='qid')
def _load_edge(self): if not self.entity_edge_path: return logging.info('loading entity edges from [%s]', self.entity_edge_path) self.h_e_edge = load_json_info(self.entity_edge_path, 'id') logging.info('[%d] entities\'s edge loaded', len(self.h_e_edge))