Пример #1
0
    def __init__(self, **kwargs):
        super(ModelInputConvert, self).__init__(**kwargs)
        self.h_qrel = load_trec_labels_dict(self.qrel_in)

        self.h_q_grounding_info_mtx = dict()  # qid -> grounding info mtx
        self.h_sf_grounding_feature_id = dict()
        self.h_e_grounding_feature_id = dict()
        self.h_e_matching_feature_id = dict()
        self.h_qid_docno_ltr_feature = dict()
        if self.ltr_f_in:
            self._load_svm_ltr_feature()
Пример #2
0
    def __init__(self, **kwargs):
        super(RankComponentAna, self).__init__(**kwargs)
        self.external_info = LeToRFeatureExternalInfo(**kwargs)
        self.embedding = self.external_info.l_embedding[0]
        self.h_entity_texts = self.external_info.h_entity_texts
        self.h_field_h_df = self.external_info.h_field_h_df
        self.h_corpus_stat = self.external_info.h_corpus_stat

        self.h_q_info = load_query_info(self.q_info_in)
        self.ll_qid_ranked_doc = load_trec_ranking_with_info(
            self.trec_with_info_in)
        self.h_qrel = load_trec_labels_dict(self.qrel_in)
        if not os.path.exists(self.out_dir):
            os.makedirs(self.out_dir)
Пример #3
0
def dynamic_load(trec, qrel, q_info, doc_info):
    if (type(trec) is str) | (type(trec) is unicode):
        l_q_rank = load_trec_ranking_with_score(trec)
    else:
        l_q_rank = trec
    if (type(qrel) is str) | (type(qrel) is unicode):
        h_qrel = load_trec_labels_dict(qrel)
    else:
        h_qrel = qrel
    if (type(q_info) is str) | (type(q_info) is unicode):
        h_q_info = load_json_info(q_info, 'qid')
    else:
        h_q_info = q_info
    if (type(doc_info) is str) | (type(doc_info) is unicode):
        h_doc_info = load_json_info(doc_info, 'docno')
    else:
        h_doc_info = doc_info
    return l_q_rank, h_qrel, h_q_info, h_doc_info
Пример #4
0
    def _load_data(self):
        """
        load data from the initialized data path
        load h_qrel, h_qid_q_info, h_q_doc_score
        :return:
        """
        self._h_qrel = load_trec_labels_dict(self.qrel_in)
        self._h_qid_q_info = load_json_info(self.q_info_in, 'qid')

        l_q_ranking_score = load_trec_ranking_with_score(
            self.q_doc_candidate_in)

        for qid, ranking_score in l_q_ranking_score:
            self._h_q_doc_score[qid] = dict(ranking_score[:self.rank_top_k])
            logging.debug('q [%s] [%d] candidate docs', qid,
                          len(self._h_q_doc_score[qid]))
        logging.info('feature extraction data pre loaded')
        return
Пример #5
0
 def __init__(self, **kwargs):
     super(KNRMCenter, self).__init__(**kwargs)
     self.k_nrm = self.h_model[self.model_name](**kwargs)
     self.hyper_para = HyperParameter(**kwargs)
     if self.embedding_npy_in:
         logging.info('loading embedding for model [%s]', self.model_name)
         emb_mtx = np.load(self.embedding_npy_in)
         self.k_nrm.set_embedding(emb_mtx)
     else:
         logging.info('model [%s] not using embedding', self.model_name)
     self.ranker, self.learner = self.k_nrm.build()
     logging.info('built ranking model:')
     self.ranker.summary()
     logging.info('pairwise training model:')
     self.learner.summary()
     if self.io_format == 'raw':
         self.h_q_info = load_json_info(self.q_info_in, 'qid')
         self.h_doc_info = load_json_info(self.doc_info_in, 'docno')
         self.h_qrel = load_trec_labels_dict(self.qrel_in)
Пример #6
0
    def _load_data(self):
        self.h_qrel = load_trec_labels_dict(self.qrel_in)
        logging.info('loaded qrel [%s]', self.qrel_in)

        logging.info('loading q info')
        # l_h_data = [json.loads(line) for line in open(self.q_info_in)]
        # l_qid = [h['qid'] for h in l_h_data]
        # self.h_q_info = dict(zip(l_qid, l_h_data))
        self.h_q_info = load_json_info(self.q_info_in, 'qid', unpack=True)
        logging.info('loaded [%d] q info [%s]', len(self.h_q_info),
                     self.q_info_in)

        logging.info('loading doc info')
        self.h_doc_info = load_json_info(self.doc_info_in,
                                         'docno',
                                         unpack=False)
        # for line in open(self.doc_info_in):
        #     docno = json.loads(line)['docno']
        #     self.h_doc_info[docno] = line.strip()
        logging.info('loaded [%d] doc info [%s]', len(self.h_doc_info),
                     self.doc_info_in)
Пример #7
0
    def _load_data(self):
        """
        load data from the initialized data path
        load h_qrel, h_qid_q_info, h_q_doc_score
        :return:
        """
        self._h_qrel = load_trec_labels_dict(self.qrel_in)
        self._h_qid_q_info = load_json_info(self.q_info_in, key_field='qid')

        l_q_ranking_score = load_trec_ranking_with_score(
            self.q_doc_candidate_in)
        if self.ext_base_rank:
            l_q_ext_base = load_trec_ranking_with_score(self.ext_base_rank)
            for q, l_rank in l_q_ext_base:
                for doc, score in l_rank:
                    self.h_ext_base[q + '\t' + doc] = score
            logging.info('external base ranking scores loaded [%s]',
                         self.ext_base_rank)
        for qid, ranking_score in l_q_ranking_score:
            self._h_q_doc_score[qid] = dict(ranking_score[:self.rank_top_k])
            logging.debug('q [%s] [%d] candidate docs', qid,
                          len(self._h_q_doc_score[qid]))
        logging.info('feature extraction data pre loaded')
        return
Пример #8
0
 def _load_data(self):
     logging.info('start loading data')
     self.h_qrel = load_trec_labels_dict(self.qrel_in)
     self.h_q_rank = dict(load_trec_ranking_with_score(self.q_rank_in))
     self.h_doc_info = load_doc_info_json(self.doc_info_in)
     logging.info('data loaded')