Exemplo n.º 1
0
 def __init__(self,
              p,
              d,
              q,
              qrels,
              out_dir,
              rank_dir,
              method,
              is_binary=True):
     """
     init the hybrid process.
     :param p: rbp parameter
     :param d: evalutation depth. 1000.
     :param q: query id
     :param qrels: qrels Qrel
     :param out_dir: output dir
     :param rank_dir: rank mat dir
     :param method: (opt_score or opt_doc , depth)
     :param is_binary:
     """
     threading.Thread.__init__(self)
     self._method = method
     self._k = d
     self._q = q
     self._qrel = qrels
     self._p = p
     self._binary = is_binary
     # Get the first round evaluation and the number of fitting method considered
     self._estimate, self._mnum = futils.read_csv_to_dict(
         HybridOpt.get_dir_str(out_dir, method) + str(q) + "-prob.txt",
         is_prob=True)
     self._rank_mat, self._runnum = futils.read_csv_to_dict(
         rank_dir + str(q) + "-rank.txt", is_prob=False)
     self.res = None  # optimization results, weighting parameters
     self._mnum -= 1  # remove constant method
     self._doc_rel = np.zeros((self._k, self._runnum, self._mnum))
     self._rbp = np.zeros(self._runnum)
     self._bg_rbp = np.zeros((self._k, self._runnum))
     # # load the rank matrix
     for k, v in self._rank_mat.iteritems():
         tmp_v = np.array(v)
         curr_rel = 0
         is_judged = False
         if k in self._qrel:
             if self._qrel[k] > 0:
                 curr_rel = 1 if self._binary else self._qrel[k]
             is_judged = True
         if min(tmp_v) < self._k and max(
                 tmp_v
         ) > -1:  # this document is retrieved by one of the system
             for i in range(0, len(tmp_v)):
                 if 0 <= tmp_v[i] < self._k:
                     self._rbp[i] += curr_rel * np.power(self._p, tmp_v[i])
                     self._bg_rbp[tmp_v[i], i] = curr_rel * np.power(
                         self._p, tmp_v[i])
                     if is_judged:
                         self._doc_rel[tmp_v[i], i, :] = np.array(
                             self._estimate[k][1:])
Exemplo n.º 2
0
def get_doc_prob(out_dir, res, q, depth):
    with open(HybridOpt.get_dir_str(out_dir, (depth, -1)) + "rmse.txt",
              'a') as fout:
        curr_str = str(q)
        for i in range(0, len(res)):
            curr_str += ",{:.4f}".format(res[i].fun)
        fout.write(curr_str.strip() + "\n")
    with open(HybridOpt.get_dir_str(out_dir, (depth, -1)) + "param.txt",
              'a') as fout:
        curr_str = str(q)
        for i in range(0, len(res)):
            curr_w = res[i].x
            for j in range(0, len(res[i].x)):
                curr_str += ",{:.4f}".format(curr_w[j])
        fout.write(curr_str.strip() + "\n")
    doc_prob = defaultdict(list)
    for i in range(0, len(res)):
        curr_dict, mnum = futils.read_csv_to_dict(
            HybridOpt.get_dir_str(out_dir, (depth, i)) + str(q) + "-prob.txt",
            is_prob=True)
        mnum -= 1
        for k, v in curr_dict.iteritems():
            if k not in doc_prob:
                doc_prob[k] = []
            tmp_prob = np.array(curr_dict[k][1:])
            doc_prob[k].append(np.dot(res[i].x, tmp_prob))
    with open(
            HybridOpt.get_dir_str(out_dir, (depth, -1)) + str(q) + "-prob.txt",
            "a") as fout:
        for k, v in doc_prob.iteritems():
            curr_str = str(k)
            for j in range(0, len(v)):
                curr_str += ", {:.4f}".format(v[j])
            fout.write(curr_str.strip() + "\n")
Exemplo n.º 3
0
 def __init__(self,
              p,
              d,
              q,
              qrelname,
              fitted_vec,
              rank_dir,
              method,
              out_dir,
              is_binary=True):
     """
     init the opt process
     :param p: persistance values
     :param d: considered pooling depth.
     :param q: qid.
     :param qrelname: qrel name
     :param fitted_vec: fitted_vector for method
     :param rank_dir: dir of rank mat
     :param method: method idx
     :param out_dir: output dir
     :param: is_binary: True
     """
     threading.Thread.__init__(self)
     self._outname = out_dir + "opt-weight-" + str(method) + ".txt"
     self._rmse = out_dir + "opt-rmse-" + str(method) + ".txt"
     self._k = d
     self._q = q
     self._qrel = Qrel(qrelname).get_rel_by_qid(q)
     self._p = p
     tmp_rank_mat, self._runnum = futils.read_csv_to_dict(
         rank_dir + str(q) + "-rank.txt", is_prob=False)
     self._rank_bg = fitted_vec
     self._rbp = np.zeros(self._runnum)
     self._bg_vectors = np.zeros((self._k, self._runnum, self._runnum))
     self._bg_rbp = np.zeros((self._k, self._runnum))
     self._binary = is_binary
     # load the rank matrix
     for k, v in tmp_rank_mat.iteritems():
         tmp_v = np.array(v)  # convert to np array for processing.
         is_judged = False
         curr_rel = 0
         if k in self._qrel:
             if self._qrel[k] > 0:
                 curr_rel = 1 if self._binary else self._qrel[k]
             is_judged = True
         if min(tmp_v) < self._k and max(
                 tmp_v
         ) > -1:  # this document is retrieved by one of the system
             tmp = self._rank_bg[tmp_v]
             for i in range(0, len(tmp_v)):
                 if 0 <= tmp_v[i] < self._k:
                     self._rbp[i] += curr_rel * np.power(self._p, tmp_v[i])
                     self._bg_rbp[tmp_v[i], i] = curr_rel * np.power(
                         self._p, tmp_v[i])
                     if is_judged:
                         self._bg_vectors[
                             tmp_v[i],
                             i, :] = tmp  # set the fitted vector to judged documents
Exemplo n.º 4
0
def get_doc_prob(qid, out_dir, rank_dir, fitted_dir, m=4):
    """
    output final estimation based on the weighting param
    :param qrelname: qrel name
    :param out_dir: output dir, same as the previous used one
    :param rank_dir: rank-mat dir
    :param fitted_dir: fitted vector dir
    :param m: number of method
    :return:
    """
    runnum = 100
    param_mat = np.zeros((m, len(qid), runnum))  # shrink later
    for i in range(0, m):
        curr_mat = np.loadtxt(out_dir + "opt-weight-" + str(i + 1) + ".txt",
                              delimiter=",",
                              dtype=float)
        if runnum >= curr_mat.shape[1]:
            runnum = curr_mat.shape[1] - 1
            param_mat = param_mat[:, :, 0:runnum]
        param_mat[i, :, :] = curr_mat[:, 1:]
    for q in range(0, len(qid)):
        doc_prob = defaultdict(list)
        rank_mat, runnum = futils.read_csv_to_dict(rank_dir + str(qid[q]) +
                                                   "-rank.txt",
                                                   is_prob=False)
        fit_mat = np.loadtxt(fitted_dir + str(qid[q]) + ".txt",
                             delimiter=" ",
                             dtype=float)
        for doc, rank in rank_mat.iteritems():
            if doc not in doc_prob:
                doc_prob[doc] = [0] * m
            for i in range(0, m):
                curr_gain = fit_mat[:, i + 1]
                tmp_gain = curr_gain[np.array(rank)]
                tmp_gain[tmp_gain == 0] = 10**-6
                doc_prob[doc][i] = np.exp(
                    np.dot(param_mat[i, q, :], np.log(tmp_gain)))
        with open(out_dir + str(qid[q]) + "-prob.txt", "a") as fout:
            for k, v in doc_prob.items():
                curr_str = str(k)
                for p in v:
                    curr_str += ", {:.4f}".format(p)
                fout.write(curr_str.strip() + "\n")