예제 #1
0
  def _create_train_ranking(self, query_id, query_feat, inverted):
    self.query_id = query_id
    assert inverted == False
    #  Get the worst gradients by click
    nums = []
    dif = self.GRAD_SIZE - self.EXP_SIZE
    for i in range(0, dif):
        max = -maxint-1
        n = 0
        # Choose
        for j in range(0, self.GRAD_SIZE):
            if self.clicklist[j] > max and j not in nums:
                max = self.clicklist[j] #  The better cl value to be excluded
                n = j # index of it
        nums.append(n)

    #  create subset of gradient matrix
    grad_temp = np.zeros([self.EXP_SIZE, self.n_features], dtype=float)
    c = 0
    for i in range(0,self.GRAD_SIZE):
        if i not in nums:
            # The wrost 'EXP_SIZE' gradients from grad[] added to gr_temp
            grad_temp[c] = copy.deepcopy(self.grad[i])
            c = c + 1

    self.model.sample_candidates_null_space(grad_temp, query_feat, self.sample_basis)
    scores = self.model.candidate_score(query_feat)
    rankings = rnk.rank_single_query(scores, inverted=False, n_results=self.n_results)
    multileaved_list = self.multileaving.make_multileaving(rankings)
    return multileaved_list
예제 #2
0
  def tieBreak_difficultQuery(self, winners):
      # ScoreList keeps track of ranks each tied candidate perform in tie breaking
      scoreList = np.zeros(self.model.n_models)
      # Iterate through 10 stored difficult queries
      for count_q, diff_query in enumerate(self.difficult_queries):
          query_feat = self.get_query_features(diff_query,
                                     self._train_features,
                                     self._train_query_ranges)
          scores = self.model.candidate_score(query_feat)
          rankings = rnk.rank_single_query(scores, inverted=False, n_results=self.n_results)

          # Iterate through tied candidates
          for winner in winners:
              candidate_NDCG = 0.0
              for count_d, doc in enumerate(self.difficult_document[count_q]):
                  # Calculate NDCG performance in current difficult query
                  diff_doc_rank = np.where(rankings[winner] == self.difficult_document[count_q][count_d])[0][0]
                  temp = 1 / (diff_doc_rank + 1.0)
                  candidate_NDCG += 1 / (diff_doc_rank + 1.0)

              # Add the NDCG value of diff. query
              scoreList[winner] += candidate_NDCG
      # Ranker with the least sum of NDCGs is the winner
      maxRank_score = np.max(scoreList[np.nonzero(scoreList)])
      winner = scoreList.tolist().index(maxRank_score)
      return [winner]
예제 #3
0
 def _create_train_ranking(self, query_id, query_feat, inverted):
   assert inverted==False
   self.model.sample_candidates()
   scores = self.model.candidate_score(query_feat)
   inverted_rankings = rnk.rank_single_query(scores,
                                             inverted=True,
                                             n_results=None)
   multileaved_list = self.multileaving.make_multileaving(inverted_rankings)
   return multileaved_list
예제 #4
0
 def _create_train_ranking(self, query_id, query_feat, inverted):
     n_docs = self.get_query_size(query_id, self._train_query_ranges)
     return rnk.rank_single_query(np.zeros(n_docs),
                                  inverted=inverted,
                                  n_results=self.n_results)[:self.n_results]