def ws_gpr(): # get the global pagerank result gpr_mtx = global_pagerank.gpr() # get the indri file names indri_names = file_scanner() # write the ranking result into txt file f = open('rank/ws_gpr_rank.txt', 'w') for cur_num in sorted(indri_names): query_id = indri_names[cur_num][0] file_name = indri_names[cur_num][1] # doc id in the current indri file doc_id = doc_extracter(file_name) # normalize intri score for each doc indri_score = score_extracter(file_name) # indri_score_pos = np.subtract(indri_score, min(indri_score) - 1) indri_score_pos = np.power(math.e, indri_score) # transform to all positive value indri_norm = [float(i)/sum(indri_score_pos) for i in indri_score_pos] # normalize pagerank value gpr_value = gpr_mtx[doc_id] gpr_norm = [float(i)/sum(gpr_value) for i in gpr_value] # combine indri and pagerank score ws_score = map(add, np.multiply(indri_norm, 0.95), np.multiply(gpr_norm, 0.05)) # sort by descending order gpr_score = np.argsort(ws_score)[::-1].tolist() doc_id_arr = np.array(doc_id) gpr_rank = doc_id_arr[gpr_score] rank_num = 0 for idx in gpr_rank: rank_num += 1 f.write("{} Q0 {} {} {} run-1\n".format(query_id, idx + 1, rank_num, ws_score[doc_id.index(idx)])) f.close() print "Weighted Sum GPR ranking finished." + '\n'
def ns_gpr(): # get the global pagerank result gpr_mtx = global_pagerank.gpr() # get the indri file names indri_names = file_scanner() # write the ranking result into txt file f = open('rank/ns_gpr_rank.txt', 'w') for cur_num in sorted(indri_names): query_id = indri_names[cur_num][0] file_name = indri_names[cur_num][1] # doc id in the current indri file doc_id = doc_extracter(file_name) # sort by descending order gpr_score = np.argsort(gpr_mtx[doc_id])[::-1].tolist() doc_id_arr = np.array(doc_id) gpr_rank = doc_id_arr[gpr_score] rank_num = 0 for idx in gpr_rank: rank_num += 1 f.write("{} Q0 {} {} {} run-1\n".format(query_id, idx + 1, rank_num, gpr_mtx[idx])) f.close() print "No-search GPR ranking finished." + '\n'
def cm_gpr(): # get the global pagerank result gpr_mtx = global_pagerank.gpr() # get the indri file names indri_names = file_scanner() # write the ranking result into txt file f = open('rank/cm_gpr_rank.txt', 'w') for cur_num in sorted(indri_names): query_id = indri_names[cur_num][0] file_name = indri_names[cur_num][1] # doc id in the current indri file doc_id = doc_extracter(file_name) doc_num = len(doc_id) # normalize intri score for each doc indri_score = score_extracter(file_name) # indri_score_pos = np.subtract(indri_score, min(indri_score) - 1) indri_score_pos = np.power(math.e, indri_score) # transform to all positive value indri_norm = [float(i)/sum(indri_score_pos) for i in indri_score_pos] # normalize pagerank value gpr_value = gpr_mtx[doc_id] gpr_norm = [float(i)/sum(gpr_value) for i in gpr_value] mu_list = np.arange(0.85, 0.95, 0.1 / doc_num)[::-1] decay_list = np.arange(0.65, 1.0, 0.35 / doc_num) mu2_list = cosine_inter(mu_list) mu3_list = np.multiply(np.subtract(1.0, mu2_list), decay_list) # ws_score = map(add, np.multiply(indri_norm, mu2_list), np.multiply(gpr_norm, np.subtract(1.0, mu2_list))) ws_score = np.subtract(np.multiply(indri_norm, mu2_list), np.multiply(gpr_norm, mu3_list)) # sort by descending order gpr_score = np.argsort(ws_score)[::-1].tolist() doc_id_arr = np.array(doc_id) gpr_rank = doc_id_arr[gpr_score] rank_num = 0 for idx in gpr_rank: rank_num += 1 f.write("{} Q0 {} {} {} run-1\n".format(query_id, idx + 1, rank_num, ws_score[doc_id.index(idx)])) f.close() print "Custom method GPR ranking finished." + '\n'