def xQuAD(basepath, topicID, filename, cutoff=50, method="mine"): """ return a ranking list using the framework of xQuAD. """ document_selected = [] out_file = open(basepath + "rank/" + topicID + "/" + filename, "w") runlist_file = open(basepath + "rank/" + topicID + "/runlist", "w") print >> runlist_file, "xquad_mine_less" print >> runlist_file, "xquad_mine_all" print >> runlist_file, "xquad_standard_less" print >> runlist_file, "xquad_standard_all" runlist_file.close() documents, subtopicID_word, query_prob, document_term_frequency, average_document_len, word2id_weight, topic_words = data_preprocessing_xquad( basepath, topicID, method) subtopic_word = {} keys = subtopicID_word.keys() keys.sort() for key in keys: if filename == "xquad_mine_less" or filename == "xquad_standard_less": if key == keys[-1]: continue if key == keys[-2]: continue # if key == 3:continue if not subtopic_word.has_key(key): subtopic_word[key] = subtopicID_word[key] # print len(subtopicID_word.keys()),len(subtopic_word.keys()) if filename == "xquad_mine_less" or filename == "xquad_standard_less": prob_sum = 0.0 for key in query_prob.keys(): if key == keys[-1]: query_prob[key] = 0 elif key == keys[-2] and len(query_prob.keys()) > 3: query_prob[key] = 0 else: prob_sum += query_prob[key] for key in query_prob.keys(): # print key,query_prob[key] query_prob[key] /= prob_sum document_subtopic_rel, rel_subtopic_ave = document_subtopic_relevance( subtopic_word, documents, average_document_len, topic_words, document_term_frequency) filem = open("middle/" + topicID + "div", "w") #iteratively selected the best document to build ranking list for i in range(cutoff): print topicID, "%2d-th best document." % (i + 1) best_document = select_best_document(document_subtopic_rel, rel_subtopic_ave, query_prob, subtopic_word, document_selected, filem) document_selected.append(best_document) if len(document_selected) == len(documents) - 1: # all document in ranking list break # print the ranking result to the evaluation files for index, document in enumerate(document_selected): documentID = document.get_id() document_rank = index + 1 document_score = document.get_ranking_score() print >> out_file, topicID + " Q0 " + documentID + " " + str( document_rank) + " " + str(document_score) + " TESTRUN" out_file.close()
def xQuAD(basepath,topicID,cutoff= 50): """ return a ranking list using the framework of xQuAD. """ document_selected =[] #docs ranking list out_file = open(basepath+"rank/"+topicID+"/xquad_result","w") runlist_file = open(basepath+"rank/"+topicID+"/runlist","w") print >>runlist_file,"xquad_result" runlist_file.close() documents,subtopicID_word,query,document_term_frequency,average_document_len,word2id_weight = data_preprocessing_xquad(basepath,topicID) sid_doc_related_probility = similarity_computing(documents,subtopicID_word,average_document_len,query,document_term_frequency) #iteratively selected the best document to build ranking list for i in range(cutoff): print "%s-th best document."%i best_document = best_document_select(documents,subtopicID_word,query,document_term_frequency,average_document_len,word2id_weight,document_selected,sid_doc_related_probility) document_selected.append(best_document) if len(document_selected) == len(documents): # all document in ranking list break # print the ranking result to the evaluation files for index,document in enumerate(document_selected): documentID = document.get_id() document_rank = index+1 document_score = document.get_ranking_score() print >>out_file,topicID +" Q0 "+documentID+" "+str(document_rank) +" "+str(document_score)+ " TESTRUN" out_file.close() # call the evaluation function to evaluate the run result r = call_eval_for_result(basepath,topicID,10)
def xQuAD(basepath,topicID,filename,cutoff= 50,method="mine"): """ return a ranking list using the framework of xQuAD. """ document_selected =[] out_file = open(basepath+"rank/"+topicID+"/"+filename,"w") runlist_file = open(basepath+"rank/"+topicID+"/runlist","w") print >>runlist_file,"xquad_mine_less" print >>runlist_file,"xquad_mine_all" print >>runlist_file,"xquad_standard_less" print >>runlist_file,"xquad_standard_all" runlist_file.close() documents,subtopicID_word,query_prob,document_term_frequency,average_document_len,word2id_weight,topic_words = data_preprocessing_xquad(basepath,topicID,method) subtopic_word={} keys = subtopicID_word.keys() keys.sort() for key in keys: if filename == "xquad_mine_less" or filename =="xquad_standard_less": if key==keys[-1]:continue if key==keys[-2]:continue # if key == 3:continue if not subtopic_word.has_key(key): subtopic_word[key] = subtopicID_word[key] # print len(subtopicID_word.keys()),len(subtopic_word.keys()) if filename == "xquad_mine_less" or filename =="xquad_standard_less": prob_sum = 0.0 for key in query_prob.keys(): if key == keys[-1]: query_prob[key]=0 elif key == keys[-2] and len(query_prob.keys())>3: query_prob[key]=0 else: prob_sum+=query_prob[key] for key in query_prob.keys(): # print key,query_prob[key] query_prob[key] /= prob_sum document_subtopic_rel,rel_subtopic_ave = document_subtopic_relevance(subtopic_word,documents,average_document_len,topic_words,document_term_frequency) filem= open("middle/"+topicID+"div","w") #iteratively selected the best document to build ranking list for i in range(cutoff): print topicID,"%2d-th best document."%(i+1) best_document = select_best_document(document_subtopic_rel,rel_subtopic_ave,query_prob,subtopic_word,document_selected,filem) document_selected.append(best_document) if len(document_selected) == len(documents)-1: # all document in ranking list break # print the ranking result to the evaluation files for index,document in enumerate(document_selected): documentID = document.get_id() document_rank = index+1 document_score = document.get_ranking_score() print >>out_file,topicID +" Q0 "+documentID+" "+str(document_rank) +" "+str(document_score)+ " TESTRUN" out_file.close()
def xQuAD(basepath, topicID, cutoff=50): """ return a ranking list using the framework of xQuAD. """ document_selected = [] #docs ranking list out_file = open(basepath + "rank/" + topicID + "/xquad_result", "w") runlist_file = open(basepath + "rank/" + topicID + "/runlist", "w") print >> runlist_file, "xquad_result" runlist_file.close() documents, subtopicID_word, query, document_term_frequency, average_document_len, word2id_weight = data_preprocessing_xquad( basepath, topicID) sid_doc_related_probility = similarity_computing(documents, subtopicID_word, average_document_len, query, document_term_frequency) #iteratively selected the best document to build ranking list for i in range(cutoff): print "%s-th best document." % i best_document = best_document_select(documents, subtopicID_word, query, document_term_frequency, average_document_len, word2id_weight, document_selected, sid_doc_related_probility) document_selected.append(best_document) if len(document_selected) == len(documents): # all document in ranking list break # print the ranking result to the evaluation files for index, document in enumerate(document_selected): documentID = document.get_id() document_rank = index + 1 document_score = document.get_ranking_score() print >> out_file, topicID + " Q0 " + documentID + " " + str( document_rank) + " " + str(document_score) + " TESTRUN" out_file.close() # call the evaluation function to evaluate the run result r = call_eval_for_result(basepath, topicID, 10)