def eval(testOn): k = 10 # k the number of top k pairs of (docID, similarity) to get from vectorQuery dictQ_ID = [] indexFile = sys.argv[1] #v "src/Data/tempFile" queryText = sys.argv[2] qrelsText = sys.argv[3] dictOfQuery = {} dictQrelsText = {} docCollection = CranFile('./CranfieldDataset/cran.all') NDCGScoreBool = [] numberOfQueries = int(sys.argv[4]) NDCGScoreVector = [] #indexFile = "src/Data/tempFile" #queryText = 'src/CranfieldDataset/query.text' #qrelsText = 'src/CranfieldDataset/qrels.text' #numberOfQueries = 50 numberOfTimeToLoop = 5 #Loads Files listOfQueryRelsMaping = readFile(qrelsText) queryFile = loadCranQry(queryText) #Data Need for i in range(numberOfTimeToLoop): #Get random Queiry dictOfQuery = getRandomQuery(queryFile, numberOfQueries) if testOn: assert len(dictOfQuery ) == numberOfQueries, "Error are getting random query" # Return all query # dictOfQuery = getAllDataItems(queryFile) # if testOn: # assert len(dictOfQuery) == 225, "Error are getting random query" #get list of Query result from qrel.txt dictQrelsText = getResultsFrom_QrelsFile(listOfQueryRelsMaping, dictOfQuery) if testOn: assert len(dictQrelsText ) == numberOfQueries, "Error number Of Queries to large" start = timer() queryProcessor = QueryProcessor( "", indexFile, docCollection.docs) # This is an extremely expensive process\ end = timer() if testOn: print("Time for creating QueryProcessor:", end - start) countDoc = 0 start = timer() dictQ_ID = [] for qid, queryText in dictOfQuery.items(): countDoc += 1 dictQ_ID.append(qid) if testOn: print("QID:", qid) start = timer() queryProcessor.loadQuery(queryText) end = timer() if testOn: print("Time for Load:", end - start) print("qrels: ", dictQrelsText[qid]) start = timer() docIDs = queryProcessor.booleanQuery( ) # data would need to be like this [12, 14, 78, 141, 486, 746, 172, 573, 1003] #docIDs_1 = queryProcessor.booleanQuery_1() end = timer() if testOn: print("Time for booleanQuery:", end - start) start = timer() listOfDocIDAndSimilarity = queryProcessor.vectorQuery( k ) # data need to look like k=3 [[625,0.8737006126353902],[401,0.8697643788341478],[943,0.8424991316663082]] #vectorQueryDict[qid] = dictOfDocIDAndSimilarity end = timer() if testOn: print("Time for vectorQuery:", end - start) print("booleanQuery:", docIDs) #For Boolean part start = timer() yTrue = [] yScore = [] for docID in docIDs: yScore.append(1) if docID in dictQrelsText[qid]: yTrue.append(1) else: yTrue.append(0) yTrue.sort(reverse=True) score = metrics.ndcg_score(yTrue[:k], yScore[:k], k, "exponential") if math.isnan(score): NDCGScoreBool.append(0) else: NDCGScoreBool.append(score) end = timer() if testOn: print("Time for Boolean ndcg:", end - start) #For Vector part start = timer() yTrue = [] yScore = [] if testOn: print("vectorQuery:", listOfDocIDAndSimilarity) for docID_Score in listOfDocIDAndSimilarity: yScore.append(float(docID_Score[1])) if docID_Score[0] in dictQrelsText[qid]: yTrue.append(1) else: yTrue.append(0) yTrue.sort(reverse=True) score = metrics.ndcg_score(yTrue[:k], yScore[:k], k, "exponential") if math.isnan(score): NDCGScoreVector.append(0) else: NDCGScoreVector.append(score) end = timer() if testOn: print("Time for Vector ndcg:", end - start) print("\nRunning Querys iteration:(", str(i + 1), ")\n", dictQ_ID) if testOn: for QID, boolScore, vectorScore in zip(dictQ_ID, NDCGScoreBool, NDCGScoreVector): print("QID", QID, "Boolean Model:", boolScore, "Vector Model", vectorScore) print("\nThe Length Of Both NDCG Score is: ", len(NDCGScoreBool), "==", len(NDCGScoreVector)) print('\nThe Avg NDCG Score') vectorAvg = avg(NDCGScoreVector) BoolAvg = avg(NDCGScoreBool) print("Avg NDCG Score for Bool:", BoolAvg, "\nAvg NDCG Score for Vector:", vectorAvg) end = timer() if testOn: print("\n\nTime for running ", countDoc, " queries:", end - start) print('\nThe P-Value') p_va_ttest = stats.ttest_ind(NDCGScoreBool, NDCGScoreVector) p_va_wilcoxon = stats.wilcoxon(NDCGScoreBool, NDCGScoreVector) print("T-Test P-value: ", p_va_ttest) print("Wilcoxon P-value: ", p_va_wilcoxon) print('Done')
def eval(): # Algorithm: # Pick N random samples from query.txt # Get top 10 results from bool query for each rnd query # Get top 10 results from vector query for each rnd query # Compute NDCG btn bool query results and qrels.txt # Compute NDCG btn vector query results and qrels.txt # Get p-value btn bool and vector # Get the query collection qc = loadCranQry(query_path) poss_queries = list(qc) # Load up the inverted index ii = InvertedIndex() ii.load(index_file) # Load up the document collection cf = CranFile("cran.all") # Get ground-truth results from qrels.txt with open(qrels_path) as f: qrels = f.readlines() # Index qrels into a dict qrel_dict = {} for qrel in qrels: qrel_split = qrel.split() if int(qrel_split[0]) in qrel_dict: qrel_dict[int(qrel_split[0])].append(int(qrel_split[1])) else: qrel_dict[int(qrel_split[0])] = [int(qrel_split[1])] # Run over N random queries, collecting NDCGs bool_ndcgs = [] vector_ndcgs = [] for _ in range(n): # Get random query ID query_id = choice(poss_queries) # Get the query if 0 < int(query_id) < 10: query_id = '00' + str(int(query_id)) elif 9 < int(query_id) < 100: query_id = '0' + str(int(query_id)) try: query = qc[query_id].text except KeyError: print("Invalid query id", query_id) return # Initialize the query processor qp = QueryProcessor(query, ii, cf) # Run bool query bool_result = qp.booleanQuery()[:10] # Run vector query vector_result = qp.vectorQuery(10) # Pull top 10 ground-truth results from qrels dict gt_results = qrel_dict[poss_queries.index(query_id) + 1][:10] # Compute NDCG for bool query # NOTE: There is no weighting on the bool query, so give all an even 1 truth_vector = list(map(lambda x: x in gt_results, bool_result)) bool_ndcg = ndcg_score(truth_vector, [1] * len(truth_vector), k=len(truth_vector)) # Compute NDCG for vector query vector_docs = [] vector_scores = [] for v in vector_result: vector_docs.append(v[0]) vector_scores.append(v[1]) truth_vector = list(map(lambda x: x in gt_results, vector_docs)) vector_ndcg = ndcg_score(truth_vector, vector_scores, k=len(truth_vector)) # Accumulate NDCGs bool_ndcgs.append(bool_ndcg) vector_ndcgs.append(vector_ndcg) # Average out score lists bool_avg = 0 for bool in bool_ndcgs: bool_avg += bool bool_avg /= len(bool_ndcgs) vector_avg = 0 for vector in vector_ndcgs: vector_avg += vector vector_avg /= len(vector_ndcgs) # Present averages and p-values print("Boolean NDCG average:", bool_avg) print("Vector NDCG average:", vector_avg) if n > 19: print("Wilcoxon p-value:", wilcoxon(bool_ndcgs, vector_ndcgs).pvalue) else: print("Wilcoxon p-value: Sample size too small to be significant") print("T-Test p-value:", ttest_ind(bool_ndcgs, vector_ndcgs).pvalue)
def eval(index_file, query_text, qrels, n): qrys = cranqry.loadCranQry(query_text) queries = {} for q in qrys: queries[q] = qrys[q].text query_ids = list(queries.keys()) query_ids.sort() query_ids_ints = [] for k in range(0, len(query_ids)): # generating n random queries query_ids_ints.append(int(query_ids[k])) set1 = set() while len(set1) != n: set1.add(random.choice(query_ids_ints)) selected_queries = list(set1) docs = set() qrels = {} f = open("qrels.text", "r") # parsing relevant queries(qrels.text) l = f.readline() while l: j = l.split(" ") if query_ids_ints[int(j[0]) - 1] in qrels.keys(): qrels[query_ids_ints[int(j[0]) - 1]].append(int(j[1])) else: qrels[query_ids_ints[int(j[0]) - 1]] = [int(j[1])] l = f.readline() cranqryobj = cranqry.loadCranQry(query_text) dict_query = {} for q in cranqryobj: dict_query[int(q)] = cranqryobj[ q].text # matching queries in query.text and qrels.text indexObject = index.InvertedIndex() items = indexObject.load(index_file) vector_ndcg_score = {} vector_score_dict = {} for q in selected_queries: print(q) query_raw = dict_query[q] QPobj = QueryProcessor(query_raw, items, index_file) QPobj.preprocessing() result_list = QPobj.vectorQuery( 10) # fetching first 10 documents for a query using vector model boolean_result_list = QPobj.booleanQuery() print("Boolean query result : ", boolean_result_list ) # fetching documents for a query using booleanQuery ndcg_boolean = 0 truth_list = qrels[q] boolean_output_list = [] rank_doc_list = list(map(lambda x: int(x[0]), result_list)) print("Relavant documents for this query : ", truth_list) # relavant documents for the query print("Vector model result : ", rank_doc_list) # documents result list for vector model vector_score_list = [] for id in boolean_result_list: # calculating the predicted scores for boolean model if int(id) in truth_list: boolean_output_list.append(1) else: boolean_output_list.append(0) boolean_score_list = [] if len(boolean_score_list) < 10: boolean_score_list = boolean_output_list while len(boolean_score_list) != 10: boolean_score_list.append(0) elif len(boolean_score_list) > 10: for i in range(0, 10): boolean_score_list[i] = boolean_output_list[i] for id in rank_doc_list: # calculating the predicted scores for vector model if id in truth_list: vector_score_list.append(1) else: vector_score_list.append(0) vector_score_dict[q] = vector_score_list truth_score_list = [] for i in range( 0, len(vector_score_list) ): # calculating the ground_truth scores for vector model truth_score_list.append(vector_score_list[i]) truth_score_list.sort(reverse=True) boolean_truth_score_list = [] for i in range( 0, len(boolean_score_list) ): # calculating the ground_truth scores for boolean model boolean_truth_score_list.append(boolean_score_list[i]) boolean_truth_score_list.sort(reverse=True) print("Vector model ground_truth list is:\n", truth_score_list) print("Vector ranking score list is:\n", vector_score_list) print("Boolean model ground_truth list is:\n", boolean_truth_score_list) print("Boolean model score list is:\n", boolean_score_list) vector_ndcg_score[q] = [ ndcg_score(np.array(boolean_truth_score_list), np.array(boolean_score_list)), ndcg_score(np.array(truth_score_list), np.array(vector_score_list)) ] vector_list = [ ] # compute ndcg score for boolean and vector models for all the randomly generated queries boolean_list = [] for qu in vector_ndcg_score: vector_list.append(vector_ndcg_score[qu][1]) boolean_list.append(vector_ndcg_score[qu][0]) print("ndcg score of boolean and vector models for all the queries:\n", vector_ndcg_score) print("ndcg scores list for boolean model for all the queries:\n", boolean_list) print("ndcg scores list for vector model for all the queries:\n", vector_list) p_value_wilcoxon = stats.wilcoxon( np.array(boolean_list), np.array(vector_list) ) # calculating p value using wilcoxon test and ttest for boolean and vector models p_value_ttest=stats.ttest_ind(np.array(boolean_list),np.array(vector_list), equal_var = False) print("wilcoxon test p value is:", p_value_wilcoxon[1]) print("ttest p value is :", p_value_ttest[1])