def eval(testOn):
    k = 10  # k the number of top k pairs of (docID, similarity) to get from vectorQuery
    dictQ_ID = []
    indexFile = sys.argv[1]  #v "src/Data/tempFile"
    queryText = sys.argv[2]
    qrelsText = sys.argv[3]
    dictOfQuery = {}
    dictQrelsText = {}
    docCollection = CranFile('./CranfieldDataset/cran.all')
    NDCGScoreBool = []
    numberOfQueries = int(sys.argv[4])
    NDCGScoreVector = []
    #indexFile           = "src/Data/tempFile"
    #queryText           = 'src/CranfieldDataset/query.text'
    #qrelsText           = 'src/CranfieldDataset/qrels.text'
    #numberOfQueries     = 50
    numberOfTimeToLoop = 5

    #Loads Files
    listOfQueryRelsMaping = readFile(qrelsText)
    queryFile = loadCranQry(queryText)

    #Data Need
    for i in range(numberOfTimeToLoop):

        #Get random Queiry
        dictOfQuery = getRandomQuery(queryFile, numberOfQueries)
        if testOn:
            assert len(dictOfQuery
                       ) == numberOfQueries, "Error are getting random query"

        # Return all query
        # dictOfQuery = getAllDataItems(queryFile)
        # if testOn:
        #     assert len(dictOfQuery) == 225, "Error are getting random query"

        #get list of Query result from qrel.txt
        dictQrelsText = getResultsFrom_QrelsFile(listOfQueryRelsMaping,
                                                 dictOfQuery)
        if testOn:
            assert len(dictQrelsText
                       ) == numberOfQueries, "Error number Of Queries to large"

        start = timer()
        queryProcessor = QueryProcessor(
            "", indexFile,
            docCollection.docs)  # This is an extremely expensive process\
        end = timer()

        if testOn:
            print("Time for creating QueryProcessor:", end - start)
        countDoc = 0
        start = timer()

        dictQ_ID = []
        for qid, queryText in dictOfQuery.items():
            countDoc += 1

            dictQ_ID.append(qid)

            if testOn:
                print("QID:", qid)
            start = timer()
            queryProcessor.loadQuery(queryText)
            end = timer()
            if testOn:
                print("Time for Load:", end - start)
                print("qrels: ", dictQrelsText[qid])

            start = timer()
            docIDs = queryProcessor.booleanQuery(
            )  # data would need to be like this [12, 14, 78, 141, 486, 746, 172, 573, 1003]
            #docIDs_1 = queryProcessor.booleanQuery_1()
            end = timer()
            if testOn:
                print("Time for booleanQuery:", end - start)

            start = timer()
            listOfDocIDAndSimilarity = queryProcessor.vectorQuery(
                k
            )  # data need to look like k=3 [[625,0.8737006126353902],[401,0.8697643788341478],[943,0.8424991316663082]]
            #vectorQueryDict[qid] = dictOfDocIDAndSimilarity
            end = timer()
            if testOn:
                print("Time for vectorQuery:", end - start)
                print("booleanQuery:", docIDs)

            #For Boolean part
            start = timer()
            yTrue = []
            yScore = []
            for docID in docIDs:
                yScore.append(1)
                if docID in dictQrelsText[qid]:
                    yTrue.append(1)
                else:
                    yTrue.append(0)
            yTrue.sort(reverse=True)
            score = metrics.ndcg_score(yTrue[:k], yScore[:k], k, "exponential")
            if math.isnan(score):
                NDCGScoreBool.append(0)
            else:
                NDCGScoreBool.append(score)
            end = timer()
            if testOn:
                print("Time for  Boolean ndcg:", end - start)

            #For Vector part
            start = timer()
            yTrue = []
            yScore = []
            if testOn:
                print("vectorQuery:", listOfDocIDAndSimilarity)
            for docID_Score in listOfDocIDAndSimilarity:
                yScore.append(float(docID_Score[1]))
                if docID_Score[0] in dictQrelsText[qid]:
                    yTrue.append(1)
                else:
                    yTrue.append(0)
            yTrue.sort(reverse=True)
            score = metrics.ndcg_score(yTrue[:k], yScore[:k], k, "exponential")
            if math.isnan(score):
                NDCGScoreVector.append(0)
            else:
                NDCGScoreVector.append(score)
            end = timer()
            if testOn:
                print("Time for  Vector ndcg:", end - start)
        print("\nRunning Querys iteration:(", str(i + 1), ")\n", dictQ_ID)

        if testOn:
            for QID, boolScore, vectorScore in zip(dictQ_ID, NDCGScoreBool,
                                                   NDCGScoreVector):
                print("QID", QID, "Boolean Model:", boolScore, "Vector Model",
                      vectorScore)

    print("\nThe Length Of Both NDCG Score is: ", len(NDCGScoreBool), "==",
          len(NDCGScoreVector))

    print('\nThe Avg NDCG Score')
    vectorAvg = avg(NDCGScoreVector)
    BoolAvg = avg(NDCGScoreBool)
    print("Avg NDCG Score for Bool:", BoolAvg, "\nAvg NDCG Score for Vector:",
          vectorAvg)
    end = timer()
    if testOn:
        print("\n\nTime for running ", countDoc, " queries:", end - start)

    print('\nThe P-Value')
    p_va_ttest = stats.ttest_ind(NDCGScoreBool, NDCGScoreVector)
    p_va_wilcoxon = stats.wilcoxon(NDCGScoreBool, NDCGScoreVector)
    print("T-Test P-value: ", p_va_ttest)
    print("Wilcoxon P-value: ", p_va_wilcoxon)
    print('Done')
示例#2
0
def eval():

    # Algorithm:
    # Pick N random samples from query.txt
    # Get top 10 results from bool query for each rnd query
    # Get top 10 results from vector query for each rnd query
    # Compute NDCG btn bool query results and qrels.txt
    # Compute NDCG btn vector query results and qrels.txt
    # Get p-value btn bool and vector

    # Get the query collection
    qc = loadCranQry(query_path)
    poss_queries = list(qc)

    # Load up the inverted index
    ii = InvertedIndex()
    ii.load(index_file)

    # Load up the document collection
    cf = CranFile("cran.all")

    # Get ground-truth results from qrels.txt
    with open(qrels_path) as f:
        qrels = f.readlines()

    # Index qrels into a dict
    qrel_dict = {}
    for qrel in qrels:
        qrel_split = qrel.split()
        if int(qrel_split[0]) in qrel_dict:
            qrel_dict[int(qrel_split[0])].append(int(qrel_split[1]))
        else:
            qrel_dict[int(qrel_split[0])] = [int(qrel_split[1])]

    # Run over N random queries, collecting NDCGs
    bool_ndcgs = []
    vector_ndcgs = []
    for _ in range(n):
        # Get random query ID
        query_id = choice(poss_queries)

        # Get the query
        if 0 < int(query_id) < 10:
            query_id = '00' + str(int(query_id))
        elif 9 < int(query_id) < 100:
            query_id = '0' + str(int(query_id))
        try:
            query = qc[query_id].text
        except KeyError:
            print("Invalid query id", query_id)
            return

        # Initialize the query processor
        qp = QueryProcessor(query, ii, cf)

        # Run bool query
        bool_result = qp.booleanQuery()[:10]

        # Run vector query
        vector_result = qp.vectorQuery(10)

        # Pull top 10 ground-truth results from qrels dict
        gt_results = qrel_dict[poss_queries.index(query_id) + 1][:10]

        # Compute NDCG for bool query
        # NOTE: There is no weighting on the bool query, so give all an even 1
        truth_vector = list(map(lambda x: x in gt_results, bool_result))
        bool_ndcg = ndcg_score(truth_vector, [1] * len(truth_vector),
                               k=len(truth_vector))

        # Compute NDCG for vector query
        vector_docs = []
        vector_scores = []
        for v in vector_result:
            vector_docs.append(v[0])
            vector_scores.append(v[1])
        truth_vector = list(map(lambda x: x in gt_results, vector_docs))
        vector_ndcg = ndcg_score(truth_vector,
                                 vector_scores,
                                 k=len(truth_vector))

        # Accumulate NDCGs
        bool_ndcgs.append(bool_ndcg)
        vector_ndcgs.append(vector_ndcg)

    # Average out score lists
    bool_avg = 0
    for bool in bool_ndcgs:
        bool_avg += bool
    bool_avg /= len(bool_ndcgs)

    vector_avg = 0
    for vector in vector_ndcgs:
        vector_avg += vector
    vector_avg /= len(vector_ndcgs)

    # Present averages and p-values
    print("Boolean NDCG average:", bool_avg)
    print("Vector NDCG average:", vector_avg)
    if n > 19:
        print("Wilcoxon p-value:", wilcoxon(bool_ndcgs, vector_ndcgs).pvalue)
    else:
        print("Wilcoxon p-value: Sample size too small to be significant")
    print("T-Test p-value:", ttest_ind(bool_ndcgs, vector_ndcgs).pvalue)
def eval(index_file, query_text, qrels, n):
    qrys = cranqry.loadCranQry(query_text)
    queries = {}
    for q in qrys:
        queries[q] = qrys[q].text
    query_ids = list(queries.keys())
    query_ids.sort()
    query_ids_ints = []
    for k in range(0, len(query_ids)):  # generating n random queries
        query_ids_ints.append(int(query_ids[k]))
    set1 = set()
    while len(set1) != n:
        set1.add(random.choice(query_ids_ints))
    selected_queries = list(set1)
    docs = set()
    qrels = {}

    f = open("qrels.text", "r")  # parsing relevant queries(qrels.text)
    l = f.readline()
    while l:
        j = l.split(" ")
        if query_ids_ints[int(j[0]) - 1] in qrels.keys():
            qrels[query_ids_ints[int(j[0]) - 1]].append(int(j[1]))
        else:
            qrels[query_ids_ints[int(j[0]) - 1]] = [int(j[1])]
        l = f.readline()
    cranqryobj = cranqry.loadCranQry(query_text)
    dict_query = {}
    for q in cranqryobj:
        dict_query[int(q)] = cranqryobj[
            q].text  # matching queries in query.text and qrels.text
    indexObject = index.InvertedIndex()
    items = indexObject.load(index_file)
    vector_ndcg_score = {}
    vector_score_dict = {}
    for q in selected_queries:
        print(q)
        query_raw = dict_query[q]
        QPobj = QueryProcessor(query_raw, items, index_file)
        QPobj.preprocessing()
        result_list = QPobj.vectorQuery(
            10)  # fetching first 10 documents for a query using vector model
        boolean_result_list = QPobj.booleanQuery()
        print("Boolean query result : ", boolean_result_list
              )  # fetching documents for a query using booleanQuery
        ndcg_boolean = 0
        truth_list = qrels[q]
        boolean_output_list = []
        rank_doc_list = list(map(lambda x: int(x[0]), result_list))
        print("Relavant documents for this query : ",
              truth_list)  # relavant documents for the query
        print("Vector model result : ",
              rank_doc_list)  # documents result list for vector model
        vector_score_list = []
        for id in boolean_result_list:  # calculating the predicted scores for boolean model
            if int(id) in truth_list:
                boolean_output_list.append(1)
            else:
                boolean_output_list.append(0)
        boolean_score_list = []
        if len(boolean_score_list) < 10:
            boolean_score_list = boolean_output_list
            while len(boolean_score_list) != 10:
                boolean_score_list.append(0)
        elif len(boolean_score_list) > 10:
            for i in range(0, 10):
                boolean_score_list[i] = boolean_output_list[i]
        for id in rank_doc_list:  # calculating the predicted scores for vector model

            if id in truth_list:
                vector_score_list.append(1)
            else:
                vector_score_list.append(0)
        vector_score_dict[q] = vector_score_list
        truth_score_list = []
        for i in range(
                0, len(vector_score_list)
        ):  # calculating the ground_truth scores for vector model
            truth_score_list.append(vector_score_list[i])
        truth_score_list.sort(reverse=True)

        boolean_truth_score_list = []
        for i in range(
                0, len(boolean_score_list)
        ):  # calculating the ground_truth scores for boolean model
            boolean_truth_score_list.append(boolean_score_list[i])
        boolean_truth_score_list.sort(reverse=True)
        print("Vector model ground_truth list is:\n", truth_score_list)
        print("Vector ranking score list is:\n", vector_score_list)
        print("Boolean model ground_truth list is:\n",
              boolean_truth_score_list)
        print("Boolean model score list is:\n", boolean_score_list)
        vector_ndcg_score[q] = [
            ndcg_score(np.array(boolean_truth_score_list),
                       np.array(boolean_score_list)),
            ndcg_score(np.array(truth_score_list), np.array(vector_score_list))
        ]
    vector_list = [
    ]  # compute ndcg score for boolean and vector models for all the randomly generated queries
    boolean_list = []
    for qu in vector_ndcg_score:
        vector_list.append(vector_ndcg_score[qu][1])
        boolean_list.append(vector_ndcg_score[qu][0])

    print("ndcg score of boolean and vector models for all the queries:\n",
          vector_ndcg_score)
    print("ndcg scores list for boolean model for all the queries:\n",
          boolean_list)
    print("ndcg scores list for vector model for all the queries:\n",
          vector_list)
    p_value_wilcoxon = stats.wilcoxon(
        np.array(boolean_list), np.array(vector_list)
    )  # calculating p value using wilcoxon test and ttest  for boolean and vector models  p_value_ttest=stats.ttest_ind(np.array(boolean_list),np.array(vector_list), equal_var = False)
    print("wilcoxon test p value is:", p_value_wilcoxon[1])
    print("ttest p value is :", p_value_ttest[1])