def retrieve_docs():
    snippet_generator = Snippet.SnippetGenerator(document_tokens,
                                                 stop_words_path)
    model = RetrievalModel.CosineSimilarity(N, inverted_index, document_tokens)
    ranked_list = model.cosine_similarity_list(query_dict)
    with open(query_expansion_table, "w") as file:
        csv_writer = csv.writer(file)
        updated_query_dict = {}
        for query_id, scores in ranked_list.items():
            updated_query = relevance_feedback_query(query_dict[query_id],
                                                     scores)
            updated_query_dict[query_id] = updated_query

        updated_list = model.cosine_similarity_list(updated_query_dict)
        for query_id, scores in updated_list.items():
            i = 0
            for score in scores:
                i += 1
                csv_writer.writerow(
                    (query_id, "Q0", score[0], i, score[1], "query_expansion"))
                if i == 1:
                    query = query_dict[query_id]
                    print("Given Query: " + query)
                    print("Top Document for given query: " + score[0])
                    print("Snippet: \n" +
                          snippet_generator.generate_snippet(score[0], query))
    file.close()
示例#2
0
def retrieve_tf_idf__docs(snippet_generator):
    tf_idf = RetrievalModel.TFIDF(N, inverted_index, document_tokens)
    ranked_list = tf_idf.tf_idf_list(query_dict)

    with open(tfidf_table, "w") as file:
        csv_writer = csv.writer(file)
        for query_id, scores in ranked_list.items():
            i = 0
            for score in scores:
                i += 1
                csv_writer.writerow(
                    (query_id, "Q0", score[0], i, score[1], "tf_idf"))
                if i == 1:
                    query = query_dict[query_id]
                    print("Given Query: " + query)
                    print("Top Document for given query: " + score[0])
                    print("Snippet: \n" +
                          snippet_generator.generate_snippet(score[0], query))
    file.close()
示例#3
0
def retrieve_bm_25_docs(snippet_generator):
    bm_25 = RetrievalModel.BM25(N, inverted_index, document_tokens,
                                relevance_dict)
    ranked_list = bm_25.bm_25_list(query_dict)

    with open(bm25_table, "w") as file:
        csv_writer = csv.writer(file)
        for query_id, scores in ranked_list.items():
            i = 0
            for score in scores:
                i += 1
                csv_writer.writerow(
                    (query_id, "Q0", score[0], i, score[1], "bm_25"))
                if i == 1:
                    query = query_dict[query_id]
                    print("Given Query: " + query)
                    print("Top Document for given query: " + score[0])
                    print("Snippet: \n" +
                          snippet_generator.generate_snippet(score[0], query))
    file.close()
示例#4
0
def retrieve_docs():
    snippet_generator = Snippet.SnippetGenerator(document_tokens,"")
    stop_words = retrieve_stop_words()
    updated_document_tokens = update_docs(stop_words)
    updated_query_dict = update_queries(stop_words)
    model = RetrievalModel.CosineSimilarity(N, inverted_index, updated_document_tokens)
    ranked_list = model.cosine_similarity_list(updated_query_dict)

    with open(stop_table, "w") as file:
        csv_writer = csv.writer(file)
        for query_id, scores in ranked_list.items():
            i = 0
            for score in scores:
                i += 1
                csv_writer.writerow((query_id, "Q0", score[0], i, score[1], "using_stop_words"))
                if i == 1:
                    query = query_dict[query_id]
                    print("Given Query: " + query)
                    print("Top Document for given query: " + score[0])
                    print("Snippet: \n" + snippet_generator.generate_snippet(score[0],query))
    file.close()
示例#5
0
def retrieve_cosine_sim_docs(snippet_generator):
    cosine_sim = RetrievalModel.CosineSimilarity(N, inverted_index,
                                                 document_tokens)
    ranked_list = cosine_sim.cosine_similarity_list(query_dict)

    with open(vsm_table, "w") as file:
        csv_writer = csv.writer(file)
        for query_id, scores in ranked_list.items():
            i = 0
            for score in scores:
                i += 1
                csv_writer.writerow((query_id, "Q0", score[0], i, score[1],
                                     "vector_space_model"))
                if i == 1:
                    query = query_dict[query_id]
                    print("Given Query: " + query)
                    print("Top Document for given query: " + score[0])
                    print("Snippet: \n" +
                          snippet_generator.generate_snippet(score[0], query))

    file.close()