def retrieve_docs(): snippet_generator = Snippet.SnippetGenerator(document_tokens, stop_words_path) model = RetrievalModel.CosineSimilarity(N, inverted_index, document_tokens) ranked_list = model.cosine_similarity_list(query_dict) with open(query_expansion_table, "w") as file: csv_writer = csv.writer(file) updated_query_dict = {} for query_id, scores in ranked_list.items(): updated_query = relevance_feedback_query(query_dict[query_id], scores) updated_query_dict[query_id] = updated_query updated_list = model.cosine_similarity_list(updated_query_dict) for query_id, scores in updated_list.items(): i = 0 for score in scores: i += 1 csv_writer.writerow( (query_id, "Q0", score[0], i, score[1], "query_expansion")) if i == 1: query = query_dict[query_id] print("Given Query: " + query) print("Top Document for given query: " + score[0]) print("Snippet: \n" + snippet_generator.generate_snippet(score[0], query)) file.close()
def retrieve_tf_idf__docs(snippet_generator): tf_idf = RetrievalModel.TFIDF(N, inverted_index, document_tokens) ranked_list = tf_idf.tf_idf_list(query_dict) with open(tfidf_table, "w") as file: csv_writer = csv.writer(file) for query_id, scores in ranked_list.items(): i = 0 for score in scores: i += 1 csv_writer.writerow( (query_id, "Q0", score[0], i, score[1], "tf_idf")) if i == 1: query = query_dict[query_id] print("Given Query: " + query) print("Top Document for given query: " + score[0]) print("Snippet: \n" + snippet_generator.generate_snippet(score[0], query)) file.close()
def retrieve_bm_25_docs(snippet_generator): bm_25 = RetrievalModel.BM25(N, inverted_index, document_tokens, relevance_dict) ranked_list = bm_25.bm_25_list(query_dict) with open(bm25_table, "w") as file: csv_writer = csv.writer(file) for query_id, scores in ranked_list.items(): i = 0 for score in scores: i += 1 csv_writer.writerow( (query_id, "Q0", score[0], i, score[1], "bm_25")) if i == 1: query = query_dict[query_id] print("Given Query: " + query) print("Top Document for given query: " + score[0]) print("Snippet: \n" + snippet_generator.generate_snippet(score[0], query)) file.close()
def retrieve_docs(): snippet_generator = Snippet.SnippetGenerator(document_tokens,"") stop_words = retrieve_stop_words() updated_document_tokens = update_docs(stop_words) updated_query_dict = update_queries(stop_words) model = RetrievalModel.CosineSimilarity(N, inverted_index, updated_document_tokens) ranked_list = model.cosine_similarity_list(updated_query_dict) with open(stop_table, "w") as file: csv_writer = csv.writer(file) for query_id, scores in ranked_list.items(): i = 0 for score in scores: i += 1 csv_writer.writerow((query_id, "Q0", score[0], i, score[1], "using_stop_words")) if i == 1: query = query_dict[query_id] print("Given Query: " + query) print("Top Document for given query: " + score[0]) print("Snippet: \n" + snippet_generator.generate_snippet(score[0],query)) file.close()
def retrieve_cosine_sim_docs(snippet_generator): cosine_sim = RetrievalModel.CosineSimilarity(N, inverted_index, document_tokens) ranked_list = cosine_sim.cosine_similarity_list(query_dict) with open(vsm_table, "w") as file: csv_writer = csv.writer(file) for query_id, scores in ranked_list.items(): i = 0 for score in scores: i += 1 csv_writer.writerow((query_id, "Q0", score[0], i, score[1], "vector_space_model")) if i == 1: query = query_dict[query_id] print("Given Query: " + query) print("Top Document for given query: " + score[0]) print("Snippet: \n" + snippet_generator.generate_snippet(score[0], query)) file.close()