def viewSearchbyTitle(): if request.method == 'POST': mystring = "Title" query = request.form['namesearchbytitle'] data = pickle.load(open(r"DataBase\data_file.pkl", "rb")) titles = pickle.load(open(r"DataBase\title_file.pkl", "rb")) auto_tag = pickle.load(open(r"DataBase\svos_file.pkl", "rb")) summary = pickle.load(open(r"DataBase\summary_file.pkl", "rb")) corpus = pickle.load(open(r"DataBase\title_corpus.pkl", "rb")) bm25 = search_by_BM25(corpus) tokenized_query, old_query, new_query = clean_query(query.lower()) indexes, results = bm25.get_top_n(tokenized_query, data, n=5) results_titles = [] results_summaries = [] results_tags = [] for i in indexes: results_titles.append(titles[i]) results_summaries.append(summary[i]) if auto_tag[i] != []: results_tags.append(list(set(random.choices(auto_tag[i], k=3)))) else: results_tags.append(['No Auto tags']) text = [] for i in results: text_to_show = " ".join(sent_tokenize(i)[:2]) if text_to_show != '': text.append(text_to_show + '....') else: text.append(i) # text = results title = results_titles summaries = results_summaries tags = results_tags title_len = len(title) document_file = pickle.load(open(r"DataBase\document_file.pkl", "rb")) extension_list = [] for i in indexes: extension_list.append(document_file[i]["extension"]) # return render_template('searchbyTitle.html', text=text, tag=query, title=title, summaries=summaries, tags=tags, title_len = title_len) return render_template('searchbyText.html', text=text, tag=query, title=title, summaries=summaries, tags=tags, type=mystring, title_len=title_len, old_query=old_query, new_query=new_query, extension_list=extension_list)
def viewSearchbyTitle(the_text): mystring = "Title" query = the_text corpus = data_for_title bm25 = search_by_BM25(corpus) tokenized_query, old_query, new_query = clean_query(query.lower()) indexes, results = bm25.get_top_n(tokenized_query, data, n=5) results_titles = [] results_summaries = [] results_tags = [] for i in indexes: results_titles.append(titles[i]) results_summaries.append(summary[i]) if auto_tag[i] != []: results_tags.append(list(set(random.choices(auto_tag[i], k=3)))) else: results_tags.append(['No Auto tags']) text = [] for i in results: text_to_show = " ".join(sent_tokenize(i)[:2]) if text_to_show != '': text.append(text_to_show + '....') else: text.append(i) # text = results title = results_titles summaries = results_summaries tags = results_tags title_len = len(title) extension_list = [] for i in indexes: extension_list.append(document_file[i]["extension"]) # return render_template('searchbyTitle.html', text=text, tag=query, title=title, summaries=summaries, tags=tags, title_len = title_len) return render_template('searchbyText.html', text=text, tag=query, title=title, summaries=summaries, tags=tags, type=mystring, title_len=title_len, old_query=old_query, new_query=new_query, extension_list=extension_list)
def viewSearchbyText(): if request.method == 'POST': mystring = "Text" query = request.form['namesearchbytext'] # query = "Corona Virus" data = pickle.load(open(r"DataBase\data_file.pkl", "rb")) titles = pickle.load(open(r"DataBase\title_file.pkl", "rb")) corpus = pickle.load(open(r"DataBase\wiki_corpus_file.pkl", "rb")) bm25 = search_by_BM25(corpus) tokenized_query, old_query, new_query = clean_query(query.lower()) indexes, results = bm25.get_top_n(tokenized_query, data, n=5) results_titles = [] for i in indexes: results_titles.append(titles[i]) text = [] for i in results: text_to_show = " ".join(sent_tokenize(i)[:2]) if text_to_show != '': text.append(text_to_show + '....') else: text.append(i) title = results_titles title_len = len(title) document_file = pickle.load(open(r"DataBase\document_file.pkl", "rb")) extension_list = [] for i in indexes: if i < 10909: extension_list.append('wikipedia') else: extension_list.append(document_file[i - 10909]["extension"]) return render_template('searchbyText.html', text=text, tag=query, title=title, type=mystring, title_len=title_len, old_query=old_query, new_query=new_query, extension_list=extension_list)
return lemmed if __name__ == '__main__': data = pickle.load(open(r"DataBase/data_file.pkl", "rb")) titles = pickle.load(open(r"DataBase/title_file.pkl", "rb")) option = input("Enter option of search") if option == 'default search': corpus = pickle.load(open(r"DataBase/corpus_file.pkl", "rb")) elif option == 'tag search': corpus = pickle.load(open(r"DataBase/tags_pickle.pkl", "rb")) elif option == 'title search': corpus = pickle.load(open(r"DataBase/title_corpus.pkl", "rb")) else: print('Not valid option') bm25 = search_by_BM25(corpus) query = input("Enter query") tokenized_query = clean_query(query.lower()) indexes, results = bm25.get_top_n(tokenized_query, data, n=20) results_titles = [] for i in indexes: results_titles.append(titles[i]) for i in range(10): print(f"Title_{i}: {results_titles[i]}") print(f"\nText_{i}: {results[i]}") print('\n\n')