def calculate_overall_ranking(self, raw_queries, settings): api = API() mean_ap_whole = [] mean_ap_doc = [] queries = self.__raw_queries_to_queries(raw_queries) settings["mode"] = Mode.without_importance_to_sections settings_sec = copy.deepcopy(settings) settings_sec["mode"] = Mode.importance_to_sections for i, query in enumerate(queries): progressBar(i, len(queries)) ranked_papers_whole = api.get_papers({"whole-document": query["search_query"]}, settings) ranked_papers_sec = api.get_papers({query["imrad"]: query["search_query"]}, settings_sec) relevant_paper = [api.get_paper(reference["paper_id"]) for reference in query["references"]] ap_whole = self.average_precision(ranked_papers_whole, relevant_paper) ap_doc = self.average_precision(ranked_papers_sec, relevant_paper) mean_ap_whole.append(ap_whole) mean_ap_doc.append(ap_doc) result_whole = sum(mean_ap_whole) / len(mean_ap_whole) result_doc = sum(mean_ap_doc) / len(mean_ap_doc) print() print("{} & {} & {}".format(Mode.without_importance_to_sections.name.replace("_", " "), len(mean_ap_whole), round(result_whole, 4))) print("{} & {} & {}".format(Mode.importance_to_sections.name.replace("_", " "), len(mean_ap_doc), round(result_doc, 4)))
def paper_info(paper_id): api = API() papers = api.get_all_paper() id_to_filename = {paper.id: paper.filename for paper in papers} paper = api.get_paper(paper_id) return render_template('admin/paper_info.html', paper=paper, id_to_filename=id_to_filename)
def print_circles(circles): api = API() tmp = [] for circle in circles: tmp_circle_array = [] for node in circle: tmp_circle_array.append(api.get_paper(node).filename) tmp.append(tmp_circle_array) print(tmp) print(circles)
def remove_link(paper_id): if not ('logged_in' in session.keys() and session['logged_in']): return redirect('admin/') api = API() api.remove__link_of_paper(paper_id, request.form['ref_paper_id']) papers = api.get_all_paper() id_to_filename = {paper.id: paper.filename for paper in papers} paper = api.get_paper(paper_id) return render_template('admin/paper_info.html', paper=paper, id_to_filename=id_to_filename)
def check_references(): print("\nCheck References") api = API() papers = api.get_all_paper() for i, paper in enumerate(papers): progressBar(i, len(papers)) other_papers = [p for p in papers if p.id != paper.id] for reference in paper.references: if not reference.get_paper_id(): continue ref_paper = api.get_paper(reference.get_paper_id()) if ref_paper.cited_by.count(paper.id) == 0: print() reference.paper_id = [] api.client.update_paper(paper) repair_corrupt_reference(reference, paper, other_papers, api)
def check_cited_by(): print("\nCheck Cited by") api = API() papers = api.get_all_paper() for i, paper in enumerate(papers): progressBar(i, len(papers)) for cited_paper_id in paper.cited_by: if not api.contains_paper(cited_paper_id): paper.cited_by.remove(cited_paper_id) api.client.update_paper(paper) continue cited_paper = api.get_paper(cited_paper_id) cited_paper_refs = [ref.get_paper_id() for ref in cited_paper.references if ref.get_paper_id()] if cited_paper_refs.count(paper.id) == 0: print() paper.cited_by.remove(cited_paper_id) api.client.update_paper(paper) link_references_to_paper(cited_paper, paper, api)
def calculate_ranking_sections(self, raw_queries, settings): api = API() mean_ap_intro, mean_ap_background, mean_ap_methods, mean_ap_result, mean_ap_discussion = [], [], [], [], [] queries = self.__raw_queries_to_queries(raw_queries) for i, query in enumerate(queries): progressBar(i, len(queries)) relevant_paper = [api.get_paper(reference["paper_id"]) for reference in query["references"]] ranked_papers_intro = api.get_papers({IMRaDType.INTRODUCTION.name: query["search_query"]}, settings) ranked_papers_background = api.get_papers({IMRaDType.BACKGROUND.name: query["search_query"]}, settings) ranked_papers_methods = api.get_papers({IMRaDType.METHODS.name: query["search_query"]}, settings) ranked_papers_result = api.get_papers({IMRaDType.RESULTS.name: query["search_query"]}, settings) ranked_papers_discussion = api.get_papers({IMRaDType.DISCUSSION.name: query["search_query"]}, settings) ap_intro = self.average_precision(ranked_papers_intro, relevant_paper) ap_background = self.average_precision(ranked_papers_background, relevant_paper) ap_methods = self.average_precision(ranked_papers_methods, relevant_paper) ap_result = self.average_precision(ranked_papers_result, relevant_paper) ap_discussion = self.average_precision(ranked_papers_discussion, relevant_paper) mean_ap_intro.append(ap_intro) mean_ap_background.append(ap_background) mean_ap_methods.append(ap_methods) mean_ap_result.append(ap_result) mean_ap_discussion.append(ap_discussion) print() print("{} & {} & {}".format(Mode.only_introduction.name.replace("_", " "), len(mean_ap_intro), sum(mean_ap_intro) / len(mean_ap_intro))) print("{} & {} & {}".format(Mode.only_background.name.replace("_", " "), len(mean_ap_background), sum(mean_ap_background) / len(mean_ap_background))) print("{} & {} & {}".format(Mode.only_methods.name.replace("_", " "), len(mean_ap_methods), sum(mean_ap_methods) / len(mean_ap_methods))) print("{} & {} & {}".format(Mode.only_results.name.replace("_", " "), len(mean_ap_result), sum(mean_ap_result) / len(mean_ap_result))) print("{} & {} & {}".format(Mode.only_discussion.name.replace("_", " "), len(mean_ap_discussion), sum(mean_ap_discussion) / len(mean_ap_discussion)))
def __link_references_to_paper(): api = API() all_papers = api.get_all_paper() finished_files = [] if not os.path.isfile(REQ_DATA_PATH + "finished_papers.txt"): with open(REQ_DATA_PATH + "finished_papers.txt", 'wb') as fp: pickle.dump(finished_files, fp) with open(REQ_DATA_PATH + "finished_papers.txt", 'rb') as fp: finished_files = pickle.load(fp) if os.path.isfile("newpapers.txt"): papers = [] with open("newpapers.txt", 'r') as fp: for paper_id in fp: papers.append(api.get_paper(paper_id.rstrip())) else: papers = api.get_all_paper() i = 0 for paper in papers: i += 1 print("(", i, "/", len(papers), ")") if paper.id in finished_files: continue other_papers = [p for p in all_papers if p.id != paper.id] for other_paper in other_papers: if os.path.isfile("newpapers.txt"): link_references_to_paper(other_paper, paper, api) link_references_to_paper(paper, other_paper, api) finished_files.append(paper.id) with open(REQ_DATA_PATH + "finished_papers.txt", 'wb') as fp: pickle.dump(finished_files, fp)