def parse(self, doc, doc_id): """Parses the document and returns all candidate mention-entity pairs. :return: candidate entities {men:{en:cmn, ...}, ...} """ query = Document(doc_id, doc) candidate_ens = defaultdict(list) for ngram in query.get_ngrams(): for en in self.commonness.get(ngram, {}): if en != "_total": candidate_ens[ngram].append(en) return candidate_ens
def main(): snippets = Document.load_test_snippets(SNIPPETS) out_file_name = OUTPUT_DIR + "/output_cmn.txt" out_file = open(out_file_name, "w") el = ELCmn() for doc_id, doc in sorted(snippets.items(), key=lambda item: int(item[0])): print("[" + doc_id + "]\t" + doc) linked_ens = el.annotate(doc, doc_id) ELUtils.write_to_file(doc_id, out_file, linked_ens) print("------") print("output written to", out_file_name)