Exemplo n.º 1
0
 def get_top_news_from_topic(self, topic_csv, topic_id, threshold=0.9):
     with open(os.path.join(self.models_folder, topic_csv), "r") as fin:
         topic_mappings = csv.DictReader(fin)
         valid_ids = [
             topic_map["document_id"] for topic_map in topic_mappings
             if int(topic_map["topic_id"]) == topic_id
             and float(topic_map["topic_prob"]) > threshold
         ]
     # sorted(valid_docs, key=lambda d: d["topic_prob"])
     # top_document_ids = [valid_doc["document_id"] for valid_doc in valid_docs[-top_n:]]
     new_corpus = Corpus()
     for document in self.corpus.iter_documents():
         if document.document_id in valid_ids:
             new_corpus.add_document_from_element(document)
     return new_corpus