def get_value(args): _log.debug("run build with args={}".format(args)) if isinstance(args.dbpath , list): args.dbpath=args.dbpath[0] wdb = wiki_knowledge.load_db_wrapper_from_wdb(args.dbpath) v = wdb.get_text_centroid(args.text) _log.info("vector = {0}".format(v.data))
def test__save_and_load(self): wiki_knowledge.parse_dump(self.expected_xml_path, self.tmp_parse_file) expected_db = wiki_knowledge.build_database_wrapper(self.tmp_parse_file, StopWordsStemmer([])) wiki_knowledge.save_db_wrapper_to_wdb(expected_db, self.tmp_wdb_file) actual = wiki_knowledge.load_db_wrapper_from_wdb(self.tmp_wdb_file) self.assertEqual(expected_db.words_num, actual.words_num, "Mismatch WikiKnowledges number of words") self.assertEqual(expected_db.title_index, actual.title_index, "Mismatch WikiKnowledges titles")
def test__many_articles(self): wdb_path = getInputFile("many_articles_dump.wdb") db_wrapper = wiki_knowledge.load_db_wrapper_from_wdb(wdb_path) d = db_wrapper.get_readable_centroid(ibm_licence_text_full) s = sorted(d.items(), key=lambda x: x[1], reverse=True)[:5] print s d = db_wrapper.get_readable_centroid(ibm_licence_text) s = sorted(d.items(), key=lambda x: x[1], reverse=True)[:5] print s
def test__save_and_load(self): wiki_knowledge.parse_dump(self.expected_xml_path, self.tmp_parse_file) expected_db = wiki_knowledge.build_database_wrapper( self.tmp_parse_file, StopWordsStemmer([])) wiki_knowledge.save_db_wrapper_to_wdb(expected_db, self.tmp_wdb_file) actual = wiki_knowledge.load_db_wrapper_from_wdb(self.tmp_wdb_file) self.assertEqual(expected_db.words_num, actual.words_num, "Mismatch WikiKnowledges number of words") self.assertEqual(expected_db.title_index, actual.title_index, "Mismatch WikiKnowledges titles")
def test__many_articles(self): wdb_path = getInputFile("many_articles_dump.wdb") db_wrapper = wiki_knowledge.load_db_wrapper_from_wdb(wdb_path) d = db_wrapper.get_readable_centroid(ibm_licence_text_full) s = sorted(d.items(), key=lambda x: x[1], reverse=True)[:5] print s d = db_wrapper.get_readable_centroid(ibm_licence_text) s = sorted(d.items(), key=lambda x: x[1], reverse=True)[:5] print s
def test__many_articles(self): wdb_path = getInputFile("many_articles_dump.wdb") db_wrapper = wiki_knowledge.load_db_wrapper_from_wdb(wdb_path) d = db_wrapper.get_readable_centroid(ibm_licence_text_full) top = get_top(d, 5) self.assertIn("Computer", dict(top)) d = db_wrapper.get_readable_centroid(ibm_licence_text) top = get_top(d, 5) self.assertIn("Computer", dict(top))
def test__many_articles(self): wdb_path = getInputFile("many_articles_dump.wdb") db_wrapper = wiki_knowledge.load_db_wrapper_from_wdb(wdb_path) d = db_wrapper.get_readable_centroid(ibm_licence_text_full) top = get_top(d,5) self.assertIn("Computer", dict(top)) d = db_wrapper.get_readable_centroid(ibm_licence_text) top = get_top(d,5) self.assertIn("Computer", dict(top))