def test__same_text_correlation(self): """ Test that for same text correlation is 1""" _log.info('-' * 80) # arrange text1 = "love is rain as long story short" text2 = text1 dump_file = getInputFile("swiki_knowledge_output.xml") parsed_file = getOutputFile("swiki_knowledge_output.parsed.xml") #wdb_file = getOutputFile("swiki_knowledge_output.wdb") articles = ['Rain', 'Love', 'Tree'] # act wn.make_dump(dump_file, articles, compress=False) wn.parse_dump(dump_file, parsed_file) db_wrapper = wn.build_database_wrapper(parsed_file, StopWordsStemmer([])) #self.addCleanup(os.remove, self.tmp_dump_file) comparer = SemanticComparer(db_wrapper) correlation = comparer.compare(text1, text2) _log.info( test_utils.get_texts_correlation_message(text1, text2, correlation)) self.assertAlmostEqual(correlation, 1.0, msg="for same text correlation should be 1")
def test__same_text_correlation(self): """ Test that for same text correlation is 1""" _log.info('-'*80) # arrange text1 = "love is rain as long story short" text2 = text1 dump_file = getInputFile("swiki_knowledge_output.xml") parsed_file = getOutputFile("swiki_knowledge_output.parsed.xml") #wdb_file = getOutputFile("swiki_knowledge_output.wdb") articles = ['Rain', 'Love', 'Tree'] # act wn.make_dump(dump_file, articles, compress=False) wn.parse_dump(dump_file, parsed_file) db_wrapper = wn.build_database_wrapper(parsed_file, StopWordsStemmer([])) #self.addCleanup(os.remove, self.tmp_dump_file) comparer = SemanticComparer(db_wrapper) correlation = comparer.compare(text1, text2) _log.info(test_utils.get_texts_correlation_message(text1, text2, correlation)) self.assertAlmostEqual(correlation, 1.0, msg="for same text correlation should be 1")
def test__save_and_load(self): wiki_knowledge.parse_dump(self.expected_xml_path, self.tmp_parse_file) expected_db = wiki_knowledge.build_database_wrapper(self.tmp_parse_file, StopWordsStemmer([])) wiki_knowledge.save_db_wrapper_to_wdb(expected_db, self.tmp_wdb_file) actual = wiki_knowledge.load_db_wrapper_from_wdb(self.tmp_wdb_file) self.assertEqual(expected_db.words_num, actual.words_num, "Mismatch WikiKnowledges number of words") self.assertEqual(expected_db.title_index, actual.title_index, "Mismatch WikiKnowledges titles")
def test__many_articles(self): wiki_dump_path = getInputFile("many_articles_dump.xml") parsed_xml_path = getOutputFile("many_articles_dump.parsed.xml") wiki_knowledge.parse_dump(wiki_dump_path, parsed_xml_path) db_wrapper = wiki_knowledge.build_database_wrapper(parsed_xml_path, PorterStemmer()) c = db_wrapper.get_readable_centroid(ibm_licence_text) print c
def test__save_and_load(self): wiki_knowledge.parse_dump(self.expected_xml_path, self.tmp_parse_file) expected_db = wiki_knowledge.build_database_wrapper( self.tmp_parse_file, StopWordsStemmer([])) wiki_knowledge.save_db_wrapper_to_wdb(expected_db, self.tmp_wdb_file) actual = wiki_knowledge.load_db_wrapper_from_wdb(self.tmp_wdb_file) self.assertEqual(expected_db.words_num, actual.words_num, "Mismatch WikiKnowledges number of words") self.assertEqual(expected_db.title_index, actual.title_index, "Mismatch WikiKnowledges titles")
def test__many_articles(self): wiki_dump_path = getInputFile("many_articles_dump.xml") parsed_xml_path = getOutputFile("many_articles_dump.parsed.xml") wiki_knowledge.parse_dump(wiki_dump_path, parsed_xml_path) db_wrapper = wiki_knowledge.build_database_wrapper( parsed_xml_path, PorterStemmer()) c = db_wrapper.get_readable_centroid(ibm_licence_text) print c
def test__execution(self): """ This is not exactly a test, but a program execution...""" text1 = "i love to learn" text2 = "the world we know" dump_file = self.tmp_dump_file wiki_knowledge.make_dump(dump_file, self.expected_articles, compress=False) wiki_knowledge.parse_dump(dump_file, self.tmp_parse_file) db_wrapper = wiki_knowledge.build_database_wrapper(self.tmp_parse_file, StopWordsStemmer([])) #wiki_knowledge = test_utils.Factory.build_wiki_knowledge() #clean up file created by factory at end self.addCleanup(os.remove, self.tmp_dump_file) comparer = semantic_interpreter.SemanticComparer(db_wrapper) correlation = comparer.compare(text1, text2) _log.info(test_utils.get_texts_correlation_message(text1, text2, correlation))
def test_number_of_concepts(self): """ db builder reads parsed xml properly""" _log.info('-'*80) # arrange dump_file = getInputFile("wikidump_Knowledge_Love_War.xml") parsed_file = getOutputFile("wikidump_Knowledge_Love_War.parsed.xml") # act wn.parse_dump(dump_file, parsed_file) db_wrapper = wn.build_database_wrapper(parsed_file, StopWordsStemmer([])) titles_count =len(db_wrapper.title_index) concepts_count =len(db_wrapper.concepts_index) # assert self.assertEqual(titles_count, 3, "number of tiltes should be 3, got {0}".format(titles_count)) self.assertEqual(concepts_count, 3, "number of tiltes should be 3, got {0}".format(concepts_count))
def test__execution(self): """ This is not exactly a test, but a program execution...""" text1 = "i love to learn" text2 = "the world we know" dump_file = self.tmp_dump_file wiki_knowledge.make_dump(dump_file, self.expected_articles, compress=False) wiki_knowledge.parse_dump(dump_file, self.tmp_parse_file) db_wrapper = wiki_knowledge.build_database_wrapper( self.tmp_parse_file, StopWordsStemmer([])) #wiki_knowledge = test_utils.Factory.build_wiki_knowledge() #clean up file created by factory at end self.addCleanup(os.remove, self.tmp_dump_file) comparer = semantic_interpreter.SemanticComparer(db_wrapper) correlation = comparer.compare(text1, text2) _log.info( test_utils.get_texts_correlation_message(text1, text2, correlation))
def test_number_of_concepts(self): """ db builder reads parsed xml properly""" _log.info('-' * 80) # arrange dump_file = getInputFile("wikidump_Knowledge_Love_War.xml") parsed_file = getOutputFile("wikidump_Knowledge_Love_War.parsed.xml") # act wn.parse_dump(dump_file, parsed_file) db_wrapper = wn.build_database_wrapper(parsed_file, StopWordsStemmer([])) titles_count = len(db_wrapper.title_index) concepts_count = len(db_wrapper.concepts_index) # assert self.assertEqual( titles_count, 3, "number of tiltes should be 3, got {0}".format(titles_count)) self.assertEqual( concepts_count, 3, "number of tiltes should be 3, got {0}".format(concepts_count))
def test__parse_dump(self): wiki_dump_path = io_tu.getInputFile(io_tu.FilesList.test__parse_tools) wiki_parsed_dump_path = io_tu.getOutputFile( io_tu.FilesList.test__parse_tools) wn.parse_dump(wiki_dump_path, wiki_parsed_dump_path)
def parse(args): _log.debug("run parse with args={}".format(args)) wiki_knowledge.parse_dump(args.dump, args.output)
def test__parse_dump(self): wiki_dump_path = io_tu.getInputFile(io_tu.FilesList.test__parse_tools) wiki_parsed_dump_path = io_tu.getOutputFile(io_tu.FilesList.test__parse_tools) wn.parse_dump(wiki_dump_path, wiki_parsed_dump_path)