示例#1
0
 def setUp(self):
     self.tmp_dump_file = getOutputFile("wiki_knowledge_output.xml")
     self.tmp_wdb_file = getOutputFile("wiki_knowledge_output.wdb")
     self.tmp_parse_file = getOutputFile("wiki_knowledge_output.parsed.xml")
     
     self.expected_articles = ['Knowledge', 'Love', 'War'] 
     self.expected_xml_path = os.path.join(os.path.dirname(__file__) ,"expected_results/expected_xml_Knowledge_Love_War.xml")
     self.expected_wdb_path = os.path.join(os.path.dirname(__file__) ,"expected_results/expected_Knowledge_Love_War.wdb")
示例#2
0
    def setUp(self):
        self.tmp_dump_file = getOutputFile("wiki_knowledge_output.xml")
        self.tmp_wdb_file = getOutputFile("wiki_knowledge_output.wdb")
        self.tmp_parse_file = getOutputFile("wiki_knowledge_output.parsed.xml")

        self.expected_articles = ['Knowledge', 'Love', 'War']
        self.expected_xml_path = os.path.join(
            os.path.dirname(__file__),
            "expected_results/expected_xml_Knowledge_Love_War.xml")
        self.expected_wdb_path = os.path.join(
            os.path.dirname(__file__),
            "expected_results/expected_Knowledge_Love_War.wdb")
示例#3
0
    def test__same_text_correlation(self):
        """ Test that for same text correlation is 1"""

        _log.info('-' * 80)

        # arrange
        text1 = "love is rain as long story short"
        text2 = text1

        dump_file = getInputFile("swiki_knowledge_output.xml")
        parsed_file = getOutputFile("swiki_knowledge_output.parsed.xml")
        #wdb_file = getOutputFile("swiki_knowledge_output.wdb")

        articles = ['Rain', 'Love', 'Tree']

        # act
        wn.make_dump(dump_file, articles, compress=False)
        wn.parse_dump(dump_file, parsed_file)
        db_wrapper = wn.build_database_wrapper(parsed_file,
                                               StopWordsStemmer([]))

        #self.addCleanup(os.remove, self.tmp_dump_file)

        comparer = SemanticComparer(db_wrapper)
        correlation = comparer.compare(text1, text2)
        _log.info(
            test_utils.get_texts_correlation_message(text1, text2,
                                                     correlation))
        self.assertAlmostEqual(correlation,
                               1.0,
                               msg="for same text correlation should be 1")
示例#4
0
    def test__same_text_correlation(self):
        """ Test that for same text correlation is 1"""
        
        _log.info('-'*80)
        
        # arrange 
        text1 = "love is rain as long story short"
        text2 = text1

        dump_file = getInputFile("swiki_knowledge_output.xml")
        parsed_file = getOutputFile("swiki_knowledge_output.parsed.xml")
        #wdb_file = getOutputFile("swiki_knowledge_output.wdb")

        articles = ['Rain', 'Love', 'Tree'] 
        
        # act
        wn.make_dump(dump_file, articles, compress=False)
        wn.parse_dump(dump_file, parsed_file)
        db_wrapper = wn.build_database_wrapper(parsed_file, StopWordsStemmer([]))
                             
        #self.addCleanup(os.remove, self.tmp_dump_file)
        
        comparer = SemanticComparer(db_wrapper)
        correlation = comparer.compare(text1, text2)
        _log.info(test_utils.get_texts_correlation_message(text1, text2, correlation))
        self.assertAlmostEqual(correlation, 1.0, msg="for same text correlation should be 1")
示例#5
0
    def test__many_articles(self):
        wiki_dump_path = getInputFile("many_articles_dump.xml")
        parsed_xml_path = getOutputFile("many_articles_dump.parsed.xml")
        
        wiki_knowledge.parse_dump(wiki_dump_path, parsed_xml_path)

        db_wrapper = wiki_knowledge.build_database_wrapper(parsed_xml_path, PorterStemmer())
        
        c = db_wrapper.get_readable_centroid(ibm_licence_text)
        print c 
示例#6
0
    def test__many_articles(self):
        wiki_dump_path = getInputFile("many_articles_dump.xml")
        parsed_xml_path = getOutputFile("many_articles_dump.parsed.xml")

        wiki_knowledge.parse_dump(wiki_dump_path, parsed_xml_path)

        db_wrapper = wiki_knowledge.build_database_wrapper(
            parsed_xml_path, PorterStemmer())

        c = db_wrapper.get_readable_centroid(ibm_licence_text)
        print c
示例#7
0
 def test_number_of_concepts(self):
     """ db builder reads parsed xml properly"""
     
     _log.info('-'*80)
     
     # arrange 
     dump_file = getInputFile("wikidump_Knowledge_Love_War.xml")
     parsed_file = getOutputFile("wikidump_Knowledge_Love_War.parsed.xml")
     
     # act
     wn.parse_dump(dump_file, parsed_file)
     db_wrapper = wn.build_database_wrapper(parsed_file, StopWordsStemmer([]))
     
     titles_count =len(db_wrapper.title_index)
     concepts_count =len(db_wrapper.concepts_index)
     
     # assert
     self.assertEqual(titles_count, 3, "number of tiltes should be 3, got {0}".format(titles_count))                     
     self.assertEqual(concepts_count, 3, "number of tiltes should be 3, got {0}".format(concepts_count)) 
示例#8
0
    def test_number_of_concepts(self):
        """ db builder reads parsed xml properly"""

        _log.info('-' * 80)

        # arrange
        dump_file = getInputFile("wikidump_Knowledge_Love_War.xml")
        parsed_file = getOutputFile("wikidump_Knowledge_Love_War.parsed.xml")

        # act
        wn.parse_dump(dump_file, parsed_file)
        db_wrapper = wn.build_database_wrapper(parsed_file,
                                               StopWordsStemmer([]))

        titles_count = len(db_wrapper.title_index)
        concepts_count = len(db_wrapper.concepts_index)

        # assert
        self.assertEqual(
            titles_count, 3,
            "number of tiltes should be 3, got {0}".format(titles_count))
        self.assertEqual(
            concepts_count, 3,
            "number of tiltes should be 3, got {0}".format(concepts_count))
示例#9
0
    def test__parse_dump(self):
        wiki_dump_path = io_tu.getInputFile(io_tu.FilesList.test__parse_tools)
        wiki_parsed_dump_path = io_tu.getOutputFile(
            io_tu.FilesList.test__parse_tools)

        wn.parse_dump(wiki_dump_path, wiki_parsed_dump_path)
示例#10
0
 def test__parse_dump(self):
     wiki_dump_path =  io_tu.getInputFile(io_tu.FilesList.test__parse_tools)
     wiki_parsed_dump_path =  io_tu.getOutputFile(io_tu.FilesList.test__parse_tools)
   
     wn.parse_dump(wiki_dump_path, wiki_parsed_dump_path)