Exemplo n.º 1
0
class WikiDigesterTest(RequiresApp):
    def setUp(self):
        # Create the WikiDigester.
        self.w = WikiDigester('tests/data/article.xml', db='test')

    def tearDown(self):
        self.w = None

    def test_instance(self):
        self.assertIsInstance(self.w, WikiDigester)

    #def test_counts_docs(self):
        #self._digest()
        #self.assertEqual(self.w.num_docs, 1)

    #def test_digest(self):
        #self._digest()

    #def test_digest_many(self):
        #self.w = WikiDigester('tests/data/articles.xml', db='test')
        #self._digest_many()

    #def test_digest_updates(self):
        #self._digest_updates()

    def test_tfidf(self):
        # Some fake token ids (hashes).
        docs = [
                ( 0, [(111, 4), (222, 8), (333, 2)] ),
                ( 1, [(111, 8), (333, 4)] ),
                ( 2, [(111, 2)] ),
                ( 3, [(111, 6), (222, 2)] )
        ]

        expected = [
                [[111, 0.0], [222, 8.0], [333, 2.0]],
                [[111, 0.0], [333, 4.0]],
                [[111, 0.0]],
                [[111, 0.0], [222, 2.0]]
        ]

        self.w.num_docs = len(docs)

        prepped_docs = []
        for doc in docs:
            tokens = [token[0] for token in doc[1]]
            prepped_docs.append((doc[0], tokens))

        # Add each dummy doc.
        for doc in docs:
            # THIS IS USING THE OLD MONGO/ADIPOSE SYNTAX, need to update it to sqlalchemy
            #self.w.db().add({'_id': doc[0], 'freqs': doc[1]})
            pass

        self.w._generate_tfidf(prepped_docs)

        for idx, doc in enumerate(expected):
            #tfidf = self.w.db().find({'_id': idx })['doc']
            #self.assertEquals(dict(doc), dict(tfidf))
            pass
Exemplo n.º 2
0
class WikiDigesterTest(RequiresApp):
    def setUp(self):
        # Create the WikiDigester.
        self.w = WikiDigester('tests/data/article.xml', db='test')

    def tearDown(self):
        self.w = None

    def test_instance(self):
        self.assertIsInstance(self.w, WikiDigester)

    #def test_counts_docs(self):
    #self._digest()
    #self.assertEqual(self.w.num_docs, 1)

    #def test_digest(self):
    #self._digest()

    #def test_digest_many(self):
    #self.w = WikiDigester('tests/data/articles.xml', db='test')
    #self._digest_many()

    #def test_digest_updates(self):
    #self._digest_updates()

    def test_tfidf(self):
        # Some fake token ids (hashes).
        docs = [(0, [(111, 4), (222, 8), (333, 2)]), (1, [(111, 8), (333, 4)]),
                (2, [(111, 2)]), (3, [(111, 6), (222, 2)])]

        expected = [[[111, 0.0], [222, 8.0], [333, 2.0]],
                    [[111, 0.0], [333, 4.0]], [[111, 0.0]],
                    [[111, 0.0], [222, 2.0]]]

        self.w.num_docs = len(docs)

        prepped_docs = []
        for doc in docs:
            tokens = [token[0] for token in doc[1]]
            prepped_docs.append((doc[0], tokens))

        # Add each dummy doc.
        for doc in docs:
            # THIS IS USING THE OLD MONGO/ADIPOSE SYNTAX, need to update it to sqlalchemy
            #self.w.db().add({'_id': doc[0], 'freqs': doc[1]})
            pass

        self.w._generate_tfidf(prepped_docs)

        for idx, doc in enumerate(expected):
            #tfidf = self.w.db().find({'_id': idx })['doc']
            #self.assertEquals(dict(doc), dict(tfidf))
            pass
Exemplo n.º 3
0
 def setUp(self):
     # Create the WikiDigester.
     self.w = WikiDigester('tests/data/article.xml', db='test')
Exemplo n.º 4
0
 def setUp(self):
     # Create the WikiDigester.
     self.w = WikiDigester('tests/data/article.xml', db='test')