class WikiDigesterTest(RequiresApp): def setUp(self): # Create the WikiDigester. self.w = WikiDigester('tests/data/article.xml', db='test') def tearDown(self): self.w = None def test_instance(self): self.assertIsInstance(self.w, WikiDigester) #def test_counts_docs(self): #self._digest() #self.assertEqual(self.w.num_docs, 1) #def test_digest(self): #self._digest() #def test_digest_many(self): #self.w = WikiDigester('tests/data/articles.xml', db='test') #self._digest_many() #def test_digest_updates(self): #self._digest_updates() def test_tfidf(self): # Some fake token ids (hashes). docs = [ ( 0, [(111, 4), (222, 8), (333, 2)] ), ( 1, [(111, 8), (333, 4)] ), ( 2, [(111, 2)] ), ( 3, [(111, 6), (222, 2)] ) ] expected = [ [[111, 0.0], [222, 8.0], [333, 2.0]], [[111, 0.0], [333, 4.0]], [[111, 0.0]], [[111, 0.0], [222, 2.0]] ] self.w.num_docs = len(docs) prepped_docs = [] for doc in docs: tokens = [token[0] for token in doc[1]] prepped_docs.append((doc[0], tokens)) # Add each dummy doc. for doc in docs: # THIS IS USING THE OLD MONGO/ADIPOSE SYNTAX, need to update it to sqlalchemy #self.w.db().add({'_id': doc[0], 'freqs': doc[1]}) pass self.w._generate_tfidf(prepped_docs) for idx, doc in enumerate(expected): #tfidf = self.w.db().find({'_id': idx })['doc'] #self.assertEquals(dict(doc), dict(tfidf)) pass
class WikiDigesterTest(RequiresApp): def setUp(self): # Create the WikiDigester. self.w = WikiDigester('tests/data/article.xml', db='test') def tearDown(self): self.w = None def test_instance(self): self.assertIsInstance(self.w, WikiDigester) #def test_counts_docs(self): #self._digest() #self.assertEqual(self.w.num_docs, 1) #def test_digest(self): #self._digest() #def test_digest_many(self): #self.w = WikiDigester('tests/data/articles.xml', db='test') #self._digest_many() #def test_digest_updates(self): #self._digest_updates() def test_tfidf(self): # Some fake token ids (hashes). docs = [(0, [(111, 4), (222, 8), (333, 2)]), (1, [(111, 8), (333, 4)]), (2, [(111, 2)]), (3, [(111, 6), (222, 2)])] expected = [[[111, 0.0], [222, 8.0], [333, 2.0]], [[111, 0.0], [333, 4.0]], [[111, 0.0]], [[111, 0.0], [222, 2.0]]] self.w.num_docs = len(docs) prepped_docs = [] for doc in docs: tokens = [token[0] for token in doc[1]] prepped_docs.append((doc[0], tokens)) # Add each dummy doc. for doc in docs: # THIS IS USING THE OLD MONGO/ADIPOSE SYNTAX, need to update it to sqlalchemy #self.w.db().add({'_id': doc[0], 'freqs': doc[1]}) pass self.w._generate_tfidf(prepped_docs) for idx, doc in enumerate(expected): #tfidf = self.w.db().find({'_id': idx })['doc'] #self.assertEquals(dict(doc), dict(tfidf)) pass
def setUp(self): # Create the WikiDigester. self.w = WikiDigester('tests/data/article.xml', db='test')