def testLearnStems(self): Brain.init(TEST_BRAIN_FILE, order=2) brain = Brain(TEST_BRAIN_FILE) brain.set_stemmer("english") stem = brain.stemmer.stem brain.learn("this is testing") c = brain.graph.cursor() stem_count = c.execute("SELECT count(*) FROM token_stems").fetchone() self.assertEqual(3, stem_count[0]) self.assertEqual(brain.graph.get_token_stem_id(stem("test")), brain.graph.get_token_stem_id(stem("testing")))
def learn(archivepath, brain, **kwargs): # start brain. Batch saves us from lots of I/O brain = Brain(brain) brain.set_stemmer(kwargs.get('language', 'english')) brain.start_batch_learning() tweets = tweet_generator(archivepath, **kwargs) count = 0 for text in tweets: count = count + 1 brain.learn(text) brain.stop_batch_learning() return count