Пример #1
0
    def testLearnStems(self):
        Brain.init(TEST_BRAIN_FILE, order=2)

        brain = Brain(TEST_BRAIN_FILE)
        brain.set_stemmer("english")
        stem = brain.stemmer.stem

        brain.learn("this is testing")

        c = brain.graph.cursor()
        stem_count = c.execute("SELECT count(*) FROM token_stems").fetchone()

        self.assertEqual(3, stem_count[0])
        self.assertEqual(brain.graph.get_token_stem_id(stem("test")),
                          brain.graph.get_token_stem_id(stem("testing")))
Пример #2
0
    def testLearnStems(self):
        Brain.init(TEST_BRAIN_FILE, order=2)

        brain = Brain(TEST_BRAIN_FILE)
        brain.set_stemmer("english")
        stem = brain.stemmer.stem

        brain.learn("this is testing")

        c = brain.graph.cursor()
        stem_count = c.execute("SELECT count(*) FROM token_stems").fetchone()

        self.assertEqual(3, stem_count[0])
        self.assertEqual(brain.graph.get_token_stem_id(stem("test")),
                          brain.graph.get_token_stem_id(stem("testing")))
Пример #3
0
def learn(archivepath, brain, **kwargs):
    # start brain. Batch saves us from lots of I/O
    brain = Brain(brain)
    brain.set_stemmer(kwargs.get('language', 'english'))

    brain.start_batch_learning()

    tweets = tweet_generator(archivepath, **kwargs)
    count = 0

    for text in tweets:
        count = count + 1
        brain.learn(text)

    brain.stop_batch_learning()

    return count