Exemplo n.º 1
0
 def test_vectorize(self):
     tokenizer = MeCabTokenizer()
     vectorizer = VectorizerFactory.generate(CountVectorizer, tokenizer)
     ret = vectorizer.fit_transform(['今日は晴れます'])
     np.testing.assert_array_equal(np.array([[1, 1, 1]]), ret.toarray())
     ok_('今日' in vectorizer.vocabulary_)
     ok_('晴れ' in vectorizer.vocabulary_)
     ok_('ます' in vectorizer.vocabulary_)
    def test_tokenize(self):
        _tokenizer = MeCabTokenizer()
        tokenizer = NgramTokenizer(_tokenizer)
        ret = tokenizer.tokenize('今日は晴れます')
        eq_(['今日は', 'は晴れ', '晴れます'], ret)

        tokenizer = NgramTokenizer(_tokenizer, 3)
        ret = tokenizer.tokenize('今日は晴れます')
        eq_(['今日は晴れ', 'は晴れます'], ret)
Exemplo n.º 3
0
 def test_generate(self):
     tokenizer = MeCabTokenizer()
     vectorizer = VectorizerFactory.generate(CountVectorizer, tokenizer)
     ok_(vectorizer)
 def test_tokenize(self):
     tokenizer = MeCabTokenizer()
     ret = tokenizer.tokenize('今日は晴れます')
     eq_(['今日', 'は', '晴れ', 'ます'], ret)
 def test_init(self):
     tokenizer = MeCabTokenizer()
     ok_(tokenizer)
 def test_init(self):
     _tokenizer = MeCabTokenizer()
     tokenizer = NgramTokenizer(_tokenizer)
     ok_(tokenizer)