def test_vectorizer6(): text = ['foo bar', 'baz'] v = PooledVectorizer(2, sent_start='<s>') v.fit(text) bounds1, X1 = v.transform(text) text2 = ['foo alpha', 'foo bravo', 'foo charlie', 'foo delta'] bounds2, X2 = v.transform(text2) assert X1.max() >= X2.max()
def test_vectorizer5(): text = ['foo bar baz foo', 'foo baz'] v = PooledVectorizer(2) bounds, X = v.fit(text).transform(text) assert_array_equal(bounds, [[0, 3], [3, 4]]) assert_array_equal(X, [[2, 3], [3, 4], [4, 2], [2, 4]])
def test_vectorizer2(): text = ['foo bar baz foo'] v = PooledVectorizer(2) bounds, X = v.fit_transform(text) assert_array_equal(bounds, [[0, 3]]) assert_array_equal(X, [[2, 3], [3, 4], [4, 2]]) assert_equal(v.get_feature_names(), [u'__padding-magic-1', u'__padding-magic-2', u'foo', u'bar', u'baz'])
def test_vectorizer(): text = ['foo bar baz foo'] v = PooledVectorizer(2, min_order=1) bounds, X = v.fit_transform(text) assert_array_equal(bounds, [[0, 7]]) assert_array_equal(X, [[2, 1], [3, 1], [2, 3], [4, 1], [3, 4], [2, 1], [4, 2]])
def test_pooled_net(): cats = ['alt.atheism', 'sci.space'] newsgroups_train = fetch_20newsgroups(subset='train', categories=cats) newsgroups_test = fetch_20newsgroups(subset='test', categories=cats) v = PooledVectorizer(3, 1) bounds, X = v.fit_transform(newsgroups_train.data) y = newsgroups_train.target test_bounds, test_X = v.transform(newsgroups_test.data) test_y = newsgroups_test.target clsf = MyPooledNetwork2(n_epochs=1, learning_rate=0.1) clsf.fit((bounds, X), y) pred_y = clsf.predict((test_bounds, test_X)) print accuracy_score(test_y, pred_y)
def test_vectorizer4(): text = ['foo bar baz foo', 'foo baz'] v = PooledVectorizer(2) bounds, X = v.transform(text)