def test_skipgram_vectorizer_basic(): vectorizer = SkipgramVectorizer() result = vectorizer.fit_transform(token_data) assert scipy.sparse.issparse(result) transform_result = vectorizer.transform(token_data) assert np.all(transform_result.data == result.data) assert np.all(transform_result.tocoo().col == result.tocoo().col)
def test_skipgram_vectorizer_mixed(): vectorizer = SkipgramVectorizer() with pytest.raises(ValueError): vectorizer.fit_transform(mixed_token_data)
def test_skipgram_vectorizer_text(): vectorizer = SkipgramVectorizer() result = vectorizer.fit_transform(text_token_data) assert scipy.sparse.issparse(result) # Ensure that the empty document has an all zero row assert len((result[1, :]).data) == 0
def test_skipgram_vectorizer_min_doc(): vectorizer = SkipgramVectorizer(min_document_occurrences=2) count_matrix = vectorizer.fit_transform(text_token_data_permutation) assert count_matrix.shape == (3, 2) assert np.all(count_matrix.toarray() == np.array([[0, 1], [0, 0], [1, 0]]))