def test_to_bag_of_terms(corpus: VectorizedCorpus): expected_docs = [ ['a', 'a', 'b', 'c', 'c', 'c', 'c', 'd'], ['a', 'a', 'b', 'b', 'c', 'c', 'c'], ['a', 'a', 'b', 'b', 'b', 'c', 'c'], ['a', 'a', 'b', 'b', 'b', 'b', 'c', 'd'], ['a', 'a', 'c', 'd'], ] assert [list(x) for x in corpus.to_bag_of_terms()] == expected_docs
def test_bag_term_matrix_to_bag_term_docs(corpus: VectorizedCorpus): doc_ids = ( 0, 1, ) expected = [['a', 'a', 'b', 'c', 'c', 'c', 'c', 'd'], ['a', 'a', 'b', 'b', 'c', 'c', 'c']] docs = corpus.to_bag_of_terms(doc_ids) assert expected == ([list(d) for d in docs]) expected = [ ['a', 'a', 'b', 'c', 'c', 'c', 'c', 'd'], ['a', 'a', 'b', 'b', 'c', 'c', 'c'], ['a', 'a', 'b', 'b', 'b', 'c', 'c'], ['a', 'a', 'b', 'b', 'b', 'b', 'c', 'd'], ['a', 'a', 'c', 'd'], ] docs = corpus.to_bag_of_terms() assert expected == ([list(d) for d in docs])