def test_empty_corpus(): """Initializing and updating a `Corpus` with no documents should work.""" corpus = Corpus([]) assert len(corpus) == 0 assert corpus.vocab == set() assert corpus.most_common() == [] assert corpus.vocab_size == 0 assert corpus.num_words == 0 corpus.update([]) assert len(corpus) == 0 assert corpus.vocab == set() assert corpus.most_common() == [] assert corpus.vocab_size == 0 assert corpus.num_words == 0
def test_basic_initialization(): """This makes sure that initializing the Corpus works as expected.""" corpus = Corpus(["I ran to the park with the baseball."]) assert corpus.most_common(1) == [("the", 2)] assert corpus.vocab == { "i", "ran", "to", "the", "park", "with", "baseball" } assert corpus.vocab_size == 7 assert corpus.num_words == 8 assert len(corpus) == 1 assert corpus.word_count("the") == 2 assert corpus.word_frequency("the") == 0.25 assert corpus.document_count("the") == 1 assert corpus.document_frequency("the") == 1.0 assert corpus.term_count("the", 0) == 2