Пример #1
0
def test_empty_corpus():
    """Initializing and updating a `Corpus` with no documents should work."""
    corpus = Corpus([])
    assert len(corpus) == 0
    assert corpus.vocab == set()
    assert corpus.most_common() == []
    assert corpus.vocab_size == 0
    assert corpus.num_words == 0
    corpus.update([])
    assert len(corpus) == 0
    assert corpus.vocab == set()
    assert corpus.most_common() == []
    assert corpus.vocab_size == 0
    assert corpus.num_words == 0
Пример #2
0
def test_basic_initialization():
    """This makes sure that initializing the Corpus works as expected."""
    corpus = Corpus(["I ran to the park with the baseball."])
    assert corpus.most_common(1) == [("the", 2)]
    assert corpus.vocab == {
        "i", "ran", "to", "the", "park", "with", "baseball"
    }
    assert corpus.vocab_size == 7
    assert corpus.num_words == 8
    assert len(corpus) == 1
    assert corpus.word_count("the") == 2
    assert corpus.word_frequency("the") == 0.25
    assert corpus.document_count("the") == 1
    assert corpus.document_frequency("the") == 1.0
    assert corpus.term_count("the", 0) == 2