Пример #1
0
def test_corpus_from_files2():
    c = Corpus.from_files([
        'examples/data/gutenberg/werther/goethe_werther1.txt',
        'examples/data/gutenberg/werther/goethe_werther2.txt'
    ])
    assert len(c.docs) == len(c.doc_paths) == 2

    for k, d in c.docs.items():
        assert k[:-1].endswith('goethe_werther')
        assert len(d) > 0
Пример #2
0
def test_corpus_from_files():
    doc_path = 'examples/data/gutenberg/kafka_verwandlung.txt'
    c1 = Corpus.from_files([doc_path])
    c2 = Corpus().add_files([doc_path])

    assert len(c1.docs) == len(c1.doc_paths) == 1
    assert len(c2.docs) == len(c2.doc_paths) == 1
    assert c1.docs.keys() == c2.docs.keys() == c1.doc_paths.keys(
    ) == c2.doc_paths.keys()

    only_doc_label = next(iter(c1.docs.keys()))
    assert only_doc_label.endswith('kafka_verwandlung')

    only_doc = c1.docs[only_doc_label]
    assert len(only_doc) > 0

    assert c1.doc_paths[only_doc_label] == doc_path
Пример #3
0
def test_corpus_from_files_not_existent():
    with pytest.raises(IOError):
        Corpus.from_files([
            'examples/data/gutenberg/werther/goethe_werther1.txt',
            'not_existent'
        ])
Пример #4
0
def test_corpus_from_files_nonlist_arg():
    with pytest.raises(ValueError):
        Corpus.from_files('wrong')
Пример #5
0
def test_empty_corpora():
    c1 = Corpus()
    c2 = Corpus.from_files([])
    c3 = Corpus.from_files([]).add_files([])
    assert c1.docs == c2.docs == c3.docs == {}
Пример #6
0
def test_corpus_from_files_nonlist_arg():
    with pytest.raises(FileNotFoundError):
        Corpus.from_files('wrong')