def test_corpus_apply(texts):
    c = Corpus({str(i): t for i, t in enumerate(texts)})
    c_orig = c.copy()
    orig_doc_labels = c.doc_labels
    orig_doc_lengths = c.doc_lengths

    assert isinstance(c.apply(str.upper), Corpus)

    assert c.doc_labels == orig_doc_labels
    assert c.doc_lengths == orig_doc_lengths

    for dl, dt in c.items():
        assert c_orig[dl].upper() == dt
示例#2
0
def test_corpus_dict_methods():
    c = Corpus()
    assert len(c) == 0
    with pytest.raises(KeyError):
        x = c['x']

    with pytest.raises(KeyError):
        c[1] = 'abc'

    with pytest.raises(KeyError):
        c[''] = 'abc'

    with pytest.raises(ValueError):
        c['d1'] = None

    c['d1'] = 'd1 text'
    assert len(c) == 1
    assert 'd1' in c
    assert set(c.keys()) == {'d1'}
    assert c['d1'] == 'd1 text'

    c['d2'] = 'd2 text'
    assert len(c) == 2
    for dl in c:
        assert dl in {'d1', 'd2'}
    assert set(c.keys()) == {'d1', 'd2'}

    for dl, dt in c.items():
        assert dl in {'d1', 'd2'}
        assert c[dl] == dt

    with pytest.raises(KeyError):
        del c['d3']

    del c['d1']
    assert len(c) == 1
    assert set(c.keys()) == {'d2'}

    del c['d2']
    assert len(c) == 0
    assert set(c.keys()) == set()