示例#1
0
def test_TokenCount_single_co_occurrence():
    from microtc.utils import tweet_iterator
    from text_models.dataset import TokenCount
    tcount = TokenCount.single_co_ocurrence()
    tcount.process_line("buenos xxx dias")
    assert tcount.counter["dias~xxx"] == 1
    assert tcount.counter["xxx"] == 1
示例#2
0
def test_TokenCount_clean():
    from microtc.utils import tweet_iterator
    from text_models.dataset import TokenCount
    tcount = TokenCount.single_co_ocurrence()    
    tcount.process(tweet_iterator(TWEETS))
    ant = len(tcount.counter)
    tcount.clean()
    act = len(tcount.counter)
    assert ant > act