def test_TokenCount_single_co_occurrence(): from microtc.utils import tweet_iterator from text_models.dataset import TokenCount tcount = TokenCount.single_co_ocurrence() tcount.process_line("buenos xxx dias") assert tcount.counter["dias~xxx"] == 1 assert tcount.counter["xxx"] == 1
def test_TokenCount_clean(): from microtc.utils import tweet_iterator from text_models.dataset import TokenCount tcount = TokenCount.single_co_ocurrence() tcount.process(tweet_iterator(TWEETS)) ant = len(tcount.counter) tcount.clean() act = len(tcount.counter) assert ant > act