def test_build_word_count_from_corpus_two_lines(self): expected = [('of', 1), ('ebook', 1), ('carroll', 1), ('alice', 1), ('project', 1), ('gutenberg', 1), ('lewis', 1), ('in', 1), ('wonderland', 1), ('the', 1), ('by', 1)] word_count = build_word_count_from_corpus(lines_max=2) most_common = word_count.most_common() self.assertEquals(most_common, expected)
def get_word_data(): """ :return: dictionary that has word_count and fragment lookup """ word_count = build_word_count_from_corpus() frequency_min = parameters['min_frequency_word_to_fragment'] word_count_smaller = {word: count for word, count in word_count.iteritems() if count >= frequency_min} fragment_lookup = word_list_to_fragment_lookup(word_count_smaller.keys()) word_data = {'word_count': word_count, 'fragment_lookup': fragment_lookup} return word_data
def get_word_data(): """ :return: dictionary that has word_count and fragment lookup """ word_count = build_word_count_from_corpus() frequency_min = parameters['min_frequency_word_to_fragment'] word_count_smaller = { word: count for word, count in word_count.iteritems() if count >= frequency_min } fragment_lookup = word_list_to_fragment_lookup(word_count_smaller.keys()) word_data = {'word_count': word_count, 'fragment_lookup': fragment_lookup} return word_data