def test_word_list_to_fragment_lookup(self): word_list = ['and', 'are', 'any', 'thisisanabsurdlylongwordthatisfake'] lookup = word_list_to_fragment_lookup(word_list) # check explicitly that 'a__' matches three words self.assertEquals(lookup['a__'], ['and', 'are', 'any']) # check the whole thing is correct expected = ['__e', '__d', '_ny', '_n_', 'a__', '_re', 'a_y', 'ar_', '_r_', 'a_e', 'a_d', '___', 'an_', '__y', '_nd'] self.assertEquals(lookup.keys(), expected)
def test_word_list_to_fragment_lookup(self): word_list = ['and', 'are', 'any', 'thisisanabsurdlylongwordthatisfake'] lookup = word_list_to_fragment_lookup(word_list) # check explicitly that 'a__' matches three words self.assertEquals(lookup['a__'], ['and', 'are', 'any']) # check the whole thing is correct expected = [ '__e', '__d', '_ny', '_n_', 'a__', '_re', 'a_y', 'ar_', '_r_', 'a_e', 'a_d', '___', 'an_', '__y', '_nd' ] self.assertEquals(lookup.keys(), expected)
def get_word_data(): """ :return: dictionary that has word_count and fragment lookup """ word_count = build_word_count_from_corpus() frequency_min = parameters['min_frequency_word_to_fragment'] word_count_smaller = {word: count for word, count in word_count.iteritems() if count >= frequency_min} fragment_lookup = word_list_to_fragment_lookup(word_count_smaller.keys()) word_data = {'word_count': word_count, 'fragment_lookup': fragment_lookup} return word_data
def get_word_data(): """ :return: dictionary that has word_count and fragment lookup """ word_count = build_word_count_from_corpus() frequency_min = parameters['min_frequency_word_to_fragment'] word_count_smaller = { word: count for word, count in word_count.iteritems() if count >= frequency_min } fragment_lookup = word_list_to_fragment_lookup(word_count_smaller.keys()) word_data = {'word_count': word_count, 'fragment_lookup': fragment_lookup} return word_data