def get_top_and_rarest_used_words(self): #todo: dot cannot be json key base_lowercase_words_counted = Counter(filter(lambda word: word not in stop_list, self.get_base_words())) ordered_dict_words = OrderedDict(base_lowercase_words_counted) def count_rarest(result, word): word_occurrences = ordered_dict_words[word] update_and_return_json(result, word, word_occurrences) if word_occurrences == 1: self.hapax_legomenons += 1 return result return { # "top_words": map((lambda (word_frequency, val): # {word_frequency: val}), base_lowercase_words_counted.most_common(1000)), # "rarest_words": map(lambda word: {word: ordered_dict_words[word]}, list(ordered_dict_words)[-1000:]) "top_words": reduce((lambda result, (word_frequency, val): update_and_return_json(result, word_frequency, val)), base_lowercase_words_counted.most_common(1000), {}), "rarest_words": reduce(count_rarest, list(ordered_dict_words)[-1000:], {}), "hapax_legomenon_ratio": self.hapax_legomenons / float(self.get_number_of_words()) }
def count_rarest(result, word): word_occurrences = ordered_dict_words[word] update_and_return_json(result, word, word_occurrences) if word_occurrences == 1: self.hapax_legomenons += 1 return result