Пример #1
0
  def get_top_and_rarest_used_words(self):
    #todo: dot cannot be json key
    base_lowercase_words_counted = Counter(filter(lambda word: word not in stop_list, self.get_base_words()))
    ordered_dict_words = OrderedDict(base_lowercase_words_counted)

    def count_rarest(result, word):
      word_occurrences = ordered_dict_words[word]
      update_and_return_json(result, word, word_occurrences)
      if word_occurrences == 1:
        self.hapax_legomenons += 1
      return result

    return {
      # "top_words": map((lambda (word_frequency, val):
      #                   {word_frequency: val}), base_lowercase_words_counted.most_common(1000)),
      # "rarest_words": map(lambda word: {word: ordered_dict_words[word]}, list(ordered_dict_words)[-1000:])
      "top_words": reduce((lambda result, (word_frequency, val):
                          update_and_return_json(result, word_frequency, val)), base_lowercase_words_counted.most_common(1000), {}),
      "rarest_words": reduce(count_rarest, list(ordered_dict_words)[-1000:], {}),
      "hapax_legomenon_ratio": self.hapax_legomenons / float(self.get_number_of_words())
    }
Пример #2
0
 def count_rarest(result, word):
   word_occurrences = ordered_dict_words[word]
   update_and_return_json(result, word, word_occurrences)
   if word_occurrences == 1:
     self.hapax_legomenons += 1
   return result
Пример #3
0
 def count_rarest(result, word):
     word_occurrences = ordered_dict_words[word]
     update_and_return_json(result, word, word_occurrences)
     if word_occurrences == 1:
         self.hapax_legomenons += 1
     return result