示例#1
0
 def _is_word_known(self, word):
     if word is None or word == '':
         return 0
     clean_word = commons.clean_word(word)
     if clean_word in self._words_dict:
         return 1
     return 0
 def _collect_word_vector_per_source(self, words):
     word_vectors = []
     for word in words:
         if word in self._word_vector_dict:
             word_vector = self._word_vector_dict[word]
             word_vectors.append(word_vector)
         else:  # added by Lior
             clean_word = commons.clean_word(word)
             if clean_word in self._word_vector_dict:
                 word_vector = self._word_vector_dict[clean_word]
                 word_vectors.append(word_vector)
     return word_vectors
示例#3
0
 def _load_known_words_to_dict(self, word_list_name):
     self._words_dict = {}
     full_path = self._word_lists_paths + "/" + word_list_name
     file_object = None
     if os.path.isfile(full_path):
         try:
             file_object = open(full_path, "r")
             for line in file_object:
                 clean_word = commons.clean_word(line).lower()
                 self._words_dict[clean_word] = 1
         except Exception as exc:
             logging.error("error in word list - " + word_list_name)
         finally:
             file_object.close()
 def _collect_word_vector(self, words, aggregation_function):
     word_vectors = []
     for word in words:
         if word in self._word_vector_dict:
             word_vector = self._word_vector_dict[word]
             word_vectors.append(word_vector)
         else:
             clean_word = commons.clean_word(word)
             if clean_word in self._word_vector_dict:
                 word_vector = self._word_vector_dict[clean_word]
                 word_vectors.append(word_vector)
     word_vectors = zip(*word_vectors)
     function = eval(aggregation_function)
     result = map(function, word_vectors)
     return result
示例#5
0
 def _clean_words(self, words_list):
     clean_words = []
     for word in words_list:
         clean_words.append(str(commons.clean_word(word)))
     return clean_words