def _is_word_known(self, word): if word is None or word == '': return 0 clean_word = commons.clean_word(word) if clean_word in self._words_dict: return 1 return 0
def _collect_word_vector_per_source(self, words): word_vectors = [] for word in words: if word in self._word_vector_dict: word_vector = self._word_vector_dict[word] word_vectors.append(word_vector) else: # added by Lior clean_word = commons.clean_word(word) if clean_word in self._word_vector_dict: word_vector = self._word_vector_dict[clean_word] word_vectors.append(word_vector) return word_vectors
def _load_known_words_to_dict(self, word_list_name): self._words_dict = {} full_path = self._word_lists_paths + "/" + word_list_name file_object = None if os.path.isfile(full_path): try: file_object = open(full_path, "r") for line in file_object: clean_word = commons.clean_word(line).lower() self._words_dict[clean_word] = 1 except Exception as exc: logging.error("error in word list - " + word_list_name) finally: file_object.close()
def _collect_word_vector(self, words, aggregation_function): word_vectors = [] for word in words: if word in self._word_vector_dict: word_vector = self._word_vector_dict[word] word_vectors.append(word_vector) else: clean_word = commons.clean_word(word) if clean_word in self._word_vector_dict: word_vector = self._word_vector_dict[clean_word] word_vectors.append(word_vector) word_vectors = zip(*word_vectors) function = eval(aggregation_function) result = map(function, word_vectors) return result
def _clean_words(self, words_list): clean_words = [] for word in words_list: clean_words.append(str(commons.clean_word(word))) return clean_words