def _should_replace_with_letter(word, w2v_utilities, i, letter): if i != len(word) and word[i - 1] != letter and word[i+1] != letter: new_word = _place_letter_at_index(word, letter, i) new_word = _remove_puncutation(new_word) heb_string = HebrewString(new_word) retriever = w2v_utilities.build_retriever(heb_string.eng_ltrs()) return retriever.get(1) is not None
def get_gold_synsets(self, heb_word): heb_word = HebrewString(heb_word) synsets = self.wordnet.synsets(heb_word.heb_ltrs(), lang='heb') synsets_number = len(synsets) # Case heb_word does not appear in Hebrew Wordnet if synsets_number == 0: ts = Translator() eng_word = ts.translate(heb_word) synsets = wordnet.synsets(eng_word) # @UndefinedVariable synsets_number = len(synsets) if synsets_number == 0: print("No real Synset has been found") return None prob = 1 / float(synsets_number) synset_dict = dict() for synset in synsets: synset_dict[synset] = prob return synset_dict
def get_word2vec_similar_synsets(self, heb_word, number_of_required_synsets): heb_word = HebrewString(heb_word) retriever = self.word2vec.build_retriever(heb_word.eng_ltrs()) word2vec_suggestions = retriever.get(number_of_required_synsets) if word2vec_suggestions is None: return None similar_synsets = dict() def need_more_synsets(): return len(similar_synsets) < number_of_required_synsets while need_more_synsets(): if len(word2vec_suggestions) == 0: word2vec_suggestions = retriever.get_more() suggestion, similarity = word2vec_suggestions.pop(0) suggestion = HebrewString(suggestion) if heb_word.eng_ltrs() in suggestion.eng_ltrs(): print("Passed over {0}".format(suggestion.heb_ltrs())) continue suggestion_synsets = self._get_suggestion_synsets(suggestion) if len(suggestion_synsets) == 0: continue number_of_suggestion_synsets = min(len(suggestion_synsets), (number_of_required_synsets - len(similar_synsets))) while need_more_synsets() and len(suggestion_synsets) > 0: synset = suggestion_synsets.pop(0) if synset not in similar_synsets: similar_synsets[synset] = (similarity / number_of_suggestion_synsets) return similar_synsets