def add(self_node: Synset, parent_list: List[Synset], g: nx.DiGraph, parent: bool = True): for n in parent_list: g.add_node(n[0].name()) if parent: g.add_edge(self_node.name(), n[0].name()) else: g.add_edge(n[0].name(), self_node.name()) return g
def simplified_lesk(word: str, sentence: str) -> Synset: """ Computes the max_overlap to understand what is the best sense :param word: word to dissmbiguate :param sentence: sentence in which word appears :return: Synset that maximizes the overlap """ synsets = wordnet.synsets(word) try: lemmatizer = WordNetLemmatizer() best_sense = wordnet.synsets(word)[0] max_overlap = 0 context = set(lemmatizer.lemmatize(word)for word in sentence.split(" ")) for sense in synsets: signature = set(lemmatizer.lemmatize(word)for word in sense.definition().split(" ")) for example in sense.examples(): signature.union(set(lemmatizer.lemmatize(word)for word in example.split(" "))) overlap = len(signature.intersection(context)) if overlap > max_overlap: max_overlap = overlap best_sense = sense return best_sense except: return Synset(None)
def get_parent(synset: Synset): """ Returns one of the parents of the synset. :param synset: The synset to obtain the parent from :return: One of the parents of the synset """ return random.choice(synset.hypernyms())
def get_parents(synset: Synset): """ Returns all parents of the synset (hypernyms). :param synset: The synset to obtain the parent from :return: List of the parents of the synset """ return synset.hypernyms()
def get_synset_id(synset: Synset): """ Get the corresponding synset id of the synset. :param synset: The synset to extract the id from :return: The corresponding synset id """ sid = "n{}".format(str(synset.offset()).zfill(8)) return sid
def get_grandparents(synset: Synset): """ Returns all grandparents of the synset. :param synset: The synset to obtain the grandparents from :return: The grandparents of the synset """ grandparents = [] for parent in synset.hypernyms(): grandparents.extend(parent.hypernyms()) return grandparents
def extended_context_lesk(word: str, sentence: str) -> Synset: """ Computes the max_overlap to understand what is the best sense, using hypernyms and hyponyms :param word: the word to be disambiguated :param sentence: input sentence which contains param 'word' :return: best_sense, which is a Wordnet Synset, for param 'word' """ stopwords_set = set(stopwords.words('english')) synsets = wordnet.synsets(word) try: lemmatizer = WordNetLemmatizer() best_sense = wordnet.synsets(word)[0] max_overlap = 0 context = set(lemmatizer.lemmatize(word)for word in sentence.split(" ")) for sense in synsets: signature = set(lemmatizer.lemmatize(word)for word in sense.definition().split(" ")) for example in sense.examples(): signature.union(set(lemmatizer.lemmatize(word)for word in example.split(" "))) for hypernym in sense.hypernyms(): signature = signature.union(set(lemmatizer.lemmatize(word)for word in hypernym.definition().split(" "))) for hyponym in sense.hyponyms(): signature = signature.union(set(lemmatizer.lemmatize(word)for word in hyponym.definition().split(" "))) signature.difference(stopwords_set) overlap = len(signature.intersection(context)) if overlap > max_overlap: max_overlap = overlap best_sense = sense return best_sense except: return Synset(None)
def lin(self, s1: Synset, s2: Synset) -> float: return s1.lin_similarity(s2, self.brown_ic)
def res(self, s1: Synset, s2: Synset) -> float: return s1.res_similarity(s2, self.brown_ic)
def wup(self, s1: Synset, s2: Synset) -> float: return s1.wup_similarity(s2)
def lch(self, s1: Synset, s2: Synset) -> float: return s1.lch_similarity(s2)
def path(self, s1: Synset, s2: Synset) -> float: return s1.path_similarity(s2)
def get_synset_frequency(synset: Synset): """ Get synset frequency in Brown """ return sum([lemma.count() for lemma in synset.lemmas()])
def meronym_ancestors(self, i): return get_ancestors(self.synset(i), Synset.member_meronyms())
def lin(self, s1: Synset, s2: Synset) -> float: try: return s1.lin_similarity(s2, self.brown_ic) except WordNetError: return NAN
def lch(self, s1: Synset, s2: Synset) -> float: try: return s1.lch_similarity(s2) except WordNetError: return NAN
def get_graph(start: Synset, relation: Callable[[Synset], List[Synset]]): """ Get the graph in the form of recurssive lists formed by doing depth-first walk starting from start and following edges relation""" return start.tree(relation)
def lowest_common_hypernyms(self, synset, other, simulate_root=False, use_min_depth=False): ''' -- NOTE: THIS CODE IS COPIED FROM NLTK3 -- Get a list of lowest synset(s) that both synsets have as a hypernym. When `use_min_depth == False` this means that the synset which appears as a hypernym of both `self` and `other` with the lowest maximum depth is returned or if there are multiple such synsets at the same depth they are all returned However, if `use_min_depth == True` then the synset(s) which has/have the lowest minimum depth and appear(s) in both paths is/are returned. By setting the use_min_depth flag to True, the behavior of NLTK2 can be preserved. This was changed in NLTK3 to give more accurate results in a small set of cases, generally with synsets concerning people. (eg: 'chef.n.01', 'fireman.n.01', etc.) This method is an implementation of Ted Pedersen's "Lowest Common Subsumer" method from the Perl Wordnet module. It can return either "self" or "other" if they are a hypernym of the other. :type other: Synset :param other: other input synset :type simulate_root: bool :param simulate_root: The various verb taxonomies do not share a single root which disallows this metric from working for synsets that are not connected. This flag (False by default) creates a fake root that connects all the taxonomies. Set it to True to enable this behavior. For the noun taxonomy, there is usually a default root except for WordNet version 1.6. If you are using wordnet 1.6, a fake root will need to be added for nouns as well. :type use_min_depth: bool :param use_min_depth: This setting mimics older (v2) behavior of NLTK wordnet. If True, will use the min_depth function to calculate the lowest common hypernyms. This is known to give strange results for some synset pairs (eg: 'chef.n.01', 'fireman.n.01') but is retained for backwards compatibility :return: The synsets that are the lowest common hypernyms of both synsets ''' fake_synset = Synset(None) fake_synset._name = '*ROOT*' fake_synset.hypernyms = lambda: [] fake_synset.instance_hypernyms = lambda: [] if simulate_root: self_hypernyms = chain(synset._iter_hypernym_lists(), [[fake_synset]]) other_hypernyms = chain(other._iter_hypernym_lists(), [[fake_synset]]) else: self_hypernyms = synset._iter_hypernym_lists() other_hypernyms = other._iter_hypernym_lists() synsets = set(s for synsets in self_hypernyms for s in synsets) others = set(s for synsets in other_hypernyms for s in synsets) if self.core_taxonomy is not None: synsets.intersection_update( map(lambda syn: wordnet.synset(syn), self.known_concepts)) others.intersection_update( map(lambda syn: wordnet.synset(syn), self.known_concepts)) synsets.intersection_update(others) try: if use_min_depth: max_depth = max(s.min_depth() for s in synsets) unsorted_lch = [ s for s in synsets if s.min_depth() == max_depth ] else: max_depth = max(s.max_depth() for s in synsets) unsorted_lch = [ s for s in synsets if s.max_depth() == max_depth ] return sorted(unsorted_lch) except ValueError: return []