Пример #1
0
def add(self_node: Synset,
        parent_list: List[Synset],
        g: nx.DiGraph,
        parent: bool = True):

    for n in parent_list:
        g.add_node(n[0].name())

        if parent:
            g.add_edge(self_node.name(), n[0].name())
        else:
            g.add_edge(n[0].name(), self_node.name())

    return g
Пример #2
0
def simplified_lesk(word: str, sentence: str) -> Synset:
    """
    Computes the max_overlap to understand what is the best sense
    :param word: word to dissmbiguate
    :param sentence: sentence in which word appears
    :return: Synset that maximizes the overlap
    """
    synsets = wordnet.synsets(word)

    try:
        lemmatizer = WordNetLemmatizer()

        best_sense = wordnet.synsets(word)[0]
        max_overlap = 0
        context = set(lemmatizer.lemmatize(word)for word in sentence.split(" "))

        for sense in synsets:
            signature = set(lemmatizer.lemmatize(word)for word in sense.definition().split(" "))
            for example in sense.examples():
                signature.union(set(lemmatizer.lemmatize(word)for word in example.split(" ")))

            overlap = len(signature.intersection(context))
            if overlap > max_overlap:
                max_overlap = overlap
                best_sense = sense

        return best_sense

    except:
        return Synset(None)
Пример #3
0
    def get_parent(synset: Synset):
        """
        Returns one of the parents of the synset.
        :param synset: The synset to obtain the parent from
        :return: One of the parents of the synset
        """

        return random.choice(synset.hypernyms())
Пример #4
0
    def get_parents(synset: Synset):
        """
        Returns all parents of the synset (hypernyms).
        :param synset: The synset to obtain the parent from
        :return: List of the parents of the synset
        """

        return synset.hypernyms()
Пример #5
0
    def get_synset_id(synset: Synset):
        """
        Get the corresponding synset id of the synset.
        :param synset: The synset to extract the id from
        :return: The corresponding synset id
        """

        sid = "n{}".format(str(synset.offset()).zfill(8))
        return sid
Пример #6
0
    def get_grandparents(synset: Synset):
        """
        Returns all grandparents of the synset.
        :param synset: The synset to obtain the grandparents from
        :return: The grandparents of the synset
        """

        grandparents = []

        for parent in synset.hypernyms():
            grandparents.extend(parent.hypernyms())

        return grandparents
Пример #7
0
def extended_context_lesk(word: str, sentence: str) -> Synset:
    """
    Computes the max_overlap to understand what is the best sense, using hypernyms and hyponyms
    :param word: the word to be disambiguated
    :param sentence: input sentence which contains param 'word'
    :return: best_sense, which is a Wordnet Synset, for param 'word'
    """
    stopwords_set = set(stopwords.words('english'))
    synsets = wordnet.synsets(word)

    try:
        lemmatizer = WordNetLemmatizer()

        best_sense = wordnet.synsets(word)[0]
        max_overlap = 0
        context = set(lemmatizer.lemmatize(word)for word in sentence.split(" "))

        for sense in synsets:
            signature = set(lemmatizer.lemmatize(word)for word in sense.definition().split(" "))

            for example in sense.examples():
                signature.union(set(lemmatizer.lemmatize(word)for word in example.split(" ")))

            for hypernym in sense.hypernyms():
                signature = signature.union(set(lemmatizer.lemmatize(word)for word in hypernym.definition().split(" ")))

            for hyponym in sense.hyponyms():
                signature = signature.union(set(lemmatizer.lemmatize(word)for word in hyponym.definition().split(" ")))

            signature.difference(stopwords_set)

            overlap = len(signature.intersection(context))
            if overlap > max_overlap:
                max_overlap = overlap
                best_sense = sense

        return best_sense

    except:
        return Synset(None)
 def lin(self, s1: Synset, s2: Synset) -> float:
     return s1.lin_similarity(s2, self.brown_ic)
 def res(self, s1: Synset, s2: Synset) -> float:
     return s1.res_similarity(s2, self.brown_ic)
 def wup(self, s1: Synset, s2: Synset) -> float:
     return s1.wup_similarity(s2)
 def lch(self, s1: Synset, s2: Synset) -> float:
     return s1.lch_similarity(s2)
 def path(self, s1: Synset, s2: Synset) -> float:
     return s1.path_similarity(s2)
def get_synset_frequency(synset: Synset):
    """
    Get synset frequency in Brown
    """
    return sum([lemma.count() for lemma in synset.lemmas()])
Пример #14
0
 def meronym_ancestors(self, i):
     return get_ancestors(self.synset(i), Synset.member_meronyms())
 def lin(self, s1: Synset, s2: Synset) -> float:
     try:
         return s1.lin_similarity(s2, self.brown_ic)
     except WordNetError:
         return NAN
 def lch(self, s1: Synset, s2: Synset) -> float:
     try:
         return s1.lch_similarity(s2)
     except WordNetError:
         return NAN
Пример #17
0
def get_graph(start: Synset, relation: Callable[[Synset], List[Synset]]):
    """ Get the graph in the form of recurssive lists formed by doing
    depth-first walk starting from start and following edges relation"""

    return start.tree(relation)
Пример #18
0
    def lowest_common_hypernyms(self,
                                synset,
                                other,
                                simulate_root=False,
                                use_min_depth=False):
        '''
        -- NOTE: THIS CODE IS COPIED FROM NLTK3 --
        Get a list of lowest synset(s) that both synsets have as a hypernym.
        When `use_min_depth == False` this means that the synset which
        appears as a hypernym of both `self` and `other` with the lowest
        maximum depth is returned or if there are multiple such synsets at
        the same depth they are all returned

        However, if `use_min_depth == True` then the synset(s) which has/have
        the lowest minimum depth and appear(s) in both paths is/are returned.

        By setting the use_min_depth flag to True, the behavior of NLTK2 can
        be preserved. This was changed in NLTK3 to give more accurate results
        in a small set of cases, generally with synsets concerning people.
        (eg: 'chef.n.01', 'fireman.n.01', etc.)

        This method is an implementation of Ted Pedersen's "Lowest Common
        Subsumer" method from the Perl Wordnet module. It can return either
        "self" or "other" if they are a hypernym of the other.

        :type other: Synset
        :param other: other input synset
        :type simulate_root: bool
        :param simulate_root: The various verb taxonomies do not
            share a single root which disallows this metric from working for
            synsets that are not connected. This flag (False by default)
            creates a fake root that connects all the taxonomies. Set it
            to True to enable this behavior. For the noun taxonomy,
            there is usually a default root except for WordNet version 1.6.
            If you are using wordnet 1.6, a fake root will need to be added
            for nouns as well.
        :type use_min_depth: bool
        :param use_min_depth: This setting mimics older (v2) behavior of NLTK
            wordnet. If True, will use the min_depth function to calculate
            the lowest common hypernyms. This is known to give strange
            results for some synset pairs (eg: 'chef.n.01', 'fireman.n.01')
            but is retained for backwards compatibility
        :return: The synsets that are the lowest common hypernyms of both
            synsets
        '''

        fake_synset = Synset(None)
        fake_synset._name = '*ROOT*'
        fake_synset.hypernyms = lambda: []
        fake_synset.instance_hypernyms = lambda: []

        if simulate_root:
            self_hypernyms = chain(synset._iter_hypernym_lists(),
                                   [[fake_synset]])
            other_hypernyms = chain(other._iter_hypernym_lists(),
                                    [[fake_synset]])
        else:
            self_hypernyms = synset._iter_hypernym_lists()
            other_hypernyms = other._iter_hypernym_lists()

        synsets = set(s for synsets in self_hypernyms for s in synsets)
        others = set(s for synsets in other_hypernyms for s in synsets)
        if self.core_taxonomy is not None:
            synsets.intersection_update(
                map(lambda syn: wordnet.synset(syn), self.known_concepts))
            others.intersection_update(
                map(lambda syn: wordnet.synset(syn), self.known_concepts))
        synsets.intersection_update(others)

        try:
            if use_min_depth:
                max_depth = max(s.min_depth() for s in synsets)
                unsorted_lch = [
                    s for s in synsets if s.min_depth() == max_depth
                ]
            else:
                max_depth = max(s.max_depth() for s in synsets)
                unsorted_lch = [
                    s for s in synsets if s.max_depth() == max_depth
                ]
            return sorted(unsorted_lch)
        except ValueError:
            return []