def create_graphs(doc_list):
    documents = doc_list
    if documents is None:
        documents = default_document_list()

    distance_functions = [
        (wn.lch_similarity(SYNSETS[0], SYNSETS[0]), 'lch', lambda sense_1, sense_2: wn.lch_similarity(sense_1, sense_2)),
        (1.0, 'lin', lambda sense_1, sense_2: wn.lin_similarity(sense_1, sense_2, CORPUS)),
        (10.636958516573292, 'res', lambda sense_1, sense_2: wn.res_similarity(sense_1, sense_2, CORPUS)),
        (wn.jcn_similarity(SYNSETS[0], SYNSETS[0], CORPUS), 'jcn', lambda sense_1, sense_2: wn.jcn_similarity(sense_1, sense_2, CORPUS)),
        (1.0, 'path', lambda sense_1, sense_2: wn.path_similarity(sense_1, sense_2)),
    ]
    all_senses = []
    for doc in documents:
        for sense in doc.top_senses():
            all_senses.append((sense, doc.name))
    against_colors = ['r', 'b', 'g']
    against_to = [wn.synset(word) for word in ["economy.n.01", "philosophy.n.02", "politics.n.01"]]
    create_against_graph('phyl_eco_pol', documents, all_senses, against_to, distance_functions, against_colors)

    against_to = SYNSETS

    against_colors = [(random(), random(), random()) for _i in range(0, len(SYNSETS))]
    create_against_graph('handpicked', documents, all_senses, against_to, distance_functions, against_colors)

    create_graph_top_senses(documents, all_senses, distance_functions)
示例#2
0
    def test_path_similarities(self):
        from nltk.corpus import wordnet as nltk_wn
        nltk_cat = nltk_wn.synset('cat.n.1')
        nltk_dog = nltk_wn.synset('dog.n.1')
        nltk_bus = nltk_wn.synset('bus.n.1')

        our_cat = our_wn.synset('cat.n.1')
        our_dog = our_wn.synset('dog.n.1')
        our_bus = our_wn.synset('bus.n.1')
        assert nltk_wn.path_similarity(nltk_cat,
                                       nltk_dog) == our_wn.path_similarity(
                                           our_cat, our_dog)
        assert nltk_wn.wup_similarity(nltk_cat,
                                      nltk_dog) == our_wn.wup_similarity(
                                          our_cat, our_dog)
        assert nltk_wn.lch_similarity(nltk_cat,
                                      nltk_dog) == our_wn.lch_similarity(
                                          our_cat, our_dog)

        assert nltk_wn.path_similarity(nltk_cat,
                                       nltk_bus) == our_wn.path_similarity(
                                           our_cat, our_bus)
        assert nltk_wn.wup_similarity(nltk_cat,
                                      nltk_bus) == our_wn.wup_similarity(
                                          our_cat, our_bus)
        assert nltk_wn.lch_similarity(nltk_cat,
                                      nltk_bus) == our_wn.lch_similarity(
                                          our_cat, our_bus)
示例#3
0
def get_synset_similarity(first_sentence: str, second_sentence: str, method: str):
    sentence_tagged_1 = pos_tag(first_sentence)
    sentence_tagged_2 = pos_tag(second_sentence)

    sentence_tagged_wn_1 = get_sentences_tagged_with_wn_and_cleaned(sentence_tagged_1)
    sentence_tagged_wn_2 = get_sentences_tagged_with_wn_and_cleaned(sentence_tagged_2)

    synsets = {}
    synsets, key_list1 = get_synset_tag(sentence_tagged_wn_1, synsets)
    synsets, key_list2 = get_synset_tag(sentence_tagged_wn_2, synsets)

    synsets_combinations = list(product(key_list1, key_list2))
    resulting_similarity = []
    for first_word, second_word in synsets_combinations:
        if first_word == second_word:
            resulting_similarity.append(1)
            continue
        first_synset = synsets[first_word][0]
        second_synset = synsets[second_word][0]
        first_tag = synsets[first_word][1]
        second_tag = synsets[second_word][1]
        if method == "path":
            path_sim = first_synset.path_similarity(second_synset)
            if path_sim is None:
                resulting_similarity.append(0)
            else:
                resulting_similarity.append(path_sim)

        if method == "lch":
            if first_tag == second_tag:
                lch_sim = wn.lch_similarity(first_synset, second_synset)
                if lch_sim is None:
                    resulting_similarity.append(0)
                else:
                    lch_norm = lch_sim / wn.lch_similarity(first_synset, first_synset)
                    resulting_similarity.append(lch_norm)

        if method == "wup":
            wup_sim = first_synset.wup_similarity(second_synset)
            if wup_sim is None:
                resulting_similarity.append(0)
            else:
                resulting_similarity.append(wup_sim)

        if method == "lin":
            if first_tag == second_tag and first_tag in ['n', 'v']:
                lin_sim = first_synset.lin_similarity(second_synset, brown_ic)
                if lin_sim is None:
                    resulting_similarity.append(0)
                else:
                    resulting_similarity.append(lin_sim)

    if not resulting_similarity:
        return 0
    else:
        return sum(resulting_similarity) / len(resulting_similarity)
def most_similar_lch(synsets_dict, verb):
    best_similarity = -1
    most_similar = str()
    verb_synset = wn.synsets(verb, pos=wn.VERB)[0]

    for verb, synset in synsets_dict.items():
        if wn.lch_similarity(synset, verb_synset) > best_similarity:
            best_similarity = wn.lch_similarity(synset, verb_synset)
            most_similar = verb

    return most_similar
示例#5
0
def compare_allsynsets(method, word1, word2):
    ss1 = wordnet.synsets(word1)
    ss2 = wordnet.synsets(word2)
    simi, simi_value = 0.0, 0.0
    for (s1, s2) in product(ss1, ss2):
        # if SYNpos and s1.pos() != s2.pos():  # SYN-POS
        #     continue
        # if TWpos and s1.pos() != pos:  # Target word POS
        #     continue
        if method == "PATH":
            simi = s1.path_similarity(s2)
        elif method == "LCH":
            simi = wordnet.lch_similarity(s1, s2)
        elif method == "WUP":
            simi = wordnet.wup_similarity(s1, s2)
        elif method == "RES":
            simi = wordnet.res_similarity(s1, s2, brown_ic)
        elif method == "JCN":
            if s1.pos() == s2.pos() and s1.pos() in ['n', 'a', 'v'
                                                     ]:  # can't do diff POS
                simi = wordnet.jcn_similarity(s1, s2, brown_ic)
        elif method == "LIN":
            if s1.pos() == s2.pos() and s1.pos() in ['n', 'a', 'v'
                                                     ]:  # can't do diff POS
                simi = wordnet.lin_similarity(s1, s2, brown_ic)
        else:
            sys.exit("Error! No similarity methods!")

        if simi > simi_value:
            simi_value = simi
    return simi_value
def similarityWordNet(word1, word2):
    """
    Similarity between two words with nltk
    Input: word1, word2 (String)
    Return: similarity (float)
    """
    #print (word1,"-",word2)
    word1 = str(wn.morphy(word1))
    word2 = str(wn.morphy(word2))

    palabras = wn.synsets(word1)
    #print (palabras)
    if len(palabras) == 0:
        print("no existe")
        return False
    temp = str(palabras[0])
    temp = temp[8:-2]
    #print (">",temp)
    word1 = wn.synset(str(temp))
    #print (word1)

    palabras = wn.synsets(word2)
    #print (palabras)
    if len(palabras) == 0:
        print("no existe")
        return False
    temp = str(palabras[0])
    temp = temp[8:-2]
    #print (">",temp)
    word2 = wn.synset(str(temp))
    #print (word2)
    """
    Return a score denoting how similar two word senses are,
    based on the shortest path that connects the senses in the is-a
    (hypernym/hypnoym) taxonomy. The score is in the range 0 to 1.
    """
    #similarity1 = word1.path_similarity(word2)
    #similarity1 = wn.path_similarity(word1, word2)
    try:
        #print (wn.path_similarity(word1, word2))
        #if (wn.path_similarity(word1, word2) > 0.5): #(hypernym/hypnoym) taxonomy
        print(wn.wup_similarity(word1, word2))
        if (wn.wup_similarity(word1, word2) > 0.5):  #Wu-Palmer Similarity
            return True
    except:
        return False
    """
    Leacock-Chodorow Similarity: Return a score denoting how similar
    two word senses are, based on the shortest path that connects
    the senses (as above) and the maximum depth of the taxonomy in
    which the senses occur. range 3.6
    """
    similarity2 = wn.lch_similarity(word1, word2)
    """
    Wu-Palmer Similarity: Return a score denoting how similar
    two word senses are, based on the depth of the two senses in
    the taxonomy and that of their Least Common Subsumer (most specific ancestor node).
    range 0.92
    """
    similarity3 = wn.wup_similarity(word1, word2)
def get_best_synset_pair(word_1, word_2, pos_1=POS_SET, pos_2=POS_SET):
    """ 
    Choose the pair with highest path similarity among all pairs. 
    Mimics pattern-seeking behavior of humans.
    """
    #synsets_1 = wn.synsets(word_1)
    synsets_1 = [s for s in wn.synsets(word_1) if s.pos() in pos_1]
    #synsets_2 = wn.synsets(word_2)
    synsets_2 = [s for s in wn.synsets(word_2) if s.pos() in pos_2]
    max_sim = None
    best_pair = None, None
    for synset_1 in synsets_1:
        for synset_2 in synsets_2:
            if synset_1.pos() == synset_2.pos():
                #sim = wn.path_similarity(synset_1, synset_2)
                sim = wn.lch_similarity(synset_1, synset_2)  # same POS needed
                if (max_sim == None) or (max_sim < sim):
                    max_sim = sim
                    best_pair = synset_1, synset_2
    #if best_pair!=(None,None): # or max_sim!=None
    if max_sim != None:
        spd = best_pair[0].shortest_path_distance(best_pair[1])
        lch = best_pair[0].lowest_common_hypernyms(best_pair[1])
        lch_depth = None
        if lch:
            lch_depth = max(s.min_depth() for s in lch)
        return best_pair, max_sim, spd, lch_depth
    return None
 def __init__(self, metric="path", double_aggregator=False):
     """
     :param metric: path lch and wup metric
     :param double_aggregator:
     """
     self.metric = metric
     self.aggregation_mode_prev = ['max', 'mean',
                                   'median']  #["mean", "max", "median"]
     self.aggregation_mode = ["mean", "std", "max", "min", "median"]
     self.aggregator = [
         None if m == "" else getattr(np, m) for m in self.aggregation_mode
     ]
     self.aggregator_prev = [
         None if m == "" else getattr(np, m)
         for m in self.aggregation_mode_prev
     ]
     self.double_aggregator = double_aggregator
     if self.metric == "path":  # scene shortest path
         self.metric_func = lambda syn1, syn2: wn.path_similarity(
             syn1, syn2)
     elif self.metric == "lch":
         self.metric_func = lambda syn1, syn2: wn.lch_similarity(syn1, syn2)
     elif self.metric == "wup":  # words' depth and ancestor depth + shortest path
         self.metric_func = lambda syn1, syn2: wn.wup_similarity(syn1, syn2)
     else:
         raise (ValueError(
             "Wrong similarity metric: %s, should be one of path/lch/wup." %
             self.metric))
示例#9
0
    def get_lch_average(self, sentence1, sentence2):
        sentence1_unique, sentence2_unique = self.sentence_difference(
            sentence1, sentence2)
        avg_similarity = 0
        total_count = 0
        # Measure similarity for each unique word from A to each unique word to B
        for sentence1_word in sentence1_unique:
            for sentence2_word in sentence2_unique:
                sentence1_word_tag = sentence1.get_tag(sentence1_word)
                sentence2_word_tag = sentence2.get_tag(sentence2_word)
                synsets_word1 = wordnet.synsets(sentence1_word,
                                                sentence1_word_tag)
                synsets_word2 = wordnet.synsets(sentence2_word,
                                                sentence2_word_tag)

                if len(synsets_word1) == 0:
                    synsets_word1 = wordnet.synsets(sentence1_word)
                if len(synsets_word2) == 0:
                    synsets_word2 = wordnet.synsets(sentence2_word)
                if len(synsets_word1) > 0 and len(synsets_word2) > 0:
                    # Skip words with different tags
                    if synsets_word1[0].pos() != synsets_word2[0].pos():
                        continue
                    similarity = wordnet.lch_similarity(
                        synsets_word1[0], synsets_word2[0])
                    if similarity != None:
                        avg_similarity += similarity
                        total_count += 1
        if total_count == 0:
            return 0
        return float(avg_similarity) / float(total_count)
示例#10
0
    def get_lch_min(self, sentence1, sentence2):
        sentence1_unique, sentence2_unique = self.sentence_difference(
            sentence1, sentence2)
        min_similarity = maxint
        # Measure similarity for each unique word from A to each unique word to B
        for sentence1_word in sentence1_unique:
            for sentence2_word in sentence2_unique:
                sentence1_word_tag = sentence1.get_tag(sentence1_word)
                sentence2_word_tag = sentence2.get_tag(sentence2_word)
                synsets_word1 = wordnet.synsets(sentence1_word,
                                                sentence1_word_tag)
                synsets_word2 = wordnet.synsets(sentence2_word,
                                                sentence2_word_tag)

                if len(synsets_word1) == 0:
                    synsets_word1 = wordnet.synsets(sentence1_word)
                if len(synsets_word2) == 0:
                    synsets_word2 = wordnet.synsets(sentence2_word)

                if len(synsets_word1) > 0 and len(synsets_word2) > 0:
                    # Skip words with different tags
                    if synsets_word1[0].pos() != synsets_word2[0].pos():
                        continue
                    similarity = wordnet.lch_similarity(
                        synsets_word1[0], synsets_word2[0])
                    if similarity != None:
                        min_similarity = min(similarity, min_similarity)
        if min_similarity == maxint:
            return 0
        return min_similarity
示例#11
0
 def _get_simil_term(self, x, y, mode='lch'):
     '''
     Returns the similarity between two terms x and y
     Args:
         x, y (str)
         mode = lch | path | wup
     '''
     w1 = wn.synsets(x)
     w2 = wn.synsets(y)
     if len(w1) == 0 or len(w2) == 0:
         return 0
     else:
         if mode == 'lch':
             return max([
                 wn.lch_similarity(e1, e2) for e1 in w1 for e2 in w2
                 if e1.pos == e2
             ])
         elif mode == 'path':
             return max([
                 wn.path_similarity(e1, e2) for e1 in w1 for e2 in w2
                 if e1.pos == e2
             ])
         elif mode == 'wup':
             return max([
                 wn.wup_similarity(e1, e2) for e1 in w1 for e2 in w2
                 if e1.pos == e2
             ])
示例#12
0
def relaxedSimi(syn1, syn2):
    """
    Compute similarity between two synsets
    """
    try:
        return wn.lch_similarity(syn1, syn2) or 0
    except WordNetError:
        return 0
示例#13
0
 def lch(self, synset_a, synset_b):
     return (
         self.normalize(
             self.MAX_VALUE,
             wordnet.lch_similarity(synset_a, synset_b, verbose=True),
         )
         if synset_a.pos() == synset_b.pos()
         else 0
     )
示例#14
0
    def classify(self, ex):

        word = ex["word"].value
        synset_ex = wn.synsets(word.replace(" ", "_"))[0]
        similarities = [
            wn.lch_similarity(synset_ex, synset_t)
            for synset_t in self.training_synsets
        ]
        cls_i = na.argmax(similarities)
        return self.training_table[cls_i]["class"]
示例#15
0
文件: similarity.py 项目: kmwenja/ftm
def similarity_by_path(sense1, sense2, option="path"):
  if option.lower() in ["path", "path_similarity"]: # Path similaritys
    return max(wn.path_similarity(sense1,sense2), 
               wn.path_similarity(sense1,sense2))
  elif option.lower() in ["wup", "wupa", "wu-palmer", "wu-palmer"]: # Wu-Palmer 
    return wn.wup_similarity(sense1, sense2)
  elif option.lower() in ['lch', "leacock-chordorow"]: # Leacock-Chodorow
    if sense1.pos != sense2.pos: # lch can't do diff POS
      return 0
    return wn.lch_similarity(sense1, sense2)
def wnsensesim(synset1, synset2, metric):

    if metric == 'path_similarity':
        return wn.path_similarity(synset1, synset2)
    elif metric == 'lch_similarity':
        return wn.lch_similarity(synset1, synset2)
    elif metric == 'wup_similarity':
        return wn.wup_similarity(synset1, synset2)
    else:#add more similarity measures e.g., jcn
        print "Unsupported wn similarity measure requested"
示例#17
0
def similarity_by_path(sense1, sense2, option="path"):
    """ Returns maximum path similarity between two senses. """
    if option.lower() in ["path", "path_similarity"]: # Path similaritys
        return max(wn.path_similarity(sense1,sense2),
                   wn.path_similarity(sense1,sense2))
    elif option.lower() in ["wup", "wupa", "wu-palmer", "wu-palmer"]: # Wu-Palmer
        return wn.wup_similarity(sense1, sense2)
    elif option.lower() in ['lch', "leacock-chordorow"]: # Leacock-Chodorow
        if sense1.pos != sense2.pos: # lch can't do diff POS
            return 0
        return wn.lch_similarity(sense1, sense2)
 def __init__(self, obs_corpus, target_corpus, metric="path", aggregation_mode_prev="", aggregation_mode=""):
     super().__init__(obs_corpus, target_corpus, aggregation_mode, None, aggregation_mode_prev)
     self.metric = metric
     if self.metric == "path":
         self.metric_func = lambda syn1, syn2: wn.path_similarity(syn1, syn2)
     elif self.metric == "lch":
         self.metric_func = lambda syn1, syn2: wn.lch_similarity(syn1, syn2)
     elif self.metric == "wup":
         self.metric_func = lambda syn1, syn2: wn.wup_similarity(syn1, syn2)
     else:
         raise(ValueError("Wrong similarity metric: %s, should be one of path/lch/wup."%self.metric))
 def __init__(self, obs_corpus, target_corpus, metric="path", aggregation_mode_prev="", aggregation_mode=""):
     super().__init__(obs_corpus, target_corpus, aggregation_mode, None, aggregation_mode_prev)
     self.metric = metric
     if self.metric == "path":
         self.metric_func = lambda syn1, syn2: wn.path_similarity(syn1, syn2)
     elif self.metric == "lch":
         self.metric_func = lambda syn1, syn2: wn.lch_similarity(syn1, syn2)
     elif self.metric == "wup":
         self.metric_func = lambda syn1, syn2: wn.wup_similarity(syn1, syn2)
     else:
         raise(ValueError("Wrong similarity metric: %s, should be one of path/lch/wup."%self.metric))
示例#20
0
def compute_similarities(s1, s2, sim):
    if sim == "path":
        return wn.path_similarity(s1, s2)
    elif sim == "lch":
        return wn.lch_similarity(s1, s2)
    elif sim == "wup":
        return wn.wup_similarity(s1, s2)
    elif sim == "res":
        return wn.res_similarity(s1, s2, genesis_ic)
    elif sim == "jcn":
        return wn.jcn_similarity(s1, s2, genesis_ic)
    elif sim == "lin":
        return wn.lin_similarity(s1, s2, genesis_ic)
示例#21
0
def similarity_by_path(sense1, sense2, option="path"):
  """ Returns maximum path similarity between two senses. """
  if option.lower() in ["path", "path_similarity"]: # Path similaritys
    return max(wn.path_similarity(sense1,sense2), 
               wn.path_similarity(sense1,sense2))
  elif option.lower() in ["wup", "wupa", "wu-palmer", "wu-palmer"]: # Wu-Palmer 
    return wn.wup_similarity(sense1, sense2)
  elif option.lower() in ['lch', "leacock-chordorow"]: # Leacock-Chodorow
    if sense1.pos != sense2.pos: # lch can't do diff POS
      return 0
    return wn.lch_similarity(sense1, sense2)

    return wn.lin_similarity(sense1, sense2, wnic.ic('ic-bnc-add1.dat'))
示例#22
0
def bagSimilarity(s1, s2) :
    if(len(s1) == 0 or len(s2) == 0) :
        return 1
        
    total = 0;

    for a in s1:
        for b in s2:
            if(a.pos() == b.pos()) :
                total += wn.lch_similarity(a,b)

    total /= (len(s1)*len(s2))
    return total
示例#23
0
def dist_all_synsets(first, second):

    f_syns = wn.synsets(first)
    s_syns = wn.synsets(second)

    #Path SImilarity
    #A 0-1 similarity score based on the shortest path that connects the senses in the is-a (hypernym/hypnoym) taxonomy.
    #A score of 1 represents identity i.e. comparing a sense with itself will return 1.
    least_sim = 0.0
    try:
        for f in f_syns:

            for s in s_syns:
                path_sim = wn.path_similarity(f, s)

                if path_sim > least_sim:
                    least_sim = path_sim
    except:
        pass

    #Leacock-Chodorow Similarity
    #A similarity score of the shortest path connecting the senses & the maximum depth of the taxonomy in which the senses occur.
    #The relationship is given as -log(p/2d) where p is the shortest path length and d the taxonomy depth.

    max_lch = 0.0
    for f in f_syns:

        for s in s_syns:
            lch = 0.0
            try:
                lch = wn.lch_similarity(s, f)
            except WordNetError:
                pass

            if lch > max_lch:
                max_lch = lch
    max_lch = max_lch / 3.6375
    #Wu-Palmer Similarity
    #A similarity score based on the depth of the two senses in the taxonomy and that of their Least Common Subsumer (most specific ancestor node).
    #The LCS does not necessarily feature in the shortest path connecting the two senses, as it is by definition the common ancestor deepest in the taxonomy, not closest to the two senses. Typically, however, it will so feature. Where multiple candidates for the LCS exist, that whose shortest path to the root node is the longest will be selected. Where the LCS has multiple paths to the root, the longer path is used for the purposes of the calculation.
    wup_sim = 0
    try:

        wup_sim = wn.wup_similarity(f_syns[0], s_syns[0])

        if (wup_sim == None):
            wup_sim = -1
    except:
        pass

    return (least_sim, max_lch, wup_sim)
示例#24
0
    def checksim(self, synset1, synset2):
        score = 0
        for syn1 in synset1:
            for syn2 in synset2:
                try:
                    ns = wn.lch_similarity(syn1, syn2)
                except:
                    ns = 0

    #            ns = wn.wup_similarity(syn1,syn2)
                if isinstance(ns, float):
                    if ns > score:
                        score = ns
        return (score)
示例#25
0
def wnsim(synset1, synset2, method='all'):
    synset_patt = re.compile(r'^.+\..+\.\d+$')

    if synset_patt.match(synset1):
        s1 = wn.synset(synset1)
    else:
        s1 = wn_synset(synset1)

    if synset_patt.match(synset2):
        s2 = wn.synset(synset2)
    else:
        s2 = wn_synset(synset2)

    if s1 is None or s2 is None:
        return 0

    if method == 'lin':
        return wn.lin_similarity(s1, s2, wn_ic)
    elif method == 'res':
        return wn.res_similarity(s1, s2, wn_ic)
    elif method == 'jcn':
        return wn.jcn_similarity(s1, s2, wn_ic)
    elif method == 'wup':
        return wn.wup_similarity(s1, s2)
    elif method == 'path':
        return wn.path_similarity(s1, s2)
    elif method == 'lch':
        return wn.lch_similarity(s1, s2)
    elif method == 'all':
        return [
            ('lin', wn.lin_similarity(s1, s2, wn_ic)),
            ('res', wn.res_similarity(s1, s2, wn_ic)),
            ('jcn', wn.jcn_similarity(s1, s2, wn_ic)),
            ('wup', wn.wup_similarity(s1, s2)),
            ('path', wn.path_similarity(s1, s2)),
            ('lch', wn.lch_similarity(s1, s2))
        ]
def create_graphs(doc_list):
    documents = doc_list
    if documents is None:
        documents = default_document_list()

    distance_functions = [
        (wn.lch_similarity(SYNSETS[0], SYNSETS[0]), 'lch',
         lambda sense_1, sense_2: wn.lch_similarity(sense_1, sense_2)),
        (1.0, 'lin',
         lambda sense_1, sense_2: wn.lin_similarity(sense_1, sense_2, CORPUS)),
        (10.636958516573292, 'res',
         lambda sense_1, sense_2: wn.res_similarity(sense_1, sense_2, CORPUS)),
        (wn.jcn_similarity(SYNSETS[0], SYNSETS[0], CORPUS), 'jcn',
         lambda sense_1, sense_2: wn.jcn_similarity(sense_1, sense_2, CORPUS)),
        (1.0, 'path',
         lambda sense_1, sense_2: wn.path_similarity(sense_1, sense_2)),
    ]
    all_senses = []
    for doc in documents:
        for sense in doc.top_senses():
            all_senses.append((sense, doc.name))
    against_colors = ['r', 'b', 'g']
    against_to = [
        wn.synset(word)
        for word in ["economy.n.01", "philosophy.n.02", "politics.n.01"]
    ]
    create_against_graph('phyl_eco_pol', documents, all_senses, against_to,
                         distance_functions, against_colors)

    against_to = SYNSETS

    against_colors = [(random(), random(), random())
                      for _i in range(0, len(SYNSETS))]
    create_against_graph('handpicked', documents, all_senses, against_to,
                         distance_functions, against_colors)

    create_graph_top_senses(documents, all_senses, distance_functions)
示例#27
0
 def __word_net_lch_eval(self, hint: str, target: str):
     h_synsets = wn.synsets(hint)
     t_synsets = wn.synsets(target)
     lst = []
     for h in h_synsets:
         for t in t_synsets:
             try:
                 strength = wn.lch_similarity(h, t)
             except WordNetError:
                 strength = -1
             lst.append(strength if strength is not None else -1)
     if all([x == -1 for x in lst]):
         return -9.999
     else:
         return max(lst)  # get strongest hint
示例#28
0
def wn_similarity(synset_1, synset_2, similarity='Shortest_Path'):
    if similarity == "Shortest_Path":
        sim = wn.path_similarity(synset_1, synset_2)
    elif similarity == "Leacock_Chodorow":
        sim = wn.lch_similarity(synset_1, synset_2)
    elif similarity == "Wu_Palmer":
        sim = wn.wup_similarity(synset_1, synset_2)
    elif similarity == "Resnik":
        sim = synset_1.res_similarity(synset_2, ic)
    elif similarity == "Jiang_Conrath":
        sim = synset_1.jcn_similarity(synset_2, ic)
    elif similarity == "Lin":
        sim = synset_1.lin_similarity(synset_2, ic)
    else:
        sim = 0
    return sim
示例#29
0
def assignToCategoriesLCH(category_synsets,word_synsets):
    prettyprint("start assigning lch_similarity...")
    assignedDict = {}
    for category in category_synsets:
        assignedDict[category] = []
    
    for word in word_synsets:
        tempValues = []
        for category in category_synsets:
            #similarity = wn.path_similarity(word, category)
            similarity = wn.lch_similarity(word, category)
            tempValues.append(similarity)
            #print("appended "+str(similarity)+" for "+str(category)+ " and "+str(word))
        #print("__________________________________")
        indexOfMaxValue = tempValues.index(getMaxFromList(tempValues))
        assignedDict[ category_synsets[ indexOfMaxValue ] ].append( word )
    return assignedDict
def lch_sim(word1,word2):
    """
    Leacock-Chodorow Similarity: Return a score denoting how similar
    two word senses are, based on the shortest path that connects
    the senses (as above) and the maximum depth of the taxonomy in
    which the senses occur. range 3.6
    
    The relationship is given as -log(p/2d) where p is the
    shortest path length and d the taxonomy depth.
    """
    try:
        try:
            value = wn.lch_similarity(word1, word2)
            value = value / 3.6 #value in range of 0 to 1
            return value
        except ValueError:
           return 0
    except:
        return 0
示例#31
0
def similarity(words: list)->list:
    '''Calculates similarity based on the given synsets'''
    results = []
    synsets = ask_for_word_defs(words)
    print("\n{}\n".format('*'*80))
    for i in range(int(len(synsets)/2)):
        print("{:30}{}".format(str(synsets[2*i]), str(synsets[2*i + 1])))
    print("\n{}\n".format('*'*80))
    print("Running comparisons...")
    for i in range(int(len(synsets)/2)):
        try:
            if (synsets[2*i] == None or synsets[2*i + 1] == None):
                results.append(["Undefined","Undefined", -1, -1, -1, "None", "None"])
                continue
        except:
            pass
        result = [words[2*i], words[2*i + 1], 0, 0, 0, synsets[2*i].definition(), synsets[2*i + 1].definition()]
        result[2] = wordnet.lch_similarity(synsets[2*i],synsets[2*i + 1])
        result[3] = wordnet.wup_similarity(synsets[2*i],synsets[2*i + 1])
        result[4] = wordnet.path_similarity(synsets[2*i],synsets[2*i + 1])
        results.append(result)
    print("\n{}\n".format('*'*80))
    return results
示例#32
0
    def word_similarity(self, w1, w2, syns, loc, thr_sim):
        syn1 = wn.synsets(w1, wn.NOUN or wn.ADJ)
        syn2 = wn.synsets(w2, wn.NOUN or wn.ADJ)

        if len(syn1) > 0 and len(syn2) > 0:
            score = 0
            max_score = 0
            count = 0
            sns1 = syn1[0]
            sns2 = syn2[0]
            for i in range(0, len(syn1)):
                for j in range(0, len(syn2)):
                    if self.wordnet_metric == 'j':  # Jiang-Conrath Similarity
                        score = wn.jcn_similarity(syn1[i], syn2[j])
                    elif self.wordnet_metric == 'le':  # Leacock-Chodorow Similarity
                        score = wn.lch_similarity(syn1[i],
                                                  syn2[j],
                                                  simulate_root=False)
                    elif self.wordnet_metric == 'li':  # Lin Similarity
                        score = wn.lin_similarity(syn1[i], syn2[j])
                    elif self.wordnet_metric == 'p':  # Path Similarity
                        score = wn.path_similarity(syn1[i], syn2[j])
                    elif self.wordnet_metric == 'w':  # Wu-Palmer Similarity. It can not be '0'. It ranges in (0,1]
                        score = wn.wup_similarity(syn1[i], syn2[j])

                    if score > max_score:  # Finding the maximum score
                        max_score = score
                        sns1 = syn1[i]
                        sns2 = syn2[j]
                        if max_score >= thr_sim:  # Storing all the synset pairs that have scores > threshold
                            syns, loc = self.merging_synsets(
                                syns, w1, w2, sns1, sns2, max_score, loc)
                            count = count + 1
            if count == 0:  # Storing the synset that has maximum score but the score < threshold
                syns, loc = self.merging_synsets(syns, w1, w2, sns1, sns2,
                                                 max_score, loc)
        return syns, loc
示例#33
0
def word_similarity(measure, word1, word2, pos):
    wsim = 0.0
    if pos is "n" or pos is "v":
        if pos is "n":
            word1 = wn.synsets(word1, wn.NOUN)
            word2 = wn.synsets(word2, wn.NOUN)
        else:
            word1 = wn.synsets(word1, wn.VERB)
            word2 = wn.synsets(word2, wn.VERB)

        if word1 != [] and word2 != []:
            word1 = word1[0]
            word2 = word2[0]
            if measure == "path":
                wsim = wn.path_similarity(word1, word2)
            if measure == "lch":
                wsim = wn.lch_similarity(word1, word2)
                wsim = (wsim / 3.63758615973)
            if measure == "wup":
                wsim = wn.wup_similarity(word1, word2)
            if measure == "res":
                wsim = word1.res_similarity(word2, brown_ic)
                wsim = wsim / 9.00601439892
            if measure == "jcn":
                wsim = word1.jcn_similarity(word2, brown_ic)
                wsim = wsim / (1e+300)
            if measure == "lin":
                wsim = word1.lin_similarity(word2, brown_ic)
            return wsim
        else:
            return wsim
    else:
        if pos is "r" or pos is "a":
            if word1 is word2:
                return 1.0
            else:
                return 0.0
示例#34
0
def lch_similarity(synsets1, synsets2):
    """
        This function returns Leacock Chodorow similarity (LCH)
        between two synsets, based on the shortest path distance
        and the maximum depth of the taxonomy. The equation to
        calculate LCH similarity is shown below:

        .. math::

            lch\\_similarity = {-log(shortest\\_path\\_distance(synsets1,
                               synsets2) \\over 2 * taxonomy\\_depth}

        :param `Synset` synsets1: first synset supplied to measures
                                  the LCH similarity
        :param `Synset` synsets2: second synset supplied to measures
                                  the LCH similarity

        :return: LCH similarity between two synsets
        :rtype: float

        :Example:

            >>> from pythainlp.corpus.wordnet import lch_similarity, synset
            >>>
            >>> entity = synset('entity.n.01')
            >>> obj = synset('object.n.01')
            >>> cat = synset('cat.n.01')
            >>>
            >>> lch_similarity(entity, obj)
            2.538973871058276
            >>> lch_similarity(entity, cat)
            0.9985288301111273
            >>> lch_similarity(obj, cat)
            1.1526795099383855
    """
    return wordnet.lch_similarity(synsets1, synsets2)
示例#35
0
def similarity_by_path(sense1, sense2, option="path", no_path_value=0):
    """
    Returns maximum path similarity between two senses.
    If no path is found between the two senses, returns no_path_value.
    """
    if option.lower() in ["path", "path_similarity"]:  # Path similaritys
        sim_dir1 = wn.path_similarity(sense1, sense2)
        sim_dir2 = wn.path_similarity(sense2, sense1)
        if sim_dir1 is None and sim_dir2 is None:
            return no_path_value
        elif sim_dir1 is None:
            return sim_dir2
        elif sim_dir2 is None:
            return sim_dir1
        else:
            return max(sim_dir2, sim_dir1)
    elif option.lower() in ["wup", "wupa", "wu-palmer",
                            "wu-palmer"]:  # Wu-Palmer
        wup_sim = wn.wup_similarity(sense1, sense2)
        return wup_sim if wup_sim is not None else no_path_value
    elif option.lower() in ['lch', "leacock-chordorow"]:  # Leacock-Chodorow
        if sense1.pos != sense2.pos:  # lch can't do diff POS
            return no_path_value
        return wn.lch_similarity(sense1, sense2)
  distances_res_bnc.append(sim7)
  distances_jcn_bnc.append(sim8)
  distances_lin_bnc.append(sim9)

#Import IC calculation
from nltk.corpus import wordnet_ic
brown_ic = wordnet_ic.ic('ic-brown-resnik-add1.dat')
bnc_ic = wordnet_ic.ic('ic-bnc-resnik-add1.dat')

#For each pair of synsets, compute distance
for s1 in synsets:
  syn1 = wn.of2ss(s1)
  for s2 in synsets:
    syn2 = wn.of2ss(s2)
    distances_path[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.path_similarity(syn1,syn2)
    distances_lch[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.lch_similarity(syn1,syn2)
    distances_wup[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.wup_similarity(syn1,syn2)
    distances_res[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.res_similarity(syn1,syn2,brown_ic)
    distances_jcn[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.jcn_similarity(syn1,syn2,brown_ic)
    distances_lin[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.lin_similarity(syn1,syn2,brown_ic)
    distances_res_bnc[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.res_similarity(syn1,syn2,bnc_ic)
    distances_jcn_bnc[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.jcn_similarity(syn1,syn2,bnc_ic)
    distances_lin_bnc[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.lin_similarity(syn1,syn2,bnc_ic)
    #distances_path[labelsNLTK.index(s1)][labelsNLTK.index(s2)] =1/(labelsNLTK.index(s2)+1) 
    #distances_lch[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1/(labelsNLTK.index(s2)+1)
    #distances_wup[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1/(labelsNLTK.index(s2)+1)  
    #distances_res[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1/(labelsNLTK.index(s2)+1)  
    #distances_jcn[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1/(labelsNLTK.index(s2)+1)  
    #distances_lin[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1/(labelsNLTK.index(s2)+1)  
    #distances_res_bnc[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1/(labelsNLTK.index(s2)+1)  
    #distances_jcn_bnc[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1/(labelsNLTK.index(s2)+1)  
示例#37
0
def lch_similarity(synset1, synset2):
    return wn.lch_similarity(synset1, synset2)
    #coeffs['drink'] = 1.0-float(sys.argv[4])
    coeffs['eat'] = 1.0
    coeffs['drink'] = 1.0
    for s in sentences:
        s = s.split()
        if s[1] not in verbs_gref:
            verbs_gref[s[1]] = np.zeros((num_basis, num_basis))
        verbs_gref[s[1]] = verbs_gref[s[1]]+np.outer(matrix[s[0]],matrix[s[2]])

        verbs[s[1]] = np.zeros((num_basis, num_basis))
        simweights = {}
        for v in verbs:
            if sys.argv[3]=='wup':
                simweights[v] = wn.wup_similarity(wn.synset(s[1]+'.v.01'), wn.synset(v+'.v.01'))
            elif sys.argv[3]=='lch':
                simweights[v] = wn.lch_similarity(wn.synset(s[1]+'.v.01'), wn.synset(v+'.v.01'))
            elif sys.argv[3]=='path':
                simweights[v] = wn.path_similarity(wn.synset(s[1]+'.v.01'), wn.synset(v+'.v.01'))
            verbs[s[1]] += coeffs[v]*simweights[v]*verbs_gref[v]
        verbs[s[1]] /= float(sum(simweights.values()))
        
    # Learn the reference matrices using Grefenstette for swallow consume and gulp
    gold_verbs = ['swallow','consume','gulp']
    for gv in gold_verbs:
        with open('train/'+gv+'_train') as f:
            sentences = f.readlines()
        for s in sentences:
            s = s.split()
            if s[1] not in verbs_gref:
                verbs_gref[s[1]] = np.zeros((num_basis, num_basis))
            verbs_gref[s[1]] = verbs_gref[s[1]]+np.outer(matrix[s[0]],matrix[s[2]])
示例#39
0
 def wn_similarity(synset_1, synset_2):
     return wn.lch_similarity(synset_1, synset_2)
示例#40
0
文件: wordnet.py 项目: zkan/pythainlp
def lch_similarity(synsets1,synsets2):
	return wordnet.lch_similarity(synsets1,synsets2)
示例#41
0
def lch_sim_fun(vq_words=[]):
    l1 = knowledge = [
        'recite', 'review', 'point', 'recognize', 'describe', 'choose',
        'examine', 'identify', 'enumerate', 'find', 'select', 'what',
        'memorize', 'collect', 'sequence', 'when', 'duplicate', 'who', 'label',
        'write', 'indicate', 'state', 'tabulate', 'which', 'relate', 'show',
        'arrange', 'cite', 'match', 'define', 'locate', 'draw', 'repeat',
        'remember', 'trace', 'read', 'quote', 'spell', 'memorise', 'how',
        'observe', 'recognise', 'copy', 'why', 'outline', 'count', 'name',
        'recall', 'study', 'omit', 'list', 'tell', 'reproduce', 'record',
        'retell', 'meet', 'listen', 'where', 'order', 'view'
    ]

    l2 = comprehension = [
        'compare', 'cite', 'give', 'predict', 'recognize', 'describe',
        'articulate', 'detail', 'order', 'characterize', 'generalize',
        'factor', 'summarize', 'select', 'illustrate', 'visualize', 'group',
        'trace', 'purpose', 'defend', 'rewrite', 'relate', 'approximate',
        'demonstrate', 'indicate', 'add', 'interact', 'tell', 'extrapolate',
        'show', 'rephrase', 'paraphrase', 'infer', 'contrast', 'locate',
        'picture', 'extend', 'associate', 'conclude', 'express', 'interpolate',
        'generalise', 'clarify', 'observe', 'understand', 'differentiate',
        'review', 'distinguish', 'estimate', 'subtract', 'discuss',
        'interpret', 'summarise', 'convert', 'translate', 'compute', 'outline',
        'identify', 'elaborate', 'ask', 'example', 'classify', 'report',
        'restate', 'explain', 'match'
    ]

    l3 = application = [
        'represent', 'show', 'identify', 'participate', 'derive', 'group',
        'calculate', 'graph', 'dramatize', 'choose', 'factor', 'include',
        'allocate', 'handle', 'practice', 'relate'
        'schedule', 'report', 'assess', 'collect', 'investigate', 'categorise',
        'ascertain', 'round', 'sketch', 'transcribe', 'sequence', 'imitate',
        'discover', 'connect', 'tabulate', 'employ', 'avoid', 'experiment',
        'manipulate', 'exercise', 'extend', 'associate', 'modify',
        'personalize', 'dramatise', 'explore', 'teach', 'change', 'perform',
        'summarise', 'act', 'implement', 'assign', 'alphabetize', 'relate',
        'articulate', 'administer', 'subscribe', 'instruct', 'determine',
        'apply', 'establish', 'select', 'illustrate', 'plot', 'use', 'prepare',
        'paint', 'transfer', 'construct', 'process', 'interpret', 'translate',
        'depreciate', 'complete', 'expose', 'acquire', 'adapt', 'link',
        'simulate', 'diminish', 'compute', 'project', 'demonstrate', 'control',
        'predict', 'contribute', 'examine', 'attain', 'capture', 'develop',
        'provide', 'utilize', 'write', 'build', 'interview', 'organise',
        'classify', 'draw', 'express', 'customize', 'price', 'chart',
        'produce', 'plan', 'inform', 'solve', 'correlation', 'model',
        'operate', 'convert'
    ]

    l4 = analysis = [
        'find', 'focus', 'identify', 'query', 'debate', 'relationships',
        'derive', 'group', 'calculate', 'explain', 'theme', 'choose', 'reason',
        'proof', 'reorganise', 'point', 'interrupt', 'difference', 'arrange',
        'list', 'investigate', 'classify', 'discover', 'motive', 'deduce',
        'connect', 'advertise', 'detect', 'confirm', 'research', 'experiment',
        'size', 'cause', 'contrast', 'inspect', 'explore', 'distinguish',
        'layout', 'optimize', 'interpret', 'question', 'omit', 'depth',
        'ensure', 'distinction', 'inference', 'divide', 'relate', 'manage',
        'rank', 'maximize', 'categorize', 'establish', 'select', 'illustrate',
        'subdivide', 'transform', 'comparing', 'assumption', 'analyze',
        'function', 'analyse', 'train', 'differentiate', 'breadboard',
        'dissect', 'see', 'limit', 'highlight', 'appraise', 'diagnose',
        'blueprint', 'compare', 'recognize', 'characterize', 'examine', 'file',
        'discriminate', 'discussion', 'isolate', 'inventory', 'test', 'survey',
        'document', 'infer', 'categorise', 'breakdown', 'separate', 'effect',
        'diagram', 'simplify', 'point', 'audit', 'criticize', 'outline',
        'correlate', 'minimize', 'prioritize', 'organise', 'model', 'order',
        'test'
    ]

    l5 = synthesis = [
        'incorporate', 'code', 'reorganize', 'invent', 'generalize', 'compose',
        'overhaul', 'explain', 'hypothesize', 'program', 'combine', 'choose',
        'frame', 'integrate', 'collaborate', 'handle', 'format', 'propose',
        'express', 'progress', 'reconstruct', 'speculate', 'discuss', 'comply',
        'arrange', 'intervene', 'collect', 'hypothesise', 'debug', 'enhance',
        'anticipate', 'originate', 'formulate', 'discover', 'reinforce',
        'design', 'animate', 'substitute', 'network', 'join', 'experiment',
        'adapt', 'lecture', 'contrast', 'extend', 'visualise', 'modify',
        'makeup', 'prescribe', 'imagine', 'interface', 'estimate', 'generate',
        'change', 'improve', 'convert', 'elaborate', 'initiate',
        'individualize', 'think', 'revise', 'organize', 'relate', 'assemble',
        'synthesize', 'categorize', 'summarize', 'prepare', 'create',
        'transform', 'construct', 'predict', 'theorise', 'minimise', 'tell',
        'cope', 'maximise', 'innovate', 'specify', 'communicate', 'setup',
        'pretend', 'budget', 'compile', 'suppose', 'tabulate', 'delete',
        'compare', 'rewrite', 'devise', 'abstract', 'dictate', 'cultivate',
        'happen', 'portray', 'depict', 'develop', 'perform', 'make', 'write',
        'build', 'test', 'negotiate', 'rearrange', 'simplify', 'produce',
        'plan', 'validate', 'structure', 'add', 'outline', 'facilitate',
        'correspond', 'solve', 'model', 'original'
    ]

    l6 = evaluation = [
        'validate', 'compare', 'deduct', 'useful', 'consider', 'conclude',
        'predict', 'relate', 'describe', 'influence', 'rank', 'assess', 'rate',
        'persuade', 'determine', 'measure', 'critique', 'mark', 'summarize',
        'select', 'discuss', 'discriminate', 'prove', 'verify', 'defend',
        'support', 'debate', 'grade', 'argue', 'disprove', 'recommend', 'test',
        'infer', 'contrast', 'choose', 'attach', 'good', 'importance',
        'evaluate', 'criteria', 'prescribe', 'hire', 'award', 'perceive',
        'dispute', 'know', 'decide', 'opinion', 'judge', 'estimate', 'why',
        'interpret', 'counsel', 'criticize', 'effective', 'prioritize',
        'value', 'agree', 'bad', 'convince', 'prioritise', 'release', 'frame',
        'appraise', 'explain', 'criticise', 'justify'
    ]

    cl_listoflist = []
    cl_listoflist.append(l1)
    cl_listoflist.append(l2)
    cl_listoflist.append(l3)
    cl_listoflist.append(l4)
    cl_listoflist.append(l5)
    cl_listoflist.append(l6)

    cnt_log = 0

    final_level_of_ques = -1
    final_sim_of_ques_with_all_levels = [0, 0, 0, 0, 0, 0]
    final_area_sim_of_ques_with_all_levels = [0, 0, 0, 0, 0, 0]
    for vq_word in vq_words:
        # calculating sum and avg of sim of word with each list
        # print("\n\ndoing for word -----" , vq_word)
        sum_of_sim_all_levels = []
        avg_of_sim_all_levels = []
        for i, list_i in enumerate(cl_listoflist):
            # print("list number  : " , i)
            sum_of_sim = 0
            for l_word in list_i:
                # print("two words " , vq_word , l_word)
                if len(wordnet.synsets(vq_word)) == 0:
                    # print vq_word
                    break
                vq_word_syn = wordnet.synsets(vq_word)[0]
                # print("l_word => wordnet.synsets(l_word)",l_word, "=>" ,wordnet.synsets(l_word))
                if len(wordnet.synsets(l_word)) == 0:
                    # print l_word
                    continue
                l_word_syn = wordnet.synsets(l_word)[0]
                try:
                    wup_sim = wordnet.lch_similarity(vq_word_syn, l_word_syn)
                except:
                    # print vq_word_syn,l_word_syn,"->exception"
                    continue
                # wup_sim=(vq_word_syn).jcn_similarity(l_word_syn)
                if (type(wup_sim) != type(None)):
                    sum_of_sim = sum_of_sim + wup_sim
                    # sum_of_sim += 1
                    # print(" counted ",vq_word,l_word , "synset " , vq_word_syn , l_word_syn)
                else:
                    cnt_log = cnt_log + 1
                    # print("Not counted             ",vq_word,l_word , "synset " , vq_word_syn , l_word_syn)
                # input()
            sum_of_sim_all_levels.append(sum_of_sim)
            avg_of_sim_all_levels.append(sum_of_sim / len(list_i))

        # print("\n\n printing all lists")
        # for l in cl_listoflist:
        # 	print(l)

        # QUES WORK BEGIN
        # print ("Sim")
        for i in range(0, 6):
            final_sim_of_ques_with_all_levels[i] += avg_of_sim_all_levels[i]
        # 	print (final_sim_of_ques_with_all_levels[i],",")
        # print("\n")

        # print("area sim")
        for i in range(0, 6):
            final_area_sim_of_ques_with_all_levels[i] += sum_of_sim_all_levels[
                i]
        # 	print (final_area_sim_of_ques_with_all_levels[i],",")
        # print("\n")
        # print ("cnt_log",cnt_log)

    # print ("Final Sim")
    # for i in range(0,6):
    # 	print (final_sim_of_ques_with_all_levels[i],",")
    # print("\n")

    # print ("Final Area Sim")
    # for i in range(0,6):
    # 	print (final_area_sim_of_ques_with_all_levels[i],",")
    # print("\n")

    #	maximum of all similarities values to find cl level
    final_level = 0
    max_sim = final_sim_of_ques_with_all_levels[0]
    for index, sim in enumerate(final_sim_of_ques_with_all_levels):
        if sim > max_sim:
            max_sim = sim
            final_level = index

    # print("\n")
    # print("avg wali list: " , avg_of_sim_all_levels)

    # print( "sum wali list: " , sum_of_sim_all_levels)

    # 	finding if word will be classified in  more than two levels
    count = 0
    indices_of_same_sim = []
    for i, sim in enumerate(final_sim_of_ques_with_all_levels):
        if sim == max_sim:
            count += 1
            indices_of_same_sim.append(i)

    # 	if word is in more than two levels
    if len(indices_of_same_sim) > 1:
        # print ("ques is in more than two levels")
        same_sim_list = []
        for index in indices_of_same_sim:
            same_sim_list.append(final_area_sim_of_ques_with_all_levels[index])

        max_sim_area = same_sim_list[0]
        for sim_area, index_of_max_sim in zip(same_sim_list,
                                              indices_of_same_sim):
            if sim_area > max_sim_area:
                max_sim_area = sim_area
                final_level = index_of_max_sim

    # print("final_level ",final_level)
    return final_level