Python lch_similarity示例，nltk.corpus.wordnet.lch_similarity Python示例

示例#1

0

显示文件

文件： top_senses_tables.py 项目： finiteautomata/leninanalysis

def create_graphs(doc_list):
    documents = doc_list
    if documents is None:
        documents = default_document_list()

    distance_functions = [
        (wn.lch_similarity(SYNSETS[0], SYNSETS[0]), 'lch', lambda sense_1, sense_2: wn.lch_similarity(sense_1, sense_2)),
        (1.0, 'lin', lambda sense_1, sense_2: wn.lin_similarity(sense_1, sense_2, CORPUS)),
        (10.636958516573292, 'res', lambda sense_1, sense_2: wn.res_similarity(sense_1, sense_2, CORPUS)),
        (wn.jcn_similarity(SYNSETS[0], SYNSETS[0], CORPUS), 'jcn', lambda sense_1, sense_2: wn.jcn_similarity(sense_1, sense_2, CORPUS)),
        (1.0, 'path', lambda sense_1, sense_2: wn.path_similarity(sense_1, sense_2)),
    ]
    all_senses = []
    for doc in documents:
        for sense in doc.top_senses():
            all_senses.append((sense, doc.name))
    against_colors = ['r', 'b', 'g']
    against_to = [wn.synset(word) for word in ["economy.n.01", "philosophy.n.02", "politics.n.01"]]
    create_against_graph('phyl_eco_pol', documents, all_senses, against_to, distance_functions, against_colors)

    against_to = SYNSETS

    against_colors = [(random(), random(), random()) for _i in range(0, len(SYNSETS))]
    create_against_graph('handpicked', documents, all_senses, against_to, distance_functions, against_colors)

    create_graph_top_senses(documents, all_senses, distance_functions)

示例#2

0

显示文件

文件： test_similarities.py 项目： zhangweipu/wordnet

    def test_path_similarities(self):
        from nltk.corpus import wordnet as nltk_wn
        nltk_cat = nltk_wn.synset('cat.n.1')
        nltk_dog = nltk_wn.synset('dog.n.1')
        nltk_bus = nltk_wn.synset('bus.n.1')

        our_cat = our_wn.synset('cat.n.1')
        our_dog = our_wn.synset('dog.n.1')
        our_bus = our_wn.synset('bus.n.1')
        assert nltk_wn.path_similarity(nltk_cat,
                                       nltk_dog) == our_wn.path_similarity(
                                           our_cat, our_dog)
        assert nltk_wn.wup_similarity(nltk_cat,
                                      nltk_dog) == our_wn.wup_similarity(
                                          our_cat, our_dog)
        assert nltk_wn.lch_similarity(nltk_cat,
                                      nltk_dog) == our_wn.lch_similarity(
                                          our_cat, our_dog)

        assert nltk_wn.path_similarity(nltk_cat,
                                       nltk_bus) == our_wn.path_similarity(
                                           our_cat, our_bus)
        assert nltk_wn.wup_similarity(nltk_cat,
                                      nltk_bus) == our_wn.wup_similarity(
                                          our_cat, our_bus)
        assert nltk_wn.lch_similarity(nltk_cat,
                                      nltk_bus) == our_wn.lch_similarity(
                                          our_cat, our_bus)

示例#3

0

显示文件

def get_synset_similarity(first_sentence: str, second_sentence: str, method: str):
    sentence_tagged_1 = pos_tag(first_sentence)
    sentence_tagged_2 = pos_tag(second_sentence)

    sentence_tagged_wn_1 = get_sentences_tagged_with_wn_and_cleaned(sentence_tagged_1)
    sentence_tagged_wn_2 = get_sentences_tagged_with_wn_and_cleaned(sentence_tagged_2)

    synsets = {}
    synsets, key_list1 = get_synset_tag(sentence_tagged_wn_1, synsets)
    synsets, key_list2 = get_synset_tag(sentence_tagged_wn_2, synsets)

    synsets_combinations = list(product(key_list1, key_list2))
    resulting_similarity = []
    for first_word, second_word in synsets_combinations:
        if first_word == second_word:
            resulting_similarity.append(1)
            continue
        first_synset = synsets[first_word][0]
        second_synset = synsets[second_word][0]
        first_tag = synsets[first_word][1]
        second_tag = synsets[second_word][1]
        if method == "path":
            path_sim = first_synset.path_similarity(second_synset)
            if path_sim is None:
                resulting_similarity.append(0)
            else:
                resulting_similarity.append(path_sim)

        if method == "lch":
            if first_tag == second_tag:
                lch_sim = wn.lch_similarity(first_synset, second_synset)
                if lch_sim is None:
                    resulting_similarity.append(0)
                else:
                    lch_norm = lch_sim / wn.lch_similarity(first_synset, first_synset)
                    resulting_similarity.append(lch_norm)

        if method == "wup":
            wup_sim = first_synset.wup_similarity(second_synset)
            if wup_sim is None:
                resulting_similarity.append(0)
            else:
                resulting_similarity.append(wup_sim)

        if method == "lin":
            if first_tag == second_tag and first_tag in ['n', 'v']:
                lin_sim = first_synset.lin_similarity(second_synset, brown_ic)
                if lin_sim is None:
                    resulting_similarity.append(0)
                else:
                    resulting_similarity.append(lin_sim)

    if not resulting_similarity:
        return 0
    else:
        return sum(resulting_similarity) / len(resulting_similarity)

示例#4

0

显示文件

文件： nltk_helper.py 项目： Muhammad-Magdi/Story-to-3D-Cartoon

def most_similar_lch(synsets_dict, verb):
    best_similarity = -1
    most_similar = str()
    verb_synset = wn.synsets(verb, pos=wn.VERB)[0]

    for verb, synset in synsets_dict.items():
        if wn.lch_similarity(synset, verb_synset) > best_similarity:
            best_similarity = wn.lch_similarity(synset, verb_synset)
            most_similar = verb

    return most_similar

示例#5

0

显示文件

def compare_allsynsets(method, word1, word2):
    ss1 = wordnet.synsets(word1)
    ss2 = wordnet.synsets(word2)
    simi, simi_value = 0.0, 0.0
    for (s1, s2) in product(ss1, ss2):
        # if SYNpos and s1.pos() != s2.pos():  # SYN-POS
        #     continue
        # if TWpos and s1.pos() != pos:  # Target word POS
        #     continue
        if method == "PATH":
            simi = s1.path_similarity(s2)
        elif method == "LCH":
            simi = wordnet.lch_similarity(s1, s2)
        elif method == "WUP":
            simi = wordnet.wup_similarity(s1, s2)
        elif method == "RES":
            simi = wordnet.res_similarity(s1, s2, brown_ic)
        elif method == "JCN":
            if s1.pos() == s2.pos() and s1.pos() in ['n', 'a', 'v'
                                                     ]:  # can't do diff POS
                simi = wordnet.jcn_similarity(s1, s2, brown_ic)
        elif method == "LIN":
            if s1.pos() == s2.pos() and s1.pos() in ['n', 'a', 'v'
                                                     ]:  # can't do diff POS
                simi = wordnet.lin_similarity(s1, s2, brown_ic)
        else:
            sys.exit("Error! No similarity methods!")

        if simi > simi_value:
            simi_value = simi
    return simi_value

示例#6

0

显示文件

文件： model_v1.0.1.1.nltk.py 项目： AlbertoTP/CapturaAtributosDiscriminativos

def similarityWordNet(word1, word2):
    """
    Similarity between two words with nltk
    Input: word1, word2 (String)
    Return: similarity (float)
    """
    #print (word1,"-",word2)
    word1 = str(wn.morphy(word1))
    word2 = str(wn.morphy(word2))

    palabras = wn.synsets(word1)
    #print (palabras)
    if len(palabras) == 0:
        print("no existe")
        return False
    temp = str(palabras[0])
    temp = temp[8:-2]
    #print (">",temp)
    word1 = wn.synset(str(temp))
    #print (word1)

    palabras = wn.synsets(word2)
    #print (palabras)
    if len(palabras) == 0:
        print("no existe")
        return False
    temp = str(palabras[0])
    temp = temp[8:-2]
    #print (">",temp)
    word2 = wn.synset(str(temp))
    #print (word2)
    """
    Return a score denoting how similar two word senses are,
    based on the shortest path that connects the senses in the is-a
    (hypernym/hypnoym) taxonomy. The score is in the range 0 to 1.
    """
    #similarity1 = word1.path_similarity(word2)
    #similarity1 = wn.path_similarity(word1, word2)
    try:
        #print (wn.path_similarity(word1, word2))
        #if (wn.path_similarity(word1, word2) > 0.5): #(hypernym/hypnoym) taxonomy
        print(wn.wup_similarity(word1, word2))
        if (wn.wup_similarity(word1, word2) > 0.5):  #Wu-Palmer Similarity
            return True
    except:
        return False
    """
    Leacock-Chodorow Similarity: Return a score denoting how similar
    two word senses are, based on the shortest path that connects
    the senses (as above) and the maximum depth of the taxonomy in
    which the senses occur. range 3.6
    """
    similarity2 = wn.lch_similarity(word1, word2)
    """
    Wu-Palmer Similarity: Return a score denoting how similar
    two word senses are, based on the depth of the two senses in
    the taxonomy and that of their Least Common Subsumer (most specific ancestor node).
    range 0.92
    """
    similarity3 = wn.wup_similarity(word1, word2)

示例#7

0

显示文件

文件： semaland_semantic_features.py 项目： luutuntin/Cinderella

def get_best_synset_pair(word_1, word_2, pos_1=POS_SET, pos_2=POS_SET):
    """ 
    Choose the pair with highest path similarity among all pairs. 
    Mimics pattern-seeking behavior of humans.
    """
    #synsets_1 = wn.synsets(word_1)
    synsets_1 = [s for s in wn.synsets(word_1) if s.pos() in pos_1]
    #synsets_2 = wn.synsets(word_2)
    synsets_2 = [s for s in wn.synsets(word_2) if s.pos() in pos_2]
    max_sim = None
    best_pair = None, None
    for synset_1 in synsets_1:
        for synset_2 in synsets_2:
            if synset_1.pos() == synset_2.pos():
                #sim = wn.path_similarity(synset_1, synset_2)
                sim = wn.lch_similarity(synset_1, synset_2)  # same POS needed
                if (max_sim == None) or (max_sim < sim):
                    max_sim = sim
                    best_pair = synset_1, synset_2
    #if best_pair!=(None,None): # or max_sim!=None
    if max_sim != None:
        spd = best_pair[0].shortest_path_distance(best_pair[1])
        lch = best_pair[0].lowest_common_hypernyms(best_pair[1])
        lch_depth = None
        if lch:
            lch_depth = max(s.min_depth() for s in lch)
        return best_pair, max_sim, spd, lch_depth
    return None

示例#8

0

显示文件

文件： generate_wordnet_sim.py 项目： zxlmufc/kaggle-quora-solution-8th

 def __init__(self, metric="path", double_aggregator=False):
     """
     :param metric: path lch and wup metric
     :param double_aggregator:
     """
     self.metric = metric
     self.aggregation_mode_prev = ['max', 'mean',
                                   'median']  #["mean", "max", "median"]
     self.aggregation_mode = ["mean", "std", "max", "min", "median"]
     self.aggregator = [
         None if m == "" else getattr(np, m) for m in self.aggregation_mode
     ]
     self.aggregator_prev = [
         None if m == "" else getattr(np, m)
         for m in self.aggregation_mode_prev
     ]
     self.double_aggregator = double_aggregator
     if self.metric == "path":  # scene shortest path
         self.metric_func = lambda syn1, syn2: wn.path_similarity(
             syn1, syn2)
     elif self.metric == "lch":
         self.metric_func = lambda syn1, syn2: wn.lch_similarity(syn1, syn2)
     elif self.metric == "wup":  # words' depth and ancestor depth + shortest path
         self.metric_func = lambda syn1, syn2: wn.wup_similarity(syn1, syn2)
     else:
         raise (ValueError(
             "Wrong similarity metric: %s, should be one of path/lch/wup." %
             self.metric))

示例#9

0

显示文件

文件： similarity_features.py 项目： zalanfarkas/cs4025

    def get_lch_average(self, sentence1, sentence2):
        sentence1_unique, sentence2_unique = self.sentence_difference(
            sentence1, sentence2)
        avg_similarity = 0
        total_count = 0
        # Measure similarity for each unique word from A to each unique word to B
        for sentence1_word in sentence1_unique:
            for sentence2_word in sentence2_unique:
                sentence1_word_tag = sentence1.get_tag(sentence1_word)
                sentence2_word_tag = sentence2.get_tag(sentence2_word)
                synsets_word1 = wordnet.synsets(sentence1_word,
                                                sentence1_word_tag)
                synsets_word2 = wordnet.synsets(sentence2_word,
                                                sentence2_word_tag)

                if len(synsets_word1) == 0:
                    synsets_word1 = wordnet.synsets(sentence1_word)
                if len(synsets_word2) == 0:
                    synsets_word2 = wordnet.synsets(sentence2_word)
                if len(synsets_word1) > 0 and len(synsets_word2) > 0:
                    # Skip words with different tags
                    if synsets_word1[0].pos() != synsets_word2[0].pos():
                        continue
                    similarity = wordnet.lch_similarity(
                        synsets_word1[0], synsets_word2[0])
                    if similarity != None:
                        avg_similarity += similarity
                        total_count += 1
        if total_count == 0:
            return 0
        return float(avg_similarity) / float(total_count)

示例#10

0

显示文件

文件： similarity_features.py 项目： zalanfarkas/cs4025

    def get_lch_min(self, sentence1, sentence2):
        sentence1_unique, sentence2_unique = self.sentence_difference(
            sentence1, sentence2)
        min_similarity = maxint
        # Measure similarity for each unique word from A to each unique word to B
        for sentence1_word in sentence1_unique:
            for sentence2_word in sentence2_unique:
                sentence1_word_tag = sentence1.get_tag(sentence1_word)
                sentence2_word_tag = sentence2.get_tag(sentence2_word)
                synsets_word1 = wordnet.synsets(sentence1_word,
                                                sentence1_word_tag)
                synsets_word2 = wordnet.synsets(sentence2_word,
                                                sentence2_word_tag)

                if len(synsets_word1) == 0:
                    synsets_word1 = wordnet.synsets(sentence1_word)
                if len(synsets_word2) == 0:
                    synsets_word2 = wordnet.synsets(sentence2_word)

                if len(synsets_word1) > 0 and len(synsets_word2) > 0:
                    # Skip words with different tags
                    if synsets_word1[0].pos() != synsets_word2[0].pos():
                        continue
                    similarity = wordnet.lch_similarity(
                        synsets_word1[0], synsets_word2[0])
                    if similarity != None:
                        min_similarity = min(similarity, min_similarity)
        if min_similarity == maxint:
            return 0
        return min_similarity

示例#11

0

显示文件

 def _get_simil_term(self, x, y, mode='lch'):
     '''
     Returns the similarity between two terms x and y
     Args:
         x, y (str)
         mode = lch | path | wup
     '''
     w1 = wn.synsets(x)
     w2 = wn.synsets(y)
     if len(w1) == 0 or len(w2) == 0:
         return 0
     else:
         if mode == 'lch':
             return max([
                 wn.lch_similarity(e1, e2) for e1 in w1 for e2 in w2
                 if e1.pos == e2
             ])
         elif mode == 'path':
             return max([
                 wn.path_similarity(e1, e2) for e1 in w1 for e2 in w2
                 if e1.pos == e2
             ])
         elif mode == 'wup':
             return max([
                 wn.wup_similarity(e1, e2) for e1 in w1 for e2 in w2
                 if e1.pos == e2
             ])

示例#12

0

显示文件

def relaxedSimi(syn1, syn2):
    """
    Compute similarity between two synsets
    """
    try:
        return wn.lch_similarity(syn1, syn2) or 0
    except WordNetError:
        return 0

示例#13

0

显示文件

 def lch(self, synset_a, synset_b):
     return (
         self.normalize(
             self.MAX_VALUE,
             wordnet.lch_similarity(synset_a, synset_b, verbose=True),
         )
         if synset_a.pos() == synset_b.pos()
         else 0
     )

示例#14

0

显示文件

文件： trainer.py 项目： stefie10/slu_hri

    def classify(self, ex):

        word = ex["word"].value
        synset_ex = wn.synsets(word.replace(" ", "_"))[0]
        similarities = [
            wn.lch_similarity(synset_ex, synset_t)
            for synset_t in self.training_synsets
        ]
        cls_i = na.argmax(similarities)
        return self.training_table[cls_i]["class"]

示例#15

0

显示文件

文件： similarity.py 项目： kmwenja/ftm

def similarity_by_path(sense1, sense2, option="path"):
  if option.lower() in ["path", "path_similarity"]: # Path similaritys
    return max(wn.path_similarity(sense1,sense2), 
               wn.path_similarity(sense1,sense2))
  elif option.lower() in ["wup", "wupa", "wu-palmer", "wu-palmer"]: # Wu-Palmer 
    return wn.wup_similarity(sense1, sense2)
  elif option.lower() in ['lch', "leacock-chordorow"]: # Leacock-Chodorow
    if sense1.pos != sense2.pos: # lch can't do diff POS
      return 0
    return wn.lch_similarity(sense1, sense2)

示例#16

0

显示文件

文件： subs_system.py 项目： fieryfish/wordSubstitutionTask

def wnsensesim(synset1, synset2, metric):

    if metric == 'path_similarity':
        return wn.path_similarity(synset1, synset2)
    elif metric == 'lch_similarity':
        return wn.lch_similarity(synset1, synset2)
    elif metric == 'wup_similarity':
        return wn.wup_similarity(synset1, synset2)
    else:#add more similarity measures e.g., jcn
        print "Unsupported wn similarity measure requested"

示例#17

0

显示文件

文件： similarity.py 项目： katadh/tropical-models

def similarity_by_path(sense1, sense2, option="path"):
    """ Returns maximum path similarity between two senses. """
    if option.lower() in ["path", "path_similarity"]: # Path similaritys
        return max(wn.path_similarity(sense1,sense2),
                   wn.path_similarity(sense1,sense2))
    elif option.lower() in ["wup", "wupa", "wu-palmer", "wu-palmer"]: # Wu-Palmer
        return wn.wup_similarity(sense1, sense2)
    elif option.lower() in ['lch', "leacock-chordorow"]: # Leacock-Chodorow
        if sense1.pos != sense2.pos: # lch can't do diff POS
            return 0
        return wn.lch_similarity(sense1, sense2)

示例#18

0

显示文件

文件： feature_wordnet_similarity.py 项目： amsqr/Kaggle_HomeDepot

 def __init__(self, obs_corpus, target_corpus, metric="path", aggregation_mode_prev="", aggregation_mode=""):
     super().__init__(obs_corpus, target_corpus, aggregation_mode, None, aggregation_mode_prev)
     self.metric = metric
     if self.metric == "path":
         self.metric_func = lambda syn1, syn2: wn.path_similarity(syn1, syn2)
     elif self.metric == "lch":
         self.metric_func = lambda syn1, syn2: wn.lch_similarity(syn1, syn2)
     elif self.metric == "wup":
         self.metric_func = lambda syn1, syn2: wn.wup_similarity(syn1, syn2)
     else:
         raise(ValueError("Wrong similarity metric: %s, should be one of path/lch/wup."%self.metric))

示例#19

0

显示文件

文件： feature_wordnet_similarity.py 项目： qianteng/Quora_HD

 def __init__(self, obs_corpus, target_corpus, metric="path", aggregation_mode_prev="", aggregation_mode=""):
     super().__init__(obs_corpus, target_corpus, aggregation_mode, None, aggregation_mode_prev)
     self.metric = metric
     if self.metric == "path":
         self.metric_func = lambda syn1, syn2: wn.path_similarity(syn1, syn2)
     elif self.metric == "lch":
         self.metric_func = lambda syn1, syn2: wn.lch_similarity(syn1, syn2)
     elif self.metric == "wup":
         self.metric_func = lambda syn1, syn2: wn.wup_similarity(syn1, syn2)
     else:
         raise(ValueError("Wrong similarity metric: %s, should be one of path/lch/wup."%self.metric))

示例#20

0

显示文件

def compute_similarities(s1, s2, sim):
    if sim == "path":
        return wn.path_similarity(s1, s2)
    elif sim == "lch":
        return wn.lch_similarity(s1, s2)
    elif sim == "wup":
        return wn.wup_similarity(s1, s2)
    elif sim == "res":
        return wn.res_similarity(s1, s2, genesis_ic)
    elif sim == "jcn":
        return wn.jcn_similarity(s1, s2, genesis_ic)
    elif sim == "lin":
        return wn.lin_similarity(s1, s2, genesis_ic)

示例#21

0

显示文件

文件： semanticsim.py 项目： alee101/wsd

def similarity_by_path(sense1, sense2, option="path"):
  """ Returns maximum path similarity between two senses. """
  if option.lower() in ["path", "path_similarity"]: # Path similaritys
    return max(wn.path_similarity(sense1,sense2), 
               wn.path_similarity(sense1,sense2))
  elif option.lower() in ["wup", "wupa", "wu-palmer", "wu-palmer"]: # Wu-Palmer 
    return wn.wup_similarity(sense1, sense2)
  elif option.lower() in ['lch', "leacock-chordorow"]: # Leacock-Chodorow
    if sense1.pos != sense2.pos: # lch can't do diff POS
      return 0
    return wn.lch_similarity(sense1, sense2)

    return wn.lin_similarity(sense1, sense2, wnic.ic('ic-bnc-add1.dat'))

示例#22

0

显示文件

def bagSimilarity(s1, s2) :
    if(len(s1) == 0 or len(s2) == 0) :
        return 1
        
    total = 0;

    for a in s1:
        for b in s2:
            if(a.pos() == b.pos()) :
                total += wn.lch_similarity(a,b)

    total /= (len(s1)*len(s2))
    return total

示例#23

0

显示文件

文件： similarity.py 项目： AhmadSakor/ReMatch-1

def dist_all_synsets(first, second):

    f_syns = wn.synsets(first)
    s_syns = wn.synsets(second)

    #Path SImilarity
    #A 0-1 similarity score based on the shortest path that connects the senses in the is-a (hypernym/hypnoym) taxonomy.
    #A score of 1 represents identity i.e. comparing a sense with itself will return 1.
    least_sim = 0.0
    try:
        for f in f_syns:

            for s in s_syns:
                path_sim = wn.path_similarity(f, s)

                if path_sim > least_sim:
                    least_sim = path_sim
    except:
        pass

    #Leacock-Chodorow Similarity
    #A similarity score of the shortest path connecting the senses & the maximum depth of the taxonomy in which the senses occur.
    #The relationship is given as -log(p/2d) where p is the shortest path length and d the taxonomy depth.

    max_lch = 0.0
    for f in f_syns:

        for s in s_syns:
            lch = 0.0
            try:
                lch = wn.lch_similarity(s, f)
            except WordNetError:
                pass

            if lch > max_lch:
                max_lch = lch
    max_lch = max_lch / 3.6375
    #Wu-Palmer Similarity
    #A similarity score based on the depth of the two senses in the taxonomy and that of their Least Common Subsumer (most specific ancestor node).
    #The LCS does not necessarily feature in the shortest path connecting the two senses, as it is by definition the common ancestor deepest in the taxonomy, not closest to the two senses. Typically, however, it will so feature. Where multiple candidates for the LCS exist, that whose shortest path to the root node is the longest will be selected. Where the LCS has multiple paths to the root, the longer path is used for the purposes of the calculation.
    wup_sim = 0
    try:

        wup_sim = wn.wup_similarity(f_syns[0], s_syns[0])

        if (wup_sim == None):
            wup_sim = -1
    except:
        pass

    return (least_sim, max_lch, wup_sim)

示例#24

0

显示文件

    def checksim(self, synset1, synset2):
        score = 0
        for syn1 in synset1:
            for syn2 in synset2:
                try:
                    ns = wn.lch_similarity(syn1, syn2)
                except:
                    ns = 0

    #            ns = wn.wup_similarity(syn1,syn2)
                if isinstance(ns, float):
                    if ns > score:
                        score = ns
        return (score)

示例#25

0

显示文件

文件： utilities.py 项目： fashandge/deja

def wnsim(synset1, synset2, method='all'):
    synset_patt = re.compile(r'^.+\..+\.\d+$')

    if synset_patt.match(synset1):
        s1 = wn.synset(synset1)
    else:
        s1 = wn_synset(synset1)

    if synset_patt.match(synset2):
        s2 = wn.synset(synset2)
    else:
        s2 = wn_synset(synset2)

    if s1 is None or s2 is None:
        return 0

    if method == 'lin':
        return wn.lin_similarity(s1, s2, wn_ic)
    elif method == 'res':
        return wn.res_similarity(s1, s2, wn_ic)
    elif method == 'jcn':
        return wn.jcn_similarity(s1, s2, wn_ic)
    elif method == 'wup':
        return wn.wup_similarity(s1, s2)
    elif method == 'path':
        return wn.path_similarity(s1, s2)
    elif method == 'lch':
        return wn.lch_similarity(s1, s2)
    elif method == 'all':
        return [
            ('lin', wn.lin_similarity(s1, s2, wn_ic)),
            ('res', wn.res_similarity(s1, s2, wn_ic)),
            ('jcn', wn.jcn_similarity(s1, s2, wn_ic)),
            ('wup', wn.wup_similarity(s1, s2)),
            ('path', wn.path_similarity(s1, s2)),
            ('lch', wn.lch_similarity(s1, s2))
        ]

示例#26

0

显示文件

文件： top_senses_tables.py 项目： dataista0/leninanalysis

def create_graphs(doc_list):
    documents = doc_list
    if documents is None:
        documents = default_document_list()

    distance_functions = [
        (wn.lch_similarity(SYNSETS[0], SYNSETS[0]), 'lch',
         lambda sense_1, sense_2: wn.lch_similarity(sense_1, sense_2)),
        (1.0, 'lin',
         lambda sense_1, sense_2: wn.lin_similarity(sense_1, sense_2, CORPUS)),
        (10.636958516573292, 'res',
         lambda sense_1, sense_2: wn.res_similarity(sense_1, sense_2, CORPUS)),
        (wn.jcn_similarity(SYNSETS[0], SYNSETS[0], CORPUS), 'jcn',
         lambda sense_1, sense_2: wn.jcn_similarity(sense_1, sense_2, CORPUS)),
        (1.0, 'path',
         lambda sense_1, sense_2: wn.path_similarity(sense_1, sense_2)),
    ]
    all_senses = []
    for doc in documents:
        for sense in doc.top_senses():
            all_senses.append((sense, doc.name))
    against_colors = ['r', 'b', 'g']
    against_to = [
        wn.synset(word)
        for word in ["economy.n.01", "philosophy.n.02", "politics.n.01"]
    ]
    create_against_graph('phyl_eco_pol', documents, all_senses, against_to,
                         distance_functions, against_colors)

    against_to = SYNSETS

    against_colors = [(random(), random(), random())
                      for _i in range(0, len(SYNSETS))]
    create_against_graph('handpicked', documents, all_senses, against_to,
                         distance_functions, against_colors)

    create_graph_top_senses(documents, all_senses, distance_functions)

示例#27

0

显示文件

 def __word_net_lch_eval(self, hint: str, target: str):
     h_synsets = wn.synsets(hint)
     t_synsets = wn.synsets(target)
     lst = []
     for h in h_synsets:
         for t in t_synsets:
             try:
                 strength = wn.lch_similarity(h, t)
             except WordNetError:
                 strength = -1
             lst.append(strength if strength is not None else -1)
     if all([x == -1 for x in lst]):
         return -9.999
     else:
         return max(lst)  # get strongest hint

示例#28

0

显示文件

文件： db_text_2.py 项目： nixidekaoya/webapi

def wn_similarity(synset_1, synset_2, similarity='Shortest_Path'):
    if similarity == "Shortest_Path":
        sim = wn.path_similarity(synset_1, synset_2)
    elif similarity == "Leacock_Chodorow":
        sim = wn.lch_similarity(synset_1, synset_2)
    elif similarity == "Wu_Palmer":
        sim = wn.wup_similarity(synset_1, synset_2)
    elif similarity == "Resnik":
        sim = synset_1.res_similarity(synset_2, ic)
    elif similarity == "Jiang_Conrath":
        sim = synset_1.jcn_similarity(synset_2, ic)
    elif similarity == "Lin":
        sim = synset_1.lin_similarity(synset_2, ic)
    else:
        sim = 0
    return sim

示例#29

0

显示文件

文件： __init__.py 项目： scubbx/wordnetgen

def assignToCategoriesLCH(category_synsets,word_synsets):
    prettyprint("start assigning lch_similarity...")
    assignedDict = {}
    for category in category_synsets:
        assignedDict[category] = []
    
    for word in word_synsets:
        tempValues = []
        for category in category_synsets:
            #similarity = wn.path_similarity(word, category)
            similarity = wn.lch_similarity(word, category)
            tempValues.append(similarity)
            #print("appended "+str(similarity)+" for "+str(category)+ " and "+str(word))
        #print("__________________________________")
        indexOfMaxValue = tempValues.index(getMaxFromList(tempValues))
        assignedDict[ category_synsets[ indexOfMaxValue ] ].append( word )
    return assignedDict

示例#30

0

显示文件

文件： model_v1.0.1.2.nltk.py 项目： AlbertoTP/CapturaAtributosDiscriminativos

def lch_sim(word1,word2):
    """
    Leacock-Chodorow Similarity: Return a score denoting how similar
    two word senses are, based on the shortest path that connects
    the senses (as above) and the maximum depth of the taxonomy in
    which the senses occur. range 3.6
    
    The relationship is given as -log(p/2d) where p is the
    shortest path length and d the taxonomy depth.
    """
    try:
        try:
            value = wn.lch_similarity(word1, word2)
            value = value / 3.6 #value in range of 0 to 1
            return value
        except ValueError:
           return 0
    except:
        return 0

示例#31

0

显示文件

文件： BSSA.py 项目： Malthanatos/Bifrost

def similarity(words: list)->list:
    '''Calculates similarity based on the given synsets'''
    results = []
    synsets = ask_for_word_defs(words)
    print("\n{}\n".format('*'*80))
    for i in range(int(len(synsets)/2)):
        print("{:30}{}".format(str(synsets[2*i]), str(synsets[2*i + 1])))
    print("\n{}\n".format('*'*80))
    print("Running comparisons...")
    for i in range(int(len(synsets)/2)):
        try:
            if (synsets[2*i] == None or synsets[2*i + 1] == None):
                results.append(["Undefined","Undefined", -1, -1, -1, "None", "None"])
                continue
        except:
            pass
        result = [words[2*i], words[2*i + 1], 0, 0, 0, synsets[2*i].definition(), synsets[2*i + 1].definition()]
        result[2] = wordnet.lch_similarity(synsets[2*i],synsets[2*i + 1])
        result[3] = wordnet.wup_similarity(synsets[2*i],synsets[2*i + 1])
        result[4] = wordnet.path_similarity(synsets[2*i],synsets[2*i + 1])
        results.append(result)
    print("\n{}\n".format('*'*80))
    return results

示例#32

0

显示文件

    def word_similarity(self, w1, w2, syns, loc, thr_sim):
        syn1 = wn.synsets(w1, wn.NOUN or wn.ADJ)
        syn2 = wn.synsets(w2, wn.NOUN or wn.ADJ)

        if len(syn1) > 0 and len(syn2) > 0:
            score = 0
            max_score = 0
            count = 0
            sns1 = syn1[0]
            sns2 = syn2[0]
            for i in range(0, len(syn1)):
                for j in range(0, len(syn2)):
                    if self.wordnet_metric == 'j':  # Jiang-Conrath Similarity
                        score = wn.jcn_similarity(syn1[i], syn2[j])
                    elif self.wordnet_metric == 'le':  # Leacock-Chodorow Similarity
                        score = wn.lch_similarity(syn1[i],
                                                  syn2[j],
                                                  simulate_root=False)
                    elif self.wordnet_metric == 'li':  # Lin Similarity
                        score = wn.lin_similarity(syn1[i], syn2[j])
                    elif self.wordnet_metric == 'p':  # Path Similarity
                        score = wn.path_similarity(syn1[i], syn2[j])
                    elif self.wordnet_metric == 'w':  # Wu-Palmer Similarity. It can not be '0'. It ranges in (0,1]
                        score = wn.wup_similarity(syn1[i], syn2[j])

                    if score > max_score:  # Finding the maximum score
                        max_score = score
                        sns1 = syn1[i]
                        sns2 = syn2[j]
                        if max_score >= thr_sim:  # Storing all the synset pairs that have scores > threshold
                            syns, loc = self.merging_synsets(
                                syns, w1, w2, sns1, sns2, max_score, loc)
                            count = count + 1
            if count == 0:  # Storing the synset that has maximum score but the score < threshold
                syns, loc = self.merging_synsets(syns, w1, w2, sns1, sns2,
                                                 max_score, loc)
        return syns, loc

示例#33

0

显示文件

文件： asags.py 项目： johndpope/CSCI-544-GROUP-34

def word_similarity(measure, word1, word2, pos):
    wsim = 0.0
    if pos is "n" or pos is "v":
        if pos is "n":
            word1 = wn.synsets(word1, wn.NOUN)
            word2 = wn.synsets(word2, wn.NOUN)
        else:
            word1 = wn.synsets(word1, wn.VERB)
            word2 = wn.synsets(word2, wn.VERB)

        if word1 != [] and word2 != []:
            word1 = word1[0]
            word2 = word2[0]
            if measure == "path":
                wsim = wn.path_similarity(word1, word2)
            if measure == "lch":
                wsim = wn.lch_similarity(word1, word2)
                wsim = (wsim / 3.63758615973)
            if measure == "wup":
                wsim = wn.wup_similarity(word1, word2)
            if measure == "res":
                wsim = word1.res_similarity(word2, brown_ic)
                wsim = wsim / 9.00601439892
            if measure == "jcn":
                wsim = word1.jcn_similarity(word2, brown_ic)
                wsim = wsim / (1e+300)
            if measure == "lin":
                wsim = word1.lin_similarity(word2, brown_ic)
            return wsim
        else:
            return wsim
    else:
        if pos is "r" or pos is "a":
            if word1 is word2:
                return 1.0
            else:
                return 0.0

示例#34

0

显示文件

def lch_similarity(synsets1, synsets2):
    """
        This function returns Leacock Chodorow similarity (LCH)
        between two synsets, based on the shortest path distance
        and the maximum depth of the taxonomy. The equation to
        calculate LCH similarity is shown below:

        .. math::

            lch\\_similarity = {-log(shortest\\_path\\_distance(synsets1,
                               synsets2) \\over 2 * taxonomy\\_depth}

        :param `Synset` synsets1: first synset supplied to measures
                                  the LCH similarity
        :param `Synset` synsets2: second synset supplied to measures
                                  the LCH similarity

        :return: LCH similarity between two synsets
        :rtype: float

        :Example:

            >>> from pythainlp.corpus.wordnet import lch_similarity, synset
            >>>
            >>> entity = synset('entity.n.01')
            >>> obj = synset('object.n.01')
            >>> cat = synset('cat.n.01')
            >>>
            >>> lch_similarity(entity, obj)
            2.538973871058276
            >>> lch_similarity(entity, cat)
            0.9985288301111273
            >>> lch_similarity(obj, cat)
            1.1526795099383855
    """
    return wordnet.lch_similarity(synsets1, synsets2)

示例#35

0

显示文件

def similarity_by_path(sense1, sense2, option="path", no_path_value=0):
    """
    Returns maximum path similarity between two senses.
    If no path is found between the two senses, returns no_path_value.
    """
    if option.lower() in ["path", "path_similarity"]:  # Path similaritys
        sim_dir1 = wn.path_similarity(sense1, sense2)
        sim_dir2 = wn.path_similarity(sense2, sense1)
        if sim_dir1 is None and sim_dir2 is None:
            return no_path_value
        elif sim_dir1 is None:
            return sim_dir2
        elif sim_dir2 is None:
            return sim_dir1
        else:
            return max(sim_dir2, sim_dir1)
    elif option.lower() in ["wup", "wupa", "wu-palmer",
                            "wu-palmer"]:  # Wu-Palmer
        wup_sim = wn.wup_similarity(sense1, sense2)
        return wup_sim if wup_sim is not None else no_path_value
    elif option.lower() in ['lch', "leacock-chordorow"]:  # Leacock-Chodorow
        if sense1.pos != sense2.pos:  # lch can't do diff POS
            return no_path_value
        return wn.lch_similarity(sense1, sense2)

示例#36

0

显示文件

文件： build_distance_matrix.py 项目： dariogarcia/tiramisu

  distances_res_bnc.append(sim7)
  distances_jcn_bnc.append(sim8)
  distances_lin_bnc.append(sim9)

#Import IC calculation
from nltk.corpus import wordnet_ic
brown_ic = wordnet_ic.ic('ic-brown-resnik-add1.dat')
bnc_ic = wordnet_ic.ic('ic-bnc-resnik-add1.dat')

#For each pair of synsets, compute distance
for s1 in synsets:
  syn1 = wn.of2ss(s1)
  for s2 in synsets:
    syn2 = wn.of2ss(s2)
    distances_path[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.path_similarity(syn1,syn2)
    distances_lch[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.lch_similarity(syn1,syn2)
    distances_wup[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.wup_similarity(syn1,syn2)
    distances_res[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.res_similarity(syn1,syn2,brown_ic)
    distances_jcn[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.jcn_similarity(syn1,syn2,brown_ic)
    distances_lin[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.lin_similarity(syn1,syn2,brown_ic)
    distances_res_bnc[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.res_similarity(syn1,syn2,bnc_ic)
    distances_jcn_bnc[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.jcn_similarity(syn1,syn2,bnc_ic)
    distances_lin_bnc[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.lin_similarity(syn1,syn2,bnc_ic)
    #distances_path[labelsNLTK.index(s1)][labelsNLTK.index(s2)] =1/(labelsNLTK.index(s2)+1) 
    #distances_lch[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1/(labelsNLTK.index(s2)+1)
    #distances_wup[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1/(labelsNLTK.index(s2)+1)  
    #distances_res[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1/(labelsNLTK.index(s2)+1)  
    #distances_jcn[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1/(labelsNLTK.index(s2)+1)  
    #distances_lin[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1/(labelsNLTK.index(s2)+1)  
    #distances_res_bnc[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1/(labelsNLTK.index(s2)+1)  
    #distances_jcn_bnc[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1/(labelsNLTK.index(s2)+1)

示例#37

0

显示文件

文件： graph_wsd_test_v2.py 项目： lancercat/OSOCR

def lch_similarity(synset1, synset2):
    return wn.lch_similarity(synset1, synset2)

示例#38

0

显示文件

文件： newtest.py 项目： sriramvasudevan/tensor-meaning-models

    #coeffs['drink'] = 1.0-float(sys.argv[4])
    coeffs['eat'] = 1.0
    coeffs['drink'] = 1.0
    for s in sentences:
        s = s.split()
        if s[1] not in verbs_gref:
            verbs_gref[s[1]] = np.zeros((num_basis, num_basis))
        verbs_gref[s[1]] = verbs_gref[s[1]]+np.outer(matrix[s[0]],matrix[s[2]])

        verbs[s[1]] = np.zeros((num_basis, num_basis))
        simweights = {}
        for v in verbs:
            if sys.argv[3]=='wup':
                simweights[v] = wn.wup_similarity(wn.synset(s[1]+'.v.01'), wn.synset(v+'.v.01'))
            elif sys.argv[3]=='lch':
                simweights[v] = wn.lch_similarity(wn.synset(s[1]+'.v.01'), wn.synset(v+'.v.01'))
            elif sys.argv[3]=='path':
                simweights[v] = wn.path_similarity(wn.synset(s[1]+'.v.01'), wn.synset(v+'.v.01'))
            verbs[s[1]] += coeffs[v]*simweights[v]*verbs_gref[v]
        verbs[s[1]] /= float(sum(simweights.values()))
        
    # Learn the reference matrices using Grefenstette for swallow consume and gulp
    gold_verbs = ['swallow','consume','gulp']
    for gv in gold_verbs:
        with open('train/'+gv+'_train') as f:
            sentences = f.readlines()
        for s in sentences:
            s = s.split()
            if s[1] not in verbs_gref:
                verbs_gref[s[1]] = np.zeros((num_basis, num_basis))
            verbs_gref[s[1]] = verbs_gref[s[1]]+np.outer(matrix[s[0]],matrix[s[2]])

示例#39

0

显示文件

 def wn_similarity(synset_1, synset_2):
     return wn.lch_similarity(synset_1, synset_2)

示例#40

0

显示文件

文件： wordnet.py 项目： zkan/pythainlp

def lch_similarity(synsets1,synsets2):
	return wordnet.lch_similarity(synsets1,synsets2)

示例#41

0

显示文件

文件： lch_sim.py 项目： goelaish/BloomTaxonomy

def lch_sim_fun(vq_words=[]):
    l1 = knowledge = [
        'recite', 'review', 'point', 'recognize', 'describe', 'choose',
        'examine', 'identify', 'enumerate', 'find', 'select', 'what',
        'memorize', 'collect', 'sequence', 'when', 'duplicate', 'who', 'label',
        'write', 'indicate', 'state', 'tabulate', 'which', 'relate', 'show',
        'arrange', 'cite', 'match', 'define', 'locate', 'draw', 'repeat',
        'remember', 'trace', 'read', 'quote', 'spell', 'memorise', 'how',
        'observe', 'recognise', 'copy', 'why', 'outline', 'count', 'name',
        'recall', 'study', 'omit', 'list', 'tell', 'reproduce', 'record',
        'retell', 'meet', 'listen', 'where', 'order', 'view'
    ]

    l2 = comprehension = [
        'compare', 'cite', 'give', 'predict', 'recognize', 'describe',
        'articulate', 'detail', 'order', 'characterize', 'generalize',
        'factor', 'summarize', 'select', 'illustrate', 'visualize', 'group',
        'trace', 'purpose', 'defend', 'rewrite', 'relate', 'approximate',
        'demonstrate', 'indicate', 'add', 'interact', 'tell', 'extrapolate',
        'show', 'rephrase', 'paraphrase', 'infer', 'contrast', 'locate',
        'picture', 'extend', 'associate', 'conclude', 'express', 'interpolate',
        'generalise', 'clarify', 'observe', 'understand', 'differentiate',
        'review', 'distinguish', 'estimate', 'subtract', 'discuss',
        'interpret', 'summarise', 'convert', 'translate', 'compute', 'outline',
        'identify', 'elaborate', 'ask', 'example', 'classify', 'report',
        'restate', 'explain', 'match'
    ]

    l3 = application = [
        'represent', 'show', 'identify', 'participate', 'derive', 'group',
        'calculate', 'graph', 'dramatize', 'choose', 'factor', 'include',
        'allocate', 'handle', 'practice', 'relate'
        'schedule', 'report', 'assess', 'collect', 'investigate', 'categorise',
        'ascertain', 'round', 'sketch', 'transcribe', 'sequence', 'imitate',
        'discover', 'connect', 'tabulate', 'employ', 'avoid', 'experiment',
        'manipulate', 'exercise', 'extend', 'associate', 'modify',
        'personalize', 'dramatise', 'explore', 'teach', 'change', 'perform',
        'summarise', 'act', 'implement', 'assign', 'alphabetize', 'relate',
        'articulate', 'administer', 'subscribe', 'instruct', 'determine',
        'apply', 'establish', 'select', 'illustrate', 'plot', 'use', 'prepare',
        'paint', 'transfer', 'construct', 'process', 'interpret', 'translate',
        'depreciate', 'complete', 'expose', 'acquire', 'adapt', 'link',
        'simulate', 'diminish', 'compute', 'project', 'demonstrate', 'control',
        'predict', 'contribute', 'examine', 'attain', 'capture', 'develop',
        'provide', 'utilize', 'write', 'build', 'interview', 'organise',
        'classify', 'draw', 'express', 'customize', 'price', 'chart',
        'produce', 'plan', 'inform', 'solve', 'correlation', 'model',
        'operate', 'convert'
    ]

    l4 = analysis = [
        'find', 'focus', 'identify', 'query', 'debate', 'relationships',
        'derive', 'group', 'calculate', 'explain', 'theme', 'choose', 'reason',
        'proof', 'reorganise', 'point', 'interrupt', 'difference', 'arrange',
        'list', 'investigate', 'classify', 'discover', 'motive', 'deduce',
        'connect', 'advertise', 'detect', 'confirm', 'research', 'experiment',
        'size', 'cause', 'contrast', 'inspect', 'explore', 'distinguish',
        'layout', 'optimize', 'interpret', 'question', 'omit', 'depth',
        'ensure', 'distinction', 'inference', 'divide', 'relate', 'manage',
        'rank', 'maximize', 'categorize', 'establish', 'select', 'illustrate',
        'subdivide', 'transform', 'comparing', 'assumption', 'analyze',
        'function', 'analyse', 'train', 'differentiate', 'breadboard',
        'dissect', 'see', 'limit', 'highlight', 'appraise', 'diagnose',
        'blueprint', 'compare', 'recognize', 'characterize', 'examine', 'file',
        'discriminate', 'discussion', 'isolate', 'inventory', 'test', 'survey',
        'document', 'infer', 'categorise', 'breakdown', 'separate', 'effect',
        'diagram', 'simplify', 'point', 'audit', 'criticize', 'outline',
        'correlate', 'minimize', 'prioritize', 'organise', 'model', 'order',
        'test'
    ]

    l5 = synthesis = [
        'incorporate', 'code', 'reorganize', 'invent', 'generalize', 'compose',
        'overhaul', 'explain', 'hypothesize', 'program', 'combine', 'choose',
        'frame', 'integrate', 'collaborate', 'handle', 'format', 'propose',
        'express', 'progress', 'reconstruct', 'speculate', 'discuss', 'comply',
        'arrange', 'intervene', 'collect', 'hypothesise', 'debug', 'enhance',
        'anticipate', 'originate', 'formulate', 'discover', 'reinforce',
        'design', 'animate', 'substitute', 'network', 'join', 'experiment',
        'adapt', 'lecture', 'contrast', 'extend', 'visualise', 'modify',
        'makeup', 'prescribe', 'imagine', 'interface', 'estimate', 'generate',
        'change', 'improve', 'convert', 'elaborate', 'initiate',
        'individualize', 'think', 'revise', 'organize', 'relate', 'assemble',
        'synthesize', 'categorize', 'summarize', 'prepare', 'create',
        'transform', 'construct', 'predict', 'theorise', 'minimise', 'tell',
        'cope', 'maximise', 'innovate', 'specify', 'communicate', 'setup',
        'pretend', 'budget', 'compile', 'suppose', 'tabulate', 'delete',
        'compare', 'rewrite', 'devise', 'abstract', 'dictate', 'cultivate',
        'happen', 'portray', 'depict', 'develop', 'perform', 'make', 'write',
        'build', 'test', 'negotiate', 'rearrange', 'simplify', 'produce',
        'plan', 'validate', 'structure', 'add', 'outline', 'facilitate',
        'correspond', 'solve', 'model', 'original'
    ]

    l6 = evaluation = [
        'validate', 'compare', 'deduct', 'useful', 'consider', 'conclude',
        'predict', 'relate', 'describe', 'influence', 'rank', 'assess', 'rate',
        'persuade', 'determine', 'measure', 'critique', 'mark', 'summarize',
        'select', 'discuss', 'discriminate', 'prove', 'verify', 'defend',
        'support', 'debate', 'grade', 'argue', 'disprove', 'recommend', 'test',
        'infer', 'contrast', 'choose', 'attach', 'good', 'importance',
        'evaluate', 'criteria', 'prescribe', 'hire', 'award', 'perceive',
        'dispute', 'know', 'decide', 'opinion', 'judge', 'estimate', 'why',
        'interpret', 'counsel', 'criticize', 'effective', 'prioritize',
        'value', 'agree', 'bad', 'convince', 'prioritise', 'release', 'frame',
        'appraise', 'explain', 'criticise', 'justify'
    ]

    cl_listoflist = []
    cl_listoflist.append(l1)
    cl_listoflist.append(l2)
    cl_listoflist.append(l3)
    cl_listoflist.append(l4)
    cl_listoflist.append(l5)
    cl_listoflist.append(l6)

    cnt_log = 0

    final_level_of_ques = -1
    final_sim_of_ques_with_all_levels = [0, 0, 0, 0, 0, 0]
    final_area_sim_of_ques_with_all_levels = [0, 0, 0, 0, 0, 0]
    for vq_word in vq_words:
        # calculating sum and avg of sim of word with each list
        # print("\n\ndoing for word -----" , vq_word)
        sum_of_sim_all_levels = []
        avg_of_sim_all_levels = []
        for i, list_i in enumerate(cl_listoflist):
            # print("list number  : " , i)
            sum_of_sim = 0
            for l_word in list_i:
                # print("two words " , vq_word , l_word)
                if len(wordnet.synsets(vq_word)) == 0:
                    # print vq_word
                    break
                vq_word_syn = wordnet.synsets(vq_word)[0]
                # print("l_word => wordnet.synsets(l_word)",l_word, "=>" ,wordnet.synsets(l_word))
                if len(wordnet.synsets(l_word)) == 0:
                    # print l_word
                    continue
                l_word_syn = wordnet.synsets(l_word)[0]
                try:
                    wup_sim = wordnet.lch_similarity(vq_word_syn, l_word_syn)
                except:
                    # print vq_word_syn,l_word_syn,"->exception"
                    continue
                # wup_sim=(vq_word_syn).jcn_similarity(l_word_syn)
                if (type(wup_sim) != type(None)):
                    sum_of_sim = sum_of_sim + wup_sim
                    # sum_of_sim += 1
                    # print(" counted ",vq_word,l_word , "synset " , vq_word_syn , l_word_syn)
                else:
                    cnt_log = cnt_log + 1
                    # print("Not counted             ",vq_word,l_word , "synset " , vq_word_syn , l_word_syn)
                # input()
            sum_of_sim_all_levels.append(sum_of_sim)
            avg_of_sim_all_levels.append(sum_of_sim / len(list_i))

        # print("\n\n printing all lists")
        # for l in cl_listoflist:
        # 	print(l)

        # QUES WORK BEGIN
        # print ("Sim")
        for i in range(0, 6):
            final_sim_of_ques_with_all_levels[i] += avg_of_sim_all_levels[i]
        # 	print (final_sim_of_ques_with_all_levels[i],",")
        # print("\n")

        # print("area sim")
        for i in range(0, 6):
            final_area_sim_of_ques_with_all_levels[i] += sum_of_sim_all_levels[
                i]
        # 	print (final_area_sim_of_ques_with_all_levels[i],",")
        # print("\n")
        # print ("cnt_log",cnt_log)

    # print ("Final Sim")
    # for i in range(0,6):
    # 	print (final_sim_of_ques_with_all_levels[i],",")
    # print("\n")

    # print ("Final Area Sim")
    # for i in range(0,6):
    # 	print (final_area_sim_of_ques_with_all_levels[i],",")
    # print("\n")

    #	maximum of all similarities values to find cl level
    final_level = 0
    max_sim = final_sim_of_ques_with_all_levels[0]
    for index, sim in enumerate(final_sim_of_ques_with_all_levels):
        if sim > max_sim:
            max_sim = sim
            final_level = index

    # print("\n")
    # print("avg wali list: " , avg_of_sim_all_levels)

    # print( "sum wali list: " , sum_of_sim_all_levels)

    # 	finding if word will be classified in  more than two levels
    count = 0
    indices_of_same_sim = []
    for i, sim in enumerate(final_sim_of_ques_with_all_levels):
        if sim == max_sim:
            count += 1
            indices_of_same_sim.append(i)

    # 	if word is in more than two levels
    if len(indices_of_same_sim) > 1:
        # print ("ques is in more than two levels")
        same_sim_list = []
        for index in indices_of_same_sim:
            same_sim_list.append(final_area_sim_of_ques_with_all_levels[index])

        max_sim_area = same_sim_list[0]
        for sim_area, index_of_max_sim in zip(same_sim_list,
                                              indices_of_same_sim):
            if sim_area > max_sim_area:
                max_sim_area = sim_area
                final_level = index_of_max_sim

    # print("final_level ",final_level)
    return final_level