Пример #1
0
def test_word_similarity():
    from sematch.semantic.similarity import WordNetSimilarity
    wns = WordNetSimilarity()
    dog = wns.word2synset('dog')
    cat = wns.word2synset('cat')
    # Measuring semantic similarity between concepts using Path method
    assert wns.similarity(dog[0], cat[0], 'path') is not None  # 0.2
    # Computing English word similarity using Li method
    assert wns.word_similarity('dog', 'cat',
                               'li') is not None  # 0.449327301063
    # Computing Spanish word similarity using Lin method
    assert wns.monol_word_similarity('perro', 'gato', 'spa',
                                     'lin') is not None  #0.876800984373
    # Computing Chinese word similarity using  Wu & Palmer method
    assert wns.monol_word_similarity('狗', '猫', 'cmn',
                                     'wup') is not None  # 0.857142857143
    # Computing Spanish and English word similarity using Resnik method
    assert wns.crossl_word_similarity('perro', 'cat', 'spa', 'eng',
                                      'res') is not None  #7.91166650904
    # Computing Spanish and Chinese word similarity using Jiang & Conrad method
    assert wns.crossl_word_similarity('perro', '猫', 'spa', 'cmn',
                                      'jcn') is not None  #0.31023804699
    # Computing Chinese and English word similarity using WPath method
    assert wns.crossl_word_similarity('狗', 'cat', 'cmn', 'eng',
                                      'wpath') is not None  #0.593666388463
def map_subjects(subjects: list, filter_dis=0.2):
    # mapping the subjects, filter the i,j in M
    wns = WordNetSimilarity()
    # enumerate pairing and calculate distances
    # [['中国人', '安乐死'], ['太阳', '很好']]
    pair = []
    # return the indexes pairing
    pair_idxs = []
    for index, value in enumerate(subjects):
        i = index + 1
        while i < len(subjects):
            # compare list : next list
            com_value = subjects[i]
            for v in value:
                for cv in com_value:
                    pair_distance = wns.monol_word_similarity(
                        v, cv, 'cmn', 'wup')
                    # print(f'{v} -> {cv}:  {pair_distance}')
                    if pair_distance > filter_dis:
                        pair.append(pair_distance)
                        # pairing index: (row, column)
                        pair_idxs.append(
                            ([index, value.index(v)], [i,
                                                       com_value.index(cv)]))
            i += 1

    return pair_idxs
Пример #3
0
def test_wordnet_similarity():
    from sematch.semantic.similarity import WordNetSimilarity
    wns = WordNetSimilarity()
    dog = wns.word2synset('dog')
    cat = wns.word2synset('cat')
    # Measuring semantic similarity between concepts using Path method
    assert wns.similarity(dog[0], cat[0], 'path') is not None # 0.2
    # Computing English word similarity using Li method
    assert wns.word_similarity('dog', 'cat', 'li') is not None# 0.449327301063
    # Computing Spanish word similarity using Lin method
    assert wns.monol_word_similarity('perro', 'gato', 'spa', 'lin') is not None#0.876800984373
    # Computing Chinese word similarity using  Wu & Palmer method
    assert wns.monol_word_similarity('狗', '猫', 'cmn', 'wup') is not None# 0.857142857143
    # Computing Spanish and English word similarity using Resnik method
    assert wns.crossl_word_similarity('perro', 'cat', 'spa', 'eng', 'res') is not None#7.91166650904
    # Computing Spanish and Chinese word similarity using Jiang & Conrad method
    assert wns.crossl_word_similarity('perro', '猫', 'spa', 'cmn', 'jcn') is not None#0.31023804699
    # Computing Chinese and English word similarity using WPath method
    assert wns.crossl_word_similarity('狗', 'cat', 'cmn', 'eng', 'wpath') is not None#0.593666388463
Пример #4
0
def test_wordsim_evaluation():
    from sematch.evaluation import WordSimEvaluation
    from sematch.semantic.similarity import WordNetSimilarity
    evaluation = WordSimEvaluation()
    print evaluation.dataset_names()
    wns = WordNetSimilarity()
    # define similarity metrics
    wpath = lambda x, y: wns.word_similarity_wpath(x, y, 0.8)
    # evaluate similarity metrics
    print evaluation.evaluate_metric('wpath', wpath, 'noun_simlex')
    # performa Steiger's Z significance Test
    print evaluation.statistical_test('wpath', 'path', 'noun_simlex')
    wpath_es = lambda x, y: wns.monol_word_similarity(x, y, 'spa', 'path')
    wpath_en_es = lambda x, y: wns.crossl_word_similarity(
        x, y, 'eng', 'spa', 'wpath')
    print evaluation.evaluate_metric('wpath_es', wpath_es, 'rg65_spanish')
    print evaluation.evaluate_metric('wpath_en_es', wpath_en_es, 'rg65_EN-ES')
from sematch.semantic.similarity import WordNetSimilarity
# import jieba
# import synonyms
# import jieba.posseg as pseg

wns = WordNetSimilarity()
wns.monol_word_similarity('狗', '猫', 'cmn', 'wup')
# print(wns.word_similarity('dog', 'cat', 'li'))
# print(wns.monol_word_similarity('忧患', '安乐', 'cmn', 'wup'))
print(wns.monol_word_similarity('狗', '猫', 'cmn', 'wup'))
print(wns.monol_word_similarity('猫', '狗', 'cmn', 'wup'))
# print(wns.monol_word_similarity('电脑', '键盘', 'cmn', 'wup'))
# print(wns.monol_word_similarity('电脑', '电脑', 'cmn', 'wup'))
# print(wns.monol_word_similarity('国家', '国家', 'cmn', 'wup'))
#
# def parse_token(data):
#     # words = []
#     # for d in data:
#     #     # jieba.enable_paddle()
#     seg_data = pseg.cut(data, use_paddle=True) #default
#     # per_word = [str(word) for word in seg_data if not str(word) in jieba_sp_words]
#     # for word, flag in seg_data:
#     #     print(f'{word}, {flag}')
#     # words.append(seg_data)
#     return seg_data
#
#
# def word_flag(sentence:list):
#     for word,flag in sentence:
#         return word,flag
#
Пример #6
0
from sematch.semantic.similarity import WordNetSimilarity
wns = WordNetSimilarity()

# Computing English word similarity using Li method
wns.word_similarity('dog', 'cat', 'li')  # 0.449327301063
# Computing Spanish word similarity using Lin method
wns.monol_word_similarity('perro', 'gato', 'spa', 'lin')  #0.876800984373
# Computing Chinese word similarity using Wu & Palmer method
wns.monol_word_similarity('狗', '猫', 'cmn', 'wup')  # 0.857142857143
# Computing Spanish and English word similarity using Resnik method
wns.crossl_word_similarity('perro', 'cat', 'spa', 'eng', 'res')  #7.91166650904
# Computing Spanish and Chinese word similarity using Jiang & Conrad method
wns.crossl_word_similarity('perro', '猫', 'spa', 'cmn', 'jcn')  #0.31023804699
# Computing Chinese and English word similarity using WPath method
wns.crossl_word_similarity('狗', 'cat', 'cmn', 'eng', 'wpath')  #0.593666388463