示例#1
0
class EntitySimilarity:

    """This class implements entity relatedness using DBpedia links and entity concepts"""

    def __init__(self):
        self._features = EntityFeatures()
        self._stats = StatSPARQL()
        self._yago = YagoTypeSimilarity()

    def similarity(self, entity1, entity2):
        concepts_1 = self._features.type(entity1)
        concepts_1 = [c for c in concepts_1 if c.__contains__('class/yago')]
        concepts_2 = self._features.type(entity2)
        concepts_2 = [c for c in concepts_2 if c.__contains__('class/yago')]
        synsets_1 = [self._yago.yago2synset(c) for c in concepts_1 if self._yago.yago2synset(c)]
        synsets_2 = [self._yago.yago2synset(c) for c in concepts_2 if self._yago.yago2synset(c)]
        if not synsets_1 or not synsets_2:
            return 0.0
        s1,_ = zip(*Counter({s:self._yago.synset_ic(s) for s in synsets_1}).most_common(5))
        s2,_ = zip(*Counter({s:self._yago.synset_ic(s) for s in synsets_2}).most_common(5))
        N1 = len(s1)
        N2 = len(s2)
        score1 = sum([max([self._yago.similarity(syn1, syn2) for syn2 in s2]) for syn1 in s1]) / N1
        score2 = sum([max([self._yago.similarity(syn1, syn2) for syn1 in s1]) for syn2 in s2]) / N2
        return (score1 + score2) / 2.0

    def relatedness(self, entity1, entity2):
        ab = self._stats.entity_share(entity1, entity2)
        if ab == 0:
            return 0
        a = self._stats.entity_relation(entity1)
        b = self._stats.entity_relation(entity2)
        x = math.log(max([a,b])) - math.log(ab)
        y = math.log(self._stats.entity_N()) - math.log(min([a,b]))
        return x / y
示例#2
0
 def __init__(self):
     self._features = EntityFeatures()
     self._stats = StatSPARQL()
     self.entity_N = self._stats.entity_N()
     self._yago = YagoTypeSimilarity()
     self.entity_stats = {}
     self.entity_share_stats = {}
示例#3
0
def test_sparql():
    from sematch.semantic.sparql import EntityFeatures, NameSPARQL
    ef = EntityFeatures()
    name = NameSPARQL()
    feature = EntityFeatures()
    x = name.name2entities('Michael Jordan')[0]
    y = name.name2entities('Michael I. Jordan')[0]
    assert x is not None
    assert y is not None
    assert feature.features(x) is not None
    assert feature.features(y) is not None
    assert ef.type('http://dbpedia.org/resource/Star_Wars') is not None
示例#4
0
class EntitySimilarity:
    """This class implements entity relatedness using DBpedia links and entity concepts"""
    def __init__(self):
        self._features = EntityFeatures()
        self._stats = StatSPARQL()
        self._yago = YagoTypeSimilarity()

    @memoized
    def similarity(self, entity1, entity2):
        concepts_1 = self._features.type(entity1)
        concepts_1 = [c for c in concepts_1 if c.__contains__('class/yago')]
        concepts_2 = self._features.type(entity2)
        concepts_2 = [c for c in concepts_2 if c.__contains__('class/yago')]
        synsets_1 = [
            self._yago.yago2synset(c) for c in concepts_1
            if self._yago.yago2synset(c)
        ]
        synsets_2 = [
            self._yago.yago2synset(c) for c in concepts_2
            if self._yago.yago2synset(c)
        ]
        if not synsets_1 or not synsets_2:
            return 0.0
        s1, _ = zip(*Counter({s: self._yago.synset_ic(s)
                              for s in synsets_1}).most_common(5))
        s2, _ = zip(*Counter({s: self._yago.synset_ic(s)
                              for s in synsets_2}).most_common(5))
        N1 = len(s1)
        N2 = len(s2)
        score1 = sum([
            max([self._yago.similarity(syn1, syn2) for syn2 in s2])
            for syn1 in s1
        ]) / N1
        score2 = sum([
            max([self._yago.similarity(syn1, syn2) for syn1 in s1])
            for syn2 in s2
        ]) / N2
        return (score1 + score2) / 2.0

    @memoized
    def relatedness(self, entity1, entity2):
        ab = self._stats.entity_share(entity1, entity2)
        if ab == 0:
            return 0
        a = self._stats.entity_relation(entity1)
        b = self._stats.entity_relation(entity2)
        x = math.log(max([a, b])) - math.log(ab)
        y = math.log(self._stats.entity_N()) - math.log(min([a, b]))
        return x / y
示例#5
0
def test_extraction():
    from sematch.nlp import Extraction
    from sematch.semantic.sparql import EntityFeatures
    upm = EntityFeatures().features('http://dbpedia.org/resource/Technical_University_of_Madrid')
    extract = Extraction()
    assert extract.extract_nouns(upm['abstract']) is not None
    assert extract.extract_verbs(upm['abstract']) is not None
    assert extract.extract_chunks_doc(upm['abstract']) is not None
    cats = extract.category_features(upm['category'])
    assert extract.category2words(cats) is not None
示例#6
0
def test_sparql():
    from sematch.semantic.sparql import EntityFeatures, NameSPARQL
    ef = EntityFeatures()
    name = NameSPARQL()
    feature = EntityFeatures()
    x = name.name2entities('Michael Jordan')[0]
    y = name.name2entities('Michael I. Jordan')[0]
    assert x is not None
    assert y is not None
    assert feature.features(x) is not None
    assert feature.features(y) is not None
    assert ef.type('http://dbpedia.org/resource/Star_Wars') is not None
示例#7
0
 def __init__(self):
     self._features = EntityFeatures()
     self._stats = StatSPARQL()
     self._yago = YagoTypeSimilarity()
示例#8
0
def test_rake():
    from sematch.nlp import RAKE
    from sematch.semantic.sparql import EntityFeatures
    upm = EntityFeatures().features('http://dbpedia.org/resource/Technical_University_of_Madrid')
    rake = RAKE()
    assert rake.extract(upm['abstract']) is not None
示例#9
0
 def __init__(self):
     self._features = EntityFeatures()
     self._stats = StatSPARQL()
     self._yago = YagoTypeSimilarity()