class EntitySimilarity: """This class implements entity relatedness using DBpedia links and entity concepts""" def __init__(self): self._features = EntityFeatures() self._stats = StatSPARQL() self._yago = YagoTypeSimilarity() def similarity(self, entity1, entity2): concepts_1 = self._features.type(entity1) concepts_1 = [c for c in concepts_1 if c.__contains__('class/yago')] concepts_2 = self._features.type(entity2) concepts_2 = [c for c in concepts_2 if c.__contains__('class/yago')] synsets_1 = [self._yago.yago2synset(c) for c in concepts_1 if self._yago.yago2synset(c)] synsets_2 = [self._yago.yago2synset(c) for c in concepts_2 if self._yago.yago2synset(c)] if not synsets_1 or not synsets_2: return 0.0 s1,_ = zip(*Counter({s:self._yago.synset_ic(s) for s in synsets_1}).most_common(5)) s2,_ = zip(*Counter({s:self._yago.synset_ic(s) for s in synsets_2}).most_common(5)) N1 = len(s1) N2 = len(s2) score1 = sum([max([self._yago.similarity(syn1, syn2) for syn2 in s2]) for syn1 in s1]) / N1 score2 = sum([max([self._yago.similarity(syn1, syn2) for syn1 in s1]) for syn2 in s2]) / N2 return (score1 + score2) / 2.0 def relatedness(self, entity1, entity2): ab = self._stats.entity_share(entity1, entity2) if ab == 0: return 0 a = self._stats.entity_relation(entity1) b = self._stats.entity_relation(entity2) x = math.log(max([a,b])) - math.log(ab) y = math.log(self._stats.entity_N()) - math.log(min([a,b])) return x / y
def __init__(self): self._features = EntityFeatures() self._stats = StatSPARQL() self.entity_N = self._stats.entity_N() self._yago = YagoTypeSimilarity() self.entity_stats = {} self.entity_share_stats = {}
def test_sparql(): from sematch.semantic.sparql import EntityFeatures, NameSPARQL ef = EntityFeatures() name = NameSPARQL() feature = EntityFeatures() x = name.name2entities('Michael Jordan')[0] y = name.name2entities('Michael I. Jordan')[0] assert x is not None assert y is not None assert feature.features(x) is not None assert feature.features(y) is not None assert ef.type('http://dbpedia.org/resource/Star_Wars') is not None
class EntitySimilarity: """This class implements entity relatedness using DBpedia links and entity concepts""" def __init__(self): self._features = EntityFeatures() self._stats = StatSPARQL() self._yago = YagoTypeSimilarity() @memoized def similarity(self, entity1, entity2): concepts_1 = self._features.type(entity1) concepts_1 = [c for c in concepts_1 if c.__contains__('class/yago')] concepts_2 = self._features.type(entity2) concepts_2 = [c for c in concepts_2 if c.__contains__('class/yago')] synsets_1 = [ self._yago.yago2synset(c) for c in concepts_1 if self._yago.yago2synset(c) ] synsets_2 = [ self._yago.yago2synset(c) for c in concepts_2 if self._yago.yago2synset(c) ] if not synsets_1 or not synsets_2: return 0.0 s1, _ = zip(*Counter({s: self._yago.synset_ic(s) for s in synsets_1}).most_common(5)) s2, _ = zip(*Counter({s: self._yago.synset_ic(s) for s in synsets_2}).most_common(5)) N1 = len(s1) N2 = len(s2) score1 = sum([ max([self._yago.similarity(syn1, syn2) for syn2 in s2]) for syn1 in s1 ]) / N1 score2 = sum([ max([self._yago.similarity(syn1, syn2) for syn1 in s1]) for syn2 in s2 ]) / N2 return (score1 + score2) / 2.0 @memoized def relatedness(self, entity1, entity2): ab = self._stats.entity_share(entity1, entity2) if ab == 0: return 0 a = self._stats.entity_relation(entity1) b = self._stats.entity_relation(entity2) x = math.log(max([a, b])) - math.log(ab) y = math.log(self._stats.entity_N()) - math.log(min([a, b])) return x / y
def test_extraction(): from sematch.nlp import Extraction from sematch.semantic.sparql import EntityFeatures upm = EntityFeatures().features('http://dbpedia.org/resource/Technical_University_of_Madrid') extract = Extraction() assert extract.extract_nouns(upm['abstract']) is not None assert extract.extract_verbs(upm['abstract']) is not None assert extract.extract_chunks_doc(upm['abstract']) is not None cats = extract.category_features(upm['category']) assert extract.category2words(cats) is not None
def __init__(self): self._features = EntityFeatures() self._stats = StatSPARQL() self._yago = YagoTypeSimilarity()
def test_rake(): from sematch.nlp import RAKE from sematch.semantic.sparql import EntityFeatures upm = EntityFeatures().features('http://dbpedia.org/resource/Technical_University_of_Madrid') rake = RAKE() assert rake.extract(upm['abstract']) is not None