def __init__(self, taxonomy, ic_file): self._taxonomy = taxonomy self._concepts = taxonomy._nodes self._concept2node = taxonomy._node2id self._label2concepts = { label: self._concepts[i] for i, label in enumerate(taxonomy._labels) } self._graph_ic = GraphIC(ic_file)
def __init__(self, graph_ic='models/yago_type_ic.txt', mappings="models/type-linkings.txt"): WordNetSimilarity.__init__(self) self._graph_ic = GraphIC(graph_ic) self._mappings = FileIO.read_json_file(mappings) self._id2mappings = {data['offset']: data for data in self._mappings} self._yago2id = { data['yago_dbpedia']: data['offset'] for data in self._mappings }
class YagoTypeSimilarity(WordNetSimilarity): """Extend the WordNet synset to linked data through YAGO mappings""" def __init__(self, graph_ic='models/yago_type_ic.txt', mappings="models/type-linkings.txt"): WordNetSimilarity.__init__(self) self._graph_ic = GraphIC(graph_ic) self._mappings = FileIO.read_json_file(mappings) self._id2mappings = {data['offset']: data for data in self._mappings} self._yago2id = { data['yago_dbpedia']: data['offset'] for data in self._mappings } def synset2id(self, synset): return str(synset.offset() + 100000000) def id2synset(self, offset): x = offset[1:] return wn._synset_from_pos_and_offset('n', int(x)) def synset2mapping(self, synset, key): mapping_id = self.synset2id(synset) if mapping_id in self._id2mappings: mapping = self._id2mappings[mapping_id] return mapping[key] if key in mapping else None else: return None def synset2yago(self, synset): return self.synset2mapping(synset, 'yago_dbpedia') def synset2dbpedia(self, synset): return self.synset2mapping(synset, 'dbpedia') def yago2synset(self, yago): if yago in self._yago2id: return self.id2synset(self._yago2id[yago]) return None def word2dbpedia(self, word): return [ self.synset2dbpedia(s) for s in self.word2synset(word) if self.synset2dbpedia(s) ] def word2yago(self, word): return [ self.synset2yago(s) for s in self.word2synset(word) if self.synset2yago(s) ] def yago_similarity(self, yago1, yago2, name='wpath'): """ Compute semantic similarity of two yago concepts by mapping concept uri to wordnet synset. :param yago1: yago concept uri :param yago2: yago concept uri :param name: name of semantic similarity metric :return: semantic similarity score if both uri can be mapped to synsets, otherwise 0. """ s1 = self.yago2synset(yago1) s2 = self.yago2synset(yago2) if s1 and s2: return self.similarity(s1, s2, name) return 0.0 def word_similarity_wpath_graph(self, w1, w2, k): s1 = self.word2synset(w1) s2 = self.word2synset(w2) return max([self.wpath_graph(c1, c2, k) for c1 in s1 for c2 in s2] + [0]) def res_graph(self, c1, c2): lcs = self.least_common_subsumer(c1, c2) yago = self.synset2yago(lcs) return self._graph_ic.concept_ic(yago) def lin_graph(self, c1, c2): lcs = self.least_common_subsumer(c1, c2) yago_c1 = self.synset2yago(c1) yago_c2 = self.synset2yago(c2) yago_lcs = self.synset2yago(lcs) lcs_ic = self._graph_ic.concept_ic(yago_lcs) c1_ic = self._graph_ic.concept_ic(yago_c1) c2_ic = self._graph_ic.concept_ic(yago_c2) combine = c1_ic + c2_ic if c1_ic == 0.0 or c2_ic == 0.0: return 0.0 return 2.0 * lcs_ic / combine def jcn_graph(self, c1, c2): lcs = self.least_common_subsumer(c1, c2) yago_c1 = self.synset2yago(c1) yago_c2 = self.synset2yago(c2) yago_lcs = self.synset2yago(lcs) lcs_ic = self._graph_ic.concept_ic(yago_lcs) c1_ic = self._graph_ic.concept_ic(yago_c1) c2_ic = self._graph_ic.concept_ic(yago_c2) lcs_ic = 2.0 * lcs_ic if c1_ic == 0.0 or c2_ic == 0.0: return 0.0 return 1.0 / 1 + (c1_ic + c2_ic - lcs_ic) def wpath_graph(self, c1, c2, k=0.9): lcs = self.least_common_subsumer(c1, c2) path = c1.shortest_path_distance(c2) yago_lcs = self.synset2yago(lcs) weight = k**self._graph_ic.concept_ic(yago_lcs) return 1.0 / (1 + path * weight)
class ConceptSimilarity: """ This class is used to compute taxonomical semantic similarity scores between concepts that are located in a concept taxonomy. A taxonomy object needs to be passed into this class in order to find the structural information of concepts such as depth, path length, and so on. The graph-based IC is needed for semantic similarity measures wpath, res, lin, jcn. """ def __init__(self, taxonomy, ic_file): self._taxonomy = taxonomy self._concepts = taxonomy._nodes self._concept2node = taxonomy._node2id self._label2concepts = { label: self._concepts[i] for i, label in enumerate(taxonomy._labels) } self._graph_ic = GraphIC(ic_file) def hyponyms(self, concept): if concept in self._concept2node: nodes = self._taxonomy.hyponyms(self._concept2node[concept]) return [self._concepts[n] for n in nodes] return [] def hypernyms(self, concept): if concept in self._concept2node: nodes = self._taxonomy.hypernyms(self._concept2node[concept]) return [self._concepts[n] for n in nodes] return [] def shortest_path_length(self, concept1, concept2): n1 = self._concept2node[concept1] n2 = self._concept2node[concept2] return self._taxonomy.shortest_path_length(n1, n2) def depth(self, concept): if concept == 'root': return 1 n = self._concept2node[concept] return self._taxonomy.depth(n) def least_common_subsumer(self, concept1, concept2): n1 = self._concept2node[concept1] n2 = self._concept2node[concept2] n = self._taxonomy.least_common_subsumer(n1, n2) if n > len(self._concepts): return 'root' return self._concepts[n] def method(self, name): def function(c1, c2): score = getattr(self, name)(c1, c2) return abs(score) return function def name2concept(self, name): return self._label2concepts[ name] if name in self._label2concepts else [] def concept_ic(self, concept): """ Get the graph-based IC of a concept. the ic of virtual root is 0 :param concept: the node id of concept :return: the ic value of concept """ if concept == 'root': return 0.0 else: return self._graph_ic.concept_ic(concept) @memoized def similarity(self, c1, c2, name='wpath'): """ Compute semantic similarity between two concepts :param c1: :param c2: :param name: :return: """ if c1 not in self._concept2node or c2 not in self._concept2node: return 'link error' return self.method(name)(c1, c2) def path(self, c1, c2): """ Rada's shortest path based similarity metric :param c1: :param c2: :return: similarity score in [0,1] """ return 1.0 / self.shortest_path_length(c1, c2) def wup(self, c1, c2): """ Wu and Palm's similarity metric :param c1: :param c2: :return: """ lcs = self.least_common_subsumer(c1, c2) depth_c1 = self.depth(c1) depth_c2 = self.depth(c2) depth_lcs = self.depth(lcs) return 2.0 * depth_lcs / (depth_c1 + depth_c2) def li(self, c1, c2, alpha=0.2, beta=0.6): path = self.shortest_path_length(c1, c2) - 1 lcs = self.least_common_subsumer(c1, c2) depth = self.depth(lcs) # print path, lcs, depth x = math.exp(-alpha * path) y = math.exp(beta * depth) # print y z = math.exp(-beta * depth) a = y - z b = y + z return x * (a / b) def res(self, c1, c2): lcs = self.least_common_subsumer(c1, c2) return self.concept_ic(lcs) def lin(self, c1, c2): lcs = self.least_common_subsumer(c1, c2) lcs_ic = self.concept_ic(lcs) c1_ic = self.concept_ic(c1) c2_ic = self.concept_ic(c2) combine = c1_ic + c2_ic if c1_ic == 0.0 or c2_ic == 0.0: return 0.0 return 2.0 * lcs_ic / combine def jcn(self, c1, c2): lcs = self.least_common_subsumer(c1, c2) lcs_ic = self.concept_ic(lcs) c1_ic = self.concept_ic(c1) c2_ic = self.concept_ic(c2) lcs_ic = 2.0 * lcs_ic if c1_ic == 0.0 or c2_ic == 0.0: return 0.0 return 1.0 / 1 + (c1_ic + c2_ic - lcs_ic) def wpath(self, c1, c2, k=0.8): lcs = self.least_common_subsumer(c1, c2) path = self.shortest_path_length(c1, c2) - 1 weight = k**self.concept_ic(lcs) return 1.0 / (1 + path * weight)
def __init__(self, taxonomy, ic_file): self._taxonomy = taxonomy self._concepts = taxonomy._nodes self._concept2node = taxonomy._node2id self._label2concepts = {label:self._concepts[i] for i, label in enumerate(taxonomy._labels)} self._graph_ic = GraphIC(ic_file)
def __init__(self, graph_ic='models/yago_type_ic.txt', mappings="models/type-linkings.txt"): WordNetSimilarity.__init__(self) self._graph_ic = GraphIC(graph_ic) self._mappings = FileIO.read_json_file(mappings) self._id2mappings = {data['offset']: data for data in self._mappings} self._yago2id = {data['yago_dbpedia']: data['offset'] for data in self._mappings}
class YagoTypeSimilarity(WordNetSimilarity): """Extend the WordNet synset to linked data through YAGO mappings""" def __init__(self, graph_ic='models/yago_type_ic.txt', mappings="models/type-linkings.txt"): WordNetSimilarity.__init__(self) self._graph_ic = GraphIC(graph_ic) self._mappings = FileIO.read_json_file(mappings) self._id2mappings = {data['offset']: data for data in self._mappings} self._yago2id = {data['yago_dbpedia']: data['offset'] for data in self._mappings} def synset2id(self, synset): return str(synset.offset() + 100000000) def id2synset(self, offset): x = offset[1:] return wn._synset_from_pos_and_offset('n', int(x)) def synset2mapping(self, synset, key): mapping_id = self.synset2id(synset) if mapping_id in self._id2mappings: mapping = self._id2mappings[mapping_id] return mapping[key] if key in mapping else None else: return None def synset2yago(self, synset): return self.synset2mapping(synset,'yago_dbpedia') def synset2dbpedia(self, synset): return self.synset2mapping(synset, 'dbpedia') def yago2synset(self, yago): if yago in self._yago2id: return self.id2synset(self._yago2id[yago]) return None def word2dbpedia(self, word): return [self.synset2dbpedia(s) for s in self.word2synset(word) if self.synset2dbpedia(s)] def word2yago(self, word): return [self.synset2yago(s) for s in self.word2synset(word) if self.synset2yago(s)] def yago_similarity(self, yago1, yago2, name='wpath'): """ Compute semantic similarity of two yago concepts by mapping concept uri to wordnet synset. :param yago1: yago concept uri :param yago2: yago concept uri :param name: name of semantic similarity metric :return: semantic similarity score if both uri can be mapped to synsets, otherwise 0. """ s1 = self.yago2synset(yago1) s2 = self.yago2synset(yago2) if s1 and s2: return self.similarity(s1, s2, name) return 0.0 def word_similarity_wpath_graph(self, w1, w2, k): s1 = self.word2synset(w1) s2 = self.word2synset(w2) return max([self.wpath_graph(c1, c2, k) for c1 in s1 for c2 in s2] + [0]) def res_graph(self, c1, c2): lcs = self.least_common_subsumer(c1,c2) yago = self.synset2yago(lcs) return self._graph_ic.concept_ic(yago) def lin_graph(self, c1, c2): lcs = self.least_common_subsumer(c1,c2) yago_c1 = self.synset2yago(c1) yago_c2 = self.synset2yago(c2) yago_lcs = self.synset2yago(lcs) lcs_ic = self._graph_ic.concept_ic(yago_lcs) c1_ic = self._graph_ic.concept_ic(yago_c1) c2_ic = self._graph_ic.concept_ic(yago_c2) combine = c1_ic + c2_ic if c1_ic == 0.0 or c2_ic == 0.0: return 0.0 return 2.0 * lcs_ic / combine def jcn_graph(self, c1, c2): lcs = self.least_common_subsumer(c1,c2) yago_c1 = self.synset2yago(c1) yago_c2 = self.synset2yago(c2) yago_lcs = self.synset2yago(lcs) lcs_ic = self._graph_ic.concept_ic(yago_lcs) c1_ic = self._graph_ic.concept_ic(yago_c1) c2_ic = self._graph_ic.concept_ic(yago_c2) lcs_ic = 2.0 * lcs_ic if c1_ic == 0.0 or c2_ic == 0.0: return 0.0 return 1.0 / 1+(c1_ic + c2_ic - lcs_ic) def wpath_graph(self, c1, c2, k=0.9): lcs = self.least_common_subsumer(c1, c2) path = c1.shortest_path_distance(c2) yago_lcs = self.synset2yago(lcs) weight = k ** self._graph_ic.concept_ic(yago_lcs) return 1.0 / (1 + path*weight)
class ConceptSimilarity: """ This class is used to compute taxonomical semantic similarity scores between concepts that are located in a concept taxonomy. A taxonomy object needs to be passed into this class in order to find the structural information of concepts such as depth, path length, and so on. The graph-based IC is needed for semantic similarity measures wpath, res, lin, jcn. """ def __init__(self, taxonomy, ic_file): self._taxonomy = taxonomy self._concepts = taxonomy._nodes self._concept2node = taxonomy._node2id self._label2concepts = {label:self._concepts[i] for i, label in enumerate(taxonomy._labels)} self._graph_ic = GraphIC(ic_file) def hyponyms(self, concept): if concept in self._concept2node: nodes = self._taxonomy.hyponyms(self._concept2node[concept]) return [self._concepts[n] for n in nodes] return [] def hypernyms(self, concept): if concept in self._concept2node: nodes = self._taxonomy.hypernyms(self._concept2node[concept]) return [self._concepts[n] for n in nodes] return [] def shortest_path_length(self, concept1, concept2): n1 = self._concept2node[concept1] n2 = self._concept2node[concept2] return self._taxonomy.shortest_path_length(n1, n2) def depth(self, concept): if concept == 'root': return 1 n = self._concept2node[concept] return self._taxonomy.depth(n) def least_common_subsumer(self, concept1, concept2): n1 = self._concept2node[concept1] n2 = self._concept2node[concept2] n = self._taxonomy.least_common_subsumer(n1, n2) if n > len(self._concepts): return 'root' return self._concepts[n] def method(self, name): def function(c1, c2): score = getattr(self, name)(c1, c2) return abs(score) return function def name2concept(self, name): return self._label2concepts[name] if name in self._label2concepts else [] def concept_ic(self, concept): """ Get the graph-based IC of a concept. the ic of virtual root is 0 :param concept: the node id of concept :return: the ic value of concept """ if concept == 'root': return 0.0 else: return self._graph_ic.concept_ic(concept) @memoized def similarity(self, c1, c2, name='wpath'): """ Compute semantic similarity between two concepts :param c1: :param c2: :param name: :return: """ if c1 not in self._concept2node or c2 not in self._concept2node: return 'link error' return self.method(name)(c1, c2) def path(self, c1, c2): """ Rada's shortest path based similarity metric :param c1: :param c2: :return: similarity score in [0,1] """ return 1.0/ self.shortest_path_length(c1, c2) def wup(self, c1, c2): """ Wu and Palm's similarity metric :param c1: :param c2: :return: """ lcs = self.least_common_subsumer(c1, c2) depth_c1 = self.depth(c1) depth_c2 = self.depth(c2) depth_lcs = self.depth(lcs) return 2.0*depth_lcs / (depth_c1 + depth_c2) def li(self, c1, c2, alpha=0.2, beta=0.6): path = self.shortest_path_length(c1, c2) - 1 lcs = self.least_common_subsumer(c1, c2) depth = self.depth(lcs) # print path, lcs, depth x = math.exp(-alpha * path) y = math.exp(beta * depth) # print y z = math.exp(-beta * depth) a = y - z b = y + z return x * (a / b) def res(self, c1, c2): lcs = self.least_common_subsumer(c1, c2) return self.concept_ic(lcs) def lin(self, c1, c2): lcs = self.least_common_subsumer(c1, c2) lcs_ic = self.concept_ic(lcs) c1_ic = self.concept_ic(c1) c2_ic = self.concept_ic(c2) combine = c1_ic + c2_ic if c1_ic == 0.0 or c2_ic == 0.0: return 0.0 return 2.0 * lcs_ic / combine def jcn(self, c1, c2): lcs = self.least_common_subsumer(c1, c2) lcs_ic = self.concept_ic(lcs) c1_ic = self.concept_ic(c1) c2_ic = self.concept_ic(c2) lcs_ic = 2.0 * lcs_ic if c1_ic == 0.0 or c2_ic == 0.0: return 0.0 return 1.0 / 1 + (c1_ic + c2_ic - lcs_ic) def wpath(self, c1, c2, k=0.8): lcs = self.least_common_subsumer(c1, c2) path = self.shortest_path_length(c1, c2) - 1 weight = k ** self.concept_ic(lcs) return 1.0 / (1 + path * weight)