words2 = self.words_around_symbol(candidate) self.words2 += len(words1) cm = CandidateMetrics() cm.score = self.words_similarity(words1, words2, exclude) cm.degree = ksyn.degree(self.hg, candidate) logging.info('%s %s' % (candidate, cm)) if cm.better_than(best_cm): best_cm = cm best = candidate self.best_sense_t += time.time() - start return best, best_cm if __name__ == '__main__': hgr = hyperg.HyperGraph({ 'backend': 'leveldb', 'hg': 'wordnet_wikidata.hg' }) p = par.Parser() d = Disambiguation(hgr, p) r1 = ['stocks', 'stock'] text1 = "Chinese stocks end year with double-digit losses" r2 = ['cambridge'] text2 = "Cambridge near Boston in the United States." text3 = "Cambridge near London in England." print(d.best_sense(r2, text2))
output = last_stage_output.tree.to_hyperedge_str( with_namespaces=self.show_namespaces) self.outputs.append(output) self.debug_msg(output) last_stage_output.main_edge = last_stage_output.tree.to_hyperedge() return last_stage_output if __name__ == '__main__': # test_text = "Due to its location in the European Plain, Berlin is influenced by a temperate seasonal climate." # test_text = "Lots of cars require lots of paved roadways and parking lots." # test_text = "Critics have pointed out the dangers of group forming among like-minded in Internet. " # test_text = "Recently online platforms such as Facebook and Google have been criticized." # test_text = "Koikuchi shoyu, best known as soy sauce, is the mother of all sauces in Japan." test_text = "Satellites from NASA and other agencies have been tracking sea ice changes since 1979." print(test_text) hgraph = hyperg.HyperGraph({ 'backend': 'leveldb', 'hg': 'wordnet_dbpedia.hg' }) extractor = Extractor(hgraph) extractor.debug = True results = extractor.read_text(test_text) for result in results: print('result: %s' % str(result[1].main_edge)) for edge in result[1].edges: print('extra edge: %s' % str(edge))
'worst_sim': e[1][0], 'sim': e[1][1], 'matches': e[1][2], 'text': self.hg.get_str_attribute(ed.str2edge(e[0]), 'text') } result.append(edge_data) return result def write_similar_edges(self, targ_edge, file_path): edge_data = self.similar_edges(targ_edge) write_edge_data(edge_data, file_path) def write_edges_with_similar_concepts(self, targ_edge, file_path): edge_data = self.edges_with_similar_concepts(targ_edge) write_edge_data(edge_data, file_path) if __name__ == '__main__': hgr = hyperg.HyperGraph({'backend': 'leveldb', 'hg': 'reddit-politics.hg'}) print('creating parser...') par = par.Parser() print('parser created.') te = '(clinches/nlp.clinch.verb clinton/nlp.clinton.noun ' \ '(+/gb democratic/nlp.democratic.adj nomination/nlp.nomination.noun))' s = Similarity(hgr, par) # s.write_edges_with_similar_concepts(ed.str2edge(te), 'edges_similar_concepts.json') s.write_similar_edges(ed.str2edge(te), 'similar_edges.json')
def setUp(self): params = {'backend': 'leveldb', 'hg': 'test.hg'} self.hg = hyperg.HyperGraph(params)
def nsimilarity(self, edges1, edges2): cs1 = set() for edge in edges1: cs1 = cs1.union(self.concept_sphere(edge)) cs2 = set() for edge in edges2: cs2 = cs2.union(self.concept_sphere(edge)) return self.setsimilarity(cs1, cs2) def synonym_similarity(self, meronomy, syn_id_1, syn_id_2): return self.nsimilarity(meronomy.synonym_full_edges(syn_id_1), meronomy.synonym_full_edges(syn_id_2)) if __name__ == '__main__': hgr = hyperg.HyperGraph({ 'backend': 'leveldb', 'hg': 'reddit-worldnews-01012013-01082017.hg' }) hs = HyperSimilarity(hgr) # e = 'clinton/nlp.clinton.noun' print('starting...') e1 = '(+/gb prime/nlp.prime.adj minister/nlp.minister.noun)' e2 = 'europe/nlp.europe.noun' print(hs.similarity(e1, e2))