def main(path_to_serialized_model): # Deserialize model network = fieldnetwork.deserialize_network(path_to_serialized_model) # Create client store_client = StoreHandler() # Load glove model print("Loading language model...") path_to_glove_model = "../glove/glove.6B.100d.txt" glove_api.load_model(path_to_glove_model) print("Loading language model...OK") # Retrieve indexes schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl') content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl') om = SSAPI(network, store_client, schema_sim_index, content_sim_index) om.add_krs([("dbpedia", "cache_onto/dbpedia.pkl")], parsed=True) matchings = om.find_matchings() print("Found: " + str(len(matchings))) for m in matchings: print(m) return om
def test_find_links(path_to_serialized_model, matchings): # Deserialize model network = fieldnetwork.deserialize_network(path_to_serialized_model) # Create client store_client = StoreHandler() # Load glove model print("Loading language model...") path_to_glove_model = "../glove/glove.6B.100d.txt" glove_api.load_model(path_to_glove_model) print("Loading language model...OK") # Retrieve indexes schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl') content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl') om = SSAPI(network, store_client, schema_sim_index, content_sim_index) om.add_krs([("efo", "cache_onto/efo.pkl")], parsed=True) om.add_krs([("clo", "cache_onto/clo.pkl")], parsed=True) om.add_krs([("bao", "cache_onto/bao.pkl")], parsed=True) links = om.find_links(matchings) for link in links: print(link)
def generate_matchings(input_model_path, input_ontology_name_path, output_file): # Deserialize model network = fieldnetwork.deserialize_network(input_model_path) # Create client store_client = StoreHandler() # Load glove model print("Loading language model...") path_to_glove_model = "../glove/glove.6B.100d.txt" glove_api.load_model(path_to_glove_model) print("Loading language model...OK") # Retrieve indexes schema_sim_index = io.deserialize_object(input_model_path + 'schema_sim_index.pkl') content_sim_index = io.deserialize_object(input_model_path + 'content_sim_index.pkl') # Create ontomatch api om = SSAPI(network, store_client, schema_sim_index, content_sim_index) for onto_name, onto_parsed_path in input_ontology_name_path: # Load parsed ontology om.add_krs([(onto_name, onto_parsed_path)], parsed=True) matchings = om.find_matchings() with open(output_file, 'w') as f: for m in matchings: f.write(str(m) + '\n') print("Done!")
def test(path_to_serialized_model): # Deserialize model network = fieldnetwork.deserialize_network(path_to_serialized_model) # Create client store_client = StoreHandler() # Load glove model print("Loading language model...") path_to_glove_model = "../glove/glove.6B.100d.txt" glove_api.load_model(path_to_glove_model) print("Loading language model...OK") # Retrieve indexes schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl') content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl') # Create ontomatch api om = SSAPI(network, store_client, schema_sim_index, content_sim_index) # Load parsed ontology om.add_krs([("efo", "cache_onto/efo.pkl")], parsed=True) om.add_krs([("clo", "cache_onto/clo.pkl")], parsed=True) om.add_krs([("bao", "cache_onto/bao.pkl")], parsed=True) #om.add_krs([("go", "cache_onto/go.pkl")], parsed=True) # parse again print("Finding matchings...") st = time.time() matchings = om.find_matchings() et = time.time() print("Finding matchings...OK") print("Took: " + str(et-st)) for k, v in matchings: print(v) return om
def add_data_model(self, path_to_serialized_model): print('Loading data model ... ') self.network = fieldnetwork.deserialize_network( path_to_serialized_model) self.schema_sim_index = io.deserialize_object( path_to_serialized_model + 'schema_sim_index.pkl') self.content_sim_index = io.deserialize_object( path_to_serialized_model + 'content_sim_index.pkl')
def test_fuzzy(path_to_serialized_model): # Deserialize model network = fieldnetwork.deserialize_network(path_to_serialized_model) # Create client store_client = StoreHandler() # Retrieve indexes schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl') content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl') # Create ontomatch api om = SSAPI(network, store_client, schema_sim_index, content_sim_index) # Load parsed ontology om.add_krs([("efo", "cache_onto/efo.pkl")], parsed=True) matchings = matcherlib.find_hierarchy_content_fuzzy(om.kr_handlers, store_client) for m in matchings: print(m)
path_to_sem_model = argv[4] path_to_results = argv[5] path_to_gold_standard = argv[6] # Deserialize model network = fieldnetwork.deserialize_network(path_to_serialized_model) # Create client store_client = StoreHandler() # Load glove model print("Loading language model...") glove_api.load_model(path_to_sem_model) print("Loading language model...OK") # Retrieve indexes schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl') content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl') # Create ontomatch api om = SSAPI(network, store_client, schema_sim_index, content_sim_index) # Load parsed ontology om.add_krs([(onto_name, path_to_ontology)], parsed=True) # # Build content sim om.priv_build_content_sim(0.6) print("Benchmarking matchers and linkers") generate_matchings(network, store_client, om, path_to_results) precision, recall = combine_and_report_results(om, path_to_results, path_to_gold_standard)
def test_4_n_42(path_to_serialized_model): # Deserialize model network = fieldnetwork.deserialize_network(path_to_serialized_model) # Create client store_client = StoreHandler() # Load glove model print("Loading language model...") path_to_glove_model = "../glove/glove.6B.100d.txt" glove_api.load_model(path_to_glove_model) print("Loading language model...OK") # Retrieve indexes schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl') content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl') # Create ontomatch api om = SSAPI(network, store_client, schema_sim_index, content_sim_index) # Load parsed ontology #om.add_krs([("efo", "cache_onto/efo.pkl")], parsed=True) #om.add_krs([("clo", "cache_onto/clo.pkl")], parsed=True) #om.add_krs([("bao", "cache_onto/bao.pkl")], parsed=True) om.add_krs([("dbpedia", "cache_onto/dbpedia.pkl")], parsed=True) # parse again # L6: [Relations] -> [Class names] (semantic groups) print("Finding L6 matchings...") st = time.time() l6_matchings, sem_coh_groups = matcherlib.find_sem_coh_matchings(om.network, om.kr_handlers) print("Finding L6 matchings...OK, " + str(len(l6_matchings)) + " found") et = time.time() print("Took: " + str(et - st)) for m in l6_matchings: print(m) for k, v in sem_coh_groups.items(): print(str(k) + " -> " + str(v)) exit() print("Finding matchings...") st = time.time() # L4: [Relation names] -> [Class names] (syntax) print("Finding L4 matchings...") st = time.time() l4_matchings = matcherlib.find_relation_class_name_matchings(om.network, om.kr_handlers) print("Finding L4 matchings...OK, " + str(len(l4_matchings)) + " found") et = time.time() print("Took: " + str(et - st)) print("computing fanout") fanout = defaultdict(int) for m in l4_matchings: sch, cla = m fanout[sch] += 1 ordered = sorted(fanout.items(), key=operator.itemgetter(1), reverse=True) for o in ordered: print(o) # for match in l4_matchings: # print(match) # L4.2: [Relation names] -> [Class names] (semantic) print("Finding L42 matchings...") st = time.time() l42_matchings = matcherlib.find_relation_class_name_sem_matchings(om.network, om.kr_handlers) print("Finding L42 matchings...OK, " + str(len(l42_matchings)) + " found") et = time.time() print("Took: " + str(et - st)) et = time.time() print("Finding matchings...OK") print("Took: " + str(et - st)) print("are l4 subsumed by l42?") not_in_l42 = 0 not_subsumed = [] for m in l4_matchings: if m not in l42_matchings: not_in_l42 += 1 not_subsumed.append(m) print("NOT-subsumed: " + str(not_in_l42)) """ # L5: [Attribute names] -> [Class names] (syntax) print("Finding L5 matchings...") st = time.time() l5_matchings = matcherlib.find_relation_class_attr_name_matching(om.network, om.kr_handlers) print("Finding L5 matchings...OK, " + str(len(l5_matchings)) + " found") et = time.time() print("Took: " + str(et - st)) # for match in l5_matchings: # print(match) # l52_matchings = [] # L52: [Attribute names] -> [Class names] (semantic) print("Finding L52 matchings...") st = time.time() l52_matchings = matcherlib.find_relation_class_attr_name_sem_matchings(om.network, om.kr_handlers) print("Finding L52 matchings...OK, " + str(len(l52_matchings)) + " found") et = time.time() print("Took: " + str(et - st)) """ with open('OUTPUT_442_only', 'w') as f: f.write("L4" + '\n') for m in l4_matchings: f.write(str(m) + '\n') f.write("L42" + '\n') for m in l42_matchings: f.write(str(m) + '\n') f.write("L5" + '\n')