def test_find_links(path_to_serialized_model, matchings): # Deserialize model network = fieldnetwork.deserialize_network(path_to_serialized_model) # Create client store_client = StoreHandler() # Load glove model print("Loading language model...") path_to_glove_model = "../glove/glove.6B.100d.txt" glove_api.load_model(path_to_glove_model) print("Loading language model...OK") # Retrieve indexes schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl') content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl') om = SSAPI(network, store_client, schema_sim_index, content_sim_index) om.add_krs([("efo", "cache_onto/efo.pkl")], parsed=True) om.add_krs([("clo", "cache_onto/clo.pkl")], parsed=True) om.add_krs([("bao", "cache_onto/bao.pkl")], parsed=True) links = om.find_links(matchings) for link in links: print(link)
def test_ranking_certainty_chem(self): path = '../models/chemical/' network = deserialize_network(path) api = API(network) api.init_store() table = 'activities' table_drs = api.drs_from_table(table) sim_tables = api.similar_content_to(table_drs) sim_tables.rank_certainty() print("All columns CERTAINTY: ") sim_tables.pretty_print_columns_with_scores() print("") print("All tables CERTAINTY: ") sim_tables.print_tables_with_scores() print("") sim_tables.rank_coverage() print("All columns COVERAGE: ") sim_tables.pretty_print_columns_with_scores() print("") print("All tables COVERAGE: ") sim_tables.print_tables_with_scores() print("")
def main(path_to_serialized_model): print('Loading: ' + str(path_to_serialized_model)) network = fieldnetwork.deserialize_network(path_to_serialized_model) store_client = StoreHandler() api = API(network, store_client) ip_shell = InteractiveShellEmbed(banner1=init_banner, exit_msg=exit_banner) ip_shell()
def generate_matchings(input_model_path, input_ontology_name_path, output_file): # Deserialize model network = fieldnetwork.deserialize_network(input_model_path) # Create client store_client = StoreHandler() # Load glove model print("Loading language model...") path_to_glove_model = "../glove/glove.6B.100d.txt" glove_api.load_model(path_to_glove_model) print("Loading language model...OK") # Retrieve indexes schema_sim_index = io.deserialize_object(input_model_path + 'schema_sim_index.pkl') content_sim_index = io.deserialize_object(input_model_path + 'content_sim_index.pkl') # Create ontomatch api om = SSAPI(network, store_client, schema_sim_index, content_sim_index) for onto_name, onto_parsed_path in input_ontology_name_path: # Load parsed ontology om.add_krs([(onto_name, onto_parsed_path)], parsed=True) matchings = om.find_matchings() with open(output_file, 'w') as f: for m in matchings: f.write(str(m) + '\n') print("Done!")
def main(path_to_serialized_model): print('Loading: ' + str(path_to_serialized_model)) network = fieldnetwork.deserialize_network(path_to_serialized_model) api = API(network) api.init_store() ip_shell = InteractiveShellEmbed(banner1=init_banner, exit_msg=exit_banner) ip_shell()
def main(path_to_serialized_model): # Deserialize model network = fieldnetwork.deserialize_network(path_to_serialized_model) # Create client store_client = StoreHandler() # Load glove model print("Loading language model...") path_to_glove_model = "../glove/glove.6B.100d.txt" glove_api.load_model(path_to_glove_model) print("Loading language model...OK") # Retrieve indexes schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl') content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl') om = SSAPI(network, store_client, schema_sim_index, content_sim_index) om.add_krs([("dbpedia", "cache_onto/dbpedia.pkl")], parsed=True) matchings = om.find_matchings() print("Found: " + str(len(matchings))) for m in matchings: print(m) return om
def test(path_to_serialized_model): # Deserialize model network = fieldnetwork.deserialize_network(path_to_serialized_model) # Create client store_client = StoreHandler() # Load glove model print("Loading language model...") path_to_glove_model = "../glove/glove.6B.100d.txt" glove_api.load_model(path_to_glove_model) print("Loading language model...OK") # Retrieve indexes schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl') content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl') # Create ontomatch api om = SSAPI(network, store_client, schema_sim_index, content_sim_index) # Load parsed ontology om.add_krs([("efo", "cache_onto/efo.pkl")], parsed=True) om.add_krs([("clo", "cache_onto/clo.pkl")], parsed=True) om.add_krs([("bao", "cache_onto/bao.pkl")], parsed=True) #om.add_krs([("go", "cache_onto/go.pkl")], parsed=True) # parse again print("Finding matchings...") st = time.time() matchings = om.find_matchings() et = time.time() print("Finding matchings...OK") print("Took: " + str(et-st)) for k, v in matchings: print(v) return om
def add_data_model(self, path_to_serialized_model): print('Loading data model ... ') self.network = fieldnetwork.deserialize_network( path_to_serialized_model) self.schema_sim_index = io.deserialize_object( path_to_serialized_model + 'schema_sim_index.pkl') self.content_sim_index = io.deserialize_object( path_to_serialized_model + 'content_sim_index.pkl')
def init_system(path_to_serialized_model, create_reporting=False): print_md('Loading: *' + str(path_to_serialized_model) + "*") sl = time.time() network = fieldnetwork.deserialize_network(path_to_serialized_model) store_client = StoreHandler() api = API(network=network, store_client=store_client) if create_reporting: reporting = Report(network) api.helper.help() el = time.time() print("Took " + str(el - sl) + " to load model") return api, reporting
def __init_system(path_to_serialized_model, create_reporting=True): print_md('Loading: *' + str(path_to_serialized_model) + "*") sl = time.time() network = fieldnetwork.deserialize_network(path_to_serialized_model) api = oldAPI(network) if create_reporting: reporting = Report(network) api.init_store() api.help() el = time.time() print("Took " + str(el - sl) + " to load all data") return api, reporting
def read_table_columns(path_to_serialized_model, network=False): # If the network is not provided, then we use the path to deserialize from disk if not network: network = fieldnetwork.deserialize_network(path_to_serialized_model) source_ids = network._get_underlying_repr_table_to_ids() col_info = network._get_underlying_repr_id_to_field_info() cols = [] # for table_name, field_ids in ... for k, v in source_ids.items(): db_name = None for el in v: (db_name, sn_name, fn_name, data_type) = col_info[el] cols.append(fn_name) yield (db_name, k, cols) cols.clear()
def test_fuzzy(path_to_serialized_model): # Deserialize model network = fieldnetwork.deserialize_network(path_to_serialized_model) # Create client store_client = StoreHandler() # Retrieve indexes schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl') content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl') # Create ontomatch api om = SSAPI(network, store_client, schema_sim_index, content_sim_index) # Load parsed ontology om.add_krs([("efo", "cache_onto/efo.pkl")], parsed=True) matchings = matcherlib.find_hierarchy_content_fuzzy(om.kr_handlers, store_client) for m in matchings: print(m)
def export(self, path_to_model): field_network = fieldnetwork.deserialize_network(path_to_model) # Create index to speed up MATCHes with self._driver.session() as session: session.run("CREATE INDEX ON :Node(nid)") for relation_label in Relation: # relation_hits is a generator. We could consume it to a list and then iterate over it, # but this would probably consume too much memory in most scenarios relation_hits = field_network.enumerate_relation(relation_label, as_str=False) for a, b in tqdm( relation_hits, desc=f'Storing {relation_label} relations to Neo4j', unit='relation'): with self._driver.session() as session: # Step 1: add nodes session.run( "CREATE (n:Node {nid:$nid,db_name:$db_name,source:$source,field:$field,score:$score}) RETURN id(n)", nid=a.nid, db_name=a.db_name, source=a.source_name, field=a.field_name, score=a.score) session.run( "CREATE (n:Node {nid:$nid,source:$source,field:$field,score:$score}) RETURN id(n)", nid=b.nid, db_name=b.db_name, source=b.source_name, field=b.field_name, score=b.score) session.run( f"MATCH (a:Node),(b:Node)" " WHERE a.nid=$nid_a AND b.nid=$nid_b " "CREATE (a)-[r: {relation_label}]->(b) RETURN type(r)". format(relation_label=str(relation_label).replace( 'Relation.', '')), nid_a=a.nid, nid_b=b.nid) # .single().value()
class TestReporting(unittest.TestCase): # create store handler store_client = StoreHandler() # read graph path = '../test/test4/' network = deserialize_network(path) api = API(network) api.init_store() def test_compute_statistics(self): r = Report(self.network) ncols = r.num_columns ntables = r.num_tables ncontent = r.num_content_sim_relations nschema = r.num_schema_sim_relations npkfk = r.num_pkfk_relations print("Num cols: " + str(ncols)) print("Num tables: " + str(ntables)) print("Num content sim relations: " + str(ncontent)) print("Num schema sim relations: " + str(nschema)) print("Num PKFK relations: " + str(npkfk))
def main(args): model_path = args.model_path separator = args.separator store_client = StoreHandler() network = fieldnetwork.deserialize_network(model_path) dod = DoD(network=network, store_client=store_client, csv_separator=separator) attrs = args.list_attributes.split(";") values = args.list_values.split(";") print(attrs) print(values) assert len(attrs) == len(values) i = 0 for mjp, attrs_project, metadata in dod.virtual_schema_iterative_search( attrs, values, debug_enumerate_all_jps=False): print("JP: " + str(i)) proj_view = dpu.project(mjp, attrs_project) print(str(proj_view.head(10))) print("Metadata") print(metadata) if args.output_path: if args.full_view: mjp.to_csv(args.output_path + "/raw_view_" + str(i), encoding='latin1', index=False) proj_view.to_csv(args.output_path + "/view_" + str(i), encoding='latin1', index=False) # always store this i += 1 if args.interactive == "True": print("") input("Press any key to continue...")
class TestProvenance(unittest.TestCase): # create store handler store_client = StoreHandler() # read graph path = '../test/test4/' network = deserialize_network(path) api = API(network) api.init_store() def test_keyword_provenance(self): print(self._testMethodName) res = self.api.keyword_search("Madden", max_results=10) print(res.get_provenance().prov_graph().nodes()) print(res.get_provenance().prov_graph().edges()) el_interest = [x for x in res][0] info = res.why(el_interest) print("WHY " + str(el_interest) + "? " + str(info)) explanation = res.how(el_interest) print("HOW " + str(el_interest) + "? " + str(explanation)) self.assertTrue(True) def test_content_sim_provenance(self): print(self._testMethodName) table = 'Buildings.csv' res = self.api.similar_content_to_table(table) print(res.get_provenance().prov_graph().nodes()) print(res.get_provenance().prov_graph().edges()) el_interest = [x for x in res][0] info = res.why(el_interest) print("WHY " + str(el_interest) + "? " + str(info)) explanation = res.how(el_interest) print("HOW " + str(el_interest) + "? " + str(explanation)) self.assertTrue(True) def test_intersection_provenance(self): print(self._testMethodName) res1 = self.api.keyword_search("Madden", max_results=10) res2 = self.api.keyword_search("Stonebraker", max_results=10) res = res1.intersection(res2) print(res.get_provenance().prov_graph().nodes()) print(res.get_provenance().prov_graph().edges()) el_interest = [x for x in res][0] info = res.why(el_interest) print("WHY " + str(el_interest) + "? " + str(info)) explanation = res.how(el_interest) print("HOW " + str(el_interest) + "? " + str(explanation)) self.assertTrue(True) def test_tc_table_mode_provenance(self): print(self._testMethodName) field1 = ('dwhsmall', 'All_olap2_uentity_desc_uses.csv', 'Entity Owner') field2 = ('dwhsmall', 'All_olap_entity_desc_uses.csv', 'Entity Owner') drs1 = self.api.drs_from_raw_field(field1) drs2 = self.api.drs_from_raw_field(field2) drs1.set_table_mode() drs2.set_table_mode() res = self.api.paths_between(drs1, drs2, Relation.PKFK) print(res.get_provenance().prov_graph().nodes()) print(res.get_provenance().prov_graph().edges()) el_interest = [x for x in res][0] info = res.why(el_interest) print("WHY " + str(el_interest) + "? " + str(info)) explanation = res.how(el_interest) print("HOW " + str(el_interest) + "? " + str(explanation)) self.assertTrue(True)
def test_4_n_42(path_to_serialized_model): # Deserialize model network = fieldnetwork.deserialize_network(path_to_serialized_model) # Create client store_client = StoreHandler() # Load glove model print("Loading language model...") path_to_glove_model = "../glove/glove.6B.100d.txt" glove_api.load_model(path_to_glove_model) print("Loading language model...OK") # Retrieve indexes schema_sim_index = io.deserialize_object(path_to_serialized_model + 'schema_sim_index.pkl') content_sim_index = io.deserialize_object(path_to_serialized_model + 'content_sim_index.pkl') # Create ontomatch api om = SSAPI(network, store_client, schema_sim_index, content_sim_index) # Load parsed ontology #om.add_krs([("efo", "cache_onto/efo.pkl")], parsed=True) #om.add_krs([("clo", "cache_onto/clo.pkl")], parsed=True) #om.add_krs([("bao", "cache_onto/bao.pkl")], parsed=True) om.add_krs([("dbpedia", "cache_onto/dbpedia.pkl")], parsed=True) # parse again # L6: [Relations] -> [Class names] (semantic groups) print("Finding L6 matchings...") st = time.time() l6_matchings, sem_coh_groups = matcherlib.find_sem_coh_matchings(om.network, om.kr_handlers) print("Finding L6 matchings...OK, " + str(len(l6_matchings)) + " found") et = time.time() print("Took: " + str(et - st)) for m in l6_matchings: print(m) for k, v in sem_coh_groups.items(): print(str(k) + " -> " + str(v)) exit() print("Finding matchings...") st = time.time() # L4: [Relation names] -> [Class names] (syntax) print("Finding L4 matchings...") st = time.time() l4_matchings = matcherlib.find_relation_class_name_matchings(om.network, om.kr_handlers) print("Finding L4 matchings...OK, " + str(len(l4_matchings)) + " found") et = time.time() print("Took: " + str(et - st)) print("computing fanout") fanout = defaultdict(int) for m in l4_matchings: sch, cla = m fanout[sch] += 1 ordered = sorted(fanout.items(), key=operator.itemgetter(1), reverse=True) for o in ordered: print(o) # for match in l4_matchings: # print(match) # L4.2: [Relation names] -> [Class names] (semantic) print("Finding L42 matchings...") st = time.time() l42_matchings = matcherlib.find_relation_class_name_sem_matchings(om.network, om.kr_handlers) print("Finding L42 matchings...OK, " + str(len(l42_matchings)) + " found") et = time.time() print("Took: " + str(et - st)) et = time.time() print("Finding matchings...OK") print("Took: " + str(et - st)) print("are l4 subsumed by l42?") not_in_l42 = 0 not_subsumed = [] for m in l4_matchings: if m not in l42_matchings: not_in_l42 += 1 not_subsumed.append(m) print("NOT-subsumed: " + str(not_in_l42)) """ # L5: [Attribute names] -> [Class names] (syntax) print("Finding L5 matchings...") st = time.time() l5_matchings = matcherlib.find_relation_class_attr_name_matching(om.network, om.kr_handlers) print("Finding L5 matchings...OK, " + str(len(l5_matchings)) + " found") et = time.time() print("Took: " + str(et - st)) # for match in l5_matchings: # print(match) # l52_matchings = [] # L52: [Attribute names] -> [Class names] (semantic) print("Finding L52 matchings...") st = time.time() l52_matchings = matcherlib.find_relation_class_attr_name_sem_matchings(om.network, om.kr_handlers) print("Finding L52 matchings...OK, " + str(len(l52_matchings)) + " found") et = time.time() print("Took: " + str(et - st)) """ with open('OUTPUT_442_only', 'w') as f: f.write("L4" + '\n') for m in l4_matchings: f.write(str(m) + '\n') f.write("L42" + '\n') for m in l42_matchings: f.write(str(m) + '\n') f.write("L5" + '\n')
class TestDDApiPathQueries(unittest.TestCase): # create store handler store_client = StoreHandler() # read graph path = 'models/chemical/' network = deserialize_network(path) api = API(network) api.init_store() """ TC primitive API """ def test_paths_between_field_mode(self): print(self._testMethodName) field1 = ('chembl_21', 'drug_indication', 'record_id') field2 = ('chembl_21', 'compound_records', 'record_id') drs1 = self.api.drs_from_raw_field(field1) drs2 = self.api.drs_from_raw_field(field2) res = self.api.paths_between(drs1, drs2, Relation.PKFK) data = [x for x in res] print("Total results: " + str(len(data))) for el in data: print(str(el)) def test_paths_between_table_mode(self): print(self._testMethodName) field1 = ('chembl_21', 'drug_indication', 'record_id') field2 = ('chembl_21', 'compound_records', 'record_id') drs1 = self.api.drs_from_raw_field(field1) drs2 = self.api.drs_from_raw_field(field2) drs1.set_table_mode() drs2.set_table_mode() res = self.api.paths_between(drs1, drs2, Relation.PKFK) data = [x for x in res] print("Total results: " + str(len(data))) for el in data: print(str(el)) print("Paths: ") res.visualize_provenance() res.debug_print() paths = res.paths() for p in paths: print(str(p)) def test_paths_between_from_tables(self): print(self._testMethodName) table1_name = "drug_indication" table2_name = "compound_records" table1 = self.api.drs_from_table(table1_name) table2 = self.api.drs_from_table(table2_name) table1.set_table_mode() table2.set_table_mode() res = self.api.paths_between(table1, table2, Relation.PKFK) data = [x for x in res] print("Total results: " + str(len(data))) for el in data: print(str(el)) print("Paths: ") paths = res.paths() for p in paths: print(str(p)) def test_paths(self): print(self._testMethodName) return def test_traverse(self): print(self._testMethodName) field1 = ('chembl_21', 'drug_indication', 'record_id') drs_field = self.api.drs_from_raw_field(field1) res = self.api.traverse(drs_field, Relation.SCHEMA_SIM, 1) data = [x for x in res] print("Total results: " + str(len(data))) for el in data: print(str(el)) return
# Ignore in-table results of neighbor searches # Exclude certain tables # keyword_search and neighbor_search, but on mutiple contexts import networkx as nx from api.apiutils import Relation from modelstore.elasticstore import StoreHandler, KWType from knowledgerepr import fieldnetwork from algebra import API path_to_serialized_model = "/Users/arcarter/code/datadiscovery/test/testmodel/" network = fieldnetwork.deserialize_network(path_to_serialized_model) store_client = StoreHandler() api = API(network, store_client) # short variables for Scope # These are used in keyword searches # To specify what parts of a file will be searched source = KWType.KW_TABLE # table/file/source name field = KWType.KW_SCHEMA # colum names/fields content = KWType.KW_TEXT # content of the columns # Short variables for Relation # These represent edge types in the graph # and are used for neighbor searches # schema = Relation.SCHEMA # similar schemas schema_sim = Relation.SCHEMA_SIM # Similar Schema Names # similar content values. i.e. matching substrings and numbers content_sim = Relation.CONTENT_SIM
class TestDDApi(unittest.TestCase): # create store handler store_client = StoreHandler() # read graph path = 'models/dwh/' network = deserialize_network(path) api = API(network) api.init_store() """ Seed API """ def test_drs_from_raw_field(self): print(self._testMethodName) field = ('mitdwh', 'Iap_subject_person.csv', 'Person Mit Affiliation') res = self.api.drs_from_raw_field(field) for el in res: print(str(el)) def test_drs_from_hit(self): print(self._testMethodName) field = ('mitdwh', 'Iap_subject_person.csv', 'Person Mit Affiliation') res = self.api.drs_from_raw_field(field) els = [x for x in res] el = els[0] res = self.api.drs_from_hit(el) for el in res: print(str(el)) def test_drs_from_table(self): print(self._testMethodName) table = 'Iap_subject_person.csv' res = self.api.drs_from_table(table) for el in res: print(el) def test_drs_from_table_hit(self): print(self._testMethodName) field = ('mitdwh', 'Iap_subject_person.csv', 'Person Mit Affiliation') res = self.api.drs_from_raw_field(field) els = [x for x in res] el = els[0] res = self.api.drs_from_table_hit(el) for el in res: print(str(el)) """ Primitive API """ def test_keyword_search(self): print(self._testMethodName) res = self.api.keyword_search("Madden", max_results=10) for el in res: print(str(el)) def test_keywords_search(self): print(self._testMethodName) res = self.api.keywords_search(["Madden", "Stonebraker", "Liskov"]) for el in res: print(str(el)) def test_schema_name_search(self): print(self._testMethodName) res = self.api.schema_name_search("Name", max_results=10) for el in res: print(str(el)) def test_schema_names_search(self): print(self._testMethodName) res = self.api.schema_names_search(["Name", "Last Name", "Employee"]) for el in res: print(str(el)) def test_entity_search(self): print(self._testMethodName) print("Future Work...") return def test_schema_neighbors(self): print(self._testMethodName) field = ('mitdwh', 'Iap_subject_person.csv', 'Person Mit Affiliation') res = self.api.schema_neighbors(field) for el in res: print(str(el)) def test_schema_neighbors_of(self): print(self._testMethodName) field = ('mitdwh', 'Iap_subject_person.csv', 'Person Mit Affiliation') res = self.api.schema_neighbors(field) res = self.api.schema_neighbors_of(res) for el in res: print(str(el)) def test_similar_schema_name_to_field(self): print(self._testMethodName) field = ('mitdwh', 'Buildings.csv', 'Building Name') res = self.api.similar_schema_name_to_field(field) print("RES size: " + str(res.size())) for el in res: print(str(el)) def test_ids_functions(self): print(self._testMethodName) field = ('mitdwh', 'Buildings.csv', 'Building Key') drs1 = self.api.drs_from_raw_field(field) field = ('mitdwh', 'Building Key', 'Buildings.csv') drs2 = self.api.drs_from_raw_field(field) for el in drs1: print(str(el)) for el in drs2: print(str(el)) def test_similar_schema_name_to_table(self): print(self._testMethodName) table = 'Buildings.csv' res = self.api.similar_schema_name_to_table(table) print("RES size: " + str(res.size())) for el in res: print(str(el)) def test_similar_schema_name_to(self): print(self._testMethodName) field = ('mitdwh', 'Buildings.csv', 'Building Key') res = self.api.similar_schema_name_to_field(field) res = self.api.similar_schema_name_to(res) print("RES size: " + str(res.size())) for el in res: print(str(el)) def test_similar_content_to_field(self): print(self._testMethodName) field = ('mitdwh', 'Buildings.csv', 'Building Name') res = self.api.similar_content_to_field(field) print("RES size: " + str(res.size())) for el in res: print(str(el)) def test_similar_content_to_table(self): print(self._testMethodName) table = 'Buildings.csv' res = self.api.similar_content_to_table(table) print("RES size: " + str(res.size())) for el in res: print(str(el)) def test_similar_content_to(self): print(self._testMethodName) field = ('mitdwh', 'Buildings.csv', 'Building Name') res = self.api.similar_content_to_field(field) res = self.api.similar_content_to(res) print("RES size: " + str(res.size())) for el in res: print(str(el)) def test_pkfk_field(self): print(self._testMethodName) field = ('mitdwh', 'Buildings.csv', 'Building Name') res = self.api.pkfk_field(field) print("RES size: " + str(res.size())) for el in res: print(str(el)) def test_pkfk_table(self): print(self._testMethodName) table = 'Buildings.csv' res = self.api.pkfk_table(table) print("RES size: " + str(res.size())) for el in res: print(str(el)) def test_pkfk_of(self): print(self._testMethodName) field = ('mitdwh', 'Buildings.csv', 'Building Name') res = self.api.pkfk_field(field) res = self.api.pkfk_of(res) print("RES size: " + str(res.size())) for el in res: print(str(el)) """ Combiner API """ def test_intersection(self): print(self._testMethodName) res1 = self.api.keyword_search("Madden", max_results=10) res2 = self.api.keyword_search("Stonebraker", max_results=10) res = res1.intersection(res2) for el in res: print(str(el)) def test_union(self): print(self._testMethodName) res1 = self.api.keyword_search("Madden", max_results=10) res2 = self.api.schema_name_search("Stonebraker", max_results=10) res = res1.union(res2) for el in res: print(str(el)) def test_difference(self): print(self._testMethodName) res1 = self.api.keyword_search("Madden", max_results=10) res2 = self.api.keyword_search("Stonebraker", max_results=10) res = res1.set_difference(res2) for el in res: print(str(el)) """ Other, bugs, etc """ def test_iter_edges_with_data_bug(self): table = "Fac_building.csv" # The table of interest # We get the representation of that table in DRS table_drs = self.api.drs_from_table(table) # similar tables are those with similar content content_similar = self.api.similar_content_to(table_drs) schema_similar = self.api.similar_schema_name_to( table_drs) # similar attribute names # some pkfk relationship involved too pkfk_similar = self.api.pkfk_of(table_drs) # similar tables are similar in content and schema inters1 = self.api.intersection(content_similar, schema_similar) similar_tables = self.api.intersection(inters1, pkfk_similar) similar_tables.print_tables()