def testFakeDataSet1(self): string_db = StringDB('rdf_graph', True) string_db.graph = RDFGraph(True) self.assertEqual(len(string_db.graph), 0) ensembl = Ensembl('rdf_graph', True) prot_map = ensembl.fetch_protein_gene_map('9606') [prot_map.update({k: ['ENSEMBL:' + prot_map[k]]}) for k in prot_map.keys()] print("Finished fetching ENSP IDs, fetched {} proteins".format(len(prot_map))) # just looking # for key in prot_map: # if string_db.graph.curie_regexp.match(prot_map[key]) is None: # print("INVALID curie for %s from %s", prot_map[key], key) dataframe = pd.DataFrame(data=self.test_set_1, columns=self.columns) string_db._process_protein_links(dataframe, prot_map, '9606') # g1 <interacts with> g2 triples = """ ENSEMBL:ENSG00000001626 RO:0002434 ENSEMBL:ENSG00000004059 . """ self.assertTrue(self.test_util.test_graph_equality(triples, string_db.graph))
def testFakeDataSet2(self): """ Dataset contains a deprecated protein ID that we expect if filtered out by ensembl biomart We test that this returns an empty graph :return: """ string_db = StringDB('rdf_graph', True) string_db.graph = RDFGraph() self.assertEqual(len(string_db.graph), 0) dataframe = pd.DataFrame(data=self.test_set_2, columns=self.columns) string_db._process_protein_links(dataframe, self.protein_list, '9606') self.assertEqual(len(string_db.graph), 0)
def testFakeDataSet2(self): """ Dataset contains a deprecated protein ID that we expect if filtered out by ensembl biomart We test that this returns an empty graph :return: """ string_db = StringDB('rdf_graph', True) string_db.graph = RDFGraph() self.assertEqual(len(string_db.graph), 0) dataframe = pd.DataFrame(data=self.test_set_2, columns=self.columns) string_db._process_protein_links(dataframe, self.protein_list, 9606) self.assertEqual(len(string_db.graph), 0)
def testFakeDataSet2(self): """ Dataset contains a deprecated protein ID that we expect if filtered out by ensembl biomart We test that this returns a graph with 3 triples: MonarchData:string.ttl a owl:Ontology ; owl:versionIRI <https://archive.monarchinitiative.org/.../string.ttl> ; owl:versionInfo "some version" :return: """ string_db = StringDB('rdf_graph', True) dataframe = pd.DataFrame(data=self.test_set_2, columns=self.columns) string_db._process_protein_links(dataframe, self.protein_list, 9606) self.assertEqual(len(string_db.graph), 3)
def testFakeDataSet1(self): string_db = StringDB('rdf_graph', True) string_db.graph = RDFGraph(True) self.assertEqual(len(string_db.graph), 0) ensembl = Ensembl('rdf_graph', True) prot_map = ensembl.fetch_protein_gene_map(9606) for key in prot_map.keys(): prot_map[key] = "ENSEMBL:{}".format(prot_map[key]) print("Finished fetching ENSP IDs, " "fetched {} proteins".format(len(prot_map.keys()))) dataframe = pd.DataFrame(data=self.test_set_1, columns=self.columns) string_db._process_protein_links(dataframe, prot_map, 9606) triples = """ ENSEMBL:ENSG00000001626 RO:0002434 ENSEMBL:ENSG00000004059 . """ self.assertTrue(self.test_util.test_graph_equality( triples, string_db.graph))
def testFakeDataSet1(self): string_db = StringDB('rdf_graph', True) string_db.graph = RDFGraph(True) self.assertEqual(len(string_db.graph), 0) ensembl = Ensembl('rdf_graph', True) prot_map = ensembl.fetch_protein_gene_map(9606) for key in prot_map.keys(): prot_map[key] = "ENSEMBL:{}".format(prot_map[key]) print("Finished fetching ENSP IDs, fetched {} proteins".format( len(prot_map.keys()))) dataframe = pd.DataFrame(data=self.test_set_1, columns=self.columns) string_db._process_protein_links(dataframe, prot_map, 9606) triples = """ ENSEMBL:ENSG00000001626 RO:0002434 ENSEMBL:ENSG00000004059 . """ self.assertTrue( self.test_util.test_graph_equality(triples, string_db.graph))
def testFakeDataSet1(self): string_db = StringDB('rdf_graph', True) string_db.graph.bind_all_namespaces() ensembl = Ensembl('rdf_graph', True) prot_map = ensembl.fetch_protein_gene_map(9606) for key in prot_map.keys(): prot_map[key] = "ENSEMBL:{}".format(prot_map[key]) print("Finished fetching ENSP IDs, " "fetched {} proteins".format(len(prot_map.keys()))) dataframe = pd.DataFrame(data=self.test_set_1, columns=self.columns) string_db._process_protein_links(dataframe, prot_map, 9606) sparql_query = """ SELECT ?prot WHERE { ?prot RO:0002434 ENSEMBL:ENSG00000004059 . } """ sparql_output = string_db.graph.query(sparql_query) results = list(sparql_output) expected = [(URIRef(string_db.graph._getNode("ENSEMBL:ENSG00000001626")),)] self.assertEqual(results, expected)