def test_therapeutic_relationship(self): from dipper.utils.TestUtils import TestUtils from dipper.models.Model import Model # Make testutils object and load ttl test_query = TestUtils(self.source.graph) test_query.load_testgraph_from_turtle(self.source) graph = self.source.graph model = Model(graph) # Expected structure # TODO can this be unified OBAN and the Annot models # to be automatically generated? sparql_query = """ SELECT ?assoc ?disease ?rel ?chemical WHERE { ?assoc a OBAN:association ; OBAN:association_has_object ?disease ; OBAN:association_has_predicate ?rel ; OBAN:association_has_subject ?chemical .} """ # SPARQL variables to check chem_id = 'MESH:D009538' chem_uri = graph._getNode(chem_id) disease_id = 'OMIM:188890' disease_uri = graph._getNode(disease_id) rel_id = model.object_properties['substance_that_treats'] rel_uri = graph._getNode(rel_id) # TODO unused # pubmed_id = 'PMID:16785264' # pubmed_uri = gu.getNode(pubmed_id) # eco = 'ECO:0000033' assoc = G2PAssoc(graph, self.source.name, chem_id, disease_id, rel_id) assoc_id = assoc.make_g2p_id() assoc_uri = self.source.graph._getNode(assoc_id) # One of the expected outputs from query expected_output = [assoc_uri, disease_uri, rel_uri, chem_uri] # Query graph sparql_output = test_query.query_graph(sparql_query) self.assertTrue( expected_output in sparql_output, "did not find expected association: " + str(expected_output) + " found " + str(len(sparql_output)) + " others:\n" + str(sparql_output)) logger.info("Test query data finished.")
def test_therapeutic_relationship(self): from dipper.utils.TestUtils import TestUtils from dipper.models.Model import Model # Make testutils object and load ttl test_query = TestUtils(self.source.graph) test_query.load_testgraph_from_turtle(self.source) graph = self.source.graph model = Model(graph) # Expected structure # TODO can this be unified OBAN and the Annot models # to be automatically generated? sparql_query = """ SELECT ?assoc ?disease ?rel ?chemical WHERE { ?assoc a OBAN:association ; OBAN:association_has_object ?disease ; OBAN:association_has_predicate ?rel ; OBAN:association_has_subject ?chemical .} """ # SPARQL variables to check chem_id = 'MESH:D009538' chem_uri = graph._getNode(chem_id) disease_id = 'OMIM:188890' disease_uri = graph._getNode(disease_id) rel_id = model.object_properties['substance_that_treats'] rel_uri = graph._getNode(rel_id) # TODO unused # pubmed_id = 'PMID:16785264' # pubmed_uri = gu.getNode(pubmed_id) # eco = 'ECO:0000033' assoc = G2PAssoc(graph, self.source.name, chem_id, disease_id, rel_id) assoc_id = assoc.make_g2p_id() assoc_uri = self.source.graph._getNode(assoc_id) # One of the expected outputs from query expected_output = [assoc_uri, disease_uri, rel_uri, chem_uri] # Query graph sparql_output = test_query.query_graph(sparql_query) self.assertTrue( expected_output in sparql_output, "did not find expected association: " + str(expected_output) + " found " + str(len(sparql_output)) + " others:\n" + str(sparql_output)) logger.info("Test query data finished.")
def test_therapeutic_relationship(self): from dipper.utils.TestUtils import TestUtils from dipper.utils.GraphUtils import GraphUtils from dipper import curie_map # Make testutils object and load ttl test_query = TestUtils(self.source.graph) test_query.load_testgraph_from_turtle(self.source) # Expected structure # TODO can this be unified OBAN and the Annot models to be automatically generated? sparql_query = """ SELECT ?assoc ?pubmed ?disease ?chemical WHERE { ?assoc a Annotation: ; dc:evidence OBO:ECO_0000033 ; dc:source ?pubmed ; :hasObject ?disease ; :hasPredicate OBO:RO_0002606 ; :hasSubject ?chemical .} """ # SPARQL variables to check gu = GraphUtils(curie_map.get()) chem_id = 'MESH:D009538' chem_uri = gu.getNode(chem_id) disease_id = 'OMIM:188890' disease_uri = gu.getNode(disease_id) eco = 'ECO:0000033' rel_id = gu.object_properties['substance_that_treats'] pubmed_id = 'PMID:16785264' pubmed_uri = gu.getNode(pubmed_id) # consider replacing with make_ctd_chem_disease_assoc_id() assoc_id = self.source.make_association_id('ctd', chem_id, rel_id, disease_id, eco, pubmed_id) assoc_uri = gu.getNode(assoc_id) # One of the expected outputs from query expected_output = [assoc_uri, pubmed_uri, disease_uri, chem_uri] # Query graph sparql_output = test_query.query_graph(sparql_query) self.assertTrue(expected_output in sparql_output, "did not find expected association: " + assoc_id + " found: " + pprint.pformat(sparql_output)) logger.info("Test query data finished.")
def test_classes_indiv_properties(self): """ Given the above sample input, produce the following: A CGD:DiseaseID is an OWL Class A CGD:DiseaseID is a subclass of DOID:4 A CGD:Disease rdfs:label "Adenocarcinoma" A CGD:DiseaseInstance is an individual of CGD:DiseaseID A CGD:DiseaseInstance rdfs:label "Adenocarcinoma with response {1} to therapy" A CGD:DrugID is an OWL Class A CGD:DrugID is a subclass of CHEBI:23888 A CGD:DrugID rdfs:label "5FU-based adjuvant therapy" A CGD:RelationID is an object property PMID:12345 is a IAO:0000013 (journal article) """ from dipper.utils.TestUtils import TestUtils # Make testutils object and load bindings test_env = TestUtils(self.cgd.graph) self.cgd.load_bindings() sparql_query = """ SELECT ?disease ?diseaseInd ?diseaseQual ?drug ?source WHERE {{ ?disease a owl:Class ; rdfs:subClassOf DOID:4 ; rdfs:label "{0}" . ?diseaseInd a ?disease ; rdfs:label "{1}" ; BFO:0000159 ?diseaseQual . ?drug a owl:Class ; rdfs:subClassOf CHEBI:23888 ; rdfs:label "{2}" . <{3}> a owl:ObjectProperty . ?source a IAO:0000013 . }} """.format(self.disease_label, self.disease_instance_label, self.drug_label, self.relationship_uri) # Expected Results expected_results = [[self.disease_uri, self.disease_ind_uri, self.disease_quality_uri, self.drug_uri, self.source_uri]] # Query graph sparql_output = test_env.query_graph(sparql_query) self.assertEqual(expected_results, sparql_output)
def test_therapeutic_relationship(self): from dipper.utils.TestUtils import TestUtils from dipper.utils.GraphUtils import GraphUtils # Make testutils object and load bindings test_query = TestUtils(self.ctd.graph) self.ctd.load_bindings() # Expected structure sparql_query = """ SELECT ?assoc ?pubmed ?disease ?chemical WHERE { ?assoc a Annotation: ; dc:evidence OBO:ECO_0000033 ; dc:source ?pubmed ; :hasObject ?disease ; :hasPredicate OBO:RO_0002606 ; :hasSubject ?chemical .} """ # SPARQL variables to check gu = GraphUtils(curie_map.get()) chem_id = 'MESH:D009538' chem_uri = gu.getNode(chem_id) disease_id = 'OMIM:188890' disease_uri = gu.getNode(disease_id) pubmed_id = 'PMID:16785264' pubmed_uri = gu.getNode(pubmed_id) rel_id = gu.object_properties['substance_that_treats'] eco = 'ECO:0000033' # TODO PYLINT make_association_id() does not exist in CTD # there is "_make_association()" with a different sig assoc_id = self.ctd.make_association_id( 'ctd', chem_id, rel_id, disease_id, eco, pubmed_id) assoc_uri = gu.getNode(assoc_id) # Expected output from query expected_output = [assoc_uri, pubmed_uri, disease_uri, chem_uri] # Query graph sparql_output = test_query.query_graph(sparql_query) self.assertTrue(expected_output in sparql_output) logger.info("Test finished.")
def test_associations(self): """ Given the above sample input, produce the following: CGD:VariantID has_phenotype(RO:0002200) CGD:DiseaseInstance A CGD:AssociationID OBO:RO_0002558 Traceable Author Statement (ECO:0000033) A CGD:AssociationID dc:source PMID:20498393 A CGD:AssociationID has_environment CGD:DrugID A CGD:AssociationID OBAN:association_has_subject CGD:VariantID A CGD:AssociationID OBAN:association_has_object_property has_phenotype A CGD:AssociationID OBAN:association_has_object CGD:DiseaseInstance """ from dipper.utils.TestUtils import TestUtils # Make testutils object and load bindings cu = CurieUtil(self.curie_map) test_env = TestUtils(self.cgd.graph) self.cgd.load_bindings() evidence = 'OBO:ECO_0000033' evidence_uri = URIRef(cu.get_uri(evidence)) sparql_query = """ SELECT ?diseaseInd ?variant ?drug ?vdannot ?source ?evidence WHERE {{ ?variant OBO:RO_0002200 ?diseaseInd . ?vdannot a OBAN:association ; OBO:RO_0002558 ?evidence ; dc:source ?source ; <{0}> ?drug ; OBAN:association_has_object ?diseaseInd ; OBAN:association_has_object_property OBO:RO_0002200 ; OBAN:association_has_subject ?variant . }} """.format(self.relationship_uri) # Expected Results expected_results = [[self.disease_ind_uri, self.variant_uri, self.drug_uri, self.vd_annot_uri, self.source_uri, evidence_uri]] # Query graph sparql_output = test_env.query_graph(sparql_query) self.assertEqual(expected_results, sparql_output)
def test_therapeutic_relationship(self): from dipper.utils.TestUtils import TestUtils from dipper.utils.GraphUtils import GraphUtils # Make testutils object and load bindings test_query = TestUtils(self.ctd.graph) # Expected structure sparql_query = """ SELECT ?assoc ?pubmed ?disease ?chemical WHERE { ?assoc a Annotation: ; dc:evidence OBO:ECO_0000033 ; dc:source ?pubmed ; :hasObject ?disease ; :hasPredicate OBO:RO_0002606 ; :hasSubject ?chemical .} """ # SPARQL variables to check chem_id = 'MESH:D009538' chem_uri = self.graph._getNode(chem_id) disease_id = 'OMIM:188890' disease_uri = self.graph._getNode(disease_id) pubmed_id = 'PMID:16785264' pubmed_uri = self.graph._getNode(pubmed_id) rel_id = self.model.object_properties['substance_that_treats'] eco = 'ECO:0000033' # TODO PYLINT make_association_id() does not exist in CTD # there is "_make_association()" with a different sig assoc_id = self.ctd.make_association_id('ctd', chem_id, rel_id, disease_id, eco, pubmed_id) assoc_uri = self.graph._getNode(assoc_id) # Expected output from query expected_output = [assoc_uri, pubmed_uri, disease_uri, chem_uri] # Query graph sparql_output = test_query.query_graph(sparql_query) self.assertTrue(expected_output in sparql_output) logger.info("Test finished.")
def main(): # TODO this should be generated by looking in the dipper/sources directory # or read from a sources/dataset/config yaml or dir of yamls source_to_class_map = { # 'facebase_alpha': 'FaceBase_alpha', 'hpoa': 'HPOAnnotations', # ~3 min 'zfin': 'ZFIN', 'omim': 'OMIM', # full file takes ~15 min, due to required throttling 'biogrid': 'BioGrid', # interactions file takes <10 minutes 'mgi': 'MGI', 'impc': 'IMPC', # Panther takes ~1hr to map 7 species-worth of associations 'panther': 'Panther', 'oma': 'OMA', 'ncbigene': 'NCBIGene', # takes about 4 minutes to process 2 species 'ucscbands': 'UCSCBands', 'ctd': 'CTD', 'genereviews': 'GeneReviews', 'eom': 'EOM', # Takes about 5 seconds. 'coriell': 'Coriell', # 'clinvar': 'ClinVar', # takes ~ half hour # 'clinvarxml_alpha': 'ClinVarXML_alpha', # takes ~ five minutes 'monochrom': 'Monochrom', 'kegg': 'KEGG', 'animalqtldb': 'AnimalQTLdb', 'ensembl': 'Ensembl', 'hgnc': 'HGNC', 'orphanet': 'Orphanet', 'omia': 'OMIA', 'flybase': 'FlyBase', 'mmrrc': 'MMRRC', 'wormbase': 'WormBase', 'mpd': 'MPD', 'gwascatalog': 'GWASCatalog', 'monarch': 'Monarch', 'go': 'GeneOntology', 'reactome': 'Reactome', 'udp': 'UDP', 'mgi-slim': 'MGISlim', 'zfin-slim': 'ZFINSlim', 'bgee': 'Bgee', 'mydrug': 'MyDrug', 'stringdb': 'StringDB', 'rgd': 'RGD', 'sgd': 'SGD' } logger = logging.getLogger(__name__) parser = argparse.ArgumentParser( description='Dipper: Data Ingestion Pipeline for SciGraph', formatter_class=argparse.RawTextHelpFormatter) parser.add_argument( '-g', '--graph', type=str, default="rdf_graph", help='graph type: rdf_graph, streamed_graph') parser.add_argument( '-s', '--sources', type=str, required=True, help='comma separated list of sources') parser.add_argument( '-l', '--limit', type=int, help='limit number of rows') parser.add_argument( '--parse_only', action='store_true', help='parse files without writing') parser.add_argument( '--fetch_only', action='store_true', help='fetch sources without parsing') parser.add_argument('-f', '--force', action='store_true', help='force re-download of files') parser.add_argument( '--no_verify', help='ignore the verification step', action='store_true') parser.add_argument('--query', help='enter in a sparql query', type=str) parser.add_argument( '-q', '--quiet', help='turn off info logging', action="store_true") parser.add_argument( '--debug', help='turn on debug logging', action="store_true") parser.add_argument( '--skip_tests', help='skip any testing', action="store_true") # Blank Nodes can't be visualized in Protege, default to Skolemizing them parser.add_argument( '-b', '--use_bnodes', help="use blank nodes instead of skolemizing", action="store_true", default=False) # TODO this should live in a global data file # and the same filter be applied to all sources parser.add_argument( '-t', '--taxon', type=str, help='Add a taxon constraint on a source. Enter 1+ NCBITaxon numbers,' ' comma delimited\n' 'Implemented taxa per source\n' 'NCBIGene: 9606,10090,7955\n' 'Panther: 9606,10090,10116,7227,7955,6239,8355\n' 'BioGrid: 9606,10090,10116,7227,7955,6239,8355\n' 'UCSCBands: 9606\n' 'GO: 9606,10090,10116,7227,7955,6239,9615,9823,9031,9913') parser.add_argument( '-o', '--test_only', help='only process and output the pre-configured test subset', action="store_true") parser.add_argument( '--dest_fmt', help='serialization format: [turtle], nt, nquads, rdfxml, n3, raw', type=str) parser.add_argument( '--version', '-v', help='version of source', type=str) args = parser.parse_args() tax_ids = None if args.taxon is not None: tax_ids = [int(t) for t in args.taxon.split(',')] taxa_supported = [ # these are not taxa 'Panther', 'NCBIGene', 'BioGrid', 'UCSCBands', 'GeneOntology', 'Bgee', 'Ensembl', 'StringDB', 'OMA'] formats_supported = [ 'turtle', 'ttl', 'ntriples', 'nt', 'nquads', 'nq', 'rdfxml', 'xml', 'notation3', 'n3', 'raw'] if args.quiet: logging.basicConfig(level=logging.ERROR) else: if args.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) if not args.use_bnodes: logger.info("Will Skolemize Blank Nodes") if args.query is not None: test_query = TestUtils() for source in args.sources.split(','): source = source.lower() mysource = source_to_class_map[source]() # import source lib module = "dipper.sources.{0}".format(mysource) imported_module = importlib.import_module(module) source_class = getattr(imported_module, mysource) test_query.check_query_syntax(args.query, source_class) test_query.load_graph_from_turtle(source_class) print(test_query.query_graph(args.query, True)) exit(0) # run initial tests if (args.no_verify or args.skip_tests) is not True: unittest.TextTestRunner(verbosity=2).run(test_suite) # set serializer if args.dest_fmt is not None: if args.dest_fmt in formats_supported: if args.dest_fmt == 'ttl': args.dest_fmt = 'turtle' elif args.dest_fmt == 'ntriples': args.dest_fmt = 'nt' elif args.dest_fmt == 'nq': args.dest_fmt = 'nquads' elif args.dest_fmt == 'xml': args.dest_fmt = 'rdfxml' elif args.dest_fmt == 'notation3': args.dest_fmt = 'n3' else: logger.error( "You have specified an invalid serializer: %s", args.dest_fmt) exit(0) else: args.dest_fmt = 'turtle' # iterate through all the sources for source in args.sources.split(','): logger.info("\n******* %s *******", source) source = source.lower() src = source_to_class_map[source] # import source lib module = "dipper.sources.{0}".format(src) imported_module = importlib.import_module(module) source_class = getattr(imported_module, src) mysource = None # arg factory source_args = dict( graph_type=args.graph ) source_args['are_bnodes_skolemized'] = not args.use_bnodes if src in taxa_supported: source_args['tax_ids'] = tax_ids if args.version: source_args['version'] = args.version mysource = source_class(**source_args) if args.parse_only is False: start_fetch = time.clock() mysource.fetch(args.force) end_fetch = time.clock() logger.info("Fetching time: %d sec", end_fetch-start_fetch) mysource.settestonly(args.test_only) # run tests first if (args.no_verify or args.skip_tests) is not True: suite = mysource.getTestSuite() if suite is None: logger.warning( "No tests configured for this source: %s", source) else: unittest.TextTestRunner(verbosity=2).run(suite) else: logger.info("Skipping Tests for source: %s", source) if args.test_only is False and args.fetch_only is False: start_parse = time.clock() mysource.parse(args.limit) end_parse = time.clock() logger.info("Parsing time: %d sec", end_parse-start_parse) if args.graph == 'rdf_graph': logger.info("Found %d nodes", len(mysource.graph)) # Add property axioms start_axiom_exp = time.clock() logger.info("Adding property axioms") properties = GraphUtils.get_properties_from_graph(mysource.graph) GraphUtils.add_property_axioms(mysource.graph, properties) end_axiom_exp = time.clock() logger.info("Property axioms added: %d sec", end_axiom_exp-start_axiom_exp) start_write = time.clock() mysource.write(fmt=args.dest_fmt) end_write = time.clock() logger.info("Writing time: %d sec", end_write-start_write) # if args.no_verify is not True: # status = mysource.verify() # if status is not True: # logger.error( # 'Source %s did not pass verification tests.', source) # exit(1) # else: # logger.info('skipping verification step') logger.info('***** Finished with %s *****', source) # load configuration parameters # for example, keys logger.info("All done.")
def main(): # TODO this should be generated by looking in the dipper/sources directory # or read from a sources/dataset/config yaml or dir of yamls source_to_class_map = { # 'facebase_alpha': 'FaceBase_alpha', 'hpoa': 'HPOAnnotations', # ~3 min 'zfin': 'ZFIN', 'omim': 'OMIM', # full file takes ~15 min, due to required throttling 'biogrid': 'BioGrid', # interactions file takes <10 minutes 'mgi': 'MGI', 'impc': 'IMPC', # Panther takes ~1hr to map 7 species-worth of associations 'panther': 'Panther', 'oma': 'OMA', 'ncbigene': 'NCBIGene', # takes about 4 minutes to process 2 species 'ucscbands': 'UCSCBands', 'ctd': 'CTD', 'genereviews': 'GeneReviews', 'eom': 'EOM', # Takes about 5 seconds. 'coriell': 'Coriell', # 'clinvar': 'ClinVar', # takes ~ half hour # 'clinvarxml_alpha': 'ClinVarXML_alpha', # takes ~ five minutes 'monochrom': 'Monochrom', 'kegg': 'KEGG', 'animalqtldb': 'AnimalQTLdb', 'ensembl': 'Ensembl', 'hgnc': 'HGNC', 'orphanet': 'Orphanet', 'omia': 'OMIA', 'flybase': 'FlyBase', 'mmrrc': 'MMRRC', 'wormbase': 'WormBase', 'mpd': 'MPD', 'gwascatalog': 'GWASCatalog', 'monarch': 'Monarch', 'go': 'GeneOntology', 'reactome': 'Reactome', 'udp': 'UDP', 'mgi-slim': 'MGISlim', 'zfinslim': 'ZFINSlim', 'bgee': 'Bgee', 'mydrug': 'MyDrug', 'stringdb': 'StringDB', 'rgd': 'RGD', 'sgd': 'SGD', 'mychem': 'MyChem' } logger = logging.getLogger(__name__) parser = argparse.ArgumentParser( description='Dipper: Data Ingestion Pipeline for SciGraph', formatter_class=argparse.RawTextHelpFormatter) parser.add_argument( '-g', '--graph', type=str, default="rdf_graph", help='graph type: rdf_graph, streamed_graph') parser.add_argument( '-s', '--sources', type=str, required=True, help='comma separated list of sources') parser.add_argument( '-l', '--limit', type=int, help='limit number of rows') parser.add_argument( '--parse_only', action='store_true', help='parse files without writing') parser.add_argument( '--fetch_only', action='store_true', help='fetch sources without parsing') parser.add_argument('-f', '--force', action='store_true', help='force re-download of files') parser.add_argument( '--no_verify', help='ignore the verification step', action='store_true') parser.add_argument('--query', help='enter in a sparql query', type=str) parser.add_argument( '-q', '--quiet', help='turn off info logging', action="store_true") parser.add_argument( '--debug', help='turn on debug logging', action="store_true") parser.add_argument( '--skip_tests', help='skip any testing', action="store_true") # Blank Nodes can't be visualized in Protege, default to Skolemizing them parser.add_argument( '-b', '--use_bnodes', help="use blank nodes instead of skolemizing", action="store_true", default=False) # TODO this should live in a global data file # and the same filter be applied to all sources parser.add_argument( '-t', '--taxon', type=str, help='Add a taxon constraint on a source. Enter 1+ NCBITaxon numbers,' ' comma delimited\n' 'Implemented taxa per source\n' 'NCBIGene: 9606,10090,7955\n' 'Panther: 9606,10090,10116,7227,7955,6239,8355\n' 'BioGrid: 9606,10090,10116,7227,7955,6239,8355\n' 'UCSCBands: 9606\n' 'GO: 9606,10090,10116,7227,7955,6239,9615,9823,9031,9913') parser.add_argument( '-o', '--test_only', help='only process and output the pre-configured test subset', action="store_true") parser.add_argument( '--dest_fmt', help='serialization format: [turtle], nt, nquads, rdfxml, n3, raw', type=str) parser.add_argument( '--version', '-v', help='version of source', type=str) args = parser.parse_args() tax_ids = None if args.taxon is not None: tax_ids = [int(t) for t in args.taxon.split(',')] taxa_supported = [ # these are not taxa 'Panther', 'NCBIGene', 'BioGrid', 'UCSCBands', 'GeneOntology', 'Bgee', 'Ensembl', 'StringDB', 'OMA'] formats_supported = [ 'turtle', 'ttl', 'ntriples', 'nt', 'nquads', 'nq', 'rdfxml', 'xml', 'notation3', 'n3', 'raw'] if args.quiet: logging.getLogger().setLevel(logging.WARNING) else: if args.debug: logging.getLogger().setLevel(logging.DEBUG) else: logging.getLogger().setLevel(logging.INFO) if not args.use_bnodes: logger.info("Will Skolemize Blank Nodes") if args.query is not None: test_query = TestUtils() for source in args.sources.split(','): source = source.lower() mysource = source_to_class_map[source]() # import source lib module = "dipper.sources.{0}".format(mysource) imported_module = importlib.import_module(module) source_class = getattr(imported_module, mysource) test_query.check_query_syntax(args.query, source_class) test_query.load_graph_from_turtle(source_class) print(test_query.query_graph(args.query, True)) exit(0) # run initial tests if (args.no_verify or args.skip_tests) is not True: unittest.TextTestRunner(verbosity=2).run(test_suite) # set serializer if args.dest_fmt is not None: if args.dest_fmt in formats_supported: if args.dest_fmt == 'ttl': args.dest_fmt = 'turtle' elif args.dest_fmt == 'ntriples': args.dest_fmt = 'nt' elif args.dest_fmt == 'nq': args.dest_fmt = 'nquads' elif args.dest_fmt == 'xml': args.dest_fmt = 'rdfxml' elif args.dest_fmt == 'notation3': args.dest_fmt = 'n3' else: logger.error( "You have specified an invalid serializer: %s", args.dest_fmt) exit(0) else: args.dest_fmt = 'turtle' # iterate through all the sources for source in args.sources.split(','): logger.info("\n******* %s *******", source) source = source.lower() src = source_to_class_map[source] # import source lib module = "dipper.sources.{0}".format(src) imported_module = importlib.import_module(module) source_class = getattr(imported_module, src) mysource = None # arg factory source_args = dict( graph_type=args.graph ) source_args['are_bnodes_skolemized'] = not args.use_bnodes if src in taxa_supported: source_args['tax_ids'] = tax_ids if args.version: source_args['version'] = args.version mysource = source_class(**source_args) if args.parse_only is False: start_fetch = time.clock() mysource.fetch(args.force) end_fetch = time.clock() logger.info("Fetching time: %d sec", end_fetch-start_fetch) mysource.settestonly(args.test_only) # run tests first if (args.no_verify or args.skip_tests) is not True: suite = mysource.getTestSuite() if suite is None: logger.warning( "No tests configured for this source: %s", source) else: unittest.TextTestRunner(verbosity=2).run(suite) else: logger.info("Skipping Tests for source: %s", source) if args.test_only is False and args.fetch_only is False: start_parse = time.clock() mysource.parse(args.limit) end_parse = time.clock() logger.info("Parsing time: %d sec", end_parse-start_parse) if args.graph == 'rdf_graph': logger.info("Found %d nodes", len(mysource.graph)) # Add property axioms start_axiom_exp = time.clock() logger.info("Adding property axioms") properties = GraphUtils.get_properties_from_graph(mysource.graph) GraphUtils.add_property_axioms(mysource.graph, properties) end_axiom_exp = time.clock() logger.info("Property axioms added: %d sec", end_axiom_exp-start_axiom_exp) start_write = time.clock() mysource.write(fmt=args.dest_fmt) end_write = time.clock() logger.info("Writing time: %d sec", end_write-start_write) # if args.no_verify is not True: # status = mysource.verify() # if status is not True: # logger.error( # 'Source %s did not pass verification tests.', source) # exit(1) # else: # logger.info('skipping verification step') logger.info('***** Finished with %s *****', source) # load configuration parameters # for example, keys logger.info("All done.")
def main(): source_to_class_map = { 'hpoa': HPOAnnotations, # ~3 min 'zfin': ZFIN, 'omim': OMIM, # full file takes ~15 min, due to required throttling 'biogrid': BioGrid, # interactions file takes <10 minutes 'mgi': MGI, 'impc': IMPC, 'panther': Panther, # this takes a very long time, ~1hr to map 7 species-worth of associations 'ncbigene': NCBIGene, # takes about 4 minutes to process 2 species 'ucscbands': UCSCBands, 'ctd': CTD, 'genereviews': GeneReviews, 'eom': EOM, # Takes about 5 seconds. 'coriell': Coriell, 'clinvar': ClinVar, 'monochrom': Monochrom, 'kegg': KEGG, 'animalqtldb': AnimalQTLdb, 'ensembl': Ensembl, 'hgnc': HGNC, 'orphanet': Orphanet } logger = logging.getLogger(__name__) parser = argparse.ArgumentParser(description='Dipper: Data Ingestion' ' Pipeline for SciGraph', formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('-s', '--sources', type=str, required=True, help='comma separated list of sources') parser.add_argument('-l', '--limit', type=int, help='limit number of rows') parser.add_argument('--parse_only', action='store_true', help='parse files without writing'), parser.add_argument('--fetch_only', action='store_true', help='fetch sources without parsing') parser.add_argument('-f', '--force', action='store_true', help='force re-download of files') parser.add_argument('--no_verify', help='ignore the verification step', action='store_true') parser.add_argument('--query', help='enter in a sparql query', type=str) parser.add_argument('-q', '--quiet', help='turn off info logging', action="store_true") parser.add_argument('--debug', help='turn on debug logging', action="store_true") # BNodes can't be visualized in Protege, so you can materialize them for testing purposes with this flag parser.add_argument('-nb', '--no_bnodes', help="convert blank nodes into identified nodes", action="store_true") # TODO this preconfiguration should probably live in the conf.json, and the same filter be applied to all sources parser.add_argument('-t', '--taxon', type=str, help='Add a taxon constraint on a source. Enter 1+ NCBITaxon numbers, comma delimited\n' 'Implemented taxa per source\n' 'NCBIGene: 9606,10090,7955\n' 'Panther: 9606,10090,10116,7227,7955,6239,8355\n' 'BioGrid: 9606,10090,10116,7227,7955,6239,8355\n' 'UCSCBands: 9606') parser.add_argument('-o', '--test_only', help='only process and output the pre-configured test subset', action="store_true") args = parser.parse_args() tax_ids = None if args.taxon is not None: tax_ids = list(map(int, args.taxon.split(','))) taxa_supported = [Panther, NCBIGene, BioGrid, UCSCBands] if args.quiet: logging.basicConfig(level=logging.ERROR) else: if args.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) if args.no_bnodes is True: logger.info("Will materialize all BNodes into BASE space") if args.query is not None: test_query = TestUtils() for source in args.sources.split(','): source = source.lower() mysource = source_to_class_map[source]() test_query.check_query_syntax(args.query, mysource) test_query.load_graph_from_turtle(mysource) print(test_query.query_graph(args.query, True)) exit(0) # run initial tests if args.no_verify is not True: unittest.TextTestRunner(verbosity=2).run(test_suite) # iterate through all the sources for source in args.sources.split(','): logger.info("\n******* %s *******", source) source = source.lower() src = source_to_class_map[source] mysource = None if src in taxa_supported: mysource = src(tax_ids) else: mysource = src() if args.parse_only is False: mysource.fetch(args.force) mysource.settestonly(args.test_only) mysource.setnobnodes(args.no_bnodes) # run tests first if args.no_verify is not True: suite = mysource.getTestSuite() if suite is None: logger.warn("No tests configured for this source: %s", source) else: unittest.TextTestRunner(verbosity=2).run(suite) else: logger.info("Skipping Tests for source: %s", source) if args.test_only is False and args.fetch_only is False: mysource.parse(args.limit) mysource.write(format='turtle') # if args.no_verify is not True: # status = mysource.verify() # if status is not True: # logger.error('Source %s did not pass verification tests.', source) # exit(1) # else: # logger.info('skipping verification step') logger.info('***** Finished with %s *****', source) # load configuration parameters # for example, keys logger.info("All done.")
def test_missense_variant_protein_model(self): """ Test missense variant with only protein information Using test data set 1, and the function add_variant_info_to_graph() We want to test the following triples: CGD:VariantID is an instance of OBO:SO_0001059 CGD:VariantID is an instance of OBO:SO_0001583 CGD:VariantID has the label "CSF3R Q741X missense mutation" CGD:VariantID is_sequence_variant_instance_of (OBO:GENO_0000408) NCBIGene:1441 CGD:VariantID has location (faldo:location) CGD:RegionID CGD:VariantID OBO:GENO_reference_amino_acid "Q" CGD:VariantID OBO:GENO_results_in_amino_acid_change "X" CGD:VariantID RO:0002205 CCDS:413.1 CCDS:413.1 is an instance of OBO:GENO_primary CCDS:413.1 has the label "CCDS413.1" """ from dipper.utils.TestUtils import TestUtils self.cgd.add_variant_info_to_graph(self.test_set_1) # Make testutils object and load bindings test_env = TestUtils(self.cgd.graph) cu = CurieUtil(self.curie_map) self.cgd.load_bindings() (variant_key, variant_label, amino_acid_variant, amino_acid_position, transcript_id, transcript_priority, protein_variant_type, functional_impact, stop_gain_loss, transcript_gene, protein_variant_source) = self.test_set_1[0][0:11] gene_id = self.cgd.gene_map[transcript_gene] ref_amino_acid = "Q" altered_amino_acid = "X" position = 741 uniprot_curie = "UniProtKB:Q99062#Q99062-1" variant_id = self.cgd.make_cgd_id('variant{0}'.format(variant_key)) transcript = "CCDS:413.1" region_id = ":_{0}{1}{2}Region".format(position, position, uniprot_curie) variant_uri = URIRef(cu.get_uri(variant_id)) transcript_uri = URIRef(cu.get_uri(transcript)) gene_uri = URIRef(cu.get_uri(gene_id)) region_uri = URIRef(cu.get_uri(region_id)) sparql_query = """ SELECT ?variant ?gene ?region ?transcript WHERE {{ ?variant a OBO:SO_0001059; a OBO:SO_0001583 ; rdfs:label "{0}" ; OBO:GENO_0000408 ?gene ; faldo:location ?region ; OBO:GENO_reference_amino_acid "{1}" ; OBO:GENO_results_in_amino_acid_change "{2}" ; RO:0002205 ?transcript . ?transcript a OBO:SO_0000233 ; rdfs:label "{3}" . }} """.format(variant_label, ref_amino_acid, altered_amino_acid, transcript_id) # Expected Results expected_results = [[ variant_uri, gene_uri, region_uri, transcript_uri ]] # Query graph sparql_output = test_env.query_graph(sparql_query) self.assertEqual(expected_results, sparql_output)
def test_chromosome_position_model(self): """ Test modelling of genomic positions Using test data set 2, and the function add_variant_info_to_graph() """ from dipper.utils.TestUtils import TestUtils self.cgd.add_variant_info_to_graph(self.test_set_2) # Make testutils object and load bindings test_env = TestUtils(self.cgd.graph) cu = CurieUtil(self.curie_map) self.cgd.load_bindings() (variant_key, variant_label, amino_acid_variant, amino_acid_position, transcript_id, transcript_priority, protein_variant_type, functional_impact, stop_gain_loss, transcript_gene, protein_variant_source, variant_gene, bp_pos, variant_cdna, cosmic_id, db_snp_id, genome_pos_start, genome_pos_end, ref_base, variant_base, primary_transcript_exons, primary_transcript_variant_sub_types, variant_type, chromosome, genome_build, build_version, build_date) = self.test_set_2[0] variant_id = self.cgd.make_cgd_id('variant{0}'.format(variant_key)) chromosome_curie = ":MONARCH_hg19chr9" region_id = ":_{0}{1}Region-{2}-{3}".format(genome_build, chromosome, genome_pos_start, genome_pos_end) start_id = ":_hg19chr9-{0}".format(genome_pos_start) end_id = ":_hg19chr9-{0}".format(genome_pos_end) region_uri = URIRef(cu.get_uri(region_id)) start_uri = URIRef(cu.get_uri(start_id)) end_uri = URIRef(cu.get_uri(end_id)) chromosome_uri = URIRef(cu.get_uri(chromosome_curie)) sparql_query = """ SELECT ?region ?startPosition ?endPosition ?chromosome WHERE {{ ?region a faldo:Region ; faldo:begin ?startPosition ; faldo:end ?endPosition . ?startPosition a faldo:Position ; faldo:position {0} ; faldo:reference ?chromosome . ?endPosition a faldo:Position ; faldo:position {1} ; faldo:reference ?chromosome . }} """.format( genome_pos_start, genome_pos_end, ) # Expected Results expected_results = [[region_uri, start_uri, end_uri, chromosome_uri]] # Query graph sparql_output = test_env.query_graph(sparql_query) self.assertEqual(expected_results, sparql_output)
def test_genome_build_chromosome_model(self): """ Test modelling of genome, builds, and chromosomes Using test data set 2, and the function add_variant_info_to_graph() """ from dipper.utils.TestUtils import TestUtils self.cgd.add_variant_info_to_graph(self.test_set_2) # Make testutils object and load bindings test_env = TestUtils(self.cgd.graph) cu = CurieUtil(self.curie_map) self.cgd.load_bindings() genome = ":9606genome" genome_label = "Human genome" chromosome = "CHR:9606chr9" chromosome_label = "chr9 (Human)" build_curie = "UCSC:hg19" build_label = "hg19" chrom_on_build = ":MONARCH_hg19chr9" chrom_build_label = "chr9 (hg19)" genome_uri = URIRef(cu.get_uri(genome)) chromosome_uri = URIRef(cu.get_uri(chromosome)) build_uri = URIRef(cu.get_uri(build_curie)) chrom_on_build_uri = URIRef(cu.get_uri(chrom_on_build)) ''' sparql_query = """ SELECT ?genome ?chromosome ?build ?chromOnBuild WHERE {{ ?genome a owl:Class ; rdfs:label "{0}" ; OBO:RO_0002162 OBO:NCBITaxon_9606 ; OBO:RO_0002351 ?chromosome ; rdfs:subClassOf OBO:SO_0001026 . ?chromosome a owl:Class ; rdfs:label "{1}" ; OBO:RO_0002350 ?genome ; rdfs:subClassOf OBO:SO_0000340 . ?build a OBO:SO_0001505 ; a ?genome ; rdfs:label "{2}" ; OBO:RO_0002351 ?chromOnBuild ; rdfs:subClassOf ?genome . ?chromOnBuild a ?chromosome ; rdfs:label "{3}" ; OBO:RO_0002350 ?build . }} """.format(genome_label, chromosome_label, build_label, chrom_build_label) ''' sparql_query = """ SELECT ?genome ?chromosome ?build ?chromOnBuild WHERE {{ ?genome a owl:Class ; rdfs:label "{0}" ; rdfs:subClassOf OBO:SO_0001026 . ?chromosome a owl:Class ; rdfs:label "{1}" ; rdfs:subClassOf OBO:SO_0000340 . ?build a OBO:SO_0001505 ; a ?genome ; rdfs:label "{2}" ; OBO:RO_0002162 OBO:NCBITaxon_9606 ; OBO:RO_0002351 ?chromOnBuild . ?chromOnBuild a ?chromosome ; a OBO:SO_0000340 ; rdfs:label "{3}" ; OBO:RO_0002350 ?build . }} """.format(genome_label, chromosome_label, build_label, chrom_build_label) # Expected Results expected_results = [[ genome_uri, chromosome_uri, build_uri, chrom_on_build_uri ]] # Query graph sparql_output = test_env.query_graph(sparql_query) self.assertEqual(expected_results, sparql_output)
def test_variant_position_region_model(self): """ Test modelling of variant positions on a transcript Using test data set 2, and the function add_variant_info_to_graph() We want to test the following triples: CGD:RegionID is an instance of faldo:Region CGD:RegionID faldo:begin BothStrandPositionID CGD:RegionID faldo:end BothStrandPositionID CGD:BothStrandPositionID is an instance of faldo:BothStrandPosition CGD:BothStrandPositionID is an instance of faldo:Position CGD:BothStrandPositionID faldo:position 944 CGD:BothStrandPositionID faldo:reference CGD:TranscriptID """ from dipper.utils.TestUtils import TestUtils self.cgd.add_variant_info_to_graph(self.test_set_2) # Make testutils object and load bindings test_env = TestUtils(self.cgd.graph) cu = CurieUtil(self.curie_map) self.cgd.load_bindings() (variant_key, variant_label, amino_acid_variant, amino_acid_position, transcript_id, transcript_priority, protein_variant_type, functional_impact, stop_gain_loss, transcript_gene, protein_variant_source, variant_gene, bp_pos, variant_cdna, cosmic_id, db_snp_id, genome_pos_start, genome_pos_end, ref_base, variant_base, primary_transcript_exons, primary_transcript_variant_sub_types, variant_type, chromosome, genome_build, build_version, build_date) = self.test_set_2[0] transcript_curie = self.cgd._make_transcript_curie(transcript_id) ccds_id = "35166.1" variant_id = self.cgd.make_cgd_id('variant{0}'.format(variant_key)) region_id = ":_{0}Region".format(transcript_curie) both_strand_id = ":_{0}-{1}".format(ccds_id, bp_pos) region_uri = URIRef(cu.get_uri(region_id)) both_strand_uri = URIRef(cu.get_uri(both_strand_id)) ccds_uri = URIRef(cu.get_uri(transcript_curie)) sparql_query = """ SELECT ?region ?bsPosition ?transcript WHERE {{ ?region a faldo:Region ; faldo:begin ?bsPosition ; faldo:end ?bsPosition . ?bsPosition a faldo:Position ; faldo:position {0} ; faldo:reference ?transcript . }} """.format(bp_pos) # Expected Results expected_results = [[region_uri, both_strand_uri, ccds_uri]] # Query graph sparql_output = test_env.query_graph(sparql_query) self.assertEqual(expected_results, sparql_output)
def test_amino_acid_position_region_model(self): """ Test modelling of amino acid positions Using test data set 1, and the function add_variant_info_to_graph() We want to test the following triples: CGD:RegionID is an instance of faldo:Region CGD:RegionID faldo:begin BothStrandPositionID CGD:RegionID faldo:end BothStrandPositionID CGD:BothStrandPositionID is an instance of faldo:BothStrandPosition CGD:BothStrandPositionID is an instance of faldo:Position CGD:BothStrandPositionID faldo:position 741 CGD:BothStrandPositionID faldo:reference UniProtID """ from dipper.utils.TestUtils import TestUtils self.cgd.add_variant_info_to_graph(self.test_set_1) # Make testutils object and load bindings test_env = TestUtils(self.cgd.graph) cu = CurieUtil(self.curie_map) self.cgd.load_bindings() (variant_key, variant_label, amino_acid_variant, amino_acid_position, transcript_id, transcript_priority, protein_variant_type, functional_impact, stop_gain_loss, transcript_gene, protein_variant_source) = self.test_set_1[0][0:11] position = 741 variant_id = self.cgd.make_cgd_id('variant{0}'.format(variant_key)) uniprot_curie = "UniProtKB:Q99062#Q99062-1" uniprot_id = "Q99062#Q99062-1" region_id = ":_{0}{1}{2}Region".format(position, position, uniprot_curie) both_strand_id = ":_{0}-{1}".format(uniprot_id, position) region_uri = URIRef(cu.get_uri(region_id)) both_strand_uri = URIRef(cu.get_uri(both_strand_id)) uniprot_uri = URIRef(cu.get_uri(uniprot_curie)) sparql_query = """ SELECT ?region ?bsPosition ?protein WHERE {{ ?region a faldo:Region ; faldo:begin ?bsPosition ; faldo:end ?bsPosition . ?bsPosition a faldo:Position ; faldo:position {0} ; faldo:reference ?protein . }} """.format(position) # Expected Results expected_results = [[region_uri, both_strand_uri, uniprot_uri]] # Query graph sparql_output = test_env.query_graph(sparql_query) self.assertEqual(expected_results, sparql_output)
def test_missense_variant_cdna_model(self): """ Test missense variant with cdna information Using test data set 2, and the function add_variant_info_to_graph() We want to test the following triples: CGD:VariantID is an instance of OBO:SO_0001059 CGD:VariantID is an instance of OBO:SO_0001583 CGD:VariantID has the label "ABL1 T315I missense mutation" CGD:VariantID is_sequence_variant_instance_of (OBO:GENO_0000408) NCBIGene:25 CGD:VariantID has location (faldo:location) AminoAcidRegionID CGD:VariantID has location (faldo:location) CDNARegionID CGD:VariantID has location (faldo:location) ChromosomalRegionID CGD:VariantID OBO:GENO_reference_amino_acid "T" CGD:VariantID OBO:GENO_results_in_amino_acid_change "I" CGD:VariantID owl:sameAs dbSNP:rs121913459 CGD:VariantID owl:sameAs COSMIC:12560 CGD:VariantID RO:0002205 (transcribed_to) CCDS:35166.1 CCDS:35166.1 is an instance of OBO:SO_0000233 CCDS:35166.1 has the label "CCDS35166.1" CCDS:35166.1 OBO:RO_0002513 (translates_to) UniProtKB:P00519#P00519-1 CCDS:35166.1 OBO:RO_0002513 (translates_to) NCBIProtein:NP_005148.2 UniProtKB:P00519#P00519-1 owl:sameAs NCBIProtein:NP_005148.2 UniProtKB:P00519#P00519-1 is an instance of OBO:SO_0000104 (polypeptide) UniProtKB:P00519#P00519-1 has the label "P00519#P00519-1" NCBIProtein:NP_005148.2 is an instance of OBO:SO_0000104 (polypeptide) NCBIProtein:NP_005148.2 has the label "NP_005148.2" """ from dipper.utils.TestUtils import TestUtils self.cgd.add_variant_info_to_graph(self.test_set_2) # Make testutils object and load bindings test_env = TestUtils(self.cgd.graph) cu = CurieUtil(self.curie_map) self.cgd.load_bindings() (variant_key, variant_label, amino_acid_variant, amino_acid_position, transcript_id, transcript_priority, protein_variant_type, functional_impact, stop_gain_loss, transcript_gene, protein_variant_source, variant_gene, bp_pos, variant_cdna, cosmic_id, db_snp_id, genome_pos_start, genome_pos_end, ref_base, variant_base, primary_transcript_exons, primary_transcript_variant_sub_types, variant_type, chromosome, genome_build, build_version, build_date) = self.test_set_2[0] gene_id = self.cgd.gene_map[transcript_gene] ref_amino_acid = "T" altered_amino_acid = "I" db_snp_curie = "dbSNP:121913459" cosmic_curie = "COSMIC:12560" uniprot_curie = "UniProtKB:P00519#P00519-1" uniprot_id = "P00519#P00519-1" refseq_curie = "NCBIProtein:NP_005148.2" transcript_curie = "CCDS:35166.1" ccds_id = "35166.1" position = 315 chromosome_curie = "hg19chr9" variant_id = self.cgd.make_cgd_id('variant{0}'.format(variant_key)) aa_region_id = ":_{0}{1}{2}Region".format(position, position, uniprot_curie) cdna_region_id = ":_{0}Region".format(transcript_curie) chr_region_id = ":_{0}{1}Region-{2}-{3}".format( genome_build, chromosome, genome_pos_start, genome_pos_end) aa_coord_id = ":_{0}-{1}".format(uniprot_id, position) cdna_coord_id = ":_{0}-{1}".format(ccds_id, bp_pos) # chr_coord_id = "CHR:{0}-{1}".format(chromosome_curie, genome_pos_start) chr_coord_id = ":_{0}-{1}".format(chromosome_curie, genome_pos_start) variant_uri = URIRef(cu.get_uri(variant_id)) transcript_uri = URIRef(cu.get_uri(transcript_curie)) gene_uri = URIRef(cu.get_uri(gene_id)) db_snp_uri = URIRef(cu.get_uri(db_snp_curie)) cosmic_uri = URIRef(cu.get_uri(cosmic_curie)) uniprot_uri = URIRef(cu.get_uri(uniprot_curie)) refseq_uri = URIRef(cu.get_uri(refseq_curie)) aa_region_uri = URIRef(cu.get_uri(aa_region_id)) cdna_region_uri = URIRef(cu.get_uri(cdna_region_id)) chr_region_uri = URIRef(cu.get_uri(chr_region_id)) aa_coord_uri = URIRef(cu.get_uri(aa_coord_id)) cdna_coord_uri = URIRef(cu.get_uri(cdna_coord_id)) chr_coord_uri = URIRef(cu.get_uri(chr_coord_id)) sparql_query = """ SELECT ?cosmic ?gene ?aaRegion ?cdnaRegion ?chrRegion ?dbSNP ?transcript ?uniprot ?refseq ?aaCoord ?cdnaCoord ?chrCoord WHERE {{ ?cosmic a OBO:SO_0001059; a OBO:SO_0001583 ; OBO:GENO_0000408 ?gene ; faldo:location ?aaRegion ; faldo:location ?cdnaRegion ; faldo:location ?chrRegion ; OBO:GENO_reference_amino_acid "{0}" ; OBO:GENO_reference_nucleotide "{1}" ; OBO:GENO_altered_nucleotide "{2}" ; OBO:GENO_results_in_amino_acid_change "{3}" ; owl:sameAs ?dbSNP ; RO:0002205 ?transcript . ?cosmic owl:sameAs ?dbSNP . ?transcript a OBO:SO_0000233 ; rdfs:label "{4}" ; OBO:RO_0002513 ?uniprot ; OBO:RO_0002513 ?refseq . ?uniprot a OBO:SO_0000104 ; rdfs:label "P00519-1" . ?refseq a OBO:SO_0000104 ; rdfs:label "NP_005148.2" . ?refseq owl:sameAs ?uniprot . ?aaRegion faldo:begin ?aaCoord . ?cdnaRegion faldo:begin ?cdnaCoord . ?chrRegion faldo:begin ?chrCoord . ?aaCoord faldo:position {5} . ?cdnaCoord faldo:position {6} . ?chrCoord faldo:position {7} . ?dbSNP rdfs:label "{8}" . }} """.format(ref_amino_acid, ref_base, variant_base, altered_amino_acid, transcript_id, position, bp_pos, genome_pos_start, db_snp_id) # Expected Results expected_results = [[ cosmic_uri, gene_uri, aa_region_uri, cdna_region_uri, chr_region_uri, db_snp_uri, transcript_uri, uniprot_uri, refseq_uri, aa_coord_uri, cdna_coord_uri, chr_coord_uri ]] # Query graph sparql_output = test_env.query_graph(sparql_query) self.assertEqual(expected_results, sparql_output)