def test_random_data_set(self): """ Download dataset using fetch(), then take a row of data and run through evidence and provenance functions to test the output Line of data is hardcoded, but theoretically should work on any line """ line_to_test = 1129 count = 0 # init impc (make this a function?) impc = IMPC('rdf_graph', True) impress_map = json.loads( impc.fetch_from_url(impc.map_files['impress_map']).read().decode('utf-8')) impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map']) # fetch file impc.fetch(True) file_path = '/'.join((impc.rawdir, impc.files['all']['file'])) with gzip.open(file_path, 'rt') as csvfile: filereader = csv.reader(csvfile, delimiter=',', quotechar='\"') for row in filereader: count += 1 if count == line_to_test: test_set = row self.test_set_1 = row break # Some DRY violation with the above tests (phenotyping_center, colony) = row[2:4] (project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = row[12:19] (statistical_method, resource_name) = row[26:28] (p_value, percentage_change, effect_size) = self.test_set_1[23:26] impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value, percentage_change, effect_size, self.study_curie) impc._add_study_provenance( impc_map, impress_map, phenotyping_center, colony, project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name) # Note that this doesn't test much since we're dealing with # multiple part_of and has_part links to individuals # which results in ambiguity = hard to test sparql_query = """ SELECT * WHERE { ?assoc OBO:SEPIO_0000007 ?evidenceline . ?evidenceline a OBO:ECO_0000015 ; OBO:SEPIO_0000085 <https://monarchinitiative.org/.well-known/genid/study> . ?study a OBO:OBI_0000471 ; OBO:SEPIO_0000114 ?param ; OBO:SEPIO_0000017 ?agent . } """ sparql_output = impc.graph.query(sparql_query) # Test that query passes and returns one row self.assertEqual(len(list(sparql_output)), 1)
def test_provenance_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', False) impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map']) impress_map = json.loads( impc.fetch_from_url(impc.map_files['impress_map']).read().decode('utf-8')) (phenotyping_center, colony) = self.test_set_1[2:4] (project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = self.test_set_1[12:19] (statistical_method, resource_name) = self.test_set_1[26:28] impc._add_study_provenance( impc_map, impress_map, phenotyping_center, colony, project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name) sparql_query = """ SELECT ?study WHERE { <https://www.mousephenotype.org/impress/procedures/15> a owl:NamedIndividual ; rdfs:label "MGP Select Pipeline" . <https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ; rdfs:label "X-ray" . <http://www.sanger.ac.uk/> a foaf:organization ; rdfs:label "WTSI" . <http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ; rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" . <https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ; rdfs:label "Number of ribs right (X-ray)" . ?study a OBO:OBI_0000471 ; OBO:BFO_0000051 OBO:STATO_0000076 ; OBO:BFO_0000050 <https://www.mousephenotype.org/impress/procedures/15> ; OBO:BFO_0000051 <https://www.mousephenotype.org/impress/protocol/175/15> ; OBO:SEPIO_0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ; OBO:BFO_0000050 <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ; OBO:SEPIO_0000017 <http://www.sanger.ac.uk/> . ?colony a owl:NamedIndividual ; rdfs:label "MEFW" . } """ sparql_output = impc.graph.query(sparql_query) # This will fail if we change our approach for # making blank node iris, it might be better # to check the length of the output (see test_provenance_mode) study = BNode('bbdd05a8ca155dda') expected_output = [(study,)] self.assertEqual(list(sparql_output), expected_output)
def test_random_data_set(self): """ Download dataset using fetch(), then take a row of data and run through evidence and provenance functions to test the output Line of data is hardcoded, but theoretically should work on any line """ line_to_test = 1129 count = 0 # init impc (make this a function?) impc = IMPC('rdf_graph', True) impress_map = json.loads( impc.fetch_from_url( impc.map_files['impress_map']).read().decode('utf-8')) impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map']) # fetch file impc.fetch(True) file_path = '/'.join((impc.rawdir, impc.files['all']['file'])) with gzip.open(file_path, 'rt') as csvfile: filereader = csv.reader(csvfile, delimiter=',', quotechar='\"') for row in filereader: count += 1 if count == line_to_test: test_set = row self.test_set_1 = row break # Some DRY violation with the above tests (phenotyping_center, colony) = row[2:4] (project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = row[12:19] (statistical_method, resource_name) = row[26:28] (p_value, percentage_change, effect_size) = self.test_set_1[23:26] impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value, percentage_change, effect_size, self.study_curie) impc._add_study_provenance(impc_map, impress_map, phenotyping_center, colony, project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name) # Note that this doesn't test much since we're dealing with # multiple part_of and has_part links to individuals # which results in ambiguity = hard to test sparql_query = """ SELECT * WHERE { ?assoc OBO:SEPIO_0000007 ?evidenceline . ?evidenceline a OBO:ECO_0000015 ; OBO:SEPIO_0000085 <https://monarchinitiative.org/.well-known/genid/study> . ?study a OBO:OBI_0000471 ; OBO:SEPIO_0000114 ?param ; OBO:SEPIO_0000017 ?agent . } """ sparql_output = impc.graph.query(sparql_query) # Test that query passes and returns one row self.assertEqual(len(list(sparql_output)), 1)
def test_provenance_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) self.assertTrue(len(list(impc.graph)) == 0) impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map']) impress_map = json.loads( impc.fetch_from_url( impc.map_files['impress_map']).read().decode('utf-8')) (phenotyping_center, colony) = self.test_set_1[2:4] (project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = self.test_set_1[12:19] (statistical_method, resource_name) = self.test_set_1[26:28] impc._add_study_provenance( impc_map, impress_map, phenotyping_center, colony, project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name) triples = """ <https://monarchinitiative.org/.well-known/genid/bbdd05a8ca155dda> a OBI:0000471 ; BFO:0000051 OBO:STATO_0000076, <https://www.mousephenotype.org/impress/protocol/175/15> ; BFO:0000050 IMPRESS-procedure:15 , <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ; SEPIO:0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ; SEPIO:0000017 <http://www.sanger.ac.uk/> . <https://monarchinitiative.org/.well-known/genid/bc0b26361b8687b5> a owl:NamedIndividual ; rdfs:label "MEFW" . <http://www.sanger.ac.uk/> a foaf:organization ; rdfs:label "WTSI" . <http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ; rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" . <https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ; rdfs:label "Number of ribs right (X-ray)" . IMPRESS-procedure:15 a owl:NamedIndividual ; rdfs:label "MGP Select Pipeline" . <https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ; rdfs:label "X-ray" . """ # dbg logger.debug("Reference graph: %s", impc.graph.serialize(format="turtle") .decode("utf-8") ) self.assertTrue(self.test_util.test_graph_equality( triples, impc.graph))
def test_provenance_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', False) impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map']) impress_map = json.loads( impc.fetch_from_url( impc.map_files['impress_map']).read().decode('utf-8')) (phenotyping_center, colony) = self.test_set_1[2:4] (project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = self.test_set_1[12:19] (statistical_method, resource_name) = self.test_set_1[26:28] impc._add_study_provenance(impc_map, impress_map, phenotyping_center, colony, project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name) sparql_query = """ SELECT ?study WHERE { <https://www.mousephenotype.org/impress/procedures/15> a owl:NamedIndividual ; rdfs:label "MGP Select Pipeline" . <https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ; rdfs:label "X-ray" . <http://www.sanger.ac.uk/> a foaf:organization ; rdfs:label "WTSI" . <http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ; rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" . <https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ; rdfs:label "Number of ribs right (X-ray)" . ?study a OBO:OBI_0000471 ; OBO:BFO_0000051 OBO:STATO_0000076 ; OBO:BFO_0000050 <https://www.mousephenotype.org/impress/procedures/15> ; OBO:BFO_0000051 <https://www.mousephenotype.org/impress/protocol/175/15> ; OBO:SEPIO_0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ; OBO:BFO_0000050 <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ; OBO:SEPIO_0000017 <http://www.sanger.ac.uk/> . ?colony a owl:NamedIndividual ; rdfs:label "MEFW" . } """ sparql_output = impc.graph.query(sparql_query) # This will fail if we change our approach for # making blank node iris, it might be better # to check the length of the output (see test_provenance_mode) study = BNode('bbdd05a8ca155dda') expected_output = [(study, )] self.assertEqual(list(sparql_output), expected_output)