def test_parse(self): for rcv in RCVS: output_nt = rcv + '.nt' input_xml = rcv + '.xml.gz' reference_ttl = TTL_PATH + rcv + '.ttl' with self.subTest(rcv=rcv): mock_args = [ "test_clinvar.py", "--inputdir", XML_PATH, "--filename", input_xml, "--mapfile", MAP_FILE, "--destination", NT_PATH, "--output", output_nt ] patch('sys.argv', mock_args).start() clinvar_parse() query_graph = RDFGraph() query_graph.bind_all_namespaces() query_graph.parse(NT_PATH + output_nt, format='nt') with open(reference_ttl, 'r') as ref_fh: ref_graph = "\n".join(ref_fh.readlines()) # debug LOG.debug( "Reference graph: %s", query_graph.serialize(format="turtle").decode("utf-8")) # Convert output from ClinVar parse to dot then png dot_file_path = DOT_PATH + rcv + ".dot" with open(dot_file_path, 'w') as dot_file: rdf2dot(query_graph, dot_file) self.assertTrue( TestUtils.test_graph_equality(ref_graph, query_graph))
class EvidenceTestCase(unittest.TestCase): def setUp(self): """ Because _process_evidence_view uses self.rawdir to find the evidence file, the defaults are overriden here to point to our test file Note the file name must match what is in that method - evidence_view """ self.test_util = TestUtils() self.mgi = MGI('rdf_graph', True) self.mgi.rawdir = os.path.join(os.path.dirname(__file__), 'resources/mgi') self.mgi.idhash['annot']['6901981'] = ':association' def tearDown(self): self.mgi = None return def test_sex_specificity_model(self): self.mgi.graph = RDFGraph(True) # Reset graph self.mgi._process_evidence_view(limit=None) logger.debug("Reference graph: %s", self.mgi.graph.serialize(format="turtle").decode("utf-8")) expected_triples = """ :association RO:0002558 ECO:0000006 ; dc:source J:74619 ; :has_sex_specificity PATO:0000384 . J:74619 a IAO:0000310 . """ self.assertTrue( self.test_util.test_graph_equality(expected_triples, self.mgi.graph))
class TestMyChemParser(unittest.TestCase): def setUp(self): self.test_util = TestUtils() self.source = MyChem('rdf_graph', True) # Replaces source.fetch() data_fh = open(TESTDATA, 'r') self.test_data = json.load(data_fh) data_fh.close() self.source.drugbank_targets.append(self.test_data[0]) self.source.drugcentral_interactors.append(self.test_data[0]) def tearDown(self): self.source = None def test_parse(self): self.source.graph = RDFGraph(True) # Reset graph self.assertTrue(len(list(self.source.graph)) == 0) self.source.parse() triples = """ UNII:46U771ERWK RO:0002606 SNOMED:386761002 ; rdfs:subClassOf CHEBI:23367 . SNOMED:386761002 rdfs:label "Local anesthesia" ; rdfs:subClassOf DOID:4 . """ # dbg logger.debug( "Reference graph: %s", self.source.graph.serialize(format="turtle").decode("utf-8")) self.assertTrue( self.test_util.test_graph_equality(triples, self.source.graph))
def test_parse(self): """ Runs WormBase.parse() and outputs dot file for each allele This is less of a unit test and more for viewing the output of an entire run on a single allele, dot files can be converted to images using scripts/dot-to-svg.sh """ for variant in VARIANTS: with self.subTest(variant_id=variant): self.tearDownAndSetUp() self.gwascatalog.rawdir = RAW_PATH + '/' + variant self.gwascatalog.parse() dot_file_path = DOT_PATH + variant + ".dot" with open(dot_file_path, 'w') as dot_file: rdf2dot(self.gwascatalog.graph, dot_file) # debug LOG.debug( "Reference graph: %s", self.gwascatalog.graph.serialize( format="turtle").decode("utf-8")) reference_ttl = TTL_PATH + variant + '.ttl' self.assertTrue( TestUtils.test_graph_equality(reference_ttl, self.gwascatalog.graph))
class SGDTestCase(unittest.TestCase): def setUp(self): self.test_util = TestUtils() self.test_set_1 = {'Allele': 'atp6-L183R (L183R)', 'Chemical': 'glycerol', 'Condition': 'elevated temperature (35 deg C)|nonfermentable carbon source', 'Details': 'similar results obtained with atp6-L247R, and atp6-W136R, all ' 'corresponding to human NARP syndrome mutants', 'Experiment Type': 'classical genetics', 'Feature Name': 'Q0085', 'Feature Type': 'ORF', 'Gene Name': 'ATP6', 'Mutant Type': 'reduction of function', 'Phenotype': 'respiratory growth: decreased rate', 'Reference': 'PMID: 21715656|SGD_REF: S000145858', 'Reporter': ' ', 'SGDID': 'S000007268', 'Strain Background': 'Other'} return def tearDown(self): return def testSGDParser(self): sgd = SGD('rdf_graph', True) sgd.graph = RDFGraph(True) record = self.test_set_1 sgd.make_association(record) description = sgd._make_description(record) triples = """ :MONARCH_95158d413dd73476 a OBAN:association ; OBO:RO_0002558 OBO:APO_0000020 ; dc:description "{0}"; dc:source PMID:21715656 ; OBAN:association_has_object MONARCH:OBO_APO_0000309OBO_APO_0000245 ; OBAN:association_has_predicate OBO:RO_0002200 ; OBAN:association_has_subject SGD:S000007268 . SGD:S000007268 rdfs:label "ATP6" ; RO:0002200 MONARCH:OBO_APO_0000309OBO_APO_0000245 . APO:0000020 rdfs:label "classical genetics" . PMID:21715656 a OBO:IAO_0000311 ; owl:sameAs SGD_REF:S000145858 . MONARCH:OBO_APO_0000309OBO_APO_0000245 rdfs:label "respiratory growth:decreased rate" ; rdfs:subClassOf UPHENO:0001001 . """.format(description) # test exact contents of graph self.assertTrue(self.test_util.test_graph_equality( triples, sgd.graph))
class RGDTestCase(unittest.TestCase): def setUp(self): self.test_util = TestUtils() self.test_set_1 = {'aspect': 'N', 'date': '2006-10-26', 'evidence': {'has_supporting_reference': ['RGD:1581841', 'PMID:12799311'], 'type': 'IED', 'with_support_from': []}, 'negated': False, 'object': {'id': 'MP:0003340', 'taxon': 'NCBITaxon:10116'}, 'provided_by': 'RGD', 'qualifiers': [], 'relation': {'id': None}, 'source_line': 'RGD\t2535\tEdnra\t\tMP:0003340\tRGD:1581841|PMID:12799311\t' 'IED\t\tN\tendothelin receptor type A\t\tgene\ttaxon:10116\t' '20061026\tRGD\t\t\n', 'subject': {'fullname': 'endothelin receptor type A', 'id': 'RGD:2535', 'label': 'Ednra', 'synonyms': [], 'taxon': {'id': 'NCBITaxon:10116'}, 'type': 'gene'}, 'subject_extensions': [{'filler': '\n', 'property': 'isoform'}]} return def tearDown(self): return def testRGDParser(self): rgd = RGD('rdf_graph', True) rgd.graph = RDFGraph(True) self.assertTrue(len(list(rgd.graph)) == 0) rgd.make_association(record=self.test_set_1) triples = """ :MONARCH_b4650e8c3d865f11a1a5 a OBAN:association ; RO:0002558 ECO:0005611 ; dc:source RGDRef:1581841 ; OBAN:association_has_object OBO:MP_0003340 ; OBAN:association_has_predicate OBO:RO_0002200 ; OBAN:association_has_subject RGD:2535 ; pav:createdOn "2006-10-26" . RGD:2535 OBO:RO_0002200 MP:0003340 . RGDRef:1581841 a IAO:0000311 ; owl:sameAs PMID:12799311 . """ # dbg logger.debug("Reference graph: %s", rgd.graph.serialize(format="turtle") .decode("utf-8") ) self.assertTrue(self.test_util.test_graph_equality( triples, rgd.graph))
class SGDTestCase(unittest.TestCase): def setUp(self): self.test_util = TestUtils() self.test_set_1 = { 'Allele': 'atp6-L183R (L183R)', 'Chemical': 'glycerol', 'Condition': 'elevated temperature (35 deg C)|nonfermentable carbon source', 'Details': 'similar results obtained with atp6-L247R, and atp6-W136R, all ' 'corresponding to human NARP syndrome mutants', 'Experiment Type': 'classical genetics', 'Feature Name': 'Q0085', 'Feature Type': 'ORF', 'Gene Name': 'ATP6', 'Mutant Type': 'reduction of function', 'Phenotype': 'respiratory growth: decreased rate', 'Reference': 'PMID: 21715656|SGD_REF: S000145858', 'Reporter': ' ', 'SGDID': 'S000007268', 'Strain Background': 'Other'} return def tearDown(self): return def testSGDParser(self): sgd = SGD('rdf_graph', True) sgd.graph = RDFGraph(True) record = self.test_set_1 sgd.make_association(record) description = sgd._make_description(record) triples = """ :MONARCH_ba748c98c0f167739128 a OBAN:association ; OBO:RO_0002558 OBO:APO_0000020 ; dc:description "{0}"; dc:source PMID:21715656 ; OBAN:association_has_object MONARCH:APO_0000309APO_0000245 ; OBAN:association_has_predicate OBO:RO_0002200 ; OBAN:association_has_subject SGD:S000007268 . SGD:S000007268 rdfs:label "ATP6" ; RO:0002200 MONARCH:APO_0000309APO_0000245 . APO:0000020 rdfs:label "classical genetics" . PMID:21715656 a OBO:IAO_0000311 ; owl:sameAs SGD_REF:S000145858 . MONARCH:APO_0000309APO_0000245 rdfs:label "respiratory growth:decreased rate" ; rdfs:subClassOf UPHENO:0001001 . """.format(description) # test exact contents of graph self.assertTrue(self.test_util.test_graph_equality(triples, sgd.graph))
class CTDTestCase(unittest.TestCase): def setUp(self): self.test_util = TestUtils() self.source = CTD('rdf_graph', True) self.source.graph = RDFGraph(True) self.test_row = [ 'Nicotine', 'D009538', '', 'TOBACCO ADDICTION, SUSCEPTIBILITY TO', 'OMIM:188890', 'therapeutic', '', '', '', '12345|56789' ] return def tearDown(self): self.source = None return def test_therapeutic_relationship(self): # test that graph is empty self.assertTrue(len(list(self.source.graph)) == 0) self.source._process_interactions(self.test_row) triples = """ :MONARCH_b6c289df47cb72653f79 a OBAN:association ; RO:0002558 ECO:0000033 ; dcterms:source PMID:12345, PMID:56789 ; OBAN:association_has_object OMIM:188890 ; OBAN:association_has_predicate RO:0002606 ; OBAN:association_has_subject MESH:D009538 . MESH:D009538 a owl:Class ; rdfs:label "Nicotine" ; biolink:category biolink:ChemicalSubstance ; RO:0002606 OMIM:188890 . PMID:12345 a IAO:0000013 . PMID:56789 a IAO:0000013 . OMIM:188890 a owl:Class ; biolink:category biolink:DiseaseOrPhenotypicFeature . """ # test exact contents of graph self.assertTrue(self.test_util.test_graph_equality( triples, self.source.graph))
class CTDTestCase(unittest.TestCase): def setUp(self): self.test_util = TestUtils() self.source = CTD('rdf_graph', True) self.source.graph = RDFGraph(True) self.test_row = [ 'Nicotine', 'D009538', '', 'TOBACCO ADDICTION, SUSCEPTIBILITY TO', 'OMIM:188890', 'therapeutic', '', '', '', '12345|56789' ] return def tearDown(self): self.source = None return def test_therapeutic_relationship(self): # test that graph is empty self.assertTrue(len(list(self.source.graph)) == 0) self.source._process_interactions(self.test_row) triples = """ :MONARCH_b6c289df47cb72653f79 a OBAN:association ; RO:0002558 ECO:0000033 ; dc:source PMID:12345, PMID:56789 ; OBAN:association_has_object OMIM:188890 ; OBAN:association_has_predicate RO:0002606 ; OBAN:association_has_subject MESH:D009538 . MESH:D009538 a owl:Class ; rdfs:label "Nicotine" ; RO:0002606 OMIM:188890 . PMID:12345 a IAO:0000013 . PMID:56789 a IAO:0000013 . OMIM:188890 a owl:Class . """ # test exact contents of graph self.assertTrue(self.test_util.test_graph_equality( triples, self.source.graph))
class ReactomeTestCase(unittest.TestCase): def setUp(self): self.test_util = TestUtils() self.test_set_1 = \ ('ENSBTAP00000013354', 'R-BTA-3000480', 'http://www.reactome.org/PathwayBrowser/#/R-BTA-3000480', 'Scavenging by Class A Receptors', 'IEA', 'Bos taurus') self.gaf_eco = {"IEA": "ECO:0000501"} return def tearDown(self): return def testEnsemblReactomeParser(self): ''' ''' reactome = Reactome('rdf_graph', True) reactome.graph = RDFGraph(True) self.assertTrue(len(list(reactome.graph)) == 0) # reactome.parse_gaf_eco('gaf-eco-mapping') (gene, pathway_id, pathway_iri, pathway_label, go_ecode, species_name) = self.test_set_1 reactome._add_component_pathway_association('ENSEMBL:' + gene, 'REACT:' + pathway_id, pathway_label, self.gaf_eco[go_ecode]) triples = """ ENSEMBL:ENSBTAP00000013354 RO:0002331 REACT:R-BTA-3000480 . :MONARCH_b582c188b7ec20016206 a OBAN:association ; OBO:RO_0002558 ECO:0000501 ; OBAN:association_has_object REACT:R-BTA-3000480 ; OBAN:association_has_predicate RO:0002331 ; OBAN:association_has_subject ENSEMBL:ENSBTAP00000013354 . REACT:R-BTA-3000480 a owl:Class ; rdfs:label "Scavenging by Class A Receptors" ; rdfs:subClassOf GO:0009987, PW:0000001 . """ self.assertTrue( self.test_util.test_graph_equality(triples, reactome.graph))
def test_gene_xref(self): """ test FlyBase._process_gene_xref() """ for allele in ALLELES: with self.subTest(allele_id=allele): self.tearDownAndSetUp() self.flybase.rawdir = RAW_PATH + '/' + allele self.flybase._process_gene_xref(limit=None) LOG.debug( "Reference graph: %s", self.flybase.graph.serialize(format="turtle").decode("utf-8")) reference_ttl = TTL_PATH + allele + '/' + 'gene_xref.ttl' self.assertTrue(TestUtils.test_graph_equality( reference_ttl, self.flybase.graph))
class ReactomeTestCase(unittest.TestCase): def setUp(self): self.test_util = TestUtils() self.test_set_1 = \ ('ENSBTAP00000013354', 'R-BTA-3000480', 'http://www.reactome.org/PathwayBrowser/#/R-BTA-3000480', 'Scavenging by Class A Receptors', 'IEA', 'Bos taurus') return def tearDown(self): return def testEnsemblReactomeParser(self): reactome = Reactome('rdf_graph', True) reactome.graph = RDFGraph(True) self.assertTrue(len(list(reactome.graph)) == 0) eco_map = Reactome.get_eco_map(Reactome.map_files['eco_map']) (gene, pathway_id, pathway_iri, pathway_label, go_ecode, species_name) = self.test_set_1 reactome._add_component_pathway_association( eco_map, gene, 'ENSEMBL', pathway_id, 'REACT', pathway_label, go_ecode) triples = """ ENSEMBL:ENSBTAP00000013354 RO:0002331 REACT:R-BTA-3000480 . :MONARCH_b582c188b7ec20016206 a OBAN:association ; OBO:RO_0002558 ECO:0000501 ; OBAN:association_has_object REACT:R-BTA-3000480 ; OBAN:association_has_predicate RO:0002331 ; OBAN:association_has_subject ENSEMBL:ENSBTAP00000013354 . REACT:R-BTA-3000480 a owl:Class ; rdfs:label "Scavenging by Class A Receptors" ; rdfs:subClassOf GO:0009987, PW:0000001 . """ self.assertTrue(self.test_util.test_graph_equality( triples, reactome.graph))
class TestMyChemParser(unittest.TestCase): def setUp(self): self.test_util = TestUtils() self.source = MyChem('rdf_graph', True) # Replaces source.fetch() data_fh = open(TESTDATA, 'r') self.test_data = json.load(data_fh) data_fh.close() self.source.drugbank_targets.append(self.test_data[0]) self.source.drugcentral_interactors.append(self.test_data[0]) def tearDown(self): self.source = None def test_parse(self): self.source.graph = RDFGraph(True) # Reset graph self.assertTrue(len(list(self.source.graph)) == 0) self.source.parse() triples = """ UNII:46U771ERWK RO:0002606 SNOMED:386761002 ; rdfs:subClassOf CHEBI:23367 . SNOMED:386761002 rdfs:label "Local anesthesia" ; rdfs:subClassOf DOID:4 . """ # dbg logger.debug("Reference graph: %s", self.source.graph.serialize(format="turtle") .decode("utf-8") ) self.assertTrue(self.test_util.test_graph_equality( triples, self.source.graph))
class EvidenceTestCase(unittest.TestCase): def setUp(self): """ Because _process_evidence_view uses self.rawdir to find the evidence file, the defaults are overriden here to point to our test file Note the file name must match what is in that method - evidence_view """ self.test_util = TestUtils() self.mgi = MGI('rdf_graph', True) self.mgi.rawdir = os.path.join( os.path.dirname(__file__), 'resources/mgi') self.mgi.idhash['annot']['6901981'] = ':association' def tearDown(self): self.mgi = None return def test_sex_specificity_model(self): self.mgi.graph = RDFGraph(True) # Reset graph self.mgi._process_evidence_view(limit=None) logger.debug( "Reference graph: %s", self.mgi.graph.serialize(format="turtle").decode("utf-8")) expected_triples = """ :association RO:0002558 ECO:0000006 ; dc:source J:74619 ; :has_sex_specificity PATO:0000384 . J:74619 a IAO:0000310 . """ self.assertTrue(self.test_util.test_graph_equality( expected_triples, self.mgi.graph))
class StringTestFakeData(unittest.TestCase): def setUp(self): self.test_util = TestUtils() # Test set with two proteins from same species self.test_set_1 = \ [['9606.ENSP00000000233', '9606.ENSP00000003084', 0, 0, 0, 0, 300, 0, 150, 800]] # Test set with deprecated protein id self.test_set_2 = \ [['9606.ENSP00000000233', '9606.ENSP00000006101', 0, 0, 0, 0, 300, 0, 150, 800]] self.columns = [ 'protein1', 'protein2', 'neighborhood', 'fusion', 'cooccurence', 'coexpression', 'experimental', 'database', 'textmining', 'combined_score' ] ensembl = Ensembl('rdf_graph', True) self.protein_list = ensembl.fetch_protein_gene_map(9606) return def tearDown(self): return def testFakeDataSet1(self): string_db = StringDB('rdf_graph', True) string_db.graph = RDFGraph(True) self.assertEqual(len(string_db.graph), 0) ensembl = Ensembl('rdf_graph', True) prot_map = ensembl.fetch_protein_gene_map(9606) for key in prot_map.keys(): prot_map[key] = "ENSEMBL:{}".format(prot_map[key]) print("Finished fetching ENSP IDs, fetched {} proteins".format( len(prot_map.keys()))) dataframe = pd.DataFrame(data=self.test_set_1, columns=self.columns) string_db._process_protein_links(dataframe, prot_map, 9606) triples = """ ENSEMBL:ENSG00000001626 RO:0002434 ENSEMBL:ENSG00000004059 . """ self.assertTrue( self.test_util.test_graph_equality(triples, string_db.graph)) def testFakeDataSet2(self): """ Dataset contains a deprecated protein ID that we expect if filtered out by ensembl biomart We test that this returns an empty graph :return: """ string_db = StringDB('rdf_graph', True) string_db.graph = RDFGraph() self.assertEqual(len(string_db.graph), 0) dataframe = pd.DataFrame(data=self.test_set_2, columns=self.columns) string_db._process_protein_links(dataframe, self.protein_list, 9606) self.assertEqual(len(string_db.graph), 0)
class GeneVariantDiseaseTest(unittest.TestCase): def setUp(self): """ """ self.test_util = TestUtils() self.orphanet = Orphanet('rdf_graph', True) self.orphanet.rawdir = os.path.join(os.path.dirname(__file__), 'resources/orphanet') def tearDown(self): self.orphanet = None return def test_germline_variant_to_disease(self): self.orphanet.graph = RDFGraph() # Reset graph self.orphanet.files['disease-gene']['file'] = 'orph-germline.xml' self.orphanet._process_diseasegene(limit=None) LOG.debug( "Reference graph: %s", self.orphanet.graph.serialize(format="turtle").decode("utf-8")) expected_triples = """ MONARCH:ba2ac5d2153c70e2bb98 a OBAN:association ; RO:0002558 ECO:0000322 ; OBAN:association_has_object ORPHA:938475 ; OBAN:association_has_predicate RO:0004013 ; OBAN:association_has_subject HGNC:30497 . ENSEMBL:ENSG00000166813 a owl:Class . HGNC:30497 a owl:Class ; RO:0004013 ORPHA:938475 ; oboInOwl:hasExactSynonym "KAS1" ; owl:equivalentClass ENSEMBL:ENSG00000166813, ORPHA:268061 . ORPHA:268061 a owl:Class . ORPHA:938475 a owl:Class ; rdfs:label "too much unit testing disorder" . ENSEMBL:ENSG00000166813 biolink:category biolink:Gene . ECO:0000322 biolink:category biolink:EvidenceType . HGNC:30497 biolink:category biolink:Genotype . HGNC:30497 biolink:category biolink:Gene . ORPHA:268061 biolink:category biolink:Gene . ORPHA:938475 biolink:category biolink:Disease . MONARCH:ba2ac5d2153c70e2bb98 biolink:category biolink:Association . """ self.assertTrue( self.test_util.test_graph_equality(expected_triples, self.orphanet.graph)) return def test_germline_lof_variant_to_disease(self): self.orphanet.graph = RDFGraph() # Reset graph self.orphanet.files['disease-gene']['file'] = 'orph-germline-lof.xml' self.orphanet._process_diseasegene(limit=None) LOG.debug( "Reference graph: %s", self.orphanet.graph.serialize(format="turtle").decode("utf-8")) expected_triples = """ MONARCH:b9ad1b0c562ad4db3f1e a OBAN:association ; RO:0002558 ECO:0000322 ; OBAN:association_has_object ORPHA:938475 ; OBAN:association_has_predicate RO:0004012 ; OBAN:association_has_subject ORPHA:268061 . ORPHA:268061 RO:0004012 ORPHA:938475 ; oboInOwl:hasExactSynonym "KAS1" . ORPHA:938475 a owl:Class ; rdfs:label "too much unit testing disorder" . ECO:0000322 biolink:category biolink:EvidenceType . ORPHA:268061 biolink:category biolink:Gene . ORPHA:268061 biolink:category biolink:Genotype . ORPHA:938475 biolink:category biolink:Disease . MONARCH:b9ad1b0c562ad4db3f1e biolink:category biolink:Association . """ self.assertTrue( self.test_util.test_graph_equality(expected_triples, self.orphanet.graph)) return def test_gene_to_disease(self): self.orphanet.graph = RDFGraph() # Reset graph self.orphanet.files['disease-gene']['file'] = 'orph-no-variant.xml' self.orphanet._process_diseasegene(limit=None) LOG.debug( "Reference graph: %s", self.orphanet.graph.serialize(format="turtle").decode("utf-8")) expected_triples = """ MONARCH:bdbeb077e365ddedda20 a OBAN:association ; RO:0002558 ECO:0000322 ; OBAN:association_has_object ORPHA:938475 ; OBAN:association_has_predicate RO:0004015 ; OBAN:association_has_subject ORPHA:268061 . ORPHA:268061 RO:0004015 ORPHA:938475 ; oboInOwl:hasExactSynonym "KAS1" . ORPHA:938475 a owl:Class ; rdfs:label "too much unit testing disorder" . ECO:0000322 biolink:category biolink:EvidenceType . ORPHA:268061 biolink:category biolink:Gene . ORPHA:268061 biolink:category biolink:Genotype . ORPHA:938475 biolink:category biolink:Disease . MONARCH:bdbeb077e365ddedda20 biolink:category biolink:Association . """ self.assertTrue( self.test_util.test_graph_equality(expected_triples, self.orphanet.graph)) return def test_unmapped_disease_assoc_type(self): """ Test that a gene disease type that we have not mapped in translationtable/orphanet.yaml raises a ValueError """ self.orphanet.graph = RDFGraph() # Reset graph self.orphanet.files['disease-gene']['file'] = 'orph-no-mapping.xml' self.assertRaises( KeyError, lambda: self.orphanet._process_diseasegene(limit=None)) return
class GeneVariantDiseaseTest(unittest.TestCase): def setUp(self): """ """ self.test_util = TestUtils() self.orphanet = Orphanet('rdf_graph', True) # Override so tests don't break when we update terms # Note there is no such file ./resources/test_terms.yaml # self.globaltt = self.orphanet.open_and_parse_yaml( # os.path.join(os.path.dirname(__file__), './resources/test_terms.yaml')) self.orphanet.rawdir = os.path.join( os.path.dirname(__file__), 'resources/orphanet') def tearDown(self): self.orphanet = None return def test_germline_variant_to_disease(self): self.orphanet.graph = RDFGraph() # Reset graph self.orphanet.files['disease-gene']['file'] = 'orph-germline.xml' self.orphanet._process_diseasegene(limit=None) LOG.debug( "Reference graph: %s", self.orphanet.graph.serialize(format="turtle").decode("utf-8") ) expected_triples = """ MONARCH:b40e89f44906ccededb6 a OBAN:association ; RO:0002558 ECO:0000322 ; OBAN:association_has_object ORPHA:938475 ; OBAN:association_has_predicate RO:0003303 ; OBAN:association_has_subject <https://monarchinitiative.org/.well-known/genid/bc50c3aece4f4f161d4d> . ENSEMBL:ENSG00000166813 a owl:Class . HGNC:30497 a owl:Class . HGNC:30497 a owl:Class ; rdfs:label "KS1" ; oboInOwl:hasExactSynonym "KAS1" ; rdfs:subClassOf OBO:SO_0001217 ; owl:equivalentClass ENSEMBL:ENSG00000166813, ORPHA:268061 . <https://monarchinitiative.org/.well-known/genid/bc50c3aece4f4f161d4d> a GENO:0000002 ; rdfs:label "germline variant of KS1" ; GENO:0000418 HGNC:30497; RO:0003303 ORPHA:938475 ; :MONARCH_anonymous true ; :has_cell_origin GENO:0000900 . ORPHA:938475 a owl:Class ; rdfs:label "too much unit testing disorder" . """ self.assertTrue(self.test_util.test_graph_equality( expected_triples, self.orphanet.graph)) return def test_germline_lof_variant_to_disease(self): self.orphanet.graph = RDFGraph() # Reset graph self.orphanet.files['disease-gene']['file'] = 'orph-germline-lof.xml' self.orphanet._process_diseasegene(limit=None) LOG.warning( "Reference graph: %s", self.orphanet.graph.serialize(format="turtle").decode("utf-8") ) expected_triples = """ MONARCH:b40e89f44906ccededb6 OBAN:association ; RO:0002558 ECO:0000322 ; OBAN:association_has_object ORPHA:938475 ; OBAN:association_has_predicate RO:0003303 ; OBAN:association_has_subject <https://monarchinitiative.org/.well-known/genid/ba0884fb61004110> . HGNC:30497 a owl:Class ; rdfs:label "KS1" ; oboInOwl:hasExactSynonym "KAS1" ; rdfs:subClassOf SO:0001217 . <https://monarchinitiative.org/.well-known/genid/ba0884fb61004110> a GENO:0000002 ; rdfs:label "germline loss of function variant of KS1" ; GENO:0000418 HGNC:30497 ; RO:0003303 ORPHA:938475 ; :MONARCH_anonymous true ; :has_cell_origin GENO:0000900 ; :has_functional_consequence SO:0002054 . ORPHA:938475 a owl:Class ; rdfs:label "too much unit testing disorder" . """ self.assertTrue( self.test_util.test_graph_equality(expected_triples, self.orphanet.graph)) return def test_gene_to_disease(self): self.orphanet.graph = RDFGraph() # Reset graph self.orphanet.files['disease-gene']['file'] = 'orph-no-variant.xml' self.orphanet._process_diseasegene(limit=None) LOG.debug( "Reference graph: %s", self.orphanet.graph.serialize(format="turtle") .decode("utf-8") ) expected_triples = """ MONARCH:bd8eebdc522f33aca860 a OBAN:association ; RO:0002558 ECO:0000322 ; OBAN:association_has_object ORPHA:938475 ; OBAN:association_has_predicate RO:0003304 ; OBAN:association_has_subject HGNC:30497 . HGNC:30497 a owl:Class ; rdfs:label "KS1" ; RO:0003304 ORPHA:938475 ; oboInOwl:hasExactSynonym "KAS1" ; rdfs:subClassOf SO:0001217 . ORPHA:938475 a owl:Class ; rdfs:label "too much unit testing disorder" . """ self.assertTrue(self.test_util.test_graph_equality( expected_triples, self.orphanet.graph)) return def test_unmapped_disease_assoc_type(self): """ Test that a gene disease type that we have not mapped in translationtable/orphanet.yaml raises a ValueError """ self.orphanet.graph = RDFGraph() # Reset graph self.orphanet.files['disease-gene']['file'] = 'orph-no-mapping.xml' self.assertRaises( ValueError, lambda: self.orphanet._process_diseasegene(limit=None)) return
class EvidenceProvenanceTestCase(unittest.TestCase): def setUp(self): self.test_util = TestUtils() self.assoc_curie = 'MONARCH:test_association' self.eco_id = 'ECO:0000015' self.test_set_1 = ( 'MGI:1920145', 'Setd5', 'WTSI', 'MEFW', 'male', 'heterozygote', 'MGI:4432631', 'Setd5<tm1a(EUCOMM)Wtsi>', 'targeted mutation 1a, Wellcome Trust Sanger Institute', 'MGI:2159965', 'C57BL/6N', 'MGP', 'Wellcome Trust Sanger Institute Mouse Genetics Project', 'MGP Select Pipeline', 'MGP_001', 'MGP_XRY_001', 'X-ray', 'IMPC_XRY_008_001', 'Number of ribs right', 'MP:0005390', 'skeleton phenotype', 'MP:0000480', 'increased rib number', '1.637023E-010', '', '8.885439E-007', 'Wilcoxon rank sum test with continuity correction', 'IMPC') # Generate test curies, these are otherwise generated # within _add_evidence() and _add_study_provenance() # these blank nodes are hardcoded as NOT Skolemized ... self.study_curie = "_:study" self.evidence_curie = "_:evidence" # IRIs for testing sparql output curie_dict = curie_map.get() curie_util = CurieUtil(curie_dict) self.assoc_iri = URIRef(curie_util.get_uri(self.assoc_curie)) return def test_evidence_model(self): """ Functional test for _add_evidence() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) # Reset graph # Test graph is empty self.assertTrue(len(list(impc.graph)) == 0) impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map']) (p_value, percentage_change, effect_size) = self.test_set_1[23:26] impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value, percentage_change, effect_size, self.study_curie) triples = """ :MONARCH_test_association SEPIO:0000007 <https://monarchinitiative.org/.well-known/genid/b097a98087df7a99> . <https://monarchinitiative.org/.well-known/genid/b097a98087df7a99> a ECO:0000015 ; SEPIO:0000084 <https://monarchinitiative.org/.well-known/genid/b89ee584330837c9>, <https://monarchinitiative.org/.well-known/genid/bc0eeccdea27a1d8> ; SEPIO:0000085 <https://monarchinitiative.org/.well-known/genid/study> . <https://monarchinitiative.org/.well-known/genid/bc0eeccdea27a1d8> a OBI:0000175 ; RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ; STATO:0000129 1.637023e-10 . <https://monarchinitiative.org/.well-known/genid/b89ee584330837c9> a STATO:0000085 ; RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ; STATO:0000129 "8.885439E-007" . """ self.assertTrue(self.test_util.test_graph_equality( triples, impc.graph)) def test_provenance_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) self.assertTrue(len(list(impc.graph)) == 0) impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map']) impress_map = json.loads( impc.fetch_from_url( impc.map_files['impress_map']).read().decode('utf-8')) (phenotyping_center, colony) = self.test_set_1[2:4] (project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = self.test_set_1[12:19] (statistical_method, resource_name) = self.test_set_1[26:28] impc._add_study_provenance( impc_map, impress_map, phenotyping_center, colony, project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name) triples = """ <https://monarchinitiative.org/.well-known/genid/bbdd05a8ca155dda> a OBI:0000471 ; BFO:0000051 OBO:STATO_0000076, <https://www.mousephenotype.org/impress/protocol/175/15> ; BFO:0000050 IMPRESS-procedure:15 , <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ; SEPIO:0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ; SEPIO:0000017 <http://www.sanger.ac.uk/> . <https://monarchinitiative.org/.well-known/genid/bc0b26361b8687b5> a owl:NamedIndividual ; rdfs:label "MEFW" . <http://www.sanger.ac.uk/> a foaf:organization ; rdfs:label "WTSI" . <http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ; rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" . <https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ; rdfs:label "Number of ribs right (X-ray)" . IMPRESS-procedure:15 a owl:NamedIndividual ; rdfs:label "MGP Select Pipeline" . <https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ; rdfs:label "X-ray" . """ # dbg logger.debug("Reference graph: %s", impc.graph.serialize(format="turtle") .decode("utf-8") ) self.assertTrue(self.test_util.test_graph_equality( triples, impc.graph)) def test_assertion_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) self.assertTrue(len(list(impc.graph)) == 0) impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map']) impc._add_assertion_provenance( self.assoc_curie, self.evidence_curie, impc_map) triples = """ MONARCH:test_association SEPIO:0000015 <https://monarchinitiative.org/.well-known/genid/bcb2c00a5c2f9c43> . <https://monarchinitiative.org/.well-known/genid/bcb2c00a5c2f9c43> a SEPIO:0000001 ; SEPIO:0000018 <http://www.mousephenotype.org/> ; SEPIO:0000111 <https://monarchinitiative.org/.well-known/genid/evidence> . <http://www.mousephenotype.org/> a foaf:organization ; rdfs:label "International Mouse Phenotyping Consortium" . """ # dbg logger.debug("Reference graph: %s", impc.graph.serialize(format="turtle") .decode("utf-8") ) self.assertTrue(self.test_util.test_graph_equality( triples, impc.graph)) def test_random_data_set(self): """ Download dataset using fetch(), then take a row of data and run through evidence and provenance functions to test the output Line of data is hardcoded, but theoretically should work on any line """ line_to_test = 1129 count = 0 impc = IMPC('rdf_graph', False) # Not Skolem impress_map = json.loads( impc.fetch_from_url( impc.map_files['impress_map']).read().decode('utf-8')) impc_map = impc.open_and_parse_yaml(impc.map_files['impc_map']) # fetch file impc.fetch(True) file_path = '/'.join((impc.rawdir, impc.files['all']['file'])) with gzip.open(file_path, 'rt') as csvfile: filereader = csv.reader(csvfile, delimiter=',', quotechar='\"') for row in filereader: count += 1 if count == line_to_test: self.test_set_1 = row break # Some DRY violation with the above tests (phenotyping_center, colony) = row[2:4] (project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = row[12:19] (statistical_method, resource_name) = row[26:28] (p_value, percentage_change, effect_size) = self.test_set_1[23:26] impc._add_evidence(self.assoc_curie, self.eco_id, impc_map, p_value, percentage_change, effect_size, self.study_curie) impc._add_study_provenance( impc_map, impress_map, phenotyping_center, colony, project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name) # Note that this doesn't test much since we're dealing with # multiple part_of and has_part links to individuals # which results in ambiguity = hard to test sparql_query = """ SELECT * WHERE { ?assoc SEPIO:0000007 ?evidenceline . ?evidenceline a ECO:0000015 ; SEPIO:0000085 _:study . ?study a OBI:0000471 ; SEPIO:0000114 ?param ; SEPIO:0000017 ?agent . } """ sparql_output = impc.graph.query(sparql_query) # Test that query passes and returns one row self.assertEqual(len(list(sparql_output)), 1) def tearDown(self): return
class TestGwasHaplotypeModel(unittest.TestCase): """ Test the modelling of a SNP to trait association from sample GWAS catalog data """ def setUp(self): self.test_util = TestUtils() self.source = GWASCatalog('rdf_graph', True) self.source.graph = RDFGraph(True) self.test_data = { 'snp_label': 'rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?', 'chrom_num': '9;9;9;9', 'chrom_pos': '36998996;37002118;37000690;36997420', 'context': 'intron_variant; intron_variant; intron_variant; intron_variant', 'allele_freq': 'NR', 'trait': 'Intelligence', 'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0004337', 'pvalue': '0.00000004', 'merged': '0', 'snp_id_current': '', 'mapped_gene': 'PAX5; PAX5; PAX5; PAX5', 'snp_gene_nums': '', 'upstream_gene_num': '107986179', 'downstream_gene_num': '107986180', 'init_sample_desc': '656 European ancestry individuals from ADHD families', 'replicated_sample_desc': 'NA', 'platform': 'Illumina [795637]', 'pubmed': '22449649' } def tearDown(self): self.source = None def test_snp_model(self): """ Test output model of _process_haplotype() self._process_haplotype( variant_curie, strongest_snp_risk_allele, chrom_num, chrom_pos, context, risk_allele_frequency, mapped_gene, so_ontology) """ self.assertTrue(len(list(self.source.graph)) == 0) variant_curie, variant_type = self.source._get_curie_and_type_from_id( self.test_data['snp_label']) so_ontology = RDFGraph() LOG.info("Loading SO ontology in separate rdf graph") so_ontology.parse(self.source.files['so']['url'], format='xml') so_ontology.bind_all_namespaces() LOG.info("Finished loading SO ontology") self.source._process_haplotype( variant_curie, self.test_data['snp_label'], self.test_data['chrom_num'], self.test_data['chrom_pos'], self.test_data['context'], self.test_data['allele_freq'], self.test_data['mapped_gene'], so_ontology) triples = """ :haplotype_bb627b1f64039b0f751a a SO:0001024 ; rdfs:label "rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?" ; GENO:0000382 dbSNP:rs1329573, dbSNP:rs3758171, dbSNP:rs3824344, dbSNP:rs7020413 ; GENO:0000418 HGNC:8619 ; RO:0002162 NCBITaxon:9606 . dbSNP:rs1329573 a SO:0000694, SO:0001627 ; rdfs:label "rs1329573-?" ; faldo:location <https://monarchinitiative.org/.well-known/genid/b3fad5df82cdfb283329> ; GENO:0000418 HGNC:8619 ; RO:0002162 NCBITaxon:9606 . dbSNP:rs3758171 a SO:0000694, SO:0001627 ; rdfs:label "rs3758171-?" ; faldo:location <https://monarchinitiative.org/.well-known/genid/b25a2da36647bdd71be3> ; GENO:0000418 HGNC:8619 ; RO:0002162 NCBITaxon:9606 . dbSNP:rs3824344 a SO:0000694, OBO:SO_0001627 ; rdfs:label "rs3824344-?" ; faldo:location <https://monarchinitiative.org/.well-known/genid/b096a3e94e32fe23374a> ; GENO:0000418 HGNC:8619 ; RO:0002162 NCBITaxon:9606 . dbSNP:rs7020413 a SO:0000694, SO:0001627 ; rdfs:label "rs7020413-?" ; faldo:location <https://monarchinitiative.org/.well-known/genid/bbb252d9b6cd02e9880a> ; GENO:0000418 HGNC:8619 ; RO:0002162 NCBITaxon:9606 . <https://monarchinitiative.org/.well-known/genid/b25a2da36647bdd71be3> a faldo:Region ; rdfs:label "GRCh38chr9-36997420-36997420-Region"; faldo:begin <https://monarchinitiative.org/.well-known/genid/b21985847fe0774084eb> ; faldo:end <https://monarchinitiative.org/.well-known/genid/b21985847fe0774084eb> . <https://monarchinitiative.org/.well-known/genid/b3fad5df82cdfb283329> a faldo:Region ; rdfs:label "GRCh38chr9-36998996-36998996-Region"; faldo:begin <https://monarchinitiative.org/.well-known/genid/b55051762f8d5a3dbeb5> ; faldo:end <https://monarchinitiative.org/.well-known/genid/b55051762f8d5a3dbeb5> . <https://monarchinitiative.org/.well-known/genid/b096a3e94e32fe23374a> a faldo:Region ; rdfs:label "GRCh38chr9-37000690-37000690-Region"; faldo:begin <https://monarchinitiative.org/.well-known/genid/b5d61dbc7958a979d046> ; faldo:end <https://monarchinitiative.org/.well-known/genid/b5d61dbc7958a979d046> . <https://monarchinitiative.org/.well-known/genid/bbb252d9b6cd02e9880a> a faldo:Region ; rdfs:label "GRCh38chr9-37002118-37002118-Region"; faldo:begin <https://monarchinitiative.org/.well-known/genid/bb870c3d7606a3e0fc3c> ; faldo:end <https://monarchinitiative.org/.well-known/genid/bb870c3d7606a3e0fc3c> . <https://monarchinitiative.org/.well-known/genid/b21985847fe0774084eb> a faldo:Position ; rdfs:label "GRCh38chr9-36997420"; faldo:position 36997420 ; faldo:reference OBO:CHR_GRCh38chr9 . <https://monarchinitiative.org/.well-known/genid/b55051762f8d5a3dbeb5> a faldo:Position ; rdfs:label "GRCh38chr9-36998996"; faldo:position 36998996 ; faldo:reference CHR:GRCh38chr9 . <https://monarchinitiative.org/.well-known/genid/b5d61dbc7958a979d046> a faldo:Position ; rdfs:label "GRCh38chr9-37000690"; faldo:position 37000690 ; faldo:reference CHR:GRCh38chr9 . <https://monarchinitiative.org/.well-known/genid/bb870c3d7606a3e0fc3c> a faldo:Position ; rdfs:label "GRCh38chr9-37002118"; faldo:position 37002118 ; faldo:reference CHR:GRCh38chr9 . """ # dbg LOG.debug("Reference graph: %s", self.source.graph.serialize(format="turtle").decode("utf-8")) self.assertTrue( self.test_util.test_graph_equality(triples, self.source.graph))
class TestGwasSNPModel(unittest.TestCase): """ Test the modelling of a SNP to trait association from sample GWAS catalog data """ def setUp(self): self.test_util = TestUtils() self.source = GWASCatalog('rdf_graph', True) self.source.graph = RDFGraph(True) # Reset graph self.source.graph.bind_all_namespaces() self.test_data = { 'snp_label': 'rs1491921-C', 'chrom_num': '5', 'chrom_pos': '21259029', 'context': 'intergenic_variant', 'allele_freq': '0.013', 'trait': 'Diisocyanate-induced asthma', 'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0006995, http://www.ebi.ac.uk/efo/EFO_0003949', 'pvalue': '0.0000007', 'merged': '0', 'snp_id_current': '1491921', 'mapped_gene': 'LOC102723561 - GUSBP1', 'snp_gene_nums': '', 'upstream_gene_num': '107986179', 'downstream_gene_num': '107986180', 'init_sample_desc': '74 European ancestry cases, 824 European ancestry controls', 'replicated_sample_desc': 'NA', 'platform': 'Illumina [1556551]', 'pubmed': '25918132' } def tearDown(self): self.source = None self.efo_ontology = None def test_snp_type_resolution(self): """ Given the label: rs1491921-C return dbSNP:rs1491921, snp """ self.assertTrue(len(list(self.source.graph)) == 0) variant_curie, variant_type = self.source._get_curie_and_type_from_id( self.test_data['snp_label']) self.assertEqual(variant_curie, "dbSNP:rs1491921") self.assertEqual(variant_type, 'snp') def test_snp_model(self): """ Test output model of _add_snp_to_graph() """ self.assertTrue(len(list(self.source.graph)) == 0) variant_curie, variant_type = self.source._get_curie_and_type_from_id( self.test_data['snp_label']) self.source._add_snp_to_graph( variant_curie, self.test_data['snp_label'], self.test_data['chrom_num'], self.test_data['chrom_pos'], self.test_data['context'], self.test_data['allele_freq']) triples = """ dbSNP:rs1491921 a OBO:SO_0000694, OBO:SO_0001628 ; rdfs:label "rs1491921-C" ; faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029-21259029-Region> ; OBO:RO_0002162 OBO:NCBITaxon_9606 ; dc:description "0.013 [risk allele frequency]" . <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029-21259029-Region> a faldo:Region ; faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> ; faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> . <https://monarchinitiative.org/.well-known/genid/GRCh38chr5-21259029> a faldo:Position ; faldo:position 21259029 ; faldo:reference OBO:CHR_GRCh38chr5 . """ # To debug # print(self.source.graph.serialize(format="turtle").decode("utf-8")) # self.assertTrue(False) # dbg # LOG.debug( # "Reference graph: %s", # self.source.graph.serialize(format="turtle").decode("utf-8")) self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph)) def test_snp_gene_relation(self): """ test the _add_snp_gene_relation function :return: """ self.assertTrue(len(list(self.source.graph)) == 0) variant_curie, variant_type = self.source._get_curie_and_type_from_id( self.test_data['snp_label']) self.source._add_snp_gene_relation( variant_curie, self.test_data['snp_gene_nums'], self.test_data['upstream_gene_num'], self.test_data['downstream_gene_num']) triples = """ dbSNP:rs1491921 OBO:RO_0002528 NCBIGene:107986180 ; OBO:RO_0002529 NCBIGene:107986179 . """ self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph)) def test_deprecated_snp(self): """ test the _add_deprecated_snp :return: """ self.assertTrue(len(list(self.source.graph)) == 0) # fake data snp_id_current = '12345' merged = '1' variant_curie, variant_type = self.source._get_curie_and_type_from_id( self.test_data['snp_label']) self.source._add_deprecated_snp( variant_curie, snp_id_current, merged, self.test_data['chrom_num'], self.test_data['chrom_pos']) triples = """ dbSNP:rs1491921 a owl:NamedIndividual ; OBO:IAO_0100001 dbSNP:rs12345 ; owl:deprecated true . dbSNP:rs12345 MONARCH:cliqueLeader true . """ self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph)) def test_snp_trait_association(self): """ test the _add_variant_trait_association :return: """ self.assertTrue(len(list(self.source.graph)) == 0) efo_ontology = RDFGraph() LOG.info("Loading EFO ontology in separate rdf graph") efo_ontology.parse(self.source.files['efo']['url'], format='xml') efo_ontology.bind_all_namespaces() LOG.info("Finished loading EFO ontology") variant_curie, variant_type = self.source._get_curie_and_type_from_id( self.test_data['snp_label']) description = self.source._make_description( self.test_data['trait'], self.test_data['init_sample_desc'], self.test_data['replicated_sample_desc'], self.test_data['platform'], self.test_data['pvalue']) self.source._add_variant_trait_association( variant_curie, self.test_data['trait_uri'], efo_ontology, self.test_data['pubmed'], description) triples = """ MONARCH:bffc7a930c08cc8fe931 a OBAN:association ; dc:description "{0}" ; OBO:RO_0002558 OBO:ECO_0000213 ; dc:source PMID:25918132 ; OBAN:association_has_object EFO:0003949 ; OBAN:association_has_predicate RO:0003304 ; OBAN:association_has_subject dbSNP:rs1491921 . MONARCH:bff9b97458d67ed7f517 a OBAN:association ; dc:description "{0}" ; OBO:RO_0002558 OBO:ECO_0000213 ; dc:source PMID:25918132 ; OBAN:association_has_object EFO:0006995 ; OBAN:association_has_predicate RO:0003304 ; OBAN:association_has_subject dbSNP:rs1491921 . EFO:0003949 a owl:Class ; rdfs:label "eye color"^^xsd:string ; rdfs:subClassOf UPHENO:0001001 . dbSNP:rs1491921 RO:0003304 EFO:0003949, EFO:0006995 . PMID:25918132 a OBO:IAO_0000013 . """.format(description) # dbg # LOG.debug( # "Reference graph: %s", # self.source.graph.serialize(format="turtle").decode("utf-8")) self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph))
class TestGwasHaplotypeModel(unittest.TestCase): """ Test the modelling of a SNP to trait association from sample GWAS catalog data """ def setUp(self): self.test_util = TestUtils() self.source = GWASCatalog('rdf_graph', True) self.source.graph = RDFGraph(True) self.test_data = { 'snp_label': 'rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?', 'chrom_num': '9;9;9;9', 'chrom_pos': '36998996;37002118;37000690;36997420', 'context': 'intron_variant; intron_variant; intron_variant; intron_variant', 'allele_freq': 'NR', 'trait': 'Intelligence', 'trait_uri': 'http://www.ebi.ac.uk/efo/EFO_0004337', 'pvalue': '0.00000004', 'merged': '0', 'snp_id_current': '', 'mapped_gene': 'PAX5; PAX5; PAX5; PAX5', 'snp_gene_nums': '', 'upstream_gene_num': '107986179', 'downstream_gene_num': '107986180', 'init_sample_desc': '656 European ancestry individuals from ADHD families', 'replicated_sample_desc': 'NA', 'platform': 'Illumina [795637]', 'pubmed': '22449649' } def tearDown(self): self.source = None def test_snp_model(self): """ Test output model of _process_haplotype() self._process_haplotype( variant_curie, strongest_snp_risk_allele, chrom_num, chrom_pos, context, risk_allele_frequency, mapped_gene, so_ontology) """ self.assertTrue(len(list(self.source.graph)) == 0) variant_curie, variant_type = self.source._get_curie_and_type_from_id( self.test_data['snp_label']) so_ontology = RDFGraph() LOG.info("Loading SO ontology in separate rdf graph") so_ontology.parse(self.source.files['so']['url'], format='xml') so_ontology.bind_all_namespaces() LOG.info("Finished loading SO ontology") self.source._process_haplotype( variant_curie, self.test_data['snp_label'], self.test_data['chrom_num'], self.test_data['chrom_pos'], self.test_data['context'], self.test_data['allele_freq'], self.test_data['mapped_gene'], so_ontology) triples = """ :haplotype_bb627b1f64039b0f751a a OBO:GENO_0000871 ; rdfs:label "rs1329573-?; rs7020413-?; rs3824344-?; rs3758171-?" ; OBO:GENO_0000382 dbSNP:rs1329573, dbSNP:rs3758171, dbSNP:rs3824344, dbSNP:rs7020413 ; OBO:SO_0001627 HGNC:8619 ; OBO:RO_0002162 OBO:NCBITaxon_9606 . dbSNP:rs1329573 a OBO:SO_0000694, SO:0001627 ; rdfs:label "rs1329573-?" ; faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996-36998996-Region> ; OBO:SO_0001627 HGNC:8619 ; OBO:RO_0002162 OBO:NCBITaxon_9606 . dbSNP:rs3758171 a OBO:SO_0000694, OBO:SO_0001627 ; rdfs:label "rs3758171-?" ; faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420-36997420-Region> ; OBO:SO_0001627 HGNC:8619 ; OBO:RO_0002162 OBO:NCBITaxon_9606 . dbSNP:rs3824344 a OBO:SO_0000694, OBO:SO_0001627 ; rdfs:label "rs3824344-?" ; faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690-37000690-Region> ; OBO:SO_0001627 HGNC:8619 ; OBO:RO_0002162 OBO:NCBITaxon_9606 . dbSNP:rs7020413 a OBO:SO_0000694, OBO:SO_0001627 ; rdfs:label "rs7020413-?" ; faldo:location <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118-37002118-Region> ; OBO:SO_0001627 HGNC:8619 ; OBO:RO_0002162 OBO:NCBITaxon_9606 . <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420-36997420-Region> a faldo:Region ; faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> ; faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> . <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996-36998996-Region> a faldo:Region ; faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> ; faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> . <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690-37000690-Region> a faldo:Region ; faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> ; faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> . <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118-37002118-Region> a faldo:Region ; faldo:begin <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> ; faldo:end <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> . <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36997420> a faldo:Position ; faldo:position 36997420 ; faldo:reference OBO:CHR_GRCh38chr9 . <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-36998996> a faldo:Position ; faldo:position 36998996 ; faldo:reference OBO:CHR_GRCh38chr9 . <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37000690> a faldo:Position ; faldo:position 37000690 ; faldo:reference OBO:CHR_GRCh38chr9 . <https://monarchinitiative.org/.well-known/genid/GRCh38chr9-37002118> a faldo:Position ; faldo:position 37002118 ; faldo:reference OBO:CHR_GRCh38chr9 . """ # dbg # LOG.debug( # "Reference graph: %s", # self.source.graph.serialize(format="turtle").decode("utf-8")) # Does not seem to acknowlage these constant triples self.assertTrue(self.test_util.test_graph_equality(triples, self.source.graph))
class UDPTestCase(unittest.TestCase): """ Test UDP parser """ def setUp(self): self.test_util = TestUtils() return def tearDown(self): return def test_dbsnp_indel_resolution(self): """ unit test for _get_rs_id() Test that we can resolve indels that have different insertion sequence(s) for one rsid 15 51766637 374313651 in-del -/A/AA/AAA/AAAA/CAAA/TAAA """ udp = UDP('rdf_graph', True) rs_map = udp._parse_rs_map_file(udp.map_files['dbsnp_map']) variant_type = 'indel' variant = { 'build': 'hg19', 'chromosome': 'chr15', 'reference_allele': '-', 'variant_allele': 'AAAA', 'position': '51766637' } rsid = udp._get_rs_id(variant, rs_map, variant_type) self.assertEqual(rsid, '374313651') def test_dbsnp_snp_mapping(self): """ unit test for _get_rs_id() Test that we can resolve snps in dbsnp to rsids """ udp = UDP('rdf_graph', True) rs_map = udp._parse_rs_map_file(udp.map_files['dbsnp_map']) variant_type = 'snp' variant = { 'build': 'hg19', 'chromosome': 'chr15', 'reference_allele': 'A', 'variant_allele': 'C', 'position': '54624219' } rsid = udp._get_rs_id(variant, rs_map, variant_type) self.assertEqual(rsid, '755532609') def test_patient_phenotype_model(self): """ functional test for _parse_patient_phenotypes() """ udp = UDP('rdf_graph', True) udp.graph = RDFGraph(True) # test that graph is empty self.assertTrue(len(list(udp.graph)) == 0) mock_lines = [ 'patient_1\tHP:000001\tyes', 'patient_1\tHP:000002\tno' ] mock_data = MagicMock() mock_data.__iter__.return_value = iter(mock_lines) mock_file = mock_open(mock=mock_data) udp._parse_patient_phenotypes(mock_file) triples = """ :patient_1 a foaf:Person ; rdfs:label "patient_1" ; RO:0002200 DOID:4, HP:000001 . """ self.assertTrue(self.test_util.test_graph_equality( triples, udp.graph)) def test_variant_model(self): """ functional test for _parse_patient_variants() """ udp = UDP('rdf_graph', True) udp.graph = RDFGraph(True) # test that graph is empty self.assertTrue(len(list(udp.graph)) == 0) data = ['patient_1', 'family_1', '1', 'HG19', '155230432', 'G', 'A', 'Maternal', 'Biallelic', 'Non-synonymous;DOWNSTREAM', 'CLK2', '', '', '', '', '', '', '', 'Compound heterozygous', 'Heterozygous', '', '0.002747253', ''] test_data = "\t".join(data) mock_lines = [test_data] mock_data = MagicMock() mock_data.__iter__.return_value = iter(mock_lines) mock_file = mock_open(mock=mock_data) udp._parse_patient_variants(mock_file) triples = """ :patient_1 GENO:0000222 <https://monarchinitiative.org/.well-known/genid/ba5f377fc8c95d4a6d7a> . <https://monarchinitiative.org/.well-known/genid/b41e8da0787b45e24c4f> a SO:0001059 ; rdfs:label "hg19chr1(CLK2):g.155230432G>A" ; GENO:0000418 HGNC:2069 ; RO:0002162 NCBITaxon:9606 ; owl:sameAs dbSNP:rs11557757 . <https://monarchinitiative.org/.well-known/genid/ba5f377fc8c95d4a6d7a> a GENO:0000000 ; rdfs:label "patient_1 genotype" ; GENO:0000382 <https://monarchinitiative.org/.well-known/genid/b41e8da0787b45e24c4f> . """ self.assertTrue(self.test_util.test_graph_equality(triples, udp.graph))
class EvidenceProvenanceTestCase(unittest.TestCase): def setUp(self): self.test_util = TestUtils() self.assoc_curie = 'MONARCH:test_association' self.eco_id = 'ECO:0000015' # Headers: # 01 marker_accession_id, # 02 marker_symbol, # 03 phenotyping_center, # 04 colony_raw, # 05 sex, # 06 zygosity, # 07 allele_accession_id, # 08 allele_symbol, # 09 allele_name, # 10 strain_accession_id, # 11 strain_name, # 12 project_name, # 13 project_fullname, # 14 pipeline_name, # 15 pipeline_stable_id, # 16 procedure_stable_id, # 17 procedure_name, # 18 parameter_stable_id, # 19 parameter_name, # 20 top_level_mp_term_id, # 21 top_level_mp_term_name, # 22 mp_term_id, # 23 mp_term_name, # 24 p_value, # 25 percentage_change, # 26 effect_size, # 27 statistical_method, # 28 resource_name self.test_set_1 = ( 'MGI:1920145', # 01 'Setd5', # 02 'WTSI', # 03 'MEFW', # 04 'male', # 05 'heterozygote', # 06 'MGI:4432631', # 07 'Setd5<tm1a(EUCOMM)Wtsi>', # 08 'targeted mutation 1a, Wellcome Trust Sanger Institute', # 09 'MGI:2159965', # 10 'C57BL/6N', # 11 'MGP', # 12 'Wellcome Trust Sanger Institute Mouse Genetics Project', # 13 'MGP Select Pipeline', # 14 'MGP_001', # 15 'MGP_XRY_001', # 16 'X-ray', # 17 'IMPC_XRY_008_001', # 18 'Number of ribs right', # 19 'MP:0005390', # 20 'skeleton phenotype', # 21 'MP:0000480', # 22 'increased rib number', # 23 '1.637023E-010', # 24 '', # 25 '8.885439E-007', # 26 'Wilcoxon rank sum test with continuity correction', # 27 'IMPC' # 28 ) # Generate test curies, these are otherwise generated # within _add_evidence() and _add_study_provenance() # these blank nodes are hardcoded as NOT Skolemized ... self.study_curie = "_:study" self.evidence_curie = "_:evidence" # IRIs for testing sparql output curie_dict = curie_map.get() curie_util = CurieUtil(curie_dict) self.assoc_iri = URIRef(curie_util.get_uri(self.assoc_curie)) return def test_evidence_model(self): """ Functional test for _add_evidence() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) # Reset graph # Test graph is empty self.assertTrue(len(list(impc.graph)) == 0) (p_value, percentage_change, effect_size) = self.test_set_1[23:26] impc._add_evidence( self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size, self.study_curie) triples = """ :MONARCH_test_association SEPIO:0000007 <https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> . <https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> a ECO:0000015 ; SEPIO:0000084 <https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888>, <https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> ; SEPIO:0000085 <https://monarchinitiative.org/.well-known/genid/study> . <https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> a OBI:0000175 ; RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ; STATO:0000129 1.637023e-10 . <https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888> a STATO:0000085 ; RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ; STATO:0000129 "8.885439E-007" . """ self.assertTrue(self.test_util.test_graph_equality( triples, impc.graph)) def test_provenance_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) self.assertTrue(len(list(impc.graph)) == 0) (phenotyping_center, colony) = self.test_set_1[2:4] (project_name, project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = self.test_set_1[11:19] (statistical_method, resource_name) = self.test_set_1[26:28] impc._add_study_provenance( phenotyping_center, colony, project_name, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name) # dbg LOG.info( "Provenance graph as turtle:\n%s\n", impc.graph.serialize(format="turtle").decode("utf-8") ) triples = """ <https://monarchinitiative.org/.well-known/genid/b0b26361b8687b5ad9ef> a owl:NamedIndividual ; rdfs:label "MEFW" . <https://monarchinitiative.org/.well-known/genid/b6f14f763c8d0629360e> a OBI:0000471 ; BFO:0000050 <http://www.sanger.ac.uk/science/data/mouse-genomes-project>, IMPC-pipe:MGP_001 ; BFO:0000051 STATO:0000076, IMPC-proc:MGP_XRY_001 ; SEPIO:0000017 <http://www.sanger.ac.uk/> ; SEPIO:0000114 <https://www.mousephenotype.org/impress/OntologyInfo?action=list&procID=MGP_XRY_001#IMPC_XRY_008_001> . <http://www.sanger.ac.uk/> a foaf:organization ; rdfs:label "WTSI" . <http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ; rdfs:label "MGP" . <https://www.mousephenotype.org/impress/OntologyInfo?action=list&procID=MGP_XRY_001#IMPC_XRY_008_001> a owl:NamedIndividual ; rdfs:label "Number of ribs right (X-ray)" . IMPC-pipe:MGP_001 a owl:NamedIndividual ; rdfs:label "MGP Select Pipeline" . IMPC-proc:MGP_XRY_001 a owl:NamedIndividual ; rdfs:label "X-ray" . """ # dbg LOG.info( "Reference graph: %s", impc.graph.serialize(format="turtle").decode("utf-8") ) self.assertTrue( self.test_util.test_graph_equality(triples, impc.graph)) def test_assertion_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) self.assertTrue(len(list(impc.graph)) == 0) impc._add_assertion_provenance(self.assoc_curie, self.evidence_curie) triples = """ MONARCH:test_association SEPIO:0000015 <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> . <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> a SEPIO:0000001 ; SEPIO:0000018 <https://www.mousephenotype.org/> ; SEPIO:0000111 <https://monarchinitiative.org/.well-known/genid/evidence> . <https://www.mousephenotype.org/> a foaf:organization ; rdfs:label "International Mouse Phenotyping Consortium" . """ # dbg LOG.info( "Assertion graph:\n %s\n", impc.graph.serialize( format="turtle").decode("utf-8") ) self.assertTrue(self.test_util.test_graph_equality(triples, impc.graph)) @unittest.skip("Timeouts on travis") def test_random_data_set(self): """ Download dataset using fetch(), then take a row of data and run through evidence and provenance functions to test the output Line of data is hardcoded, but theoretically should work on any line """ line_to_test = 1129 count = 0 impc = IMPC('rdf_graph', False) # Not Skolem self.test_set_N = [] # fetch file # impc.fetch(True) file_path = '/'.join((impc.rawdir, impc.files['all']['file'])) with gzip.open(file_path, 'rt') as csvfile: filereader = csv.reader(csvfile, delimiter=',', quotechar='\"') for row in filereader: count += 1 if count < line_to_test: continue elif count == line_to_test: self.test_set_N = row elif count > line_to_test: LOG.info("stopped at line:\t%s\n", count) break # Some DRY violation with the above tests (phenotyping_center, colony) = self.test_set_N[2:4] (project_name,project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = self.test_set_N[11:19] (statistical_method, resource_name) = self.test_set_N[26:28] (p_value, percentage_change, effect_size) = self.test_set_N[23:26] # adding evidence impc._add_evidence( self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size, self.study_curie) # adding study impc._add_study_provenance( phenotyping_center, colony, project_name, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name, line_to_test) # Note that this doesn't test much since we're dealing with # multiple part_of and has_part links to individuals # which results in ambiguity = hard to test # dbg LOG.info( "Row %i graph as ntriples:\n%s\n", line_to_test, impc.graph.serialize(format="ntriples").decode("utf-8") ) sparql_query = """ SELECT * WHERE { ?assoc SEPIO:0000007 ?evidenceline . ?evidenceline a ECO:0000015 ; SEPIO:0000085 _:study . ?study a OBI:0000471 ; SEPIO:0000114 ?param ; SEPIO:0000017 ?agent . } """ sparql_output = impc.graph.query(sparql_query) LOG.info( "Test that query for row %i passes and returns one row", int(line_to_test)) # print("Sparql Output: %s\n", list(sparql_output) ) # it is an array with one list with five vars in it self.assertEqual(len(list(sparql_output)), 1) def tearDown(self): return
class EvidenceProvenanceTestCase(unittest.TestCase): def setUp(self): self.test_util = TestUtils() self.assoc_curie = 'MONARCH:test_association' self.eco_id = 'ECO:0000015' # Headers: # 01 marker_accession_id, # 02 marker_symbol, # 03 phenotyping_center, # 04 colony_raw, # 05 sex, # 06 zygosity, # 07 allele_accession_id, # 08 allele_symbol, # 09 allele_name, # 10 strain_accession_id, # 11 strain_name, # 12 project_name, # 13 project_fullname, # 14 pipeline_name, # 15 pipeline_stable_id, # 16 procedure_stable_id, # 17 procedure_name, # 18 parameter_stable_id, # 19 parameter_name, # 20 top_level_mp_term_id, # 21 top_level_mp_term_name, # 22 mp_term_id, # 23 mp_term_name, # 24 p_value, # 25 percentage_change, # 26 effect_size, # 27 statistical_method, # 28 resource_name self.test_set_1 = ( 'MGI:1920145', # 01 'Setd5', # 02 'WTSI', # 03 'MEFW', # 04 'male', # 05 'heterozygote', # 06 'MGI:4432631', # 07 'Setd5<tm1a(EUCOMM)Wtsi>', # 08 'targeted mutation 1a, Wellcome Trust Sanger Institute', # 09 'MGI:2159965', # 10 'C57BL/6N', # 11 'MGP', # 12 'Wellcome Trust Sanger Institute Mouse Genetics Project', # 13 'MGP Select Pipeline', # 14 'MGP_001', # 15 'MGP_XRY_001', # 16 'X-ray', # 17 'IMPC_XRY_008_001', # 18 'Number of ribs right', # 19 'MP:0005390', # 20 'skeleton phenotype', # 21 'MP:0000480', # 22 'increased rib number', # 23 '1.637023E-010', # 24 '', # 25 '8.885439E-007', # 26 'Wilcoxon rank sum test with continuity correction', # 27 'IMPC' # 28 ) # Generate test curies, these are otherwise generated # within _add_evidence() and _add_study_provenance() # these blank nodes are hardcoded as NOT Skolemized ... self.study_curie = "_:study" self.evidence_curie = "_:evidence" # IRIs for testing sparql output curie_dict = curie_map.get() curie_util = CurieUtil(curie_dict) self.assoc_iri = URIRef(curie_util.get_uri(self.assoc_curie)) return def test_evidence_model(self): """ Functional test for _add_evidence() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) # Reset graph # Test graph is empty self.assertTrue(len(list(impc.graph)) == 0) (p_value, percentage_change, effect_size) = self.test_set_1[23:26] impc._add_evidence( self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size, self.study_curie) triples = """ :MONARCH_test_association SEPIO:0000007 <https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> . <https://monarchinitiative.org/.well-known/genid/b97a98087df7a99d8a38> a ECO:0000015 ; SEPIO:0000084 <https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888>, <https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> ; SEPIO:0000085 <https://monarchinitiative.org/.well-known/genid/study> . <https://monarchinitiative.org/.well-known/genid/b216606de82749b03956> a OBI:0000175 ; RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ; STATO:0000129 1.637023e-10 . <https://monarchinitiative.org/.well-known/genid/b41ad2bfd375c9de8888> a STATO:0000085 ; RO:0002353 <https://monarchinitiative.org/.well-known/genid/study> ; STATO:0000129 "8.885439E-007" . """ self.assertTrue(self.test_util.test_graph_equality( triples, impc.graph)) def test_provenance_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) self.assertTrue(len(list(impc.graph)) == 0) (phenotyping_center, colony) = self.test_set_1[2:4] (project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = self.test_set_1[12:19] (statistical_method, resource_name) = self.test_set_1[26:28] impc._add_study_provenance( phenotyping_center, colony, project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name, 0) # dbg logger.info( "Provenance graph as turtle:\n%s\n", impc.graph.serialize(format="turtle").decode("utf-8") ) triples = """ <https://monarchinitiative.org/.well-known/genid/bdd05a8ca155ddaf415e> a OBI:0000471 ; BFO:0000051 OBO:STATO_0000076, <https://www.mousephenotype.org/impress/protocol/175/15> ; BFO:0000050 IMPRESS-procedure:15 , <http://www.sanger.ac.uk/science/data/mouse-genomes-project> ; SEPIO:0000114 <https://www.mousephenotype.org/impress/parameterontologies/1867/91> ; SEPIO:0000017 <http://www.sanger.ac.uk/> . <https://monarchinitiative.org/.well-known/genid/b0b26361b8687b5ad9ef> a owl:NamedIndividual ; rdfs:label "MEFW" . <http://www.sanger.ac.uk/> a foaf:organization ; rdfs:label "WTSI" . <http://www.sanger.ac.uk/science/data/mouse-genomes-project> a VIVO:Project ; rdfs:label "Wellcome Trust Sanger Institute Mouse Genetics Project" . <https://www.mousephenotype.org/impress/parameterontologies/1867/91> a owl:NamedIndividual ; rdfs:label "Number of ribs right (X-ray)" . IMPRESS-procedure:15 a owl:NamedIndividual ; rdfs:label "MGP Select Pipeline" . <https://www.mousephenotype.org/impress/protocol/175/15> a owl:NamedIndividual ; rdfs:label "X-ray" . """ # dbg logger.debug( "Reference graph: %s", impc.graph.serialize(format="turtle").decode("utf-8") ) self.assertTrue( self.test_util.test_graph_equality(triples, impc.graph)) def test_assertion_model(self): """ Functional test for _add_study_provenance() """ impc = IMPC('rdf_graph', True) impc.graph = RDFGraph(True) self.assertTrue(len(list(impc.graph)) == 0) impc._add_assertion_provenance(self.assoc_curie, self.evidence_curie) triples = """ MONARCH:test_association SEPIO:0000015 <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> . <https://monarchinitiative.org/.well-known/genid/bf92df374a884963e805> a SEPIO:0000001 ; SEPIO:0000018 <https://www.mousephenotype.org/> ; SEPIO:0000111 <https://monarchinitiative.org/.well-known/genid/evidence> . <https://www.mousephenotype.org/> a foaf:organization ; rdfs:label "International Mouse Phenotyping Consortium" . """ # dbg logger.info( "Assertion graph:\n %s\n", impc.graph.serialize( format="turtle").decode("utf-8") ) self.assertTrue(self.test_util.test_graph_equality(triples, impc.graph)) def test_random_data_set(self): """ Download dataset using fetch(), then take a row of data and run through evidence and provenance functions to test the output Line of data is hardcoded, but theoretically should work on any line """ line_to_test = 1129 count = 0 impc = IMPC('rdf_graph', False) # Not Skolem self.test_set_N = [] # fetch file # impc.fetch(True) file_path = '/'.join((impc.rawdir, impc.files['all']['file'])) with gzip.open(file_path, 'rt') as csvfile: filereader = csv.reader(csvfile, delimiter=',', quotechar='\"') for row in filereader: count += 1 if count < line_to_test: continue elif count == line_to_test: self.test_set_N = row elif count > line_to_test: logger.info("stopped at line:\t%s\n", count) break # Some DRY violation with the above tests (phenotyping_center, colony) = self.test_set_N[2:4] (project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name) = self.test_set_N[12:19] (statistical_method, resource_name) = self.test_set_N[26:28] (p_value, percentage_change, effect_size) = self.test_set_N[23:26] # adding evidence impc._add_evidence( self.assoc_curie, self.eco_id, p_value, percentage_change, effect_size, self.study_curie) # adding study impc._add_study_provenance( phenotyping_center, colony, project_fullname, pipeline_name, pipeline_stable_id, procedure_stable_id, procedure_name, parameter_stable_id, parameter_name, statistical_method, resource_name, line_to_test) # Note that this doesn't test much since we're dealing with # multiple part_of and has_part links to individuals # which results in ambiguity = hard to test # dbg logger.info( "Row %i graph as ntriples:\n%s\n", line_to_test, impc.graph.serialize( format="ntriples").decode("utf-8") ) sparql_query = """ SELECT * WHERE { ?assoc SEPIO:0000007 ?evidenceline . ?evidenceline a ECO:0000015 ; SEPIO:0000085 _:study . ?study a OBI:0000471 ; SEPIO:0000114 ?param ; SEPIO:0000017 ?agent . } """ sparql_output = impc.graph.query(sparql_query) logger.info("Test that query for row %i passes and returns one row", int(line_to_test)) # print("Sparql Output: %s\n", list(sparql_output) ) # it is an array with one list with five vars in it self.assertEqual(len(list(sparql_output)), 1) def tearDown(self): return
class StringTestFakeData(unittest.TestCase): def setUp(self): self.test_util = TestUtils() # Test set with two proteins from same species self.test_set_1 = [[ '9606.ENSP00000000233', '9606.ENSP00000003084', 0, 0, 0, 0, 300, 0, 150, 800]] # Test set with deprecated protein id self.test_set_2 = [[ '9606.ENSP00000000233', '9606.ENSP00000006101', 0, 0, 0, 0, 300, 0, 150, 800]] self.columns = [ 'protein1', 'protein2', 'neighborhood', 'fusion', 'cooccurence', 'coexpression', 'experimental', 'database', 'textmining', 'combined_score'] ensembl = Ensembl('rdf_graph', True) self.protein_list = ensembl.fetch_protein_gene_map('9606') return def tearDown(self): return def testFakeDataSet1(self): string_db = StringDB('rdf_graph', True) string_db.graph = RDFGraph(True) self.assertEqual(len(string_db.graph), 0) ensembl = Ensembl('rdf_graph', True) prot_map = ensembl.fetch_protein_gene_map('9606') for key in prot_map.keys(): for i, gene in enumerate(prot_map[key]): prot_map[key][i] = "ENSEMBL:{}".format(gene) print( "Finished fetching ENSP IDs, fetched {} proteins" .format(len(prot_map.keys()))) dataframe = pd.DataFrame(data=self.test_set_1, columns=self.columns) string_db._process_protein_links(dataframe, prot_map, '9606') # g1 <interacts with> g2 triples = """ ENSEMBL:ENSG00000001626 RO:0002434 ENSEMBL:ENSG00000004059 . """ self.assertTrue(self.test_util.test_graph_equality(triples, string_db.graph)) def testFakeDataSet2(self): """ Dataset contains a deprecated protein ID that we expect if filtered out by ensembl biomart We test that this returns an empty graph :return: """ string_db = StringDB('rdf_graph', True) string_db.graph = RDFGraph() self.assertEqual(len(string_db.graph), 0) dataframe = pd.DataFrame(data=self.test_set_2, columns=self.columns) string_db._process_protein_links(dataframe, self.protein_list, '9606') self.assertEqual(len(string_db.graph), 0)
class GeneVariantDiseaseTest(unittest.TestCase): def setUp(self): """ """ self.test_util = TestUtils() self.orphanet = Orphanet('rdf_graph', True) # Override so tests don't break when we update terms self.globaltt = self.orphanet.open_and_parse_yaml( os.path.join(os.path.dirname(__file__), './resources/test_terms.yaml')) self.orphanet.rawdir = os.path.join(os.path.dirname(__file__), 'resources/orphanet') def tearDown(self): self.orphanet = None return def test_germline_variant_to_disease(self): self.orphanet.graph = RDFGraph() # Reset graph self.orphanet.files['disease-gene']['file'] = 'orph-germline.xml' self.orphanet._process_diseasegene(limit=None) logger.debug( "Reference graph: %s", self.orphanet.graph.serialize(format="turtle").decode("utf-8")) expected_triples = """ MONARCH:b2cd4dfacc21d0e28c39 a OBAN:association ; RO:0002558 ECO:0000322 ; OBAN:association_has_object Orphanet:938475 ; OBAN:association_has_predicate RO:0003303 ; OBAN:association_has_subject <https://monarchinitiative.org/.well-known/genid/b56f798350412a34> . ENSEMBL:ENSG00000166813 a owl:Class . HGNC:30497 a owl:Class . Orphanet:268061 a owl:Class ; rdfs:label "KS1" ; dc:description "kinesin family member 7" ; oboInOwl:hasExactSynonym "KAS1" ; rdfs:subClassOf OBO:SO_0001217 ; owl:equivalentClass ENSEMBL:ENSG00000166813, HGNC:30497 . <https://monarchinitiative.org/.well-known/genid/b56f798350412a34> a GENO:0000002 ; rdfs:label "germline variant of KS1" ; GENO:0000418 Orphanet:268061 ; RO:0003303 Orphanet:938475 ; :MONARCH_anonymous true ; :has_cell_origin GENO:0000900 . Orphanet:938475 a owl:Class ; rdfs:label "too much unit testing disorder" . """ self.assertTrue( self.test_util.test_graph_equality(expected_triples, self.orphanet.graph)) def test_germline_lof_variant_to_disease(self): self.orphanet.graph = RDFGraph() # Reset graph self.orphanet.files['disease-gene']['file'] = 'orph-germline-lof.xml' self.orphanet._process_diseasegene(limit=None) logger.debug( "Reference graph: %s", self.orphanet.graph.serialize(format="turtle").decode("utf-8")) expected_triples = """ MONARCH:b53dada0eb229a75e705 OBAN:association ; RO:0002558 ECO:0000322 ; OBAN:association_has_object Orphanet:938475 ; OBAN:association_has_predicate RO:0003303 ; OBAN:association_has_subject <https://monarchinitiative.org/.well-known/genid/ba0884fb61004110> . Orphanet:268061 a owl:Class ; rdfs:label "KS1" ; dc:description "kinesin family member 7" ; oboInOwl:hasExactSynonym "KAS1" ; rdfs:subClassOf SO:0001217 . <https://monarchinitiative.org/.well-known/genid/ba0884fb61004110> a GENO:0000002 ; rdfs:label "germline loss of function variant of KS1" ; GENO:0000418 Orphanet:268061 ; RO:0003303 Orphanet:938475 ; :MONARCH_anonymous true ; :has_cell_origin GENO:0000900 ; :has_functional_consequence SO:0002054 . Orphanet:938475 a owl:Class ; rdfs:label "too much unit testing disorder" . """ self.assertTrue( self.test_util.test_graph_equality(expected_triples, self.orphanet.graph)) def test_gene_to_disease(self): self.orphanet.graph = RDFGraph() # Reset graph self.orphanet.files['disease-gene']['file'] = 'orph-no-variant.xml' self.orphanet._process_diseasegene(limit=None) logger.debug( "Reference graph: %s", self.orphanet.graph.serialize(format="turtle").decode("utf-8")) expected_triples = """ MONARCH:b64684a0ea6ae59fdb09 a OBAN:association ; RO:0002558 ECO:0000322 ; OBAN:association_has_object Orphanet:938475 ; OBAN:association_has_predicate RO:0003304 ; OBAN:association_has_subject Orphanet:268061 . Orphanet:268061 a owl:Class ; rdfs:label "KS1" ; RO:0003304 Orphanet:938475 ; dc:description "kinesin family member 7" ; oboInOwl:hasExactSynonym "KAS1" ; rdfs:subClassOf SO:0001217 . Orphanet:938475 a owl:Class ; rdfs:label "too much unit testing disorder" . """ self.assertTrue( self.test_util.test_graph_equality(expected_triples, self.orphanet.graph)) def test_unmapped_disease_assoc_type(self): """ Test that a gene disease type that we have not mapped in translationtable/orphanet.yaml raises a ValueError """ self.orphanet.graph = RDFGraph() # Reset graph self.orphanet.files['disease-gene']['file'] = 'orph-no-mapping.xml' self.assertRaises( ValueError, lambda: self.orphanet._process_diseasegene(limit=None))
class RGDTestCase(unittest.TestCase): def setUp(self): self.test_util = TestUtils() self.test_set_1 = { 'aspect': 'N', 'date': '2006-10-26', 'evidence': { 'has_supporting_reference': ['RGD:1581841', 'PMID:12799311'], 'type': 'IED', 'with_support_from': [] }, 'negated': False, 'object': { 'id': 'MP:0003340', 'taxon': 'NCBITaxon:10116' }, 'provided_by': 'RGD', 'qualifiers': [], 'relation': { 'id': None }, 'source_line': 'RGD\t2535\tEdnra\t\tMP:0003340\tRGD:1581841|PMID:12799311\t' 'IED\t\tN\tendothelin receptor type A\t\tgene\ttaxon:10116\t' '20061026\tRGD\t\t\n', 'subject': { 'fullname': 'endothelin receptor type A', 'id': 'RGD:2535', 'label': 'Ednra', 'synonyms': [], 'taxon': { 'id': 'NCBITaxon:10116' }, 'type': 'gene' }, 'subject_extensions': [{ 'filler': '\n', 'property': 'isoform' }] } return def tearDown(self): return def testRGDParser(self): rgd = RGD('rdf_graph', True) rgd.graph = RDFGraph(True) self.assertTrue(len(list(rgd.graph)) == 0) rgd.make_association(record=self.test_set_1) triples = """ :MONARCH_b4650e8c3d865f11a1a5 a OBAN:association ; RO:0002558 ECO:0005611 ; dcterms:source RGDRef:1581841 ; OBAN:association_has_object OBO:MP_0003340 ; OBAN:association_has_predicate OBO:RO_0002200 ; OBAN:association_has_subject RGD:2535 ; pav:createdOn "2006-10-26" . RGD:2535 OBO:RO_0002200 MP:0003340 . RGDRef:1581841 a IAO:0000311 ; owl:sameAs PMID:12799311 . """ # dbg logger.debug("Reference graph: %s", rgd.graph.serialize(format="turtle").decode("utf-8")) self.assertTrue(self.test_util.test_graph_equality(triples, rgd.graph))
class StringTestFakeData(unittest.TestCase): def setUp(self): self.test_util = TestUtils() # Test set with two proteins from same species self.test_set_1 = [[ '9606.ENSP00000000233', '9606.ENSP00000003084', 0, 0, 0, 0, 300, 0, 150, 800]] # Test set with deprecated protein id self.test_set_2 = [[ '9606.ENSP00000000233', '9606.ENSP00000006101', 0, 0, 0, 0, 300, 0, 150, 800]] self.columns = [ 'protein1', 'protein2', 'neighborhood', 'fusion', 'cooccurence', 'coexpression', 'experimental', 'database', 'textmining', 'combined_score'] ensembl = Ensembl('rdf_graph', True) self.protein_list = ensembl.fetch_protein_gene_map('9606') return def tearDown(self): return def testFakeDataSet1(self): string_db = StringDB('rdf_graph', True) string_db.graph = RDFGraph(True) self.assertEqual(len(string_db.graph), 0) ensembl = Ensembl('rdf_graph', True) prot_map = ensembl.fetch_protein_gene_map('9606') [prot_map.update({k: ['ENSEMBL:' + prot_map[k]]}) for k in prot_map.keys()] print("Finished fetching ENSP IDs, fetched {} proteins".format(len(prot_map))) # just looking # for key in prot_map: # if string_db.graph.curie_regexp.match(prot_map[key]) is None: # print("INVALID curie for %s from %s", prot_map[key], key) dataframe = pd.DataFrame(data=self.test_set_1, columns=self.columns) string_db._process_protein_links(dataframe, prot_map, '9606') # g1 <interacts with> g2 triples = """ ENSEMBL:ENSG00000001626 RO:0002434 ENSEMBL:ENSG00000004059 . ENSEMBL:ENSG00000001626 rdf:type SO:0000704 . ENSEMBL:ENSG00000004059 rdf:type SO:0000704 . """ self.assertTrue(self.test_util.test_graph_equality(triples, string_db.graph)) def testFakeDataSet2(self): """ Dataset contains a deprecated protein ID that we expect if filtered out by ensembl biomart We test that this returns an empty graph :return: """ string_db = StringDB('rdf_graph', True) string_db.graph = RDFGraph() self.assertEqual(len(string_db.graph), 0) dataframe = pd.DataFrame(data=self.test_set_2, columns=self.columns) string_db._process_protein_links(dataframe, self.protein_list, '9606') self.assertEqual(len(string_db.graph), 0)