class ZFINTestCase(SourceTestCase): def setUp(self): self.source = ZFIN('rdf_graph', True) self.source.settestonly(True) self._setDirToSource() def tearDown(self): self.source = None def test_mapping_of_phenotypes_to_zp_ids(self): """ test that code correctly uses zp_map to map phenotypes to zp ids :return: """ mapping_file = "./tests/resources/zfin/zp-mapping-test-map.txt" pheno_file = "./tests/resources/zfin/zp-mapping-test-phenotype.txt" self.source.zp_map = self.source._load_zp_mappings(mapping_file) pheno_dat = open(pheno_file).read().split('\t') (fish_num, fish_name, start_stage_id, start_stage_name, end_stage_id, end_stage_name, subterm1_id, subterm1_name, postcomp1_rel_id, postcomp1_rel_name, superterm1_id, superterm1_name, quality_id, quality_name, modifier, subterm2_id, subterm2_name, postcomp2_rel_id, postcomp2_rel_name, superterm2_id, superterm2_name, pub_id, env_id) = \ pheno_dat self.assertEqual( self.source._map_octuple_to_phenotype( subterm1_id, postcomp1_rel_id, superterm1_id, quality_id, subterm2_id, postcomp2_rel_id, superterm2_id, "abnormal"), 'ZP:0022140') def test_load_zp_mappings(self): """ test correct loading of zp mappings file and construction of zp_map """ if self.source is not None: try: zp_map = self.source._load_zp_mappings( "./tests/resources/zfin/zp-mapping-test.txt") self.assertIsInstance( zp_map, dict, "_load_zp_mappings() didn't return dict!") self.assertTrue( len(zp_map) == 1, "_load_zp_mappings() didn't return exactly one thing!") self.assertDictEqual( zp_map, { 'MONARCH:b308a8f1c67793a56d16': { 'post_composed_relationship_id_1': 'BFO:0000050', 'post_composed_relationship_id_2': 'BFO:0000050', 'quality_id': 'PATO:0001453', 'subterm1_id': 'ZFA:0009114', 'subterm2_id': 'GO:0005927', 'superterm1_id': 'ZFA:0001056', 'superterm2_id': 'ZFA:0001056', 'zp_id': 'ZP:0002959', 'modifier': 'PATO:0000460' } }, "_load_zp_mappings() " + "didn't return what I expected!") except Exception as t_except: LOGGER.error(t_except) def test_make_zpkey(self): """ test that _make_zpkey returns correct id """ if self.source is not None: try: dummy_args = list(map(str, list(range(1, 9)))) # 1 - 8 as strings expected_key = self.source.make_id("_".join(dummy_args)) self.assertEqual(self.source._make_zpkey(*dummy_args), expected_key) self.assertEqual( self.source._make_zpkey(['0'] * 8), self.source._make_zpkey([''] * 8), "_make_zpkey() doesn't seem to be replacing empty " + "strings with zeros before making key," + "this might cause zp_map lookup issues") except Exception as t_except: LOGGER.error(t_except) def test_genotype_labels(self): """ test that genotype label is set correctly after parse() """ if self.source is not None: test_resource_dir = "../../tests/resources/zfin/" self.source.files['fish_components']['file'] = test_resource_dir + \ "genotype-label-test-fish_components_fish.txt" self.source.files['backgrounds']['file'] = test_resource_dir + \ "genotype-label-test-genotype_backgrounds.txt" self.source.files['geno']['file'] = test_resource_dir + \ "genotype-label-test-genotype_features.txt" self.source.parse() this_iri = URIRef("http://zfin.org/ZDB-GENO-070228-3") expect_genotype_label = "shha<sup>tbx392/tbx392</sup> (AB)" self.assertEqual(str(self.source.testgraph.label(this_iri, None)), expect_genotype_label)
def parse(self, limit=None): zfin_parser = ZFIN(self.graph_type, self.are_bnodes_skized) model = Model(self.graph) zp_file = '/'.join((self.rawdir, self.files['zpmap']['file'])) g2p_file = '/'.join((self.rawdir, self.files['g2p_clean']['file'])) zfin_parser.zp_map = zfin_parser._load_zp_mappings(zp_file) with open(g2p_file, 'r', encoding="utf8") as csvfile: filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"') for row in filereader: (internal_id, symbol, gene_id, subterm1_id, subterm1_label, pc_rel_id, pc_rel_label, superterm1_id, superterm1_label, quality_id, quality_name, modifier, subterm2_id, subterm2_label, pc_rel2_id, pc_rel2_label, superterm2_id, superterm2_label, fish_id, fish_label, start_stage, end_stage, environment, pub_id, figure_id ) = row if modifier != "abnormal": LOG.warning("skipping phenotype with modifier != abnormal: " + modifier) continue zp_id = zfin_parser._map_octuple_to_phenotype(subterm1_id, pc_rel_id, superterm1_id, quality_id, subterm2_id, pc_rel2_id, superterm2_id, modifier) gene_curie = "ZFIN:{0}".format(gene_id) model.makeLeader(gene_curie) pub_curie = "ZFIN:{0}".format(pub_id) if zp_id: assoc = G2PAssoc(self.graph, self.name, gene_curie, zp_id) if pub_id: reference = Reference(self.graph, pub_curie, self.globaltt['document']) reference.addRefToGraph() assoc.add_source(pub_curie) assoc.add_evidence( self.globaltt['experimental phenotypic evidence']) assoc.add_association_to_graph()
def parse(self, limit=None): zfin_parser = ZFIN(self.graph_type, self.are_bnodes_skized) model = Model(self.graph) src_key = 'zpmap' # keep same-as zfin.files[key] zfin_parser.zp_map = zfin_parser._load_zp_mappings(src_key) src_key = 'g2p_clean' raw = '/'.join((self.rawdir, self.files[src_key]['file'])) LOG.info("Processing clean Geno to Pheno from file: %s", raw) col = self.files[src_key]['columns'] collen = len(col) with open(raw, 'r', encoding="utf8") as csvfile: reader = csv.reader(csvfile, delimiter='\t', quotechar='\"') for row in reader: if len(row) != collen: LOG.warning('Row: %i has unexpected format', reader.line_num) # internal_id = row[col.index('ID')] # symbol = row[col.index('Gene Symbol')] gene_id = row[col.index('Gene ID')] subterm1_id = row[col.index( 'Affected Structure or Process 1 subterm ID')] # subterm1_label = row[col.index( # 'Affected Structure or Process 1 subterm Name')] pc_rel_id = row[col.index( 'Post-composed Relationship ID')].strip() # pc_rel_label = row[col.index('Post-composed Relationship Name')] superterm1_id = row[col.index( 'Affected Structure or Process 1 superterm ID')].strip() # superterm1_label = row[col.index( # 'Affected Structure or Process 1 superterm Name')] quality_id = row[col.index('Phenotype Keyword ID')].strip() # quality_name = row[col.index('Phenotype Keyword Name')] modifier = row[col.index('Phenotype Tag')].strip() subterm2_id = row[col.index( 'Affected Structure or Process 2 subterm ID')].strip() # subterm2_label = row[col.index( # 'Affected Structure or Process 2 subterm name')] pc_rel2_id = row[col.index( 'Post-composed Relationship (rel) ID')] # pc_rel2_label = row[col.index( # 'Post-composed Relationship (rel) Name')] superterm2_id = row[col.index( 'Affected Structure or Process 2 superterm ID')].strip() # superterm2_label = row[col.index( # 'Affected Structure or Process 2 superterm name')] # fish_id = row[col.index('Fish ID')] # fish_label = row[col.index('Fish Display Name')] start_stage = row[col.index('Start Stage ID')] # end_stage = row[col.index('End Stage ID')] # environment = row[col.index('Fish Environment ID')] pub_id = row[col.index('Publication ID')].strip() # figure_id = row[col.index('Figure ID')] if modifier != 'abnormal': LOG.warning( "skipping phenotype with modifier %s != abnormal ", modifier) continue zp_id = zfin_parser._map_octuple_to_phenotype( subterm1_id, pc_rel_id, superterm1_id, quality_id, subterm2_id, pc_rel2_id, superterm2_id, modifier) gene_curie = "ZFIN:{0}".format(gene_id) model.makeLeader(gene_curie) pub_curie = "ZFIN:{0}".format(pub_id) if zp_id: assoc = G2PAssoc(self.graph, self.name, gene_curie, zp_id) if pub_id: reference = Reference(self.graph, pub_curie, self.globaltt['document']) reference.addRefToGraph() assoc.add_source(pub_curie) assoc.add_evidence( self.globaltt['experimental phenotypic evidence']) assoc.add_association_to_graph()