def test_gpi_iterator(self): """Test GOA GPI file iterator, gpi-version: 1.1.""" recs = [] with open("UniProt/gp_information.goa_yeast.28.gpi") as handle: for rec in GOA.gpi_iterator(handle): recs.append(rec) self.assertEqual(len(recs), 300) self.assertEqual(sorted(recs[0].keys()), sorted(GOA.GPI11FIELDS)) # Check values of first record self.assertEqual(recs[0]["DB_Object_ID"], "A2P2R3") self.assertEqual(recs[0]["DB_Object_Symbol"], "YMR084W") self.assertEqual( recs[0]["DB_Object_Name"], [ "Putative glutamine--fructose" "-6-phosphate aminotransferase" " [isomerizing]" ], ) self.assertEqual(recs[0]["DB_Object_Synonym"], ["YM084_YEAST", "YMR084W"]) self.assertEqual(recs[0]["DB_Object_Type"], "protein") self.assertEqual(recs[0]["Taxon"], "taxon:559292") self.assertEqual(recs[0]["Parent_Object_ID"], "") self.assertEqual(recs[0]["DB_Xref"], [""]) self.assertEqual(recs[0]["Gene_Product_Properties"], ["db_subset=Swiss-Prot"])
def test_gpi_iterator_one_two(self): """Test GOA GPI file iterator, gpi-version: 1.2.""" recs = [] with open("UniProt/goa_human_sample.gpi") as handle: for rec in GOA.gpi_iterator(handle): recs.append(rec) self.assertEqual(len(recs), 9) self.assertEqual(sorted(recs[0].keys()), sorted(GOA.GPI12FIELDS)) # Check values of first record self.assertEqual(recs[0]["DB"], "UniProtKB") self.assertEqual(recs[0]["DB_Object_ID"], "A0A024R1R8") self.assertEqual(recs[0]["DB_Object_Symbol"], "hCG_2014768") self.assertEqual(recs[0]["DB_Object_Name"], ["HCG2014768, isoform CRA_a"]) self.assertEqual(recs[0]["DB_Object_Synonym"], ["hCG_2014768"]) self.assertEqual(recs[0]["DB_Object_Type"], "protein") self.assertEqual(recs[0]["Taxon"], "taxon:9606") self.assertEqual(recs[0]["Parent_Object_ID"], "") self.assertEqual(recs[0]["DB_Xref"], [""]) self.assertEqual(recs[0]["Gene_Product_Properties"], ["db_subset=TrEMBL"])
def parse_gpi(infile, taxon=''): sp_id = defaultdict() infile_handle = open(infile, 'r') parser = GOAParser.gpi_iterator(infile_handle) for rec in parser: print rec.keys() if not rec.has_key('Gene_Product_Properties'): print "This version of the gp information file does not contain all required information" sys.exit(1) else: break for rec in parser: taxid = rec['Taxon'].split(':')[1].strip() db = rec['Gene_Product_Properties'][0].split('=')[1].strip() if db.startswith('Swiss-Prot') and taxon == taxid: sp_id[rec['DB_Object_ID']] = 1 return sp_id
def test_gpi_iterator(self): """Test GOA GPI file iterator.""" recs = [] with open('UniProt/gp_information.goa_yeast.28.gpi', 'r') as handle: for rec in GOA.gpi_iterator(handle): recs.append(rec) self.assertEqual(len(recs), 300) self.assertEqual(sorted(recs[0].keys()), sorted(GOA.GPI11FIELDS)) # Check values of first record self.assertEqual(recs[0]['DB_Object_ID'], 'A2P2R3') self.assertEqual(recs[0]['DB_Object_Symbol'], 'YMR084W') self.assertEqual(recs[0]['DB_Object_Name'], [ 'Putative glutamine--fructose' '-6-phosphate aminotransferase' ' [isomerizing]' ]) self.assertEqual(recs[0]['DB_Object_Synonym'], ['YM084_YEAST', 'YMR084W']) self.assertEqual(recs[0]['DB_Object_Type'], 'protein') self.assertEqual(recs[0]['Taxon'], 'taxon:559292') self.assertEqual(recs[0]['Parent_Object_ID'], '') self.assertEqual(recs[0]['DB_Xref'], ['']) self.assertEqual(recs[0]['Gene_Product_Properties'], ['db_subset=Swiss-Prot'])