def test_embl_cds_interaction(self): """Test EMBL CDS interaction, parse CDS features on embl files.""" embl_s = Scanner.EmblScanner() # Test parse CDS features on embl_file with open("EMBL/AE017046.embl") as handle_embl7046: l_cds_f = list(embl_s.parse_cds_features(handle_embl7046)) # number of records, should be 10 self.assertEqual(len(l_cds_f), 10) # Seq ID self.assertEqual(l_cds_f[0].id, 'AAS58758.1') self.assertEqual(l_cds_f[0].description, 'putative transposase')
def test_embl_record_interaction(self): """Test EMBL Record interaction on embl files.""" embl_s = Scanner.EmblScanner() # Test parse records on embl_file with open("EMBL/AE017046.embl") as handle_embl7046: l_embl_r = list(embl_s.parse_records(handle_embl7046, do_features=True)) # number of records, should be 1 self.assertEqual(len(l_embl_r), 1) self.assertEqual(l_embl_r[0].id, 'AE017046.1') self.assertEqual(l_embl_r[0].description, 'Yersinia pestis biovar Microtus ' 'str. 91001 plasmid pPCP1, complete ' 'sequence.') self.assertEqual(len(l_embl_r[0].features), 29)
def test_topology_embl(self): """Check EMBL ID line parsing.""" # This is a bit low level, but can test pasing the ID line only tests = [ # Modern examples with sequence version ("ID X56734; SV 1; linear; mRNA; STD; PLN; 1859 BP.", "linear", "mRNA", "PLN"), ("ID CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP.", "linear", "genomic DNA", "MAM"), # Example to match GenBank example used above: ("ID U49845; SV 1; linear; genomic DNA; STD; FUN; 5028 BP.", "linear", "genomic DNA", "FUN"), # Old examples: ("ID BSUB9999 standard; circular DNA; PRO; 4214630 BP.", "circular", "DNA", "PRO"), ("ID SC10H5 standard; DNA; PRO; 4870 BP.", None, "DNA", "PRO"), # Patent example from 2016-06-10 # ftp://ftp.ebi.ac.uk/pub/databases/embl/patent/ ("ID A01679; SV 1; linear; unassigned DNA; PAT; MUS; 12 BP.", "linear", "unassigned DNA", "MUS"), # Old patent examples ("ID NRP_AX000635; PRT; NR1; 15 SQ", None, None, "NR1"), ("ID NRP0000016E; PRT; NR2; 5 SQ", None, None, "NR2"), # KIPO patent examples ("ID DI500001 STANDARD; PRT; 111 AA.", None, None, None), ("ID DI644510 standard; PRT; 1852 AA.", None, None, None), ] for (line, topo, mol_type, div) in tests: scanner = Scanner.EmblScanner() consumer = GenBank._FeatureConsumer(1, GenBank.FeatureValueCleaner) scanner._feed_first_line(consumer, line) t = consumer.data.annotations.get('topology', None) self.assertEqual( t, topo, "Wrong topology %r not %r from %r" % (t, topo, line)) mt = consumer.data.annotations.get('molecule_type', None) self.assertEqual( mt, mol_type, "Wrong molecule_type %r not %r from %r" % (mt, mol_type, line)) d = consumer.data.annotations.get('data_file_division', None) self.assertEqual( d, div, "Wrong division %r not %r from %r" % (d, div, line))