def test_short_sequences(self): seq = "AATGTAGGAAAAACAGCATTTTCATTTCGCCATTTT" p = Pyrodigal(meta=True) for i in range(1, len(seq)): genes = p.find_genes(seq[:i]) self.assertEqual(len(genes), 0) self.assertRaises(StopIteration, next, iter(genes))
def test_training_info_deallocation(self): p = Pyrodigal(meta=False) with warnings.catch_warnings(): warnings.simplefilter("ignore") p.train(str(self.record.seq)) genes = p.find_genes(str(self.record.seq)) del p # normally should not deallocate training info since it's RC self.assertEqual(genes[0].translate(), str(self.proteins[0].seq))
def test_empty_sequence(self): p = Pyrodigal(meta=False) with warnings.catch_warnings(): warnings.simplefilter("ignore") p.train(str(self.record.seq[:20000])) genes = p.find_genes("") self.assertEqual(len(genes), 0) self.assertRaises(StopIteration, next, iter(genes))
def test_short_sequences(self): seq = "AATGTAGGAAAAACAGCATTTTCATTTCGCCATTTT" p = Pyrodigal(meta=False) with warnings.catch_warnings(): warnings.simplefilter("ignore") p.train(str(self.record.seq[:20000])) for i in range(1, len(seq)): genes = p.find_genes(seq[:i]) self.assertEqual(len(genes), 0) self.assertRaises(StopIteration, next, iter(genes))
def run_prodigal(seqrecord): id = seqrecord.id nuclen = len(str(seqrecord.seq)) p = Pyrodigal(meta=True) #p.train(str(seqrecord.seq)) genes = p.find_genes(str(seqrecord.seq)) table_11 = [gene.translate(translation_table=11) for gene in genes] table_4 = [gene.translate(translation_table=4) for gene in genes] table_15 = [gene.translate(translation_table=15) for gene in genes] #Chooses best translation table (highest coding density) all_lens = find_max(table_4, table_11, table_15) table_densities = [float(x) / float(nuclen) for x in all_lens] return [id] + table_densities
def setUpClass(cls): data = os.path.realpath(os.path.join(__file__, "..", "data")) fna = os.path.join(data, "SRR492066.fna.gz") meta_faa = os.path.join(data, "SRR492066.meta.faa.gz") with gzip.open(fna, "rt") as f: cls.record = next(Bio.SeqIO.parse(f, "fasta")) cls.p = Pyrodigal(meta=True) cls.genes = cls.p.find_genes(str(cls.record.seq))
def test_overflow(self): # > 180195.SAMN03785337.LFLS01000089 seq = """ AACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAAC AGCACTGGCAATCTGACTGTGGGCGGTGTTACCAACGGCACTGCTACTACTGGCAACATC GCACTGACCGGTAACAATGCGCTGAGCGGTCCGGTCAATCTGAATGCGTCGAATGGCACG GTGACCTTGAACACGACCGGCAATACCACGCTCGGTAACGTGACGGCACAAGGCAATGTG ACGACCAATGTGTCCAACGGCAGTCTGACGGTTACCGGCAATACGACAGGTGCCAACACC AACCTCAGTGCCAGCGGCAACCTGACCGTGGGTAACCAGGGCAATATCAGTACCGCAGGC AATGCAACCCTGACGGCCGGCGACAACCTGACGAGCACTGGCAATCTGACTGTGGGCGGC GTCACCAACGGCACGGCCACCACCGGCAACATCGCGCTGACCGGTAACAATGCACTGGCT GGTCCTGTCAATCTGAACGCGCCGAACGGCACCGTGACCCTGAACACAACCGGCAATACC ACGCTGGGTAATGTCACCGCACAAGGCAATGTGACGACTAATGTGTCCAACGGCAGCCTG ACAGTCGCTGGCAATACCACAGGTGCCAACACCAACCTGAGTGCCAGCGGCAATCTGACC GTGGGCAACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAAC CTGAGC """ p = Pyrodigal(meta=True, closed=False) genes = p.find_genes(textwrap.dedent(seq).replace("\n", "")) self.assertEqual(len(genes), 1) self.assertEqual(genes[0].start_type, "Edge") self.assertTrue(genes[0].partial_begin) self.assertTrue(genes[0].partial_end)
def find_genes(cls, seq): p = Pyrodigal(meta=True) return p.find_genes(seq)
def test_train(self): p = Pyrodigal(meta=True) self.assertRaises(RuntimeError, p.train, str(self.record.seq))
def test_train_not_called(self): p = Pyrodigal(meta=False) self.assertRaises(RuntimeError, p.find_genes, str(self.record.seq))
def find_genes(cls, seq): p = Pyrodigal(meta=False) with warnings.catch_warnings(): warnings.simplefilter("ignore") p.train(seq) return p.find_genes(seq)
def test_empty_sequence(self): p = Pyrodigal(meta=True) genes = p.find_genes("") self.assertEqual(len(genes), 0) self.assertRaises(StopIteration, next, iter(genes))