def setUp(self): self.s = Seq.Seq("TCAAAAGGATGCATCATG") self.dna = [ Seq.Seq("ATCG"), Seq.Seq("gtca"), Seq.MutableSeq("GGTCA"), Seq.Seq("CTG-CA"), ] self.rna = [ Seq.Seq("AUUUCG"), Seq.MutableSeq("AUUCG"), Seq.Seq("uCAg"), Seq.MutableSeq("UC-AG"), Seq.Seq("U.CAG"), ] self.nuc = [Seq.Seq("ATCG")] self.protein = [ Seq.Seq("ATCGPK"), Seq.Seq("atcGPK"), Seq.Seq("T.CGPK"), Seq.Seq("T-CGPK"), Seq.Seq("MEDG-KRXR*"), Seq.MutableSeq("ME-K-DRXR*XU"), Seq.Seq("MEDG-KRXR@"), Seq.Seq("ME-KR@"), Seq.Seq("MEDG.KRXR@"), ] self.test_chars = ["-", Seq.Seq("-"), Seq.Seq("*"), "-X@"]
def setUp(self): self.dna = [ Seq.Seq("ATCG"), Seq.Seq("gtca"), Seq.MutableSeq("GGTCA"), Seq.Seq("CTG-CA"), "TGGTCA", ] self.rna = [ Seq.Seq("AUUUCG"), Seq.MutableSeq("AUUCG"), Seq.Seq("uCAg"), Seq.MutableSeq("UC-AG"), Seq.Seq("U.CAG"), "UGCAU", ] self.nuc = [ Seq.Seq("ATCG"), "UUUTTTACG", ] self.protein = [ Seq.Seq("ATCGPK"), Seq.Seq("atcGPK"), Seq.Seq("T.CGPK"), Seq.Seq("T-CGPK"), Seq.Seq("MEDG-KRXR*"), Seq.MutableSeq("ME-K-DRXR*XU"), "TEDDF", ]
def setUp(self): self.dna = [ Seq.Seq("ATCG", IUPAC.ambiguous_dna), Seq.Seq("gtca", Alphabet.generic_dna), Seq.MutableSeq("GGTCA", Alphabet.generic_dna), Seq.Seq("CTG-CA", Alphabet.generic_dna), "TGGTCA", ] self.rna = [ Seq.Seq("AUUUCG", IUPAC.ambiguous_rna), Seq.MutableSeq("AUUCG", IUPAC.ambiguous_rna), Seq.Seq("uCAg", Alphabet.generic_rna), Seq.MutableSeq("UC-AG", Alphabet.generic_rna), Seq.Seq("U.CAG", Alphabet.generic_rna), "UGCAU", ] self.nuc = [ Seq.Seq("ATCG", Alphabet.generic_nucleotide), "UUUTTTACG", ] self.protein = [ Seq.Seq("ATCGPK", IUPAC.protein), Seq.Seq("atcGPK", Alphabet.generic_protein), Seq.Seq("T.CGPK", Alphabet.generic_protein), Seq.Seq("T-CGPK", Alphabet.generic_protein), Seq.Seq("MEDG-KRXR*", Alphabet.generic_protein), Seq.MutableSeq("ME-K-DRXR*XU", Alphabet.generic_protein), "TEDDF", ]
def setUp(self): self.s = Seq.Seq("TCAAAAGGATGCATCATG", IUPAC.unambiguous_dna) self.dna = [ Seq.Seq("ATCG", IUPAC.ambiguous_dna), Seq.Seq("gtca", Alphabet.generic_dna), Seq.MutableSeq("GGTCA", Alphabet.generic_dna), Seq.Seq("CTG-CA", Alphabet.generic_dna), ] self.rna = [ Seq.Seq("AUUUCG", IUPAC.ambiguous_rna), Seq.MutableSeq("AUUCG", IUPAC.ambiguous_rna), Seq.Seq("uCAg", Alphabet.generic_rna), Seq.MutableSeq("UC-AG", Alphabet.generic_rna), Seq.Seq("U.CAG", Alphabet.generic_rna), ] self.nuc = [Seq.Seq("ATCG", Alphabet.generic_nucleotide)] self.protein = [ Seq.Seq("ATCGPK", IUPAC.protein), Seq.Seq("atcGPK", Alphabet.generic_protein), Seq.Seq("T.CGPK", Alphabet.generic_protein), Seq.Seq("T-CGPK", Alphabet.generic_protein), Seq.Seq("MEDG-KRXR*", Alphabet.generic_protein), Seq.MutableSeq("ME-K-DRXR*XU", Alphabet.generic_protein), Seq.Seq("MEDG-KRXR@", Alphabet.generic_protein), Seq.Seq("ME-KR@", Alphabet.generic_protein), Seq.Seq("MEDG.KRXR@", Alphabet.generic_protein), ] self.test_chars = ["-", Seq.Seq("-"), Seq.Seq("*"), "-X@"]
def setUp(self): self.test_seqs = [ Seq.Seq("TCAAAAGGATGCATCATG"), Seq.Seq("ATGAAACTG"), Seq.Seq("ATGAARCTG"), Seq.Seq("AWGAARCKG"), # Note no U or T Seq.Seq("".join(ambiguous_rna_values)), Seq.Seq("".join(ambiguous_dna_values)), Seq.Seq("AUGAAACUG"), Seq.Seq("ATGAAACTGWN"), Seq.Seq("AUGAAACUGWN"), Seq.MutableSeq("ATGAAACTG"), Seq.MutableSeq("AUGaaaCUG"), ]
def test_remove_items(self): self.mutable_s.remove("G") self.assertEqual( Seq.MutableSeq("TCAAAAGATGCATCATG"), self.mutable_s, "Remove first G" ) self.assertRaises(ValueError, self.mutable_s.remove, "Z")
def test_mutableseq_construction(self): """Test MutableSeq object initialization.""" sequence = bytes(self.s) mutable_s = Seq.MutableSeq(sequence) self.assertIsInstance(mutable_s, Seq.MutableSeq, "Initializing MutableSeq from bytes") self.assertEqual(mutable_s, self.s) mutable_s = Seq.MutableSeq(bytearray(sequence)) self.assertIsInstance(mutable_s, Seq.MutableSeq, "Initializing MutableSeq from bytearray") self.assertEqual(mutable_s, self.s) mutable_s = Seq.MutableSeq(sequence.decode("ASCII")) self.assertIsInstance(mutable_s, Seq.MutableSeq, "Initializing MutableSeq from str") self.assertEqual(mutable_s, self.s) mutable_s = Seq.MutableSeq(self.s) self.assertIsInstance(mutable_s, Seq.MutableSeq, "Initializing MutableSeq from Seq") self.assertEqual(mutable_s, self.s) mutable_s = Seq.MutableSeq(Seq.MutableSeq(sequence)) self.assertEqual(mutable_s, self.s) self.assertIsInstance(mutable_s, Seq.MutableSeq, "Initializing MutableSeq from MutableSeq") # Deprecated: with self.assertWarns(BiopythonDeprecationWarning): mutable_s = Seq.MutableSeq( array.array("u", sequence.decode("ASCII"))) self.assertIsInstance(mutable_s, Seq.MutableSeq, "Creating MutableSeq using array") self.assertEqual(mutable_s, self.s) self.assertRaises(UnicodeEncodeError, Seq.MutableSeq, "ÄþÇÐ") # All are Latin-1 characters self.assertRaises(UnicodeEncodeError, Seq.MutableSeq, "あいうえお") # These are not
def test_setting_slices(self): self.assertEqual( Seq.MutableSeq("CAAA"), self.mutable_s[1:5], "Slice mutable seq", ) self.mutable_s[1:3] = "GAT" self.assertEqual( Seq.MutableSeq("TGATAAAGGATGCATCATG"), self.mutable_s, "Set slice with string and adding extra nucleotide", ) self.mutable_s[1:3] = self.mutable_s[5:7] self.assertEqual( Seq.MutableSeq("TAATAAAGGATGCATCATG"), self.mutable_s, "Set slice with MutableSeq", )
def test_mutableseq_upper_lower(self): seq = Seq.MutableSeq("ACgt") lseq = seq.lower() self.assertEqual(lseq, "acgt") self.assertEqual(seq, "ACgt") lseq = seq.lower(inplace=False) self.assertEqual(lseq, "acgt") self.assertEqual(seq, "ACgt") lseq = seq.lower(inplace=True) self.assertEqual(lseq, "acgt") self.assertIs(lseq, seq) seq = Seq.MutableSeq("ACgt") useq = seq.upper() self.assertEqual(useq, "ACGT") self.assertEqual(seq, "ACgt") useq = seq.upper(inplace=False) self.assertEqual(useq, "ACGT") self.assertEqual(seq, "ACgt") useq = seq.upper(inplace=True) self.assertEqual(useq, "ACGT") self.assertIs(useq, seq)
def setUp(self): self.test_seqs = [ Seq.Seq("TCAAAAGGATGCATCATG", IUPAC.unambiguous_dna), Seq.Seq("ATGAAACTG"), Seq.Seq("ATGAARCTG"), Seq.Seq("AWGAARCKG"), # Note no U or T Seq.Seq("".join(ambiguous_rna_values)), Seq.Seq("".join(ambiguous_dna_values)), Seq.Seq("".join(ambiguous_rna_values), Alphabet.generic_rna), Seq.Seq("".join(ambiguous_dna_values), Alphabet.generic_dna), Seq.Seq("".join(ambiguous_rna_values), IUPAC.IUPACAmbiguousRNA()), Seq.Seq("".join(ambiguous_dna_values), IUPAC.IUPACAmbiguousDNA()), Seq.Seq("AWGAARCKG", Alphabet.generic_dna), Seq.Seq("AUGAAACUG", Alphabet.generic_rna), Seq.Seq("ATGAAACTG", IUPAC.unambiguous_dna), Seq.Seq("ATGAAACTGWN", IUPAC.ambiguous_dna), Seq.Seq("AUGAAACUG", Alphabet.generic_rna), Seq.Seq("AUGAAACUG", IUPAC.unambiguous_rna), Seq.Seq("AUGAAACUGWN", IUPAC.ambiguous_rna), Seq.Seq("ATGAAACTG", Alphabet.generic_nucleotide), Seq.MutableSeq("ATGAAACTG", Alphabet.generic_dna), Seq.MutableSeq("AUGaaaCUG", IUPAC.unambiguous_rna), ]
def test_seq_construction(self): """Test Seq object initialization.""" sequence = bytes(self.s) s = Seq.Seq(sequence) self.assertIsInstance(s, Seq.Seq, "Creating MutableSeq using bytes") self.assertEqual(s, self.s) s = Seq.Seq(bytearray(sequence)) self.assertIsInstance(s, Seq.Seq, "Creating MutableSeq using bytearray") self.assertEqual(s, self.s) s = Seq.Seq(sequence.decode("ASCII")) self.assertIsInstance(s, Seq.Seq, "Creating MutableSeq using str") self.assertEqual(s, self.s) s = Seq.Seq(self.s) self.assertIsInstance(s, Seq.Seq, "Creating MutableSeq using Seq") self.assertEqual(s, self.s) s = Seq.Seq(Seq.MutableSeq(sequence)) self.assertIsInstance(s, Seq.Seq, "Creating MutableSeq using MutableSeq") self.assertEqual(s, self.s) self.assertRaises( UnicodeEncodeError, Seq.Seq, "ÄþÇÐ" ) # All are Latin-1 characters self.assertRaises(UnicodeEncodeError, Seq.Seq, "あいうえお") # These are not
def translate_sequences(filename): """Translate the DNA sequences in a file. arguments: - filename: The file containing the DNA sequences, in GenBank format returns: An iterator for the translated sequences, after non-coding regions are removed. """ for (i, record) in enumerate(SeqIO.parse(filename, "genbank")): print(f"Translating sequence {i} of {filename}...") features = [f for f in record.features if f.type in ("exon", "CDS")] print(f"record {i}: {len(features)} features.") r = Seq.MutableSeq("") print(f"Splicing record {i}...") for f in features: r.extend(f.extract(record.seq)) yield r.toseq().translate()
def test_mutable_seq(self): s = Seq.MutableSeq("ACGT") with self.assertRaises(AttributeError): s.dog s.dog = "woof" self.assertIn("dog", dir(s)) self.assertEqual(s.dog, "woof") del s.dog with self.assertRaises(AttributeError): s.dog self.assertNotIn("dog", dir(s)) with self.assertRaises(AttributeError): s.cat s.dog = "woof" s.cat = "meow" self.assertIn("dog", dir(s)) self.assertIn("cat", dir(s)) self.assertEqual(s.dog, "woof") self.assertEqual(s.cat, "meow") del s.dog with self.assertRaises(AttributeError): s.dog self.assertNotIn("dog", dir(s)) self.assertIn("cat", dir(s)) self.assertEqual(s.cat, "meow") del s.cat with self.assertRaises(AttributeError): s.cat self.assertNotIn("cat", dir(s)) s.dog = "woof" s.dog = "bark" self.assertIn("dog", dir(s)) self.assertEqual(s.dog, "bark") del s.dog with self.assertRaises(AttributeError): s.dog self.assertNotIn("dog", dir(s))
def test_complement_rna_string(self): seq = Seq.MutableSeq("AUGaaaCUG") seq.complement() self.assertEqual("UACuuuGAC", str(seq))
def test_complement_mixed_aphabets(self): seq = Seq.MutableSeq("AUGaaaCTG") with self.assertRaises(ValueError): seq.complement()
test_seqs = [ Seq.Seq("TCAAAAGGATGCATCATG"), Seq.Seq("T"), Seq.Seq("ATGAAACTG"), Seq.Seq("ATGAARCTG"), Seq.Seq("AWGAARCKG"), # Note no U or T Seq.Seq("".join(ambiguous_rna_values)), Seq.Seq("".join(ambiguous_dna_values)), Seq.Seq("AWGAARCKG"), Seq.Seq("AUGAAACUG"), Seq.Seq("ATGAAA-CTG"), Seq.Seq("ATGAAACTGWN"), Seq.Seq("AUGAAA==CUG"), Seq.Seq("AUGAAACUGWN"), Seq.Seq("AUGAAACTG"), # U and T Seq.MutableSeq("ATGAAACTG"), Seq.MutableSeq("AUGaaaCUG"), Seq.Seq("ACTGTCGTCT"), ] protein_seqs = [ Seq.Seq("ATCGPK"), Seq.Seq("T.CGPK"), Seq.Seq("T-CGPK"), Seq.Seq("MEDG-KRXR*"), Seq.MutableSeq("ME-K-DRXR*XU"), Seq.Seq("MEDG-KRXR@"), Seq.Seq("ME-KR@"), Seq.Seq("MEDG.KRXR@"), ]
def test_setting_item(self): self.mutable_s[3] = "G" self.assertEqual(Seq.MutableSeq("TCAGAAGGATGCATCATG"), self.mutable_s)
def test_inserting(self): self.mutable_s.insert(4, "G") self.assertEqual(Seq.MutableSeq("TCAAGAAGGATGCATCATG"), self.mutable_s)
print repr(test_seq[0::3]) print repr(test_seq[1::3]) print repr(test_seq[2::3]) print "Setting wobble codon to N (set slice with stride 3):" test_seq[2::3] = "N" * len(test_seq[2::3]) print repr(test_seq) ########################################################################### print print "Testing Seq addition" print "====================" dna = [ Seq.Seq("ATCG", IUPAC.ambiguous_dna), Seq.Seq("gtca", Alphabet.generic_dna), Seq.MutableSeq("GGTCA", Alphabet.generic_dna), Seq.Seq("CTG-CA", Alphabet.Gapped(IUPAC.unambiguous_dna, "-")), "TGGTCA" ] rna = [ Seq.Seq("AUUUCG", IUPAC.ambiguous_rna), Seq.MutableSeq("AUUCG", IUPAC.ambiguous_rna), Seq.Seq("uCAg", Alphabet.generic_rna), Seq.MutableSeq("UC-AG", Alphabet.Gapped(Alphabet.generic_rna, "-")), Seq.Seq("U.CAG", Alphabet.Gapped(Alphabet.generic_rna, ".")), "UGCAU" ] nuc = [Seq.Seq("ATCG", Alphabet.generic_nucleotide), "UUUTTTACG"] protein = [ Seq.Seq("ATCGPK", IUPAC.protein), Seq.Seq("atcGPK", Alphabet.generic_protein), Seq.Seq("T.CGPK", Alphabet.Gapped(IUPAC.protein, ".")), Seq.Seq("T-CGPK", Alphabet.Gapped(IUPAC.protein, "-")),
def test_delete_stride_slice(self): del self.mutable_s[4 : 6 - 1] self.assertEqual(Seq.MutableSeq("TCAAAGGATGCATCATG"), self.mutable_s)
def test_extract_third_nucleotide(self): """Test extracting every third nucleotide (slicing with stride 3).""" self.assertEqual(Seq.MutableSeq("TAGTAA"), self.mutable_s[0::3]) self.assertEqual(Seq.MutableSeq("CAGGTT"), self.mutable_s[1::3]) self.assertEqual(Seq.MutableSeq("AAACCG"), self.mutable_s[2::3])
def test_extend_method(self): self.mutable_s.extend("GAT") self.assertEqual(Seq.MutableSeq("TCAAAAGGATGCATCATGGAT"), self.mutable_s)
def test_extend_with_mutable_seq(self): self.mutable_s.extend(Seq.MutableSeq("TTT")) self.assertEqual(Seq.MutableSeq("TCAAAAGGATGCATCATGTTT"), self.mutable_s)
def test_reverse_with_stride(self): """Test reverse using -1 stride.""" self.assertEqual(Seq.MutableSeq("GTACTACGTAGGAAAACT"), self.mutable_s[::-1])
def test_deleting_slice(self): del self.mutable_s[4:5] self.assertEqual(Seq.MutableSeq("TCAAAGGATGCATCATG"), self.mutable_s)
def test_deleting_item(self): del self.mutable_s[3] self.assertEqual(Seq.MutableSeq("TCAAAGGATGCATCATG"), self.mutable_s)
def test_complement_dna_string(self): seq = Seq.MutableSeq("ATGaaaCTG") seq.complement() self.assertEqual("TACtttGAC", str(seq))
def test_set_wobble_codon_to_n(self): """Test setting wobble codon to N (set slice with stride 3).""" self.mutable_s[2::3] = "N" * len(self.mutable_s[2::3]) self.assertEqual(Seq.MutableSeq("TCNAANGGNTGNATNATN"), self.mutable_s)
def test_reverse(self): """Test using reverse method.""" self.mutable_s.reverse() self.assertEqual(Seq.MutableSeq("GTACTACGTAGGAAAACT"), self.mutable_s)
def test_appending(self): self.mutable_s.append("C") self.assertEqual(Seq.MutableSeq("TCAAAAGGATGCATCATGC"), self.mutable_s)