def test_make_seq(self): """test constructor utility function""" _seq = "ACGGT" seq = make_seq(_seq) self.assertEqual(seq.moltype.label, "text") seq = make_seq(_seq, moltype="dna") self.assertEqual(seq.moltype.label, "dna") self.assertEqual(str(seq), _seq)
def test_reverse_complement(self): """testing reversal and complementing of a sequence""" seq = make_seq(moltype=DNA, seq="ACTGTAA") rev = seq.reverse_complement() self.assertEqual(str(rev), "TTACAGT") seq = make_seq(moltype=DNA, seq="ACTG-TAA") rev = seq.reverse_complement() self.assertEqual(str(rev), "TTA-CAGT") # try amigbuities seq = make_seq(moltype=DNA, seq="ACHNRTAA") rev = seq.reverse_complement() self.assertEqual(str(rev), "TTAYNDGT")
def test_has_terminal_stop(self): """test check for terminal stop codons""" seq = make_seq(moltype=DNA, seq="ACTTAA") assert seq.has_terminal_stop() == True seq = make_seq(moltype=DNA, seq="ACTTAT") == False # for sequence not divisible by 3 seq = make_seq(moltype=DNA, seq="ACTTA") # fail self.assertRaises(ValueError, seq.has_terminal_stop) # unless explicitly over-ride length issue using allow_partial # in which case, returns False self.assertFalse(seq.has_terminal_stop(allow_partial=True))
def test_without_terminal_stop_sodon(self): """testing deleting terminal stop""" # for standard code seq = make_seq(moltype=DNA, seq="ACTTAA") seq2 = seq.trim_stop_codon() self.assertEqual(str(seq2), "ACT") # for sequence not divisible by 3 seq = make_seq(moltype=DNA, seq="ACTTA") # fail self.assertRaises(ValueError, seq.trim_stop_codon) # unless explicitly over-ride length issue using allow_partial seq2 = seq.trim_stop_codon(allow_partial=True)
def test_translate_incomplete(self): """test of translating seqs with incomplete codon""" seq = make_seq("CGNTGNAC----", moltype=DNA) aa = seq.get_translation(incomplete_ok=True) self.assertEqual(str(aa), "RX?-") with self.assertRaises(AlphabetError): _ = seq.get_translation(incomplete_ok=False)
def test_to_regex(self): """creates a regex from aa seq to match a DNA sequence""" import re from cogent3 import make_seq dna = "ACCGAACAGGGC" aa = "TEQG" pattern = DEFAULT.to_regex(aa) self.assertTrue("".join(re.findall(pattern, dna)) == dna) # note that Z is Q or E aa = "TZQG" pattern = DEFAULT.to_regex(aa) self.assertTrue("".join(re.findall(pattern, dna)) == dna) aa = make_seq(aa, moltype="protein") pattern = DEFAULT.to_regex(aa) self.assertTrue("".join(re.findall(pattern, dna)) == dna)
def test_ambig_translate(self): """test of translating seqs""" seq = make_seq("CGNTGN???---", moltype=DNA).get_translation() self.assertEqual(str(seq), "RX?-")
def test_translate(self): """test of translating seqs""" seq = make_seq("ATGACGTTGCGTAGCATAGCTCGA", moltype=DNA).get_translation() self.assertEqual(str(seq), "MTLRSIAR")
def setUp(self): self.seq = make_seq("ATGACGTTGCGTAGCATAGCTCGA", "dna")
""" Information analysis of an alignment ==================================== Information here is in the formal sense -- maximum entropy minus the entropy at a position. This is fast to compute and is an indicator of the variability at a position. """ #%% # Illustrated with a simple example # ################################# from cogent3 import load_aligned_seqs, make_aligned_seqs, make_seq s1 = make_seq('TGATGTAAGGTAGTT', name='s1', moltype="dna") s2 = make_seq('--CTGGAAGGGT---', name='s2', moltype="dna") seqs = make_aligned_seqs(data=[s1, s2], array_align=False, moltype='dna') draw = seqs.information_plot(window=2, include_gap=True) draw.show(width=500, height=400) #%% # On a sample data set # ******************** # # Clicking on any of the legend items causes that to disappear from the plot. aln = load_aligned_seqs('../../data/brca1.fasta', moltype='protein') fig = aln.information_plot(stat='median') # sphinx_gallery_thumbnail_number = 2 fig.show(width=500, height=400)