Exemplo n.º 1
0
    def test_repr(self):
        s0 = Seq("ACGTURYBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s1 = Seq("ACGTURYSWKMBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s2 = Seq("ACGTURSWKMBDHVNACGTURKMBDHVN", nucleic_alphabet)
        seqs = SeqList([s0, s1, s2])

        repr(seqs)
Exemplo n.º 2
0
    def test_create(self):
        # 1234567890123456789012345678
        s0 = Seq("ACGTURYBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s1 = Seq("ACGTURYSWKMBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s2 = Seq("ACGTURSWKMBDHVNACGTURKMBDHVN", nucleic_alphabet)
        seqs = SeqList([s0, s1, s2])

        self.assertEqual(len(seqs), 3)
Exemplo n.º 3
0
    def test_transform(self):
        trans = Transform(Seq("ACGTURYSWKMBDHVN", nucleic_alphabet),
                          Seq("ACGTTNNNNNNNNNNN", dna_alphabet))
        s0 = Seq("AAAAAR", nucleic_alphabet)
        s1 = trans(s0)  # Callable ob
        self.assertEqual(s1.alphabet, dna_alphabet)
        self.assertEqual(s1, Seq("AAAAAN", dna_alphabet))

        s2 = Seq(protein_alphabet, protein_alphabet)
        self.assertRaises(ValueError, trans, s2)
Exemplo n.º 4
0
    def test_create_empty(self):
        s0 = Seq("ACGTURYBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s1 = Seq("ACGTURYSWKMBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s2 = Seq("ACGTURSWKMBDHVNACGTURKMBDHVN", nucleic_alphabet)

        seqs = SeqList()
        seqs.append(s0)
        seqs.extend((s1, s2))

        self.assertEqual(len(seqs), 3)
        self.assertEqual(type(seqs), SeqList)
Exemplo n.º 5
0
    def test_create_seq(self):
        self.assertTrue(Seq("alphabet", "alphbet"))
        self.assertRaises(ValueError, Seq, "not alphabetic", "alphabet")

        a = "Any printable Ascii character `1234567890-=~!@#$%^&*()_+{}|[]\\:;'<>?,./QWERTYUIOPASD"\
            "FGHJKLZXCVBNMqwertyuiopasdfghjklzxcvbnm "

        for x in a:
            self.assertTrue(x in generic_alphabet)
        self.assertTrue(Seq(a, generic_alphabet))
        self.assertRaises(ValueError, Seq, "Not zero. \x00", generic_alphabet)
Exemplo n.º 6
0
    def test_isaligned(self):
        a = Alphabet("ABCD")

        s0 = Seq("ABCDD", a)
        s1 = Seq("AAAAD", a)
        s2 = Seq("AAABD", a)
        s3 = Seq("AAACD", a)
        seqs = SeqList([s0, s1, s2, s3], a)
        assert seqs.isaligned()

        seqs = SeqList([s0, s1, s2, s3], Alphabet("ABCDE"))
        assert not seqs.isaligned()
Exemplo n.º 7
0
    def test_create_annotated(self):
        s0 = Seq("ACGTURYBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s1 = Seq("ACGTURYSWKMBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s2 = Seq("ACGTURSWKMBDHVNACGTURKMBDHVN", nucleic_alphabet)

        seqs = SeqList([s0, s1, s2],
                       alphabet=nucleic_alphabet,
                       name="alsdf",
                       description='a')
        self.assertEqual(seqs.name, 'alsdf')
        self.assertEqual(seqs.description, 'a')
        self.assertEqual(seqs.alphabet, nucleic_alphabet)
Exemplo n.º 8
0
    def test_profile(self):
        a = Alphabet("ABCD")

        s0 = Seq("ABCDD", a)
        s1 = Seq("AAAAD", a)
        s2 = Seq("AAABD", a)
        s3 = Seq("AAACD", a)

        seqs = SeqList([s0, s1, s2, s3], a)

        tally = seqs.profile()

        self.assertEqual(list(tally[0]), [4, 0, 0, 0])
        self.assertEqual(list(tally[1]), [3, 1, 0, 0])
        self.assertEqual(list(tally[2]), [3, 0, 1, 0])
        self.assertEqual(list(tally[3]), [1, 1, 1, 1])
        self.assertEqual(list(tally[4]), [0, 0, 0, 4])

        self.assertEqual(tally[4, 'D'], 4)

        seqs = SeqList([Seq("AAACD", a), Seq("AAACDA", a)], a)
        self.assertRaises(ValueError, seqs.profile)

        seqs = SeqList([Seq("AAACD", a), Seq("AAACD", a)])
        self.assertRaises(ValueError, seqs.profile)
Exemplo n.º 9
0
    def test_tally(self):
        # 1234567890123456789012345678
        s0 = Seq("ACTTT", nucleic_alphabet)
        s1 = Seq("ACCCC", nucleic_alphabet)
        s2 = Seq("GGGG", nucleic_alphabet)
        seqs = SeqList([s0, s1, s2], nucleic_alphabet)

        counts = seqs.tally()
        assert counts == [2, 5, 4, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

        seqs = SeqList(
            [Seq("AAACD", nucleic_alphabet),
             Seq("AAACD", nucleic_alphabet)])
        self.assertRaises(ValueError, seqs.tally)
Exemplo n.º 10
0
    def test_reduced_protein_alphabets(self):
        seq = Seq(
            "ENHGGKVALKTHCGKYLSIGDHKQVYLSHHLHGDHSLFHLEHHGGKVSIKGHHHHYISADHHGHVSTKEHHDHDT"
            "TFEEIII", reduced_protein_alphabet)

        for t in reduced_protein_alphabets.values():
            t(seq)
Exemplo n.º 11
0
 def test_seg_invalid(self):
     seq = Seq("KTHCGKYLSIGDHKQVYLSHH", protein_alphabet)
     self.assertRaises(ValueError, mask_low_complexity, seq, 12, -1, 0)
     self.assertRaises(ValueError, mask_low_complexity, seq, -1, 0, 0)
     self.assertRaises(ValueError, mask_low_complexity, seq, 12, 1, 10)
     self.assertRaises(ValueError, mask_low_complexity, seq, 6, 12, 13)
     self.assertRaises(ValueError, mask_low_complexity, seq, 6, 2.0, 1.9)
Exemplo n.º 12
0
def main():
    # ------ Parse Command line ------
    parser = _build_option_parser()
    (opts, args) = parser.parse_args(sys.argv[1:])
    if args:
        parser.error("Unparsable arguments: %s " % args)

    seqs = opts.reader.read(opts.fin)

    if opts.trans_seg:
        seqs = SeqList([mask_low_complexity(s) for s in seqs])

    if opts.subsample is not None:
        from random import random

        frac = opts.subsample
        ss = []
        for s in seqs:
            if random() < frac:
                ss.append(s)
        seqs = SeqList(ss)

    if opts.reverse:
        seqs = SeqList([s.reverse() for s in seqs])

    if opts.complement:
        seqs = SeqList([Seq(s, alphabet=nucleic_alphabet) for s in seqs])
        seqs = SeqList([s.complement() for s in seqs])

    opts.writer.write(opts.fout, seqs)
Exemplo n.º 13
0
    def complement(self):
        """Complement nucleic acid sequence."""
        from weblogo.seq import Seq, Alphabet
        alphabet = self.alphabet
        complement_alphabet = Alphabet(Seq(alphabet, alphabet).complement())
        self.alphabets = (None, complement_alphabet)

        m = self.reindex(alphabet)
        self.alphabets = (None, alphabet)
        self.array = m.array
Exemplo n.º 14
0
    def test_add(self):
        s1 = Seq("AAAA", dna_alphabet)
        s2 = Seq("TTTT", dna_alphabet)

        s3 = s1 + s2
        self.assertEqual(s3.alphabet, dna_alphabet)
        self.assertEqual(s3, Seq("AAAATTTT", dna_alphabet))

        assert s3 == Seq("AAAATTTT", dna_alphabet)
        assert s3 != Seq("AAAATTTT", protein_alphabet)
        assert s3 != "not a seq"

        s4 = "AA"
        s5 = s4 + s1
        s6 = s1 + s4
        self.assertEqual(s5.alphabet, s6.alphabet)
        self.assertEqual(s5, s6)

        assert s5 == s6
        assert not (s5 != s6)
Exemplo n.º 15
0
    def test_words(self):
        s = Seq("AGTCAGCTACGACGcgcx", dna_alphabet)
        w = list(s.words(2, unambiguous_dna_alphabet))
        self.assertEqual(len(w), len(s) - 2)
        self.assertEqual(
            w,
            [
                "AG",
                "GT",
                "TC",
                "CA",
                "AG",
                "GC",
                "CT",
                "TA",
                "AC",
                "CG",
                "GA",
                "AC",
                "CG",
                "GC",
                "CG",
                "GC",
            ],
        )

        self.assertEqual(list(s.words(len(s), unambiguous_dna_alphabet)), [])
        self.assertEqual(
            list(s.words(len(s) - 1, unambiguous_dna_alphabet)),
            [
                "AGTCAGCTACGACGCGC",
            ],
        )

        w = list(s.words(200, unambiguous_dna_alphabet))
Exemplo n.º 16
0
    def test_segging(self):
        before = "mgnrafkshhghflsaegeavkthhghhdhhthfhvenhggkvalkthcgkylsigdhkqvylshhlhgdhslfhlehhg"\
                 "gkvsikghhhhyisadhhghvstkehhdhdttfeeiii".upper()
        after = "MGNRAFKSHHGHFLSAEGEAVxxxxxxxxxxxxxxxENHGGKVALKTHCGKYLSIGDHKQVYLSHHLHGDHSLFHLEHHGG"\
                "KVSIKGHHHHYISADHHGHVSTKEHHDHDTTFEEIII".upper()

        bseq = Seq(before, protein_alphabet)
        aseq = Seq(after, protein_alphabet)
        xseq = Seq('X' * len(bseq), protein_alphabet)

        sseq = mask_low_complexity(bseq)
        self.assertEqual(aseq, sseq)

        # Nothing should be segged
        sseq = mask_low_complexity(bseq, 12, 0, 0)
        self.assertEqual(bseq, sseq)

        # Everthing should be segged
        sseq = mask_low_complexity(bseq, 12, 4.3, 4.3)
        self.assertEqual(sseq, xseq)

        mask_low_complexity(bseq, 100000, 4.3, 4.3)
Exemplo n.º 17
0
    def test_join(self):
        s1 = Seq("AAAA", dna_alphabet)
        s2 = Seq("TTTT", dna_alphabet)
        s3 = "GGGG"
        s0 = Seq("", dna_alphabet)

        j = s0.join([s1, s2, s3])
        self.assertEqual(j, Seq("AAAATTTTGGGG", dna_alphabet))
Exemplo n.º 18
0
    def test_ords(self):
        s0 = Seq("ACGTURYBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s1 = Seq("ACGTURYSDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s2 = Seq("ACGTURSWKMBDHVNACGTURKMBDHVN", nucleic_alphabet)
        seqs = SeqList([s0, s1, s2], nucleic_alphabet)
        seqs.ords()
        # self.assertEqual( a.shape, (3, 28) )

        # Fails if seqs are of different lengths
        # FIXME?
        # s3 = Seq("ACGTUR", nucleic_alphabet )
        # seqs2 = SeqList( [ s0,s1,s3,s2],  nucleic_alphabet)
        # self.assertRaises(ValueError, seqs2.ords )

        # Use a different alphabet
        seqs.ords(nucleic_alphabet)

        # No alphabet
        seqs3 = SeqList([s0, s1, s2])
        seqs3.ords(alphabet=Alphabet("ABC"))

        # Fail if no alphabet
        self.assertRaises(ValueError, seqs3.ords)
Exemplo n.º 19
0
    def test_words(self):
        s = Seq("AGTCAGCTACGACGcgcx", dna_alphabet)
        w = list(s.words(2, unambiguous_dna_alphabet))
        self.assertEqual(len(w), len(s) - 2)
        self.assertEqual(w, [
            'AG', 'GT', 'TC', 'CA', 'AG', 'GC', 'CT', 'TA', 'AC', 'CG', 'GA',
            'AC', 'CG', 'GC', 'CG', 'GC'
        ])

        self.assertEqual(list(s.words(len(s), unambiguous_dna_alphabet)), [])
        self.assertEqual(list(s.words(len(s) - 1, unambiguous_dna_alphabet)), [
            "AGTCAGCTACGACGCGC",
        ])

        w = list(s.words(200, unambiguous_dna_alphabet))
Exemplo n.º 20
0
    def test_which_alphabet(self):
        a = Alphabet.which(Seq("ARNDCQEGHILKMFPSTWYVX"))
        assert a == unambiguous_protein_alphabet

        f1 = data_stream('cap.fa')
        f2 = data_stream('cox2.msf')
        f3 = data_stream('Rv3829c.fasta')
        f4 = data_stream('chain_B.fasta')

        tests = (
            (seq_io.read(f1), unambiguous_dna_alphabet),
            (seq_io.read(f2), unambiguous_protein_alphabet),
            (seq_io.read(f3), unambiguous_protein_alphabet),
            (seq_io.read(f4), unambiguous_protein_alphabet),
        )
        for t in tests:
            self.assertEqual(Alphabet.which(t[0]), t[1])

        f1.close()
        f2.close()
        f3.close()
        f4.close()
Exemplo n.º 21
0
 def test_reverse(self):
     s = Seq("ACGT", dna_alphabet)
     self.assertEqual(s, s.reverse().reverse())
     self.assertEqual(s.reverse(), Seq("TGCA", dna_alphabet))
Exemplo n.º 22
0
 def test_tally_nonalphabetic(self):
     s = Seq("AGTCAGCTACGACGCGC", dna_alphabet)
     c = s.tally(Alphabet("AC"))
     self.assertEqual(2, len(c))
     self.assertEqual(list(c), [4, 6])
Exemplo n.º 23
0
 def test_words2(self):
     s = Seq("AGTCAGCTACGACGCGC", unambiguous_dna_alphabet)
     wc = s.word_count(2)
     count = list(zip(*wc))[1]
     self.assertEqual(count, (2, 2, 1, 3, 1, 1, 3, 1, 1, 1))
Exemplo n.º 24
0
 def test_getslice(self):
     s = Seq("AGTCAGCTACGACGCGC", dna_alphabet)
     slice = s[2:4]
     self.assertEqual(s.alphabet, slice.alphabet)
Exemplo n.º 25
0
 def test_alphabet_chrs(self):
     a = Alphabet("alph")
     self.assertEqual(Seq("ppla", a), a.chrs((2, 2, 1, 0)))
Exemplo n.º 26
0
 def test_create_annotated(self):
     s = "ACGTURYSWKMBDHVNACGTURYSWKMBDHVNAAAAA"
     a = Seq(s, nucleic_alphabet, name="ID", description="DESCRIPTION")
     self.assertEqual(a.name, "ID")
     self.assertEqual(a.description, "DESCRIPTION")
     self.assertEqual(s, str(a))
Exemplo n.º 27
0
 def test_ungap(self):
     s = Seq("T-T", dna_alphabet).ungap()
     self.assertEqual(str(s), 'TT')
     s = Seq("T-~---T...~~~--", dna_alphabet).ungap()
     self.assertEqual(str(s), 'TT')
Exemplo n.º 28
0
 def test_repr(self):
     s1 = Seq("AAAA", dna_alphabet)
     repr(s1)
Exemplo n.º 29
0
 def test_str(self):
     s1 = Seq("AGCTA", dna_alphabet)
     self.assertEqual(str(s1), "AGCTA")
     # Uncased alpahebt
     self.assertEqual(str(Seq("AgcTA", dna_alphabet)), "AgcTA")
Exemplo n.º 30
0
 def test_tostring(self):
     self.assertEqual(Seq("AgcTAAAA", dna_alphabet).tostring(), "AgcTAAAA")