Пример #1
0
def main():
    # ------ Parse Command line ------
    parser = _build_option_parser()
    (opts, args) = parser.parse_args(sys.argv[1:])
    if args:
        parser.error("Unparsable arguments: %s " % args)

    seqs = opts.reader.read(opts.fin)

    if opts.trans_seg:
        seqs = SeqList([mask_low_complexity(s) for s in seqs])

    if opts.subsample is not None:
        from random import random

        frac = opts.subsample
        ss = []
        for s in seqs:
            if random() < frac:
                ss.append(s)
        seqs = SeqList(ss)

    if opts.reverse:
        seqs = SeqList([s.reverse() for s in seqs])

    if opts.complement:
        seqs = SeqList([Seq(s, alphabet=nucleic_alphabet) for s in seqs])
        seqs = SeqList([s.complement() for s in seqs])

    opts.writer.write(opts.fout, seqs)
Пример #2
0
    def test_tally(self):
        # 1234567890123456789012345678
        s0 = Seq("ACTTT", nucleic_alphabet)
        s1 = Seq("ACCCC", nucleic_alphabet)
        s2 = Seq("GGGG", nucleic_alphabet)
        seqs = SeqList([s0, s1, s2], nucleic_alphabet)

        counts = seqs.tally()
        assert counts == [2, 5, 4, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

        seqs = SeqList(
            [Seq("AAACD", nucleic_alphabet),
             Seq("AAACD", nucleic_alphabet)])
        self.assertRaises(ValueError, seqs.tally)
Пример #3
0
    def test_repr(self):
        s0 = Seq("ACGTURYBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s1 = Seq("ACGTURYSWKMBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s2 = Seq("ACGTURSWKMBDHVNACGTURKMBDHVN", nucleic_alphabet)
        seqs = SeqList([s0, s1, s2])

        repr(seqs)
Пример #4
0
    def test_create(self):
        # 1234567890123456789012345678
        s0 = Seq("ACGTURYBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s1 = Seq("ACGTURYSWKMBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s2 = Seq("ACGTURSWKMBDHVNACGTURKMBDHVN", nucleic_alphabet)
        seqs = SeqList([s0, s1, s2])

        self.assertEqual(len(seqs), 3)
Пример #5
0
    def test_profile(self):
        a = Alphabet("ABCD")

        s0 = Seq("ABCDD", a)
        s1 = Seq("AAAAD", a)
        s2 = Seq("AAABD", a)
        s3 = Seq("AAACD", a)

        seqs = SeqList([s0, s1, s2, s3], a)

        tally = seqs.profile()

        self.assertEqual(list(tally[0]), [4, 0, 0, 0])
        self.assertEqual(list(tally[1]), [3, 1, 0, 0])
        self.assertEqual(list(tally[2]), [3, 0, 1, 0])
        self.assertEqual(list(tally[3]), [1, 1, 1, 1])
        self.assertEqual(list(tally[4]), [0, 0, 0, 4])

        self.assertEqual(tally[4, 'D'], 4)

        seqs = SeqList([Seq("AAACD", a), Seq("AAACDA", a)], a)
        self.assertRaises(ValueError, seqs.profile)

        seqs = SeqList([Seq("AAACD", a), Seq("AAACD", a)])
        self.assertRaises(ValueError, seqs.profile)
Пример #6
0
    def test_create_annotated(self):
        s0 = Seq("ACGTURYBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s1 = Seq("ACGTURYSWKMBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s2 = Seq("ACGTURSWKMBDHVNACGTURKMBDHVN", nucleic_alphabet)

        seqs = SeqList([s0, s1, s2],
                       alphabet=nucleic_alphabet,
                       name="alsdf",
                       description='a')
        self.assertEqual(seqs.name, 'alsdf')
        self.assertEqual(seqs.description, 'a')
        self.assertEqual(seqs.alphabet, nucleic_alphabet)
Пример #7
0
    def test_ords(self):
        s0 = Seq("ACGTURYBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s1 = Seq("ACGTURYSDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s2 = Seq("ACGTURSWKMBDHVNACGTURKMBDHVN", nucleic_alphabet)
        seqs = SeqList([s0, s1, s2], nucleic_alphabet)
        seqs.ords()
        # self.assertEqual( a.shape, (3, 28) )

        # Fails if seqs are of different lengths
        # FIXME?
        # s3 = Seq("ACGTUR", nucleic_alphabet )
        # seqs2 = SeqList( [ s0,s1,s3,s2],  nucleic_alphabet)
        # self.assertRaises(ValueError, seqs2.ords )

        # Use a different alphabet
        seqs.ords(nucleic_alphabet)

        # No alphabet
        seqs3 = SeqList([s0, s1, s2])
        seqs3.ords(alphabet=Alphabet("ABC"))

        # Fail if no alphabet
        self.assertRaises(ValueError, seqs3.ords)
Пример #8
0
    def test_create_empty(self):
        s0 = Seq("ACGTURYBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s1 = Seq("ACGTURYSWKMBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s2 = Seq("ACGTURSWKMBDHVNACGTURKMBDHVN", nucleic_alphabet)

        seqs = SeqList()
        seqs.append(s0)
        seqs.extend((s1, s2))

        self.assertEqual(len(seqs), 3)
        self.assertEqual(type(seqs), SeqList)
Пример #9
0
    def test_isaligned(self):
        a = Alphabet("ABCD")

        s0 = Seq("ABCDD", a)
        s1 = Seq("AAAAD", a)
        s2 = Seq("AAABD", a)
        s3 = Seq("AAACD", a)
        seqs = SeqList([s0, s1, s2, s3], a)
        assert seqs.isaligned()

        seqs = SeqList([s0, s1, s2, s3], Alphabet("ABCDE"))
        assert not seqs.isaligned()