Пример #1
0
    def test_get_next_from_file(self):
        '''get_next_from_file() should read seqs from OK, and raise error at badly formatted file'''
        bad_files = [
            'sequences_test_fail_no_AT.fq', 'sequences_test_fail_no_seq.fq',
            'sequences_test_fail_no_plus.fq', 'sequences_test_fail_no_qual.fq'
        ]

        bad_files = [os.path.join(data_dir, x) for x in bad_files]

        for fname in bad_files:
            f_in = utils.open_file_read(fname)
            fq = sequences.Fastq()
            with self.assertRaises(sequences.Error):
                while fq.get_next_from_file(f_in):
                    pass

            utils.close(f_in)

        fname = os.path.join(data_dir, 'sequences_test_good_file.fq')
        try:
            f_in = open(fname)
        except IOError:
            print("Error opening '" + fname + "'", file=sys.stderr)
            sys.exit(1)

        fq = sequences.Fastq()
        while fq.get_next_from_file(f_in):
            self.assertEqual(fq, sequences.Fastq('ID', 'ACGTA', 'IIIII'))
        utils.close(f_in)
Пример #2
0
    def test_translate(self):
        '''Test nucleatide -> amino acid conversion works on Fasta'''
        fq = sequences.Fastq(
            'ID',
            'GCAGCCGCGGCTAGAAGGCGACGCCGGCGTAACAATGACGATTGCTGTGAAGAGCAACAGGGAGGCGGGGGTCACCATATAATCATTTTATTGCTACTCCTGCTTAAAAAGATGTTCTTTCCACCCCCGCCTAGCAGTTCATCCTCGTCTACAACCACGACTTGGTACTATGTAGTCGTGGTTTAATAGTGA',
            'IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII'
        )

        self.assertEqual(
            sequences.Fastq(
                'ID',
                'AAAARRRRRRNNDDCCEEQQGGGGHHIIILLLLLLKKMFFPPPPSSSSSSTTTTWYYVVVV***',
                'IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII'
            ), fq.translate())
Пример #3
0
 def test_to_Fastq(self):
     '''Check to_Fastq converts OK, including out of range quality scores'''
     fa = sequences.Fasta('X', 'AAAAA')
     quals = [-1, 0, 40, 93, 94]
     self.assertEqual(sequences.Fastq('X', 'AAAAA', '!!I~~'),
                      fa.to_Fastq(quals))
     with self.assertRaises(sequences.Error):
         fa.to_Fastq('AAAAAAAAAAAAA')
Пример #4
0
    def test_trim(self):
        '''trim() should trim the right number of bases off start and end'''
        fq = sequences.Fastq('ID', '1234567890', '1234567890')
        fq.trim(0, 0)
        self.assertEqual(fq, sequences.Fastq('ID', '1234567890', '1234567890'))

        fq = sequences.Fastq('ID', '1234567890', '1234567890')
        fq.trim(1, 0)
        self.assertEqual(fq, sequences.Fastq('ID', '234567890', '234567890'))

        fq = sequences.Fastq('ID', '1234567890', '1234567890')
        fq.trim(0, 1)
        self.assertEqual(fq, sequences.Fastq('ID', '123456789', '123456789'))

        fq = sequences.Fastq('ID', '1234567890', '1234567890')
        fq.trim(2, 2)
        self.assertEqual(fq, sequences.Fastq('ID', '345678', '345678'))
Пример #5
0
    def test_trim_Ns(self):
        '''trim_Ns() should do the right trimming of a fastq sequence'''
        fq = sequences.Fastq('ID', 'ANNANA', '111111')
        test_seqs = [
            sequences.Fastq('ID', 'ANNANA', '111111'),
            sequences.Fastq('ID', 'NANNANA', '1111111'),
            sequences.Fastq('ID', 'NANNANAN', '11111111'),
            sequences.Fastq('ID', 'ANNANAN', '1111111'),
            sequences.Fastq('ID', 'NNNNNNANNANAN', '1111111111111'),
            sequences.Fastq('ID', 'NNANNANANn', '1111111111')
        ]

        for s in test_seqs:
            s.trim_Ns()
            self.assertEqual(fq, s)
Пример #6
0
def merge_to_one_seq(infile, outfile, seqname='union'):
    '''Takes a multi fasta or fastq file and writes a new file that contains just one sequence, with the original sequences catted together, preserving their order'''
    seq_reader = sequences.file_reader(infile)
    seqs = []

    for seq in seq_reader:
        seqs.append(copy.copy(seq))

    new_seq = ''.join([seq.seq for seq in seqs])

    if type(seqs[0]) == sequences.Fastq:
        new_qual = ''.join([seq.qual for seq in seqs])
        seqs[:] = []
        merged = sequences.Fastq(seqname, new_seq, new_qual)
    else:
        merged = sequences.Fasta(seqname, new_seq)
        seqs[:] = []

    f = utils.open_file_write(outfile)
    print(merged, file=f)
    utils.close(f)
Пример #7
0
    def test_replace_interval(self):
        '''Test replace_interval()'''
        fa = sequences.Fasta('ID', 'ACGTA')
        fa.replace_interval(0, 0, 'NEW')
        self.assertEqual(fa, sequences.Fasta('ID', 'NEWCGTA'))

        fa = sequences.Fasta('ID', 'ACGTA')
        fa.replace_interval(4, 4, 'NEW')
        self.assertEqual(fa, sequences.Fasta('ID', 'ACGTNEW'))

        fa = sequences.Fasta('ID', 'ACGTA')
        fa.replace_interval(2, 3, 'NEW')
        self.assertEqual(fa, sequences.Fasta('ID', 'ACNEWA'))

        fa = sequences.Fasta('ID', 'ACGTA')
        with self.assertRaises(sequences.Error):
            fa.replace_interval(3, 2, 'x')
        with self.assertRaises(sequences.Error):
            fa.replace_interval(1, 5, 'x')
        with self.assertRaises(sequences.Error):
            fa.replace_interval(5, 10, 'x')

        fq = sequences.Fastq('ID', 'ACGTA', 'ABCDE')
        fq.replace_interval(0, 0, 'NEW', 'III')
        self.assertEqual(fq, sequences.Fastq('ID', 'NEWCGTA', 'IIIBCDE'))

        fq = sequences.Fastq('ID', 'ACGTA', 'ABCDE')
        fq.replace_interval(4, 4, 'NEW', 'III')
        self.assertEqual(fq, sequences.Fastq('ID', 'ACGTNEW', 'ABCDIII'))

        fq = sequences.Fastq('ID', 'ACGTA', 'ABCDE')
        fq.replace_interval(2, 3, 'NEW', 'III')
        self.assertEqual(fq, sequences.Fastq('ID', 'ACNEWA', 'ABIIIE'))

        with self.assertRaises(sequences.Error):
            fq.replace_interval(1, 1, 'x', 'xx')
Пример #8
0
 def test_file_reader_fastq(self):
     '''file_reader should iterate through a fastq file correctly'''
     reader = sequences.file_reader(
         os.path.join(data_dir, 'sequences_test_good_file.fq'))
     for seq in reader:
         self.assertEqual(seq, sequences.Fastq('ID', 'ACGTA', 'IIIII'))
Пример #9
0
 def test_to_Fasta_and_qual(self):
     '''Check to_Fasta_and_qual converts quality scores correctly'''
     fq = sequences.Fastq('ID', 'ACGT', '>ADI')
     (fa, qual) = fq.to_Fasta_and_qual()
     self.assertEqual(fa, sequences.Fasta('ID', 'ACGT'))
     self.assertListEqual(qual, [29, 32, 35, 40])
Пример #10
0
 def test_revcomp(self):
     '''revcomp() should correctly reverse complement a sequence'''
     fq = sequences.Fastq('ID', 'ACGTNacgtn', '1234567890')
     fq.revcomp()
     self.assertEqual(fq, sequences.Fastq('ID', 'nacgtNACGT', '0987654321'))
Пример #11
0
 def test_init_length_mismatch(self):
     '''__init__ should raise an error when length of seq and quality not the same'''
     with self.assertRaises(sequences.Error):
         sequences.Fastq('X', 'A', 'II')
Пример #12
0
 def setUp(self):
     self.fastq = sequences.Fastq('ID', 'ACGTA', 'IIIII')
Пример #13
0
    def test_expand_nucleotides(self):
        '''Test expand_nucleotides'''
        tests = [
            (sequences.Fasta('1', 'A'), [sequences.Fasta('1.1', 'A')]),
            (sequences.Fasta('2', 'C'), [sequences.Fasta('2.1', 'C')]),
            (sequences.Fasta('3', 'G'), [sequences.Fasta('3.1', 'G')]),
            (sequences.Fasta('4', 'T'), [sequences.Fasta('4.1', 'T')]),
            (sequences.Fasta('6', 'R'),
             [sequences.Fasta('6.1', 'A'),
              sequences.Fasta('6.2', 'G')]),
            (sequences.Fasta('7', 'Y'),
             [sequences.Fasta('7.1', 'C'),
              sequences.Fasta('7.2', 'T')]),
            (sequences.Fasta('8', 'S'),
             [sequences.Fasta('8.1', 'C'),
              sequences.Fasta('8.2', 'G')]),
            (sequences.Fasta('9', 'W'),
             [sequences.Fasta('9.1', 'A'),
              sequences.Fasta('9.2', 'T')]),
            (sequences.Fasta('10', 'K'),
             [sequences.Fasta('10.1', 'G'),
              sequences.Fasta('10.2', 'T')]),
            (sequences.Fasta('11', 'M'),
             [sequences.Fasta('11.1', 'A'),
              sequences.Fasta('11.2', 'C')]),
            (sequences.Fasta('12', 'B'), [
                sequences.Fasta('12.1', 'C'),
                sequences.Fasta('12.2', 'G'),
                sequences.Fasta('12.3', 'T')
            ]),
            (sequences.Fasta('13', 'D'), [
                sequences.Fasta('13.1', 'A'),
                sequences.Fasta('13.2', 'G'),
                sequences.Fasta('13.3', 'T')
            ]),
            (sequences.Fasta('14', 'H'), [
                sequences.Fasta('14.1', 'A'),
                sequences.Fasta('14.2', 'C'),
                sequences.Fasta('14.3', 'T')
            ]),
            (sequences.Fasta('15', 'V'), [
                sequences.Fasta('15.1', 'A'),
                sequences.Fasta('15.2', 'C'),
                sequences.Fasta('15.3', 'G')
            ]),
            (sequences.Fasta('16', 'N'), [
                sequences.Fasta('16.1', 'A'),
                sequences.Fasta('16.2', 'C'),
                sequences.Fasta('16.3', 'G'),
                sequences.Fasta('16.4', 'T')
            ]),
            (sequences.Fasta('17', 'ART'),
             [sequences.Fasta('17.1', 'AAT'),
              sequences.Fasta('17.2', 'AGT')]),
            (sequences.Fasta('18', 'ARRT'), [
                sequences.Fasta('18.1', 'AAAT'),
                sequences.Fasta('18.2', 'AAGT'),
                sequences.Fasta('18.3', 'AGAT'),
                sequences.Fasta('18.4', 'AGGT')
            ]),
            (sequences.Fasta('19', 'ARTR'), [
                sequences.Fasta('19.1', 'AATA'),
                sequences.Fasta('19.2', 'AATG'),
                sequences.Fasta('19.3', 'AGTA'),
                sequences.Fasta('19.4', 'AGTG')
            ]),
            (sequences.Fastq('20', 'ART', 'GHI'), [
                sequences.Fastq('20.1', 'AAT', 'GHI'),
                sequences.Fastq('20.2', 'AGT', 'GHI')
            ]),
        ]

        for t in tests:
            self.assertListEqual(t[0].expand_nucleotides(), t[1])