示例#1
0
    def test_convert_nt_output_format(self):
        # tests that a length 3 tupple output, and each is the correct numpy
        # array type
        sequences = read_fasta('test_data/test_frames.fa')
        ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames(
            sequences, both_strands=False)
        orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(
            aa_frames)
        # filter data by minimum orf length
        keep = orf_length >= 6
        aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length = filter_objects(
            keep, aa_frames, frame, strand, seq_length_nt, ids, seq_length,
            start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length)

        output = convert_start_stop_to_nt(start_sites, stop_sites,
                                          seq_length_nt, orf_length, frame,
                                          last_aa_is_stop)

        t_len = len(output) == 3
        # test numpy types of all outputs
        t0 = output[0].dtype == 'int64'
        t1 = output[1].dtype == 'int64'
        t2 = output[2].dtype == 'int64'

        all_right_types = t0 and t1 and t2 and t_len
        self.assertTrue(all_right_types)
示例#2
0
    def test_translate_bothstrands(self):
        sequences = read_fasta('test_data/test_trans_all_frames.fa')
        ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames(
            sequences, both_strands=True)

        frame_correct = np.all(frame == np.array([1, 1, 2, 2, 3, 3]))
        strand_correct = np.all(
            strand == np.array(['+', '-', '+', '-', '+', '-']))
        trans_correct = np.all(aa_frames == np.array([
            'MANATEE*', 'LFFGRVRH', 'WRTRPKN', 'YSSVAFA', 'GERDRRI', 'ILRSRSP'
        ]))

        self.assertTrue(frame_correct and strand_correct and trans_correct)
示例#3
0
    def test_convert_utr_nt(self):
        sequences = read_fasta('test_data/test_frames.fa')

        ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames(
            sequences, both_strands=False)
        orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(
            aa_frames)
        # filter data by minimum orf length
        keep = orf_length >= 6
        aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length = filter_objects(
            keep, aa_frames, frame, strand, seq_length_nt, ids, seq_length,
            start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length)

        start_site_nt, stop_site_nt, utr3_length = convert_start_stop_to_nt(
            start_sites, stop_sites, seq_length_nt, orf_length, frame,
            last_aa_is_stop)
        self.assertTrue(np.all(utr3_length == np.array([5, 4, 3])))
示例#4
0
    def test_translate_output_format(self):
        # tests that a length 3 tupple output, and each is the correct numpy
        # array type
        sequences = read_fasta('test_data/test_trans_all_frames.fa')
        output = translate_all_frames(sequences, both_strands=False)

        t_len = len(output) == 6
        # test numpy types of all outputs
        t0 = output[0].dtype.type == np.str_
        t1 = output[1].dtype.type == np.str_
        t2 = output[2].dtype == 'int64'
        t3 = output[3].dtype.type == np.str_
        t4 = output[4].dtype == 'int64'
        t5 = output[5].dtype == 'int64'

        all_right_types = t0 and t1 and t2 and t3 and t4 and t5 and t_len
        self.assertTrue(all_right_types)
示例#5
0
 def test_translate_seq_length(self):
     sequences = read_fasta('test_data/test_trans_all_frames.fa')
     ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames(
         sequences, both_strands=False)
     self.assertTrue(np.all(seq_length == np.array([8, 7, 7])))
示例#6
0
 def test_translate_alltransframes(self):
     sequences = read_fasta('test_data/test_trans_all_frames.fa')
     ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames(
         sequences, both_strands=False)
     self.assertTrue(
         np.all(aa_frames == np.array(['MANATEE*', 'WRTRPKN', 'GERDRRI'])))