Exemplo n.º 1
0
    def test_find_longest_orf_stopsites(self):
        # check that the stop site is calculated as the * for seqs with it,
        # and the last AA for those without
        stop_loc_with_stop = find_longest_orfs(['AMEATBALL*'])[2]
        stop_loc_without_stop = find_longest_orfs(['AMEATBALL'])[2]

        self.assertEqual(stop_loc_with_stop, stop_loc_without_stop + 1)
Exemplo n.º 2
0
    def test_convert_nt_output_format(self):
        # tests that a length 3 tupple output, and each is the correct numpy
        # array type
        sequences = read_fasta('test_data/test_frames.fa')
        ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames(
            sequences, both_strands=False)
        orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(
            aa_frames)
        # filter data by minimum orf length
        keep = orf_length >= 6
        aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length = filter_objects(
            keep, aa_frames, frame, strand, seq_length_nt, ids, seq_length,
            start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length)

        output = convert_start_stop_to_nt(start_sites, stop_sites,
                                          seq_length_nt, orf_length, frame,
                                          last_aa_is_stop)

        t_len = len(output) == 3
        # test numpy types of all outputs
        t0 = output[0].dtype == 'int64'
        t1 = output[1].dtype == 'int64'
        t2 = output[2].dtype == 'int64'

        all_right_types = t0 and t1 and t2 and t_len
        self.assertTrue(all_right_types)
Exemplo n.º 3
0
    def test_add_upstream_aa_exactupstream(self):
        # test with upstream AAs of exactly  min_upstream_length
        aa_sequence = np.array(['BEARMEATBALLS'])
        orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(
            aa_sequence)
        output = add_upstream_aas(aa_sequence,
                                  stop_sites,
                                  start_sites,
                                  orf_sequence,
                                  orf_length,
                                  min_upstream_length=4)

        self.assertEqual(output[0], 'BEARMEATBALLS')
Exemplo n.º 4
0
    def test_add_upstream_aa_noupstream(self):
        # test with no viable upstream AAs
        aa_sequence = np.array(['BEAREATS*MEATBALLS'])
        orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(
            aa_sequence)
        output = add_upstream_aas(aa_sequence,
                                  stop_sites,
                                  start_sites,
                                  orf_sequence,
                                  orf_length,
                                  min_upstream_length=5)

        self.assertEqual(output[0], 'MEATBALLS')
Exemplo n.º 5
0
    def test_add_upstream_aa(self):
        # test expected output
        aa_sequence = np.array(['ALONGERUPSTREAMMEATBALL'])
        orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(
            aa_sequence)
        output = add_upstream_aas(aa_sequence,
                                  stop_sites,
                                  start_sites,
                                  orf_sequence,
                                  orf_length,
                                  min_upstream_length=5)

        self.assertEqual(output[0], 'ALONGERUPSTREAMMEATBALL')
Exemplo n.º 6
0
    def test_find_longest_orf_output_format(self):
        # tests that a length 5 tupple output, and each is the correct numpy
        # array type
        long_orf = find_longest_orfs(['AMEATBALL'])

        t_len = len(long_orf) == 5
        # test numpy types of all outputs
        t0 = long_orf[0].dtype == '<U8'
        t1 = long_orf[1].dtype == 'int64'
        t2 = long_orf[2].dtype == 'int64'
        t3 = long_orf[3].dtype == 'int64'
        t4 = long_orf[4].dtype == 'bool'

        all_right_types = t0 and t1 and t2 and t3 and t4 and t_len
        self.assertTrue(all_right_types)
Exemplo n.º 7
0
    def test_convert_utr_nt(self):
        sequences = read_fasta('test_data/test_frames.fa')

        ids, aa_frames, frame, strand, seq_length_nt, seq_length = translate_all_frames(
            sequences, both_strands=False)
        orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(
            aa_frames)
        # filter data by minimum orf length
        keep = orf_length >= 6
        aa_frames, frame, strand, seq_length_nt, ids, seq_length, start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length = filter_objects(
            keep, aa_frames, frame, strand, seq_length_nt, ids, seq_length,
            start_sites, stop_sites, orf_sequence, last_aa_is_stop, orf_length)

        start_site_nt, stop_site_nt, utr3_length = convert_start_stop_to_nt(
            start_sites, stop_sites, seq_length_nt, orf_length, frame,
            last_aa_is_stop)
        self.assertTrue(np.all(utr3_length == np.array([5, 4, 3])))
Exemplo n.º 8
0
    def test_add_upstream_aa_multi(self):
        # test with multiple inputs
        aa_sequence = np.array([
            'ALONGERUPSTREAMMEATBALL', 'TWODOZENMEATBALLS',
            'BROWNBEARMAULSGIANTSQUID'
        ])
        orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(
            aa_sequence)
        output = add_upstream_aas(aa_sequence,
                                  stop_sites,
                                  start_sites,
                                  orf_sequence,
                                  orf_length,
                                  min_upstream_length=5)

        self.assertTrue(
            np.all(output[0] == np.array([
                'ALONGERUPSTREAMMEATBALL', 'TWODOZENMEATBALLS',
                'BROWNBEARMAULSGIANTSQUID'
            ])))
Exemplo n.º 9
0
    def test_add_upstream_aa_output(self):
        # check all outputs generated and all in correct type
        aa_sequence = np.array(['ALONGERUPSTREAMMEATBALL'])
        orf_sequence, start_sites, stop_sites, orf_length, last_aa_is_stop = find_longest_orfs(
            aa_sequence)
        output = add_upstream_aas(aa_sequence,
                                  stop_sites,
                                  start_sites,
                                  orf_sequence,
                                  orf_length,
                                  min_upstream_length=5)

        t_len = len(output) == 3
        # test numpy types of all outputs
        t0 = output[0].dtype.type == np.str_
        t1 = output[1].dtype == 'int64'
        t2 = output[2].dtype == 'int64'

        all_right_types = t0 and t1 and t2 and t_len
        self.assertTrue(all_right_types)
Exemplo n.º 10
0
 def test_find_longest_orf_multiple(self):
     input = ['AMEATBALL*', 'TWOMEATBALLS']
     result = find_longest_orfs(input)
     self.assertEqual(len(result[0]), len(input))
Exemplo n.º 11
0
 def test_find_longest_orf_trimmed(self):
     # check that the last * is trimmed from the orf sequence
     self.assertEqual(find_longest_orfs(['AMEATBALL*'])[0], ['MEATBALL'])