Exemplo n.º 1
0
def test_fetch_seq_for_circular_DNA_when_beginning_is_negative():
    """
    AATTCCGG
    012345678
    """
    assert fetch_seq(REF_FA, seqname='chrM', beg=-1, end=1) == 'GA'
    assert fetch_seq(REF_FA, seqname='chrM', beg=-2, end=3) == 'GGAAT'
Exemplo n.º 2
0
def test_fetch_seq_for_circular_DNA_with_positive_beginning_and_ending_less_than_contig_length():
    """
    AATTCCGG
    012345678
    """
    assert fetch_seq(REF_FA, seqname='chrM', beg=0, end=8) == 'AATTCCGG'
    assert fetch_seq(REF_FA, seqname='chrM', beg=2, end=6) == 'TTCC'
Exemplo n.º 3
0
def test_fetch_seq_for_circular_DNA_when_ending_is_beyond_contig_length():
    """
    AATTCCGG
    012345678
    """
    assert fetch_seq(REF_FA, seqname='chrM', beg=6, end=11) == 'GGAAT'
    assert fetch_seq(REF_FA, seqname='chrM', beg=7, end=9) == 'GA'
Exemplo n.º 4
0
        def test_fetch_seq_based_on_KLEAT_clv_minus_strand_chr9_GNAQ(self):
            chrm = '9'
            # reported by KLEAT, converted to 0-based, corresponding to a T, the
            # last base of its 3'UTR
            clv = 80335189 - 1
            self.assertEqual(fetch_seq(self.refseq, chrm, clv, clv + 1), 'T')

            beg, end = gen_coords(clv, '-', window=6)
            self.assertEqual((beg, end), (clv, clv + 6))
            self.assertEqual(fetch_seq(self.refseq, chrm, beg, end), 'TTTTAT')
Exemplo n.º 5
0
        def test_fetch_seq_based_on_KLEAT_clv_minus_strand_chr2_NFE2L2(self):
            chrm = '2'
            # reported by KLEAT, converted to 0-based, corresponding to a T, the
            # last base of its 3'UTR
            clv = 178095802 - 1

            self.assertEqual(fetch_seq(self.refseq, chrm, clv, clv + 1), 'G')
            beg, end = gen_coords(clv, '-', window=10)
            self.assertEqual((beg, end), (clv, clv + 10))
            self.assertEqual(fetch_seq(self.refseq, chrm, beg, end),
                             'GCCACTTTAT')
Exemplo n.º 6
0
 def test_fetch_seq_based_on_KLEAT_clv_plus_strand_chr12_DRAM1(self):
     """based on hg19"""
     chrm = '12'
     # this is a coord reported by KLEAT, it's 1-based, so -1 to make it
     # 0-based, and it points to the end of 3'UTR, where there is a hexamer
     # right to the upstream of it, see included screenshot in the repo
     clv = 102316878 - 1
     # 'a' is the last base on 3'UTR
     self.assertEqual(fetch_seq(self.refseq, chrm, clv, clv + 1), 'a')
     beg, end = gen_coords(clv, '+', window=6)
     self.assertEqual((beg, end), (clv - 6 + 1, clv + 1))
     self.assertEqual(fetch_seq(self.refseq, chrm, beg, end), 'aataaa')
Exemplo n.º 7
0
        def test_fetch_seq_bound_negative_coordinate_by_0(self):
            chrm = 'GL000192.1'
            clv = 3
            beg, end = gen_coords(clv, '+', window=6)
            calc_beg = -2  # clv - 6 + 1
            calc_end = 4  # clv + 1
            self.assertEqual((beg, end), (calc_beg, calc_end))
            expected = 'GAAT'  # the first 4 bases of GL000192.1
            self.assertEqual(fetch_seq(self.refseq, chrm, beg, end), expected)

            # assert beginning is bound by 0
            self.assertEqual(fetch_seq(self.refseq, chrm, 0, end), expected)
Exemplo n.º 8
0
def test_fetch_seq_for_circular_DNA_when_both_beg_and_end_are_less_than_0():
    """
    this happens when the assembled contig cross the starting point of a circular DNA

              ┬          ┬
    AATTCCGGAC AATTCCGGAC AATTCCGGAC
    0123456789 0123456789 0123456789
    """
    assert fetch_seq(REF_FA, seqname='MT', beg=-1, end=1) == 'CA'

    assert fetch_seq(REF_FA, seqname='MT', beg=-3, end=-1) == 'GA'
    assert fetch_seq(REF_FA, seqname='MT', beg=-2, end=-1) == 'A'

    assert fetch_seq(REF_FA, seqname='MT', beg=-3, end=0) == 'GAC'
    assert fetch_seq(REF_FA, seqname='MT', beg=-2, end=0) == 'AC'
Exemplo n.º 9
0
def do_skip(ref_b, cigar_val, ref_fa, seqname, ref_clv):
    """
    Handle BAM CREF_SKIP CIGAR
    """
    ref_e = ref_b + cigar_val

    if ref_b <= ref_clv:
        if ref_e > ref_clv:
            seq_to_add = fetch_seq(ref_fa, seqname, ref_clv, ref_e)
        else:           # still before clv
            seq_to_add = ''
    else:
        seq_to_add = fetch_seq(ref_fa, seqname, ref_b, ref_e)

    next_ref_b = ref_e
    return next_ref_b, seq_to_add
Exemplo n.º 10
0
def do_skip(ref_e, cigar_val, ref_fa, seqname, ref_clv):
    """
    :param ref_e: reference end index
    :param ref_fa: a pysam.libcalignmentfile.AlignmentFile instance for reference genome
    :param ref_clv: cleavage site in reference genome coordinate
    """
    ref_b = ref_e - cigar_val

    if ref_e >= ref_clv:
        if ref_b < ref_clv:
            seq_to_add = fetch_seq(ref_fa, seqname, ref_b, ref_clv + 1)
        else:
            seq_to_add = ''
    else:
        seq_to_add = fetch_seq(ref_fa, seqname, ref_b, ref_e)

    next_ref_e = ref_b
    return next_ref_e, seq_to_add
Exemplo n.º 11
0
 def test_chr12_DRAM_plus_strand(self):
     chrom = '12'
     clv = 102316878 - 1
     # confirm the corresponding seq in hg19
     self.assertEqual(
         fetch_seq(self.refseq, '12', 102316872, 102316872 + 6),
         'aataaa')
     self.assertEqual(
         search_ref_genome(self.refseq, chrom, clv, '+', 50),
         ('AATAAA', 16, 102316872))
Exemplo n.º 12
0
 def test_chr19_AKT2_minus_strand(self):
     chrom = '19'
     clv = 40737005 - 1
     # confirm the corresponding seq in hg19
     self.assertEqual(
         fetch_seq(self.refseq, chrom, 40737011 + 1 - 6, 40737011 + 1),
         'TTTATT')
     self.assertEqual(
         search_ref_genome(self.refseq, chrom, clv, '-', 50),
         ('AATAAA', 16, 40737011))
Exemplo n.º 13
0
def test_fetch_seq_for_circular_DNA_when_ending_beg_is_larger_than_end():
    """
    AATTCCGG
    01234567890
    """
    assert fetch_seq(REF_FA, seqname='chrM', beg=8, end=1) == 'A'
    assert fetch_seq(REF_FA, seqname='chrM', beg=8, end=2) == 'AA'
    assert fetch_seq(REF_FA, seqname='chrM', beg=8, end=7) == 'AATTCCG'

    assert fetch_seq(REF_FA, seqname='chrM', beg=9, end=1) == ''
    assert fetch_seq(REF_FA, seqname='chrM', beg=9, end=2) == 'A'
    assert fetch_seq(REF_FA, seqname='chrM', beg=9, end=3) == 'AT'
    assert fetch_seq(REF_FA, seqname='chrM', beg=9, end=3) == 'AT'
Exemplo n.º 14
0
def search_ref_genome(refseq, chrom, clv, strand, window=50):
    """
    Different from search, this function search hexamer on reference genome

    :param refseq: an object returned by pysam.FastaFile, see TestSearch for
    its usage

    :param clv: 0-based. suppposed to be the 1-based coordinate of last base of
    3' UTR (e.g. output by KLEAT) - 1. converted to 0-based because pysam is
    0-based.

    return: a tuple of (hexamer, hexamer id (indicates strength), 0-based hexamer location)
    """
    beg, end = gen_coords(clv, strand, window)
    seq = apautils.fetch_seq(refseq, chrom, beg, end)
    # -1 as it's 0-based
    res = search_hexamer(seq, strand, beg, end - 1)
    return res
Exemplo n.º 15
0
def test_fetch_seq_for_circular_DNA_when_both_beg_and_end_are_larger_than_seq_len():
    """
    this happens when the assembled contig cross the starting point of a circular DNA

              ┬          ┬
    AATTCCGGAC AATTCCGGAC AATTCCGGAC
    0123456789 0123456789 0123456789
    """
    assert fetch_seq(REF_FA, seqname='MT', beg=0, end=10) == 'AATTCCGGAC'

    assert fetch_seq(REF_FA, seqname='MT', beg=10, end=11) == 'A'
    assert fetch_seq(REF_FA, seqname='MT', beg=10, end=12) == 'AA'
    assert fetch_seq(REF_FA, seqname='MT', beg=10, end=13) == 'AAT'
    assert fetch_seq(REF_FA, seqname='MT', beg=10, end=14) == 'AATT'

    assert fetch_seq(REF_FA, seqname='MT', beg=11, end=11) == ''
    assert fetch_seq(REF_FA, seqname='MT', beg=11, end=12) == 'A'
    assert fetch_seq(REF_FA, seqname='MT', beg=11, end=13) == 'AT'

    assert fetch_seq(REF_FA, seqname='MT', beg=12, end=11) == 'TTCCGGACA'
    assert fetch_seq(REF_FA, seqname='MT', beg=12, end=12) == ''
    assert fetch_seq(REF_FA, seqname='MT', beg=12, end=13) == 'T'

    assert fetch_seq(REF_FA, seqname='MT', beg=21, end=21) == ''
    assert fetch_seq(REF_FA, seqname='MT', beg=21, end=23) == 'AT'
Exemplo n.º 16
0
def test_fetch_seq_for_circular_DNA_when_ending_beg_is_larger_than_end_set_seq_len_is_ten_for_convenience():
    """
              ┬          ┬
    AATTCCGGAC AATTCCGGAC AATTCCGGAC
    0123456789 0123456789 0123456789
    """
    assert fetch_seq(REF_FA, seqname='MT', beg=8, end=1) == 'ACA'
    assert fetch_seq(REF_FA, seqname='MT', beg=8, end=2) == 'ACAA'

    assert fetch_seq(REF_FA, seqname='MT', beg=10, end=1) == 'A'
    assert fetch_seq(REF_FA, seqname='MT', beg=10, end=2) == 'AA'
    assert fetch_seq(REF_FA, seqname='MT', beg=10, end=3) == 'AAT'
    assert fetch_seq(REF_FA, seqname='MT', beg=10, end=4) == 'AATT'

    assert fetch_seq(REF_FA, seqname='MT', beg=11, end=1) == ''
    assert fetch_seq(REF_FA, seqname='MT', beg=11, end=2) == 'A'
    assert fetch_seq(REF_FA, seqname='MT', beg=11, end=3) == 'AT'

    assert fetch_seq(REF_FA, seqname='MT', beg=21, end=1) == ''
    assert fetch_seq(REF_FA, seqname='MT', beg=21, end=3) == 'AT'
Exemplo n.º 17
0
def test_fetch_seq_for_linear_DNA_when_ending_is_beyond_contig_length():
    assert fetch_seq(REF_FA, seqname='chr_mock1', beg=6, end=11) == 'GG'
    assert fetch_seq(REF_FA, seqname='chr_mock1', beg=7, end=9) == 'G'
Exemplo n.º 18
0
def test_fetch_seq_for_linear_DNA_when_beginning_is_negative():
    assert fetch_seq(REF_FA, seqname='chr_mock1', beg=-1, end=1) == 'A'
    assert fetch_seq(REF_FA, seqname='chr_mock1', beg=-2, end=3) == 'AAT'
Exemplo n.º 19
0
def test_fetch_seq_for_linear_DNA_with_positive_beginning_and_ending_less_than_contig_length():
    assert fetch_seq(REF_FA, seqname='chr_mock1', beg=0, end=8) == 'AATTCCGG'
    assert fetch_seq(REF_FA, seqname='chr_mock1', beg=2, end=6) == 'TTCC'