Exemplo n.º 1
0
def exon_matching(exon_tree,
                  ref_exon,
                  match_extend_tolerate_left,
                  match_extend_tolerate_right,
                  intervals_adjacent=True):
    """
    exon_tree --- an IntervalTree made from .baseC/.altC using exon detection; probably only short read data
    ref_exon --- an Interval representing an exon; probably from PacBio
    match_extend_tolerate --- maximum difference between the matched start/end

    find a continuous exon path (consisting of 1 or more nodes for which the intervals must be adjacent)
    in exon_tree that matches to ref_exon
    """
    matches = exon_tree.find(ref_exon.start, ref_exon.end)
    if len(matches) == 0:  # likely due to very low coverage on transcript
        return None
    # check that all matches are adjacent (no splicing! this just one integral exon)
    if (not intervals_adjacent) or c_branch.intervals_all_adjacent(matches):
        # check if the ends differ a little, if so, extend to min/max
        for i in xrange(len(matches)):
            d_start = abs(matches[i].start - ref_exon.start)
            #print "matching {0} to {1}".format(matches[i].start, ref_exon.start)
            #pdb.set_trace()
            if d_start <= match_extend_tolerate_left:  # now find the furthest end that satisfies the results
                for j in xrange(len(matches) - 1, i - 1, -1):
                    if abs(matches[j].end -
                           ref_exon.end) <= match_extend_tolerate_right:
                        return matches[i:(j + 1)]
        return None
    else:  # ack! could not find evidence for this :<
        return None
Exemplo n.º 2
0
def exon_matching(exon_tree, ref_exon, match_extend_tolerate_left, match_extend_tolerate_right, intervals_adjacent=True):
    """
    exon_tree --- an IntervalTree made from .baseC/.altC using exon detection; probably only short read data
    ref_exon --- an Interval representing an exon; probably from PacBio
    match_extend_tolerate --- maximum difference between the matched start/end

    find a continuous exon path (consisting of 1 or more nodes for which the intervals must be adjacent)
    in exon_tree that matches to ref_exon
    """
    matches = exon_tree.find(ref_exon.start, ref_exon.end)
    if len(matches) == 0: # likely due to very low coverage on transcript
        return None
    # check that all matches are adjacent (no splicing! this just one integral exon)
    if (not intervals_adjacent) or c_branch.intervals_all_adjacent(matches):
        # check if the ends differ a little, if so, extend to min/max
        for i in xrange(len(matches)):
            d_start = abs(matches[i].start - ref_exon.start)
            #print "matching {0} to {1}".format(matches[i].start, ref_exon.start)
            #pdb.set_trace()
            if d_start <= match_extend_tolerate_left: # now find the furthest end that satisfies the results
                for j in xrange(len(matches)-1, i-1, -1):
                    if abs(matches[j].end - ref_exon.end) <= match_extend_tolerate_right:
                        return matches[i:(j+1)]
        return None
    else: # ack! could not find evidence for this :<
        return None