Exemplo n.º 1
0
def _stratify(nest,rank,filterfunc,sget,sset,eget,eset):
    '''This function implements the "subject overlap truncation scheme" for
    nests. Specifically, it pops the "best" element of the nest (according
    to the argument *rank*), truncates the rest to have no overlap with it
    (according to the argument *sget*), and filters the nest (using the 
    argument *filterfunc*). The process repeats until the nest is exhausted.

    This is to be understood as an abstract version of the "stratify"
    function, with particulars represented abstractly to aid understanding
    and readability. In particular, all arguments except for *nest* are
    functions.'''
    def s_hit(main,other):
        '''Strips off the sequence part of the *main* hit from the *other*,
        and yields two hits, one, or none depending on whether or how they
        overlap.'''
        [[s,s_],[e,e_]] = [[f(x) for x in (main,other)] for f in (sget,eget)]
        if s_ < s-1: yield eset(other,min(e_,s-1))
        if e+1 < e_: yield sset(other,max(e+1,s_))
    nest = [(x,rank(x)) for x in nest if filterfunc(x)]
    while nest:
       (h,r),i = utils.popmax(nest,key=_itemget(1)),0 ; yield h
       while i < len(nest):
          results = [(x,rank(x)) for x in s_hit(h,nest[i][0]) if filterfunc(x)]
          nest[i:i+1] = results
          i += len(results)
Exemplo n.º 2
0
def classifyrecords(seq,overlap):
    '''Takes a sequence of blast hits; picks out as 'nests' sequences of
    adjacent hits that overlap with a neighbor. Returns a pair of lists:
    the first contains records that were not part of a nest, and the second
    contains nests, i.e.  lists of the resulting 'extracted' hits from that
    nest.
    
    This function does not stratify those nests - for use in the final
    algorithm, they must go through the function stratify().
    '''
    sings,nests = utils.bifilter(
                     utils.components(seq,lambda x,y: s_overlap(x,y)>=overlap),
                     key=lambda x: len(x)==1)
    return (map(_itemget(0),sings),nests)