def _stratify(nest,rank,filterfunc,sget,sset,eget,eset): '''This function implements the "subject overlap truncation scheme" for nests. Specifically, it pops the "best" element of the nest (according to the argument *rank*), truncates the rest to have no overlap with it (according to the argument *sget*), and filters the nest (using the argument *filterfunc*). The process repeats until the nest is exhausted. This is to be understood as an abstract version of the "stratify" function, with particulars represented abstractly to aid understanding and readability. In particular, all arguments except for *nest* are functions.''' def s_hit(main,other): '''Strips off the sequence part of the *main* hit from the *other*, and yields two hits, one, or none depending on whether or how they overlap.''' [[s,s_],[e,e_]] = [[f(x) for x in (main,other)] for f in (sget,eget)] if s_ < s-1: yield eset(other,min(e_,s-1)) if e+1 < e_: yield sset(other,max(e+1,s_)) nest = [(x,rank(x)) for x in nest if filterfunc(x)] while nest: (h,r),i = utils.popmax(nest,key=_itemget(1)),0 ; yield h while i < len(nest): results = [(x,rank(x)) for x in s_hit(h,nest[i][0]) if filterfunc(x)] nest[i:i+1] = results i += len(results)
def classifyrecords(seq,overlap): '''Takes a sequence of blast hits; picks out as 'nests' sequences of adjacent hits that overlap with a neighbor. Returns a pair of lists: the first contains records that were not part of a nest, and the second contains nests, i.e. lists of the resulting 'extracted' hits from that nest. This function does not stratify those nests - for use in the final algorithm, they must go through the function stratify(). ''' sings,nests = utils.bifilter( utils.components(seq,lambda x,y: s_overlap(x,y)>=overlap), key=lambda x: len(x)==1) return (map(_itemget(0),sings),nests)