def main(args): fwig = gs.loadWig( args.forwardWig, smooth=False ) rwig = gs.loadWig( args.reverseWig, smooth=False ) mappedCount = getMappedCount(fwig, rwig) poses = loadPos( args.positionFile, args.chromCol, args.startCol, args.endCol, args.strandCol, args.offset, args.format ) values = [] #print "\n" #print fwig.keys() #print rwig.keys() for chrom in poses: #print "\n" #print chrom if chrom in fwig: chromFwig = gs.expandWig( fwig[ chrom ], 0, 1, False ) if chrom in rwig: chromRwig = gs.expandWig( rwig[ chrom ], 0, 1, False) chromPos = poses[ chrom ] for p,strand in chromPos: keep = True tempFValues = np.zeros( 2 * args.width + 1 ) tempRValues = np.zeros( 2 * args.width + 1 ) if chrom in fwig: #print chromFwig.shape start = int(p - args.width - fwig[ chrom ][0,0]) end = int(p + args.width - fwig[ chrom ][0,0]) #print 'Forward' #print start, " " , end #print abs(min(0,start)), ' ', min( tempFValues.shape[0], tempFValues.shape[0] + chromFwig.shape[0] - end -1 ) #print max(0, start) , ' ', min( chromFwig.shape[0], end + 1 ) if end >= 0 and start < chromFwig.shape[0]: tempFValues[abs(min(0,start)):min( tempFValues.shape[0], tempFValues.shape[0] + chromFwig.shape[0] - end -1) ] = chromFwig[ max(0, start) : min( chromFwig.shape[0], end + 1 ) ] if chrom in rwig: #print chromRwig.shape start = int(p - args.width - rwig[ chrom ][0,0]) end = int(p + args.width - rwig[ chrom ][0,0]) #print 'Reverse' #print start, " ", end #print abs(min(0,start)), ' ', min( tempRValues.shape[0], tempRValues.shape[0] + chromRwig.shape[0] - end -1) #print max(0, start) , ' ', min( chromRwig.shape[0], end + 1 ) if end >= 0 and start < chromRwig.shape[0]: tempRValues[abs(min(0,start)):min( tempRValues.shape[0], tempRValues.shape[0] + chromRwig.shape[0] - end -1 ) ] = chromRwig[ max(0, start) : min( chromRwig.shape[0], end + 1 ) ] thresh = mappedCount * args.thresh / 10**6 if tempRValues.sum() < thresh or tempFValues.sum() < thresh: keep = False if keep: if strand == '-': temp = tempFValues[::-1] tempFValues = tempRValues[::-1] tempRValues = temp values.append(10**6*np.array(np.ma.concatenate([tempFValues, tempRValues])) / mappedCount) values.sort(key=lambda k:(sum(k),)) writeAll( values, args.out + ".txt" ) values = np.array(values) plot( values[:,0:2*args.width+1], values[:,2*args.width+1:], args.width, args.out )
def pair( fpeaks, rpeaks, fwig, rwig, ulimit, dlimit, prefix): ''' Assuming that the peaks on one strand is mutually exclusive. They do not overlap with each other. In this case, the ordering of the starts of the peaks and the ends of the peaks are the same. And that when the starts are sorted, the ends are also sorted. ''' offset = 5 expandCol = 1 out1 = open(prefix + "_singletons.bed",'w') out2 = open(prefix + "_pairs.gff", "w") out3 = open(prefix + "_pairs.narrowPeak", "w") for chrom in fpeaks: if chrom not in rpeaks: continue print chrom fp = fpeaks[chrom] rp = rpeaks[chrom] pairF = [] #Store the pairing information, if unpaired, it will be negative. pairR = [] fw = fwig[chrom] expandedFw = gs.expandWig( fw, offset, expandCol, False ) rw = rwig[chrom] expandedRw = gs.expandWig( rw, offset, expandCol, False ) rstarts = [] rends = [] fprefer = [] rprefer = [] unpairedF = [] for f in fp: fprefer.append( SortedCollection( key=itemgetter(1) ) ) pairF.append( ( -1, 0, 0) ) #( index of the mate, score, distance ) for r in rp: rprefer.append( SortedCollection( key=itemgetter(1) ) ) pairR.append( (-1, 0, 0) ) rstarts.append( r[1] ) rends.append( r[2] ) for i in range( len( fp ) ): currfp = fp[ i ] start = currfp[1] end = currfp[2] es = start - ulimit ee = end + dlimit currFw = expandedFw[ max( 0, start - fw[0,0] ) + offset : max( 0, end - fw[0,0] ) + offset + 1] start = max( start, fw[0,0] ) end = max( end, fw[0,0] ) flength = end - start si = bisect.bisect_left( rends, es ) ei = bisect.bisect_right( rstarts, ee ) ftagCounts,_,_ = gs.getTagCount( fwig, chrom, start, end ) #print ei - si maxScore = 0 bestDist = 0 bestIdx = 0 bestRpos = 0 for idx in range( si, ei ): currrp = rp[ idx ] rstart = max(rw[0,0], currrp[1]) rend = min(rw[-1,0], currrp[2]+flength) currRw = expandedRw[ max( 0, start - rw[0, 0] ) + offset : max( 0, rend - rw[0,0] ) + offset + 1 ] rtagCoungs,_,_ = gs.getTagCount( rwig, chrom, currrp[1], currrp[2] ) tempScore, tempDist, tempRpos = getScore( currFw, currRw, start, rstart ) fprefer[ i ].insert( (idx, tempScore, tempDist, tempRpos) ) rprefer[ idx ].insert( (i, tempScore, tempDist, tempRpos) ) if tempScore > maxScore: maxScore = tempScore bestDist = tempDist bestIdx = idx bestRpos = tempRpos if maxScore > pairR[ bestIdx ][1]: pairF[i] = ( bestIdx, maxScore, bestDist , bestRpos) if pairR[ bestIdx ][0] > 0: unpairedF.append( pairR[ bestIdx ][0] ) pairR[bestIdx] = ( i, maxScore, bestDist, bestRpos ) else: unpairedF.append( i ) try: fprefer[ i ].remove( ( bestIdx, maxScore, bestDist, bestRpos ) ) except ValueError: #print "Value error: ", bestIdx, ' ',maxScore, ' ',bestDist,' ', si,' ', ei pass singletons = [] pairs = [] pairs_narrow = [] while len(unpairedF) > 0: for u in unpairedF: if len( fprefer[u] ) > 0: ridx = fprefer[u][-1][0] if pairR[ ridx ][1] < fprefer[u][-1][1]: if pairR[ idx ][0] > 0: pairF[ pairR[ idx ][0] ] = (-1, 0, 0) unpairedF.append( pairR[ idx ][0] ) pairR[ ridx ] = ( u, fprefer[u][-1][1], fprefer[u][-1][2], fprefer[u][-1][3] ) pairF[ u ] = ( ridx, fprefer[u][-1][1], fprefer[u][-1][2], fprefer[u][-1][3] ) fprefer[u].remove( (ridx, fprefer[u][-1][1], fprefer[u][-1][2], fprefer[u][-1][3] ) ) else: unpairedF.remove( u ) for i,f in enumerate(pairF): fp[i][1] -= 1 currFp = fp[i] if f[0] == -1: singletons.append( fp[i] ) else: rp[f[0]][1] -= 1 currRp = rp[f[0]] #pairs.append( fp[i] ) #pairs.append( rp[f[0]] ) pairStart = (2*f[3] - f[2])/2 pairEnd = pairStart + 1 pairs.append( [fp[i][0],'.','.',pairStart-10, pairEnd+10,f[1],'.','.','cw_distance='+str(f[2]) ] ) half_len = int(f[2]/2) narrowStart= max(currFp[1] + half_len, currRp[1] - half_len) - half_len narrowEnd = min(currFp[2] + half_len, currRp[2] - half_len) + half_len pairs_narrow.append([currFp[0], narrowStart, narrowEnd, currFp[3]+'_'+currRp[3], (currFp[4]+currRp[4])/2, '.', (currFp[6]+currRp[6])/2, (currFp[7]+currRp[7])/2, (currFp[8]+currRp[8])/2, f[3] - half_len - narrowStart, f[2]]) for i,f in enumerate(pairR): rp[i][1] -= 1 if f[0] == -1: singletons.append( rp[i] ) singletons.sort(key=lambda k:( k[0], k[1], k[2] )) pairs.sort(key = lambda k:( k[0], k[1], k[2])) print "singletons: ", len(singletons) print "pairs: ", len(pairs) for s in singletons: out1.write('\t'.join([str(i) for i in s])) out1.write('\n') for p in pairs: out2.write('\t'.join([str(i) for i in p])) out2.write('\n') for p in pairs_narrow: out3.write('\t'.join([str(i) for i in p])) out3.write('\n') out1.close() out2.close() out3.close()