Пример #1
0
def main(args):
    fwig = gs.loadWig( args.forwardWig, smooth=False )
    rwig = gs.loadWig( args.reverseWig, smooth=False )
    mappedCount = getMappedCount(fwig, rwig)
    poses = loadPos( args.positionFile, args.chromCol, args.startCol, args.endCol, args.strandCol, args.offset, args.format )
    values = []
    #print "\n"
    #print fwig.keys()
    #print rwig.keys()
    for chrom in poses:
        #print "\n"
        #print chrom
        if chrom in fwig:
            chromFwig = gs.expandWig( fwig[ chrom ], 0, 1, False )
        if chrom in rwig:
            chromRwig = gs.expandWig( rwig[ chrom ], 0, 1, False)
        chromPos = poses[ chrom ]
        for p,strand in chromPos:
            keep = True
            tempFValues = np.zeros( 2 * args.width + 1 )
            tempRValues = np.zeros( 2 * args.width + 1 )
            if chrom in fwig:
                #print chromFwig.shape
                start = int(p - args.width - fwig[ chrom ][0,0])
                end = int(p + args.width - fwig[ chrom ][0,0])
                #print 'Forward'
                #print start, " " , end
                #print abs(min(0,start)), ' ', min( tempFValues.shape[0], tempFValues.shape[0] + chromFwig.shape[0] - end -1 )
                #print max(0, start) , ' ', min( chromFwig.shape[0], end + 1 )
                if end >= 0 and start < chromFwig.shape[0]:
                    tempFValues[abs(min(0,start)):min( tempFValues.shape[0], tempFValues.shape[0] + chromFwig.shape[0] - end -1) ] = chromFwig[ max(0, start) : min( chromFwig.shape[0], end + 1 ) ]

            if chrom in rwig:
                #print chromRwig.shape
                start = int(p - args.width - rwig[ chrom ][0,0])
                end = int(p + args.width - rwig[ chrom ][0,0])
                #print 'Reverse'
                #print start, " ", end
                #print abs(min(0,start)), ' ', min( tempRValues.shape[0], tempRValues.shape[0] + chromRwig.shape[0] - end -1)
                #print max(0, start) , ' ', min( chromRwig.shape[0], end + 1 )
                if end >= 0 and start < chromRwig.shape[0]:
                    tempRValues[abs(min(0,start)):min( tempRValues.shape[0], tempRValues.shape[0] + chromRwig.shape[0] - end -1 ) ] = chromRwig[ max(0, start) : min( chromRwig.shape[0], end + 1 ) ]
            thresh = mappedCount * args.thresh / 10**6
            if tempRValues.sum() < thresh or tempFValues.sum() < thresh:
                keep = False
            if keep:
                if strand == '-':
                    temp = tempFValues[::-1]
                    tempFValues = tempRValues[::-1]
                    tempRValues = temp
                values.append(10**6*np.array(np.ma.concatenate([tempFValues, tempRValues])) / mappedCount)


    values.sort(key=lambda k:(sum(k),))

    writeAll( values, args.out + ".txt" )
    values = np.array(values)
    plot( values[:,0:2*args.width+1], values[:,2*args.width+1:], args.width, args.out )
Пример #2
0
def pair( fpeaks, rpeaks, fwig, rwig, ulimit, dlimit, prefix):
    '''
    Assuming that the peaks on one strand is mutually exclusive.
    They do not overlap with each other.
    In this case, the ordering of the starts of the peaks and the
    ends of the peaks are the same. And that when the starts are
    sorted, the ends are also sorted.
    '''
    offset = 5
    expandCol = 1
    out1 = open(prefix + "_singletons.bed",'w')
    out2 = open(prefix + "_pairs.gff", "w")
    out3 = open(prefix + "_pairs.narrowPeak", "w")
    for chrom in fpeaks:
        if chrom not in rpeaks:
            continue
        print chrom
        fp = fpeaks[chrom]
        rp = rpeaks[chrom]
        pairF = []  #Store the pairing information, if unpaired, it will be negative.
        pairR = []
        fw = fwig[chrom]
        expandedFw = gs.expandWig( fw, offset, expandCol, False )
        rw = rwig[chrom]
        expandedRw = gs.expandWig( rw, offset, expandCol, False )
        rstarts = []
        rends = []
        fprefer = []
        rprefer = []
        unpairedF = []
        for f in fp:
            fprefer.append( SortedCollection( key=itemgetter(1) ) )
            pairF.append( ( -1, 0, 0) )  #( index of the mate, score, distance )
        for r in rp:
            rprefer.append( SortedCollection( key=itemgetter(1) ) )
            pairR.append( (-1, 0, 0) )
            rstarts.append( r[1] )
            rends.append( r[2] )
        for i in range( len( fp ) ):
            currfp = fp[ i ]
            start = currfp[1]
            end = currfp[2]
            es = start - ulimit
            ee = end + dlimit
            currFw = expandedFw[ max( 0, start - fw[0,0] ) + offset : max( 0, end - fw[0,0] ) + offset + 1]
            start = max( start, fw[0,0] )
            end = max( end, fw[0,0] )
            flength = end - start
            si = bisect.bisect_left( rends, es )
            ei = bisect.bisect_right( rstarts, ee )
            ftagCounts,_,_ = gs.getTagCount( fwig, chrom, start, end )
            #print ei - si
            maxScore = 0
            bestDist = 0
            bestIdx = 0
            bestRpos = 0
            for idx in range( si, ei ):
                currrp = rp[ idx ]
                rstart = max(rw[0,0], currrp[1])
                rend = min(rw[-1,0], currrp[2]+flength)
                currRw = expandedRw[ max( 0, start - rw[0, 0] ) + offset : max( 0, rend - rw[0,0] ) + offset + 1 ]
                rtagCoungs,_,_ = gs.getTagCount( rwig, chrom, currrp[1], currrp[2] )

                tempScore, tempDist, tempRpos = getScore( currFw, currRw, start, rstart )

                fprefer[ i ].insert( (idx, tempScore, tempDist, tempRpos) )
                rprefer[ idx ].insert( (i, tempScore, tempDist, tempRpos) )
                if tempScore > maxScore:
                    maxScore = tempScore
                    bestDist = tempDist
                    bestIdx = idx
                    bestRpos = tempRpos
            if maxScore > pairR[ bestIdx ][1]:
                pairF[i] = ( bestIdx, maxScore, bestDist , bestRpos)
                if pairR[ bestIdx ][0] > 0:
                    unpairedF.append( pairR[ bestIdx ][0] )
                pairR[bestIdx] = ( i, maxScore, bestDist, bestRpos )
            else:
                unpairedF.append( i )
            try:
                fprefer[ i ].remove( ( bestIdx, maxScore, bestDist, bestRpos ) )
            except ValueError:
                #print "Value error: ", bestIdx, ' ',maxScore, ' ',bestDist,' ', si,' ', ei
                pass
        singletons = []
        pairs = []
        pairs_narrow = []
        while len(unpairedF) > 0:
            for u in unpairedF:
                if len( fprefer[u] ) > 0:
                    ridx = fprefer[u][-1][0]
                    if pairR[ ridx ][1] < fprefer[u][-1][1]:
                        if pairR[ idx ][0] > 0:
                            pairF[ pairR[ idx ][0] ] = (-1, 0, 0)
                            unpairedF.append( pairR[ idx ][0] )
                        pairR[ ridx ] = ( u, fprefer[u][-1][1], fprefer[u][-1][2], fprefer[u][-1][3] )
                        pairF[ u ] = ( ridx, fprefer[u][-1][1], fprefer[u][-1][2], fprefer[u][-1][3] )
                    fprefer[u].remove( (ridx, fprefer[u][-1][1], fprefer[u][-1][2], fprefer[u][-1][3] ) )
                else:
                    unpairedF.remove( u )

        for i,f in enumerate(pairF):
            fp[i][1] -= 1
            currFp = fp[i]
            if f[0] == -1:
                singletons.append( fp[i] )
            else:
                rp[f[0]][1] -= 1
                currRp = rp[f[0]]
                #pairs.append( fp[i] )
                #pairs.append( rp[f[0]] )
                pairStart = (2*f[3] - f[2])/2
                pairEnd = pairStart + 1
                pairs.append( [fp[i][0],'.','.',pairStart-10, pairEnd+10,f[1],'.','.','cw_distance='+str(f[2]) ] )
                half_len = int(f[2]/2)
                narrowStart=  max(currFp[1] + half_len, currRp[1] - half_len) - half_len
                narrowEnd = min(currFp[2] + half_len, currRp[2] - half_len) + half_len
                pairs_narrow.append([currFp[0], narrowStart, narrowEnd, currFp[3]+'_'+currRp[3], (currFp[4]+currRp[4])/2, '.', (currFp[6]+currRp[6])/2, (currFp[7]+currRp[7])/2, (currFp[8]+currRp[8])/2, f[3] - half_len - narrowStart, f[2]])


        for i,f in enumerate(pairR):
            rp[i][1] -= 1
            if f[0] == -1:
                singletons.append( rp[i] )

        singletons.sort(key=lambda k:( k[0], k[1], k[2] ))
        pairs.sort(key = lambda k:( k[0], k[1], k[2]))
        print "singletons: ", len(singletons)
        print "pairs: ", len(pairs)

        for s in singletons:
            out1.write('\t'.join([str(i) for i in s]))
            out1.write('\n')
        for p in pairs:
            out2.write('\t'.join([str(i) for i in p]))
            out2.write('\n')
        for p in pairs_narrow:
            out3.write('\t'.join([str(i) for i in p]))
            out3.write('\n')
    out1.close()
    out2.close()
    out3.close()