Пример #1
0
def get_cov(args, bases = 50000, splitsize = 1000):
    """function to get coverages

    """
    if not args.out:
        if args.bed is None:
            args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1])
        else:
            args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
    if args.bed is None:
        chrs = read_chrom_sizes_from_bam(args.bam)
        chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize)
        sets = chunks.split(items = bases/splitsize)
    else:
        chunks = ChunkList.read(args.bed)
        chunks.merge()
        sets = chunks.split(bases = bases)
    maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks])))
    pool1 = mp.Pool(processes = max(1,args.cores-1))
    out_handle = open(args.out + '.cov.bedgraph','w')
    out_handle.close()
    write_queue = mp.JoinableQueue(maxsize = maxQueueSize)
    write_process = mp.Process(target = _writeCov, args=(write_queue, args.out))
    write_process.start()
    for j in sets:
        tmp = pool1.map(_covHelper, zip(j,itertools.repeat(args)))
        for track in tmp:
            write_queue.put(track)
    pool1.close()
    pool1.join()
    write_queue.put('STOP')
    write_process.join()
    pysam.tabix_compress(args.out + '.cov.bedgraph', args.out + '.cov.bedgraph.gz', force = True)
    shell_command('rm ' + args.out + '.cov.bedgraph')
    pysam.tabix_index(args.out + '.cov.bedgraph.gz', preset = "bed", force = True)
Пример #2
0
def make_bias_track(args, bases = 500000, splitsize = 1000):
    """function to compute bias track

    """
    if args.out is None:
        if args.bed is not None:
            args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
        else:
            args.out = '.'.join(os.path.basename(args.fasta).split('.')[0:-1])
    params = _BiasParams(args.fasta, args.pwm)
    if args.bed is None:
        chunks = ChunkList.convertChromSizes(params.chrs, splitsize = splitsize)
        sets = chunks.split(items = bases/splitsize)
    else:
        chunks = ChunkList.read(args.bed)
        chunks.merge()
        sets = chunks.split(bases = bases)
    maxQueueSize = max(2,int(2 * bases / np.mean([chunk.length() for chunk in chunks])))
    pool = mp.Pool(processes = max(1,args.cores-1))
    out_handle = open(args.out + '.Scores.bedgraph','w')
    out_handle.close()
    write_queue = mp.JoinableQueue(maxsize = maxQueueSize)
    write_process = mp.Process(target = _writeBias, args=(write_queue, args.out))
    write_process.start()
    for j in sets:
        tmp = pool.map(_biasHelper, zip(j,itertools.repeat(params)))
        for track in tmp:
            write_queue.put(track)
    pool.close()
    pool.join()
    write_queue.put('STOP')
    write_process.join()
    pysam.tabix_compress(args.out + '.Scores.bedgraph', args.out + '.Scores.bedgraph.gz', force = True)
    shell_command('rm ' + args.out + '.Scores.bedgraph')
    pysam.tabix_index(args.out + '.Scores.bedgraph.gz', preset = "bed", force = True)
Пример #3
0
def get_pwm(args, bases=50000, splitsize=1000):
    """Functiono obtain PWM around ATAC insertion"""
    if not args.out:
        args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1])
    chrs = read_chrom_sizes_from_fasta(args.fasta)
    if args.bed is None:
        chunks = ChunkList.convertChromSizes(chrs,
                                             splitsize=splitsize,
                                             offset=args.flank)
        sets = chunks.split(items=bases / splitsize)
    else:
        chunks = ChunkList.read(args.bed,
                                chromDict=chrs,
                                min_offset=args.flank)
        sets = chunks.split(bases=bases)
    params = _PWMParameters(bam=args.bam,
                            up=args.flank,
                            down=args.flank,
                            fasta=args.fasta,
                            lower=args.lower,
                            upper=args.upper,
                            atac=args.atac,
                            sym=args.sym)
    pool = Pool(processes=args.cores)
    tmp = pool.map(_pwmHelper, zip(sets, itertools.repeat(params)))
    pool.close()
    pool.join()
    n = 0.0
    result = np.zeros((len(params.nucleotides), params.up + params.down + 1))
    for i in tmp:
        result += i[0]
        n += i[1]
    result /= n
    if args.bed:
        normfreqs = seq.getNucFreqsFromChunkList(chunks, args.fasta,
                                                 params.nucleotides)
    else:
        normfreqs = seq.getNucFreqs(args.fasta, params.nucleotides)
    result = result / np.reshape(np.repeat(normfreqs, result.shape[1]),
                                 result.shape)
    if args.sym:
        #Symmetrize
        left = result[:, 0:(args.flank + 1)]
        right = result[:, args.flank:]
        rightflipped = np.fliplr(np.flipud(right))
        combined = (left + rightflipped) / 2
        result = np.hstack(
            (combined, np.fliplr(np.flipud(combined[:, 0:args.flank]))))
    #save
    pwm = PWM(result, args.flank, args.flank, params.nucleotides)
    pwm.save(args.out + '.PWM.txt')
Пример #4
0
def make_bias_track(args, bases=500000, splitsize=1000):
    """function to compute bias track

    """
    if args.out is None:
        if args.bed is not None:
            args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
        else:
            args.out = '.'.join(os.path.basename(args.fasta).split('.')[0:-1])
    params = _BiasParams(args.fasta, args.pwm)

    if args.bed is None:
        chunks = ChunkList.convertChromSizes(params.chrs, splitsize=splitsize)
        sets = chunks.split(items=bases // splitsize)
    else:
        chunks = ChunkList.read(args.bed)
        chunks.checkChroms(list(params.chrs.keys()))
        chunks.merge()
        sets = chunks.split(bases=bases)

    maxQueueSize = max(
        2, int(2 * bases / np.mean([chunk.length() for chunk in chunks])))
    pool = mp.Pool(processes=max(1, args.cores - 1))
    out_handle = open(args.out + '.Scores.bedgraph', 'w')
    out_handle.close()
    write_queue = mp.JoinableQueue(maxsize=maxQueueSize)
    write_process = mp.Process(target=_writeBias, args=(write_queue, args.out))
    write_process.start()
    for j in sets:
        tmp = pool.map(_biasHelper, list(zip(j, itertools.repeat(params))))
        for track in tmp:
            write_queue.put(track)
    pool.close()
    pool.join()
    write_queue.put('STOP')
    write_process.join()
    pysam.tabix_compress(args.out + '.Scores.bedgraph',
                         args.out + '.Scores.bedgraph.gz',
                         force=True)
    shell_command('rm ' + args.out + '.Scores.bedgraph')
    pysam.tabix_index(args.out + '.Scores.bedgraph.gz',
                      preset="bed",
                      force=True)
Пример #5
0
def get_ins(args, bases=50000, splitsize=1000):
    """function to get insertions

    """
    if not args.out:
        if args.bed is None:
            args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1])
        else:
            args.out = '.'.join(os.path.basename(args.bed).split('.')[0:-1])
    if args.bed is None:
        chrs = read_chrom_sizes_from_bam(args.bam)
        chunks = ChunkList.convertChromSizes(chrs, splitsize=splitsize)
        sets = chunks.split(items=bases / splitsize)
    else:
        chunks = ChunkList.read(args.bed)
        chunks.merge()
        sets = chunks.split(bases=bases)
    maxQueueSize = max(
        2, int(2 * bases / np.mean([chunk.length() for chunk in chunks])))
    pool1 = mp.Pool(processes=max(1, args.cores - 1))
    out_handle = open(args.out + '.ins.bedgraph', 'w')
    out_handle.close()
    write_queue = mp.JoinableQueue(maxsize=maxQueueSize)
    write_process = mp.Process(target=_writeIns, args=(write_queue, args.out))
    write_process.start()
    for j in sets:
        if args.smooth:
            tmp = pool1.map(_insHelperSmooth,
                            list(zip(j, itertools.repeat(args))))
        else:
            tmp = pool1.map(_insHelper, list(zip(j, itertools.repeat(args))))
        for track in tmp:
            write_queue.put(track)
    pool1.close()
    pool1.join()
    write_queue.put('STOP')
    write_process.join()
    pysam.tabix_compress(args.out + '.ins.bedgraph',
                         args.out + '.ins.bedgraph.gz',
                         force=True)
    shell_command('rm ' + args.out + '.ins.bedgraph')
    pysam.tabix_index(args.out + '.ins.bedgraph.gz', preset="bed", force=True)
Пример #6
0
def get_pwm(args, bases = 50000, splitsize = 1000):
    """Functiono obtain PWM around ATAC insertion"""
    if not args.out:
        args.out = '.'.join(os.path.basename(args.bam).split('.')[0:-1])
    chrs = read_chrom_sizes_from_fasta(args.fasta)
    if args.bed is None:
        chunks = ChunkList.convertChromSizes(chrs, splitsize = splitsize, offset = args.flank)
        sets = chunks.split(items = bases/splitsize)
    else:
        chunks = ChunkList.read(args.bed, chromDict = chrs, min_offset = args.flank)
        sets = chunks.split(bases = bases)
    params = _PWMParameters(bam = args.bam, up = args.flank, down = args.flank, fasta = args.fasta,
                            lower = args.lower, upper = args.upper, atac = args.atac, sym = args.sym)
    pool = Pool(processes = args.cores)
    tmp = pool.map(_pwmHelper, zip(sets,itertools.repeat(params)))
    pool.close()
    pool.join()
    n = 0.0
    result = np.zeros((len(params.nucleotides), params.up + params.down + 1))
    for i in tmp:
        result += i[0]
        n += i[1]
    result /= n
    if args.bed:
        normfreqs = seq.getNucFreqsFromChunkList(chunks, args.fasta, params.nucleotides)
    else:
        normfreqs = seq.getNucFreqs(args.fasta, params.nucleotides)
    result = result / np.reshape(np.repeat(normfreqs,result.shape[1]),result.shape)
    if args.sym:
        #Symmetrize
        left = result[:,0:(args.flank + 1)]
        right = result[:,args.flank:]
        rightflipped = np.fliplr(np.flipud(right))
        combined = (left + rightflipped) / 2
        result = np.hstack((combined, np.fliplr(np.flipud(combined[:,0:args.flank]))))
    #save
    pwm = PWM(result, args.flank, args.flank, params.nucleotides)
    pwm.save(args.out + '.PWM.txt')