Python PipelineChipseq.getPeakShiftFromMacs示例

编程语言: Python

类/类型: PipelineChipseq

方法/功能: getPeakShiftFromMacs

hotexamples.com的示例: 4

Python PipelineChipseq.getPeakShiftFromMacs - 已找到4个示例。这些是从开源项目中提取的最受好评的PipelineChipseq.getPeakShiftFromMacs现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

exportMacsIntervalsAsBed(2)

getPeakShiftFromMacs(2)

loadMACS(2)

buildSimpleNormalizedBAM(1)

countPeaks(1)

mergeIntervalsWithScores(1)

summarizeMACS(1)

summarizeMACSsolo(1)

示例#1

显示文件

def getMergedBigWigPeakShift(infiles, outfile):
    '''Merge multiple BAM files per replicate to produce a single peak-shifted bigwig file'''
    expt = P.snip(os.path.basename(outfile), ".merge.bw").replace("-agg", "")
    in_list = " --bamfile=".join(infiles)

    offsets = []
    for t in infiles:
        track = P.snip(os.path.basename(t), ".norm.bam")
        fn = "macs/with_input/%s.macs" % track
        if os.path.exists(fn):
            offsets.append(str(PIntervals.getPeakShiftFromMacs(fn)))

    shifts = " --shift=".join(offsets)
    statement = '''python %(scriptsdir)s/bam2wiggle.py 
                      --output-format=bigwig
                      %(in_list)s
                      %(shifts)s > %(outfile)s'''
    P.run()

示例#2

显示文件

文件： pipeline_proj012_chipseq.py 项目： BioinformaticsArchive/cgat

def getMergedBigWigPeakShift( infiles, outfile ):
    '''Merge multiple BAM files per replicate to produce a single peak-shifted bigwig file'''
    expt = P.snip( os.path.basename( outfile ), ".merge.bw").replace("-agg","")
    in_list = " --bamfile=".join(infiles)
    
    offsets = []
    for t in infiles:
        track = P.snip(os.path.basename(t), ".norm.bam")
        fn = "macs/with_input/%s.macs" % track
        if os.path.exists( fn ):
            offsets.append( str(PIntervals.getPeakShiftFromMacs( fn )) )

    shifts = " --shift=".join(offsets)
    statement = '''python %(scriptsdir)s/bam2wiggle.py 
                      --output-format=bigwig
                      %(in_list)s
                      %(shifts)s > %(outfile)s'''
    P.run()

示例#3

显示文件

def loadMergedIntervals(infile, outfile):
    '''load combined intervals.

    Also, re-evaluate the intervals by counting reads within
    the interval. In contrast to the initial pipeline, the
    genome is not binned. In particular, the meaning of the
    columns in the table changes to:

    nProbes: number of reads in interval
    PeakCenter: position with maximum number of reads in interval
    AvgVal: average coverage within interval

    If *replicates* is true, only replicates will be considered
    for the counting. Otherwise the counts aggregate both replicates
    and conditions.
    '''

    # Write header to output file
    tmpfile = tempfile.NamedTemporaryFile(delete=False)
    headers = ("contig", "start", "end", "interval_id", "nPeaks", "PeakCenter",
               "Length", "AvgVal", "PeakVal", "nProbes", "Fold")
    tmpfile.write("\t".join(headers) + "\n")
    contig, start, end, interval_id, npeaks, peakcenter, length, avgval, peakval, nprobes = "", 0, 0, 0, 0, 0, 0, 0, 0, 0

    # Get SAM file and Macs offset
    samfiles, offsets = [], []
    track = P.snip(os.path.basename(infile), ".merged.cleaned.bed")
    base_track = track.replace(".solo", "")

    fn = "bam/%s.norm.bam" % track
    assert os.path.exists(
        fn), "could not find bamfile %s for track %s" % (fn, track)
    samfiles.append(pysam.Samfile(fn, "rb"))
    if track.find("solo") > -1:
        fn = "macs/no_input/%s.macs" % track
    else:
        fn = "macs/with_input/%s.macs" % track
    if os.path.exists(fn):
        offsets.append(PIntervals.getPeakShiftFromMacs(fn))

    # Loop over input Bed file and calculate stats for merged intervals
    c = E.Counter()
    for line in open(infile, "r"):
        c.input += 1
        contig, start, end, int_id, fc = line[:-1].split()[:5]
        start, end = int(start), int(end)
        interval_id = c.input

        npeaks, peakcenter, length, avgval, peakval, nprobes = PIntervals.countPeaks(
            contig, start, end, samfiles, offsets)

        # nreads can be 0 if the intervals overlap only slightly
        # and due to the binning, no reads are actually in the overlap region.
        # However, most of these intervals should be small and have already be deleted via
        # the merge_min_interval_length cutoff.
        # do not output intervals without reads.
        if nprobes == 0:
            c.skipped_reads += 1

        c.output += 1
        tmpfile.write("\t".join(
            map(str, (contig, start, end, int_id, npeaks, peakcenter, length,
                      avgval, peakval, nprobes, fc))) + "\n")

    tmpfile.close()

    tmpfilename = tmpfile.name
    tablename = "%s_macs_merged_intervals" % track

    statement = '''python %(scriptsdir)s/csv2db.py %(csv2db_options)s
                       --index=interval_id
                       --index=contig,start 
                       --table=%(tablename)s
                   < %(tmpfilename)s > %(outfile)s '''
    P.run()
    os.unlink(tmpfile.name)
    L.info("%s\n" % str(c))

示例#4

显示文件

文件： pipeline_proj012_chipseq.py 项目： BioinformaticsArchive/cgat

def loadMergedIntervals( infile, outfile ):
    '''load combined intervals.

    Also, re-evaluate the intervals by counting reads within
    the interval. In contrast to the initial pipeline, the
    genome is not binned. In particular, the meaning of the
    columns in the table changes to:

    nProbes: number of reads in interval
    PeakCenter: position with maximum number of reads in interval
    AvgVal: average coverage within interval

    If *replicates* is true, only replicates will be considered
    for the counting. Otherwise the counts aggregate both replicates
    and conditions.
    '''

    # Write header to output file
    tmpfile = tempfile.NamedTemporaryFile(delete=False)
    headers = ( "contig","start","end","interval_id","nPeaks","PeakCenter","Length","AvgVal","PeakVal","nProbes", "Fold" )
    tmpfile.write( "\t".join(headers) + "\n" )
    contig,start,end,interval_id,npeaks,peakcenter,length,avgval,peakval,nprobes = "",0,0,0,0,0,0,0,0,0

    # Get SAM file and Macs offset
    samfiles, offsets = [], []
    track = P.snip( os.path.basename(infile), ".merged.cleaned.bed")
    base_track = track.replace(".solo","")

    fn = "bam/%s.norm.bam" % track
    assert os.path.exists( fn ), "could not find bamfile %s for track %s" % ( fn, track)
    samfiles.append( pysam.Samfile( fn,  "rb" ) )
    if track.find("solo") > -1:
        fn = "macs/no_input/%s.macs" % track
    else:
        fn = "macs/with_input/%s.macs" % track
    if os.path.exists( fn ):
        offsets.append( PIntervals.getPeakShiftFromMacs( fn ) )

    # Loop over input Bed file and calculate stats for merged intervals
    c = E.Counter()
    for line in open(infile, "r"):
        c.input += 1
        contig, start, end, int_id, fc = line[:-1].split()[:5]
        start, end = int(start), int(end)
        interval_id = c.input

        npeaks, peakcenter, length, avgval, peakval, nprobes = PIntervals.countPeaks( contig, start, end, samfiles, offsets )

        # nreads can be 0 if the intervals overlap only slightly
        # and due to the binning, no reads are actually in the overlap region.
        # However, most of these intervals should be small and have already be deleted via 
        # the merge_min_interval_length cutoff.
        # do not output intervals without reads.
        if nprobes == 0:
            c.skipped_reads += 1
            
        c.output += 1
        tmpfile.write( "\t".join( map( str, (contig,start,end,int_id,npeaks,peakcenter,length,avgval,peakval,nprobes,fc) )) + "\n" )
 
    tmpfile.close()

    tmpfilename = tmpfile.name
    tablename = "%s_macs_merged_intervals" % track
    
    statement = '''python %(scriptsdir)s/csv2db.py %(csv2db_options)s
                       --index=interval_id
                       --index=contig,start 
                       --table=%(tablename)s
                   < %(tmpfilename)s > %(outfile)s '''
    P.run()
    os.unlink( tmpfile.name )
    L.info( "%s\n" % str(c) )