Пример #1
0
def run( args ):
    """The Differential function/pipeline for MACS.
    
    """
    # Parse options...
    options = diff_opt_validate( args )
    #0 output arguments
#    info("\n"+options.argtxt)
 
    ofile_prefix = options.name
    
    # check if tag files exist
    with open(options.t1bdg) as f: pass
    with open(options.c1bdg) as f: pass
    with open(options.t2bdg) as f: pass
    with open(options.c2bdg) as f: pass
    
    if not options.peaks1 == '':
        info("Read peaks for condition 1...")
        p1io = PeakIO()
        with open(options.peaks1, 'rU') as f:
            p1io.read_from_xls(f)

    if not options.peaks2 == '':
        info("Read peaks for condition 2...")
        p2io = PeakIO()
        with open(options.peaks2, 'rU') as f:
            p2io.read_from_xls(f)
    
    #1 Read tag files
    info("Read and build treatment 1 bedGraph...")
    t1bio = cBedGraphIO.bedGraphIO(options.t1bdg)
    t1btrack = t1bio.build_bdgtrack()

    info("Read and build control 1 bedGraph...")
    c1bio = cBedGraphIO.bedGraphIO(options.c1bdg)
    c1btrack = c1bio.build_bdgtrack()

    if len(options.depth) >=2:
        depth1 = options.depth[0]
        depth2 = options.depth[1]
    else:
        depth1 = options.depth[0]
        depth2 = depth1
    
    info("Read and build treatment 2 bedGraph...")
    t2bio = cBedGraphIO.bedGraphIO(options.t2bdg)
    t2btrack = t2bio.build_bdgtrack()

    info("Read and build control 2 bedGraph...")
    c2bio = cBedGraphIO.bedGraphIO(options.c2bdg)
    c2btrack = c2bio.build_bdgtrack()
    
    #3 Call Peaks

    diffscore = DiffScoreTrackI( t1btrack,
                                 c1btrack,
                                 t2btrack,
                                 c2btrack,
                                 depth1, depth2 )
    diffscore.finalize()
    if options.call_peaks:
        diffscore.set_track_score_method(options.track_score_method)
        info("Calling peaks")
        if options.track_score_method == 'p':
            diffscore.call_peaks(cutoff = options.peaks_log_pvalue,
                                 min_length = options.pminlen)
        elif options.track_score_method == 'q':
            diffscore.call_peaks(cutoff = options.peaks_log_qvalue,
                                 min_length = options.pminlen)
        else:
            raise NotImplementedError
    else:
        info("Using existing peaks")
        diffscore.store_peaks(p1io, p2io)
        info("Rebuilding chromosomes")
        diffscore.rebuild_chromosomes()
        diffscore.annotate_peaks()
    
    info("Calling differentially occupied peaks")
    if options.score_method == 'p':
        diffscore.call_diff_peaks(cutoff = options.log_pvalue,
                                  min_length = options.dminlen,
                                  score_method = options.score_method)
    if options.score_method == 'q':
        diffscore.call_diff_peaks(cutoff = options.log_qvalue,
                                  min_length = options.dminlen,
                                  score_method = options.score_method)
#    diffscore.print_some_peaks()
#    diffscore.print_diff_peaks()
    
    info("Write output xls and BED files...")
    ofhd_xls = open( os.path.join( options.outdir, options.peakxls), "w" )
    ofhd_xls.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION))
    ofhd_xls.write( options.argtxt+"\n" )
    ofhd_bed = open( os.path.join( options.outdir, options.peakbed), "w" )

    # pass write method so we can print too, and include name
    diffscore.write_peaks(xls=ofhd_xls, bed=ofhd_bed,
                    name = options.name, name_prefix="%s_peak_",
                    description="Peaks for %s (Made with MACS v2, " + strftime("%x") + ")",
                    trackline=options.trackline)
    ofhd_xls.close()
    ofhd_bed.close()
    
    if diffscore.has_peakio():
        info("Write annotated peak xls files...")
        ofhd_xls1 = open( os.path.join( options.outdir, options.peak1xls), "w" )
        ofhd_xls1.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION))
        ofhd_xls1.write(options.argtxt+"\n")
        ofhd_xls2 = open( os.path.join( options.outdir, options.peak2xls), "w" )
        ofhd_xls2.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION))
        ofhd_xls2.write(options.argtxt+"\n")
        diffscore.write_peaks_by_summit(ofhd_xls1, ofhd_xls2,
                                        name = options.name, name_prefix="%s_peak_")
        ofhd_xls1.close()
        ofhd_xls2.close()
    
    if options.store_bdg:
        info("#4 Write output bedgraph files...")
        ofhd_logLR = open( os.path.join( options.outdir, options.bdglogLR), "w" )
        ofhd_pvalue = open( os.path.join( options.outdir, options.bdgpvalue), "w" )
        ofhd_logFC = open( os.path.join( options.outdir, options.bdglogFC), "w" )
        diffscore.write_bedgraphs(logLR=ofhd_logLR, pvalue=ofhd_pvalue,
                                  logFC=ofhd_logFC, name = options.name,
                                  description=" for %s (Made with MACS v2, " + strftime("%x") + ")",
                                  trackline=options.trackline)
        ofhd_logLR.close()
        ofhd_pvalue.close()
        ofhd_logFC.close()
Пример #2
0
def run( args ):
    """The Differential function/pipeline for MACS.
    
    """
    # Parse options...
    options = diff_opt_validate( args )
    #0 output arguments
#    info("\n"+options.argtxt)
 
    ofile_prefix = options.name
    
    # check if tag files exist
    with open(options.t1bdg) as f: pass
    with open(options.c1bdg) as f: pass
    with open(options.t2bdg) as f: pass
    with open(options.c2bdg) as f: pass
    
    if not options.peaks1 == '':
        info("Read peaks for condition 1...")
        p1io = PeakIO()
        with open(options.peaks1, 'rU') as f:
            p1io.read_from_xls(f)

    if not options.peaks2 == '':
        info("Read peaks for condition 2...")
        p2io = PeakIO()
        with open(options.peaks2, 'rU') as f:
            p2io.read_from_xls(f)
    
    #1 Read tag files
    info("Read and build treatment 1 bedGraph...")
    t1bio = cBedGraphIO.bedGraphIO(options.t1bdg)
    t1btrack = t1bio.build_bdgtrack()

    info("Read and build control 1 bedGraph...")
    c1bio = cBedGraphIO.bedGraphIO(options.c1bdg)
    c1btrack = c1bio.build_bdgtrack()

    if len(options.depth) >=2:
        depth1 = options.depth[0]
        depth2 = options.depth[1]
    else:
        depth1 = options.depth[0]
        depth2 = depth1
    
    info("Read and build treatment 2 bedGraph...")
    t2bio = cBedGraphIO.bedGraphIO(options.t2bdg)
    t2btrack = t2bio.build_bdgtrack()

    info("Read and build control 2 bedGraph...")
    c2bio = cBedGraphIO.bedGraphIO(options.c2bdg)
    c2btrack = c2bio.build_bdgtrack()
    
    #3 Call Peaks

    diffscore = DiffScoreTrackI( t1btrack,
                                 c1btrack,
                                 t2btrack,
                                 c2btrack,
                                 depth1, depth2 )
    diffscore.finalize()
    if options.call_peaks:
        diffscore.set_track_score_method(options.track_score_method)
        info("Calling peaks")
        if options.track_score_method == 'p':
            diffscore.call_peaks(cutoff = options.peaks_log_pvalue,
                                 min_length = options.pminlen)
        elif options.track_score_method == 'q':
            diffscore.call_peaks(cutoff = options.peaks_log_qvalue,
                                 min_length = options.pminlen)
        else:
            raise NotImplementedError
    else:
        info("Using existing peaks")
        diffscore.store_peaks(p1io, p2io)
        info("Rebuilding chromosomes")
        diffscore.rebuild_chromosomes()
        diffscore.annotate_peaks()
    
    info("Calling differentially occupied peaks")
    if options.score_method == 'p':
        diffscore.call_diff_peaks(cutoff = options.log_pvalue,
                                  min_length = options.dminlen,
                                  score_method = options.score_method)
    if options.score_method == 'q':
        diffscore.call_diff_peaks(cutoff = options.log_qvalue,
                                  min_length = options.dminlen,
                                  score_method = options.score_method)
#    diffscore.print_some_peaks()
#    diffscore.print_diff_peaks()
    
    info("Write output xls and BED files...")
    ofhd_xls = open( os.path.join( options.outdir, options.peakxls), "w" )
    ofhd_xls.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION))
    ofhd_xls.write( options.argtxt+"\n" )
    ofhd_bed = open( os.path.join( options.outdir, options.peakbed), "w" )

    # pass write method so we can print too, and include name
    diffscore.write_peaks(xls=ofhd_xls, bed=ofhd_bed,
                    name = options.name, name_prefix="%s_peak_",
                    description="Peaks for %s (Made with MACS v2, " + strftime("%x") + ")",
                    trackline=options.trackline)
    ofhd_xls.close()
    ofhd_bed.close()
    
    if diffscore.has_peakio():
        info("Write annotated peak xls files...")
        ofhd_xls1 = open( os.path.join( options.outdir, options.peak1xls), "w" )
        ofhd_xls1.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION))
        ofhd_xls1.write(options.argtxt+"\n")
        ofhd_xls2 = open( os.path.join( options.outdir, options.peak2xls), "w" )
        ofhd_xls2.write("# This file is generated by MACS version, using the diffpeak module %s\n" % (MACS_VERSION))
        ofhd_xls2.write(options.argtxt+"\n")
        diffscore.write_peaks_by_summit(ofhd_xls1, ofhd_xls2,
                                        name = options.name, name_prefix="%s_peak_")
        ofhd_xls1.close()
        ofhd_xls2.close()
    
    if options.store_bdg:
        info("#4 Write output bedgraph files...")
        ofhd_logLR = open( os.path.join( options.outdir, options.bdglogLR), "w" )
        ofhd_pvalue = open( os.path.join( options.outdir, options.bdgpvalue), "w" )
        ofhd_logFC = open( os.path.join( options.outdir, options.bdglogFC), "w" )
        diffscore.write_bedgraphs(logLR=ofhd_logLR, pvalue=ofhd_pvalue,
                                  logFC=ofhd_logFC, name = options.name,
                                  description=" for %s (Made with MACS v2, " + strftime("%x") + ")",
                                  trackline=options.trackline)
        ofhd_logLR.close()
        ofhd_pvalue.close()
        ofhd_logFC.close()