Exemplo n.º 1
0
 def process():
     if len(buff)==1: return 0
     max_score=0.0
     total_score=0.0
     e=[]
     for i in buff:
         total_score+=i.score
         e.append(i.score)
     e=[i/total_score for i in e]
     gini=gini_coefficient(e)
     if total_score < args.min_reads_number:
         return 0
     record={}
     meta=BED6(buff[0].chr,buff[0].start,buff[-1].stop,args.prefix+"."+str(group_id),total_score,buff[0].strand)
     peak=max(buff,key=lambda x:x.score)
     record["peak"]=peak._replace(score=peak.score/total_score)
     record["meta"]=meta._replace(strand=peak.strand)
     record["gini"]=gini
     records.append(record)
     return 1
Exemplo n.º 2
0
def run(args):
    logging.basicConfig(level=logging.INFO)
    up=args.up
    down=args.down
    bp_num=up+down
    offset=-up
    bam=DBI.init(args.bam,"bam")
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    bin_sum=[0 for i in xrange(bp_num)]
    bin_e=[0.0 for i in xrange(bp_num)]
    bin_dis=[[] for i in xrange(bp_num)]
    for i0,bed in enumerate(TableIO.parse(fin,args.format)):
        bed_bin=[0 for i in xrange(bp_num)]
        if args.tts:
            pos=bed.tts()
        else:
            pos=bed.tss()
        pos_flank=get_flank_region(pos,up,down)
        for read in bam.query(pos_flank,"bam1",strand="read1"):
            a=translate_coordinates(pos,read)
            #print(a,file=out)
            for e in a.Exons():
                #print(e,file=out)
                start=e.start-offset
                end=e.stop-offset
                if start < 0: start=0
                if end > bp_num: end=bp_num
                for j in xrange(start,end):
                    bed_bin[j]+=1
        for  i in xrange(bp_num):
            bin_sum[i]+=bed_bin[i]
            bin_dis[i].append(bed_bin[i])
    bed_num=i0+1
    for i in xrange(bp_num):
        bin_e[i]=gini_coefficient(bin_dis[i])
    if args.tts:
        print("pos_to_tts\taggregation_mean\tgini_coefficient",file=out)
    else:
        print("pos_to_tss\taggregation_mean\tgini_coefficient",file=out)
    for i in xrange(bp_num):
        print("{bin}\t{aggregation}\t{E}".format(bin=i+offset,aggregation=float(bin_sum[i])/bed_num,E=bin_e[i]),file=out)
    
    try:
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
        matplotlib.rcParams.update({'font.size':9})
        ax1=plt.subplot2grid((7,1),(6,0))
        plt.ylabel('gini coeffecient')
        plt.fill_between(range(-up,down),bin_e,color="r",alpha=0.2,y2=0)
        ax1.set_ylim(0,1)
        ax1.set_xlim(-up,down)
        ax1.axes.get_xaxis().set_visible(False)
        plt.axvline(x=0,linewidth=1, color='y')
        ax2=plt.subplot2grid((7,1),(0,0),rowspan=5)
        ax2.set_xlim(-up,down)
        plt.plot(range(-up,down),[float(i)/bed_num for i in bin_sum])
        plt.ylabel('mean coverage')
        if args.tts:
            plt.xlabel('pos to tts (bp)')
        else:
            plt.xlabel('pos to tss (bp)')
        plt.axvline(x=0,linewidth=1, color='y')
        plt.grid(True)
        plt.savefig(args.output+".png")
    except:
        pass