示例#1
0
def count_flank(beds,bp,strand,upstream=True):
    '''
    flank seq aggregation ( no splicing)
    if upstream is False , count downstream
    '''
    pos_sum=[0 for i in xrange(bp)]
    pos_dis=[[] for i in xrange(bp)]
    neg_sum=[0 for i in xrange(bp)]
    neg_dis=[[] for i in xrange(bp)]
    for bed in beds:
        if upstream:
            flank=bed.upstream(bp)
            offset=bp-flank.cdna_length()  # in case upstream is less than bp 
        else:
            flank=bed.downstream(bp)
            offset=0
        pos=[0 for i in xrange(bp)]
        neg=[0 for i in xrange(bp)]
        for read in bam.query(flank,"bam1",strand=strand):
            translated_read=translate_coordinates(flank,read)
            if translated_read.strand=="+" or translated_read.strand==".":
                for start,size in itertools.izip(translated_read.blockStarts,translated_read.blockSizes):
                    for j in xrange(start+offset,start+size+offset):
                        if j>=0 and j<bp:
                            pos[j]+=1
            else:
                for start,size in itertools.izip(translated_read.blockStarts,translated_read.blockSizes):
                    for j in xrange(start+offset,start+size+offset):
                        if j>=0 and j<bp:
                            neg[j]+=1
        for i in xrange(bp):
            pos_sum[i]+=pos[i]
            neg_sum[i]+=neg[i]
            pos_dis[i].append(pos[i])
            neg_dis[i].append(neg[i])
    return pos_sum,pos_dis,neg_sum,neg_dis
示例#2
0
def run(args):
    logging.basicConfig(level=logging.INFO)
    up=args.up
    down=args.down
    bp_num=up+down
    offset=-up
    bam=DBI.init(args.bam,"bam")
    fin=IO.fopen(args.input,"r")
    out=IO.fopen(args.output,"w")
    bin_sum=[0 for i in xrange(bp_num)]
    bin_e=[0.0 for i in xrange(bp_num)]
    bin_dis=[[] for i in xrange(bp_num)]
    for i0,bed in enumerate(TableIO.parse(fin,args.format)):
        bed_bin=[0 for i in xrange(bp_num)]
        if args.tts:
            pos=bed.tts()
        else:
            pos=bed.tss()
        pos_flank=get_flank_region(pos,up,down)
        for read in bam.query(pos_flank,"bam1",strand="read1"):
            a=translate_coordinates(pos,read)
            #print(a,file=out)
            for e in a.Exons():
                #print(e,file=out)
                start=e.start-offset
                end=e.stop-offset
                if start < 0: start=0
                if end > bp_num: end=bp_num
                for j in xrange(start,end):
                    bed_bin[j]+=1
        for  i in xrange(bp_num):
            bin_sum[i]+=bed_bin[i]
            bin_dis[i].append(bed_bin[i])
    bed_num=i0+1
    for i in xrange(bp_num):
        bin_e[i]=gini_coefficient(bin_dis[i])
    if args.tts:
        print("pos_to_tts\taggregation_mean\tgini_coefficient",file=out)
    else:
        print("pos_to_tss\taggregation_mean\tgini_coefficient",file=out)
    for i in xrange(bp_num):
        print("{bin}\t{aggregation}\t{E}".format(bin=i+offset,aggregation=float(bin_sum[i])/bed_num,E=bin_e[i]),file=out)
    
    try:
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
        matplotlib.rcParams.update({'font.size':9})
        ax1=plt.subplot2grid((7,1),(6,0))
        plt.ylabel('gini coeffecient')
        plt.fill_between(range(-up,down),bin_e,color="r",alpha=0.2,y2=0)
        ax1.set_ylim(0,1)
        ax1.set_xlim(-up,down)
        ax1.axes.get_xaxis().set_visible(False)
        plt.axvline(x=0,linewidth=1, color='y')
        ax2=plt.subplot2grid((7,1),(0,0),rowspan=5)
        ax2.set_xlim(-up,down)
        plt.plot(range(-up,down),[float(i)/bed_num for i in bin_sum])
        plt.ylabel('mean coverage')
        if args.tts:
            plt.xlabel('pos to tts (bp)')
        else:
            plt.xlabel('pos to tss (bp)')
        plt.axvline(x=0,linewidth=1, color='y')
        plt.grid(True)
        plt.savefig(args.output+".png")
    except:
        pass