示例#1
0
    def stats(self, alpha=0.05, start=0, batches=100):
        """
        Generate posterior statistics for node.
        """
        from utils import hpd, quantiles
        from numpy import sqrt

        try:
            trace = np.squeeze(np.array(self.trace(), float)[start:])

            n = len(trace)
            if not n:
                print 'Cannot generate statistics for zero-length trace in', self.__name__
                return


            return {
                'n': n,
                'standard deviation': trace.std(0),
                'mean': trace.mean(0),
                '%s%s HPD interval' % (int(100*(1-alpha)),'%'): hpd(trace, alpha),
                'mc error': batchsd(trace, batches),
                'quantiles': quantiles(trace)
            }
        except:
            print 'Could not generate output statistics for', self.__name__
            return
示例#2
0
def histosAndQuantiles(tfile = None, dir = "") :
    histos = {}
    quantiles = {}
    for tkey in tfile.Get(dir).GetListOfKeys() :
        key = tkey.GetName()
        histos[key] = tfile.Get("/%s/%s"%(dir, key))
        quantiles[key] = utils.quantiles(histos[key], sigmaList = [-1.0, 0.0, 1.0])
    return histos,quantiles
示例#3
0
def histosAndQuantiles(tfile=None, dir=""):
    histos = {}
    quantiles = {}
    for tkey in tfile.Get(dir).GetListOfKeys():
        key = tkey.GetName()
        histos[key] = tfile.Get("/%s/%s" % (dir, key))
        quantiles[key] = utils.quantiles(histos[key],
                                         sigmaList=[-1.0, 0.0, 1.0])
    return histos, quantiles
示例#4
0
文件: Node.py 项目: along1x/pymc
    def stats(self, alpha=0.05, start=0, batches=100, chain=None):
        """
        Generate posterior statistics for node.
        
        :Parameters:
        alpha : float
          The alpha level for generating posterior intervals. Defaults to
          0.05.

        start : int
          The starting index from which to summarize (each) chain. Defaults
          to zero.
          
        batches : int
          Batch size for calculating standard deviation for non-independent
          samples. Defaults to 100.
          
        chain : int
          The index for which chain to summarize. Defaults to None (all
          chains).
        """
        from utils import hpd, quantiles
        from numpy import sqrt

        try:
            trace = np.squeeze(np.array(self.trace(burn=start, chain=chain), float))

            n = len(trace)
            if not n:
                print 'Cannot generate statistics for zero-length trace in', self.__name__
                return


            return {
                'n': n,
                'standard deviation': trace.std(0),
                'mean': trace.mean(0),
                '%s%s HPD interval' % (int(100*(1-alpha)),'%'): hpd(trace, alpha),
                'mc error': batchsd(trace, batches),
                'quantiles': quantiles(trace)
            }
        except:
            print 'Could not generate output statistics for', self.__name__
            return
示例#5
0
def histoLines(args = {}, key = None, histo = None) :
    hLine = r.TLine(); hLine.SetLineColor(args["quantileColor"])
    bestLine = r.TLine(); bestLine.SetLineColor(args["bestColor"])
    errorLine = r.TLine(); errorLine.SetLineColor(args["errorColor"])

    q = utils.quantiles(histo, sigmaList = [-1.0, 0.0, 1.0])
    min  = histo.GetMinimum()
    max  = histo.GetMaximum()

    best = args["bestDict"][key]
    error = args["errorDict"][key] if "errorDict" in args else None
    out = []
    out.append(hLine.DrawLine(q[1], min, q[1], max))
    out.append(hLine.DrawLine(q[0], min, q[0], max))
    out.append(hLine.DrawLine(q[2], min, q[2], max))
    
    out.append(bestLine.DrawLine(best, min, best, max))
    if error!=None : out.append(errorLine.DrawLine(best - error, max/2.0, best + error, max/2.0))

    if "print" in args and args["print"] : print "%20s: %g + %g - %g"%(histo.GetName(), best, q[2]-best, best-q[0])
    return out
示例#6
0
def createHistos(var,filename,isData,histos,q):
    
    #Getting histograms labeling
    rec = var+'_rec'
    wgt = rec+'_wgt'
    gen = var+'_gen'
    mig = var+'_migration'
    
    #open file
    fIn=ROOT.TFile.Open(filename)
    
    #loop over events in the tree and fill histos
    tree=fIn.Get('DileptonInfo')
    for i in xrange(0,tree.GetEntriesFast()):
        tree.GetEntry(i)

        #select only emu events
        if tree.EvCat != -11*13 : continue
        if not isData: 
           if tree.GenLpPt == 0 or tree.GenLmPt == 0: continue

        #base weight: BR fix for ttbar x pileup x lepton selection x xsec weight
        baseWeight = tree.Weight[0]*tree.Weight[1]*tree.Weight[4] #*tree.XSWeight
                        
        #event weight
        weight = 1 if isData else baseWeight
        
        #positive lepton
        lp=ROOT.TLorentzVector()
        lp.SetPtEtaPhiM(tree.LpPt,tree.LpEta,tree.LpPhi,0.)
        glp=ROOT.TLorentzVector()
        glp.SetPtEtaPhiM(tree.GenLpPt,tree.GenLpEta,tree.GenLpPhi,0.)

        #negative lepton
        lm=ROOT.TLorentzVector()
        lm.SetPtEtaPhiM(tree.LmPt,tree.LmEta,tree.LmPhi,0.)       
        glm=ROOT.TLorentzVector()
        glm.SetPtEtaPhiM(tree.GenLmPt,tree.GenLmEta,tree.GenLmPhi,0.)

        #charged lepton pair - pt
        ll=ROOT.TLorentzVector()
        ll = lp + lm
        gll=ROOT.TLorentzVector()
        gll = glp + glm

        #fill the histograms according to the distrubution variable
        #pT positive lepton
        if var == 'ptpos': 
            histos[rec].Fill(lp.Pt(),weight)
            binWidth = histos[wgt].GetXaxis().GetBinWidth(histos[wgt].GetXaxis().FindBin(lp.Pt() ) )
            histos[wgt].Fill(lp.Pt(),weight/binWidth)
            if not isData:
                    histos[gen].Fill(glp.Pt(),weight)
                    histos[mig].Fill(glp.Pt(),lp.Pt(),weight)

        #Second distribution: Pt(l+l-) = ll.Pt      
        if var == 'ptll': 
            histos[rec].Fill(ll.Pt(),weight)
            binWidth = histos[wgt].GetXaxis().GetBinWidth(histos[wgt].GetXaxis().FindBin(ll.Pt() ) )
            histos[wgt].Fill(ll.Pt(),weight/binWidth)
            if not isData:
                    histos[gen].Fill(gll.Pt(),weight)
                    histos[mig].Fill(gll.Pt(),ll.Pt(),weight)

        #Third distribution: M(l+l-) = ll.M
        if var == 'mll': 
            histos[rec].Fill(ll.M(),weight)
            binWidth = histos[wgt].GetXaxis().GetBinWidth(histos[wgt].GetXaxis().FindBin(ll.M() ) )
            histos[wgt].Fill(ll.M(),weight/binWidth)
            if not isData:
                    histos[gen].Fill(gll.M(),weight)
                    histos[mig].Fill(gll.M(),ll.M(),weight)

        #Fourth distribution: E(l+)+E(l-) = lp.E() + lm.E()
        if var == 'EposEm': 
            histos[rec].Fill(lp.E() + lm.E(),weight)
            binWidth = histos[wgt].GetXaxis().GetBinWidth(histos[wgt].GetXaxis().FindBin(lp.E() + lm.E() ) )
            histos[wgt].Fill(lp.E() + lm.E(),weight/binWidth)
            if not isData:
                    histos[gen].Fill(glp.E() + glm.E(),weight)
                    histos[mig].Fill(glp.E() + glm.E(),lp.E() + lm.E(),weight)

        #Fifth distribution: Pt(l+)+Pt(l-) = lp.Pt() + lm.Pt()
        if var == 'ptposptm': 
            histos[rec].Fill(lp.Pt() + lm.Pt(),weight)
            binWidth = histos[wgt].GetXaxis().GetBinWidth(histos[wgt].GetXaxis().FindBin(lp.Pt() + lm.Pt() ) )
            histos[wgt].Fill(lp.Pt() + lm.Pt(),weight/binWidth)
            if not isData:
                    histos[gen].Fill(glp.Pt() + glm.Pt(),weight)
                    histos[mig].Fill(glp.Pt() + glm.Pt(),lp.Pt() + lm.Pt(),weight)

    #close file
    fIn.Close()

    # Gets quantiles from histos if q is True
    # GetQuantiles just works for TH1 not TH2, therefore h != migration
    if q == True:
        #print 'Getting quantiles from %s' %filename
        for h in histos:
            if h == gen:
                q_gen=[]
                q_gen=utils.quantiles(histos[h])
                #print 'quantiles lenght for gen %s' %len(q_gen)
                #for i in xrange(0,len(q_gen)): print q_gen[i]

                #there should be an easier way to rebin the histograms just using the function rebin that I defined in utils.py but it doesn't seem to be working
                #histos[h]=utils.rebin(hist,q_gen[h])
            
            #if h == wgt or h == rec:
            if h == rec:    
                q_rec=[]
                q_rec=utils.quantiles(histos[h])
                for i in xrange(0,len(q_rec)): print q_rec[i]

        return q_gen,q_rec
示例#7
0
def plot_reads(Reads, xts, motiflen=None, q=5, quantile=True, bounds=None, subtitle=None, cellnames=None, title=None, hist=False):

    """
    reads is a list of list of binary tuples of lists of numpy array with read counts. The outermost
    list is over different cell types. The next inner list
    is over different TSS dist thresholds. The binary tuple is for same / opposite strands.
    """

    width = Reads[0][0][0][0].size
    figure = plot.figure()
    xvals = np.arange(-width/2,width/2)
    numcols = len(Reads)
    numrows = len(Reads[0])
    if hist:
        numrows = 2*numrows

    colors = colorwheel(q)

    for cellidx, reads in enumerate(Reads): 

        for index, (xt,read) in enumerate(zip(xts,reads)):

            if quantile:
                quantized = utils.quantiles(xt, q=q)
            else:
                quantized = utils.quantize(xt, q=q, bounds=bounds)
            same = [np.mean([read[0][idx] for idx in quant if read[0][idx].size==width],0) for quant in quantized]
            opp = [-1*np.mean([read[1][idx] for idx in quant if read[1][idx].size==width],0) for quant in quantized]
            if hist:
                subplot = figure.add_subplot(numrows,numcols,2*index*numcols+cellidx+1)
            else:
                subplot = figure.add_subplot(numrows,numcols,index*numcols+cellidx+1)
            subplot = remove_spines(subplot)

            fwd = [subplot.plot(xvals, s, color=c, linestyle='-', linewidth=0.5) for s,c in zip(same,colors)]
            rev = [subplot.plot(xvals, o, color=c, linestyle='-', linewidth=0.5) for o,c in zip(opp,colors)]
            subplot.axhline(0, linestyle='--', linewidth=0.2)
            subplot.axvline(0, linestyle='--', linewidth=0.2)

            if motiflen:
                subplot.axvline(motiflen-1, linestyle='--', c='g', linewidth=0.2)

            xmin = xvals[0]
            xmax = xvals[-1]
            ymax = max([s.max() for s in same])
            ymin = min([o.min() for o in opp])
            subplot.axis([xmin, xmax, ymin, ymax])

            for text in subplot.get_xticklabels():
                text.set_fontsize(7)
                text.set_verticalalignment('center')

            ytick_locs = list(np.linspace(np.round(ymin,2),np.round(ymax,2),5))
            if 0 not in ytick_locs:
                ytick_locs.append(0)
                ytick_locs.sort()
            ytick_labels = tuple(['%.2f'%s for s in ytick_locs])
            subplot.set_yticks(ytick_locs)
            subplot.set_yticklabels(ytick_labels, color='k', fontsize=6, horizontalalignment='right')

            if subtitle and cellidx==0:
                bbox = subplot.get_position()
                xloc = bbox.xmin/3.
                yloc = (bbox.ymax+bbox.ymin)/2.
                plot.text(xloc, yloc, subtitle[index], fontsize=8, horizontalalignment='center', \
                    verticalalignment='center', transform=figure.transFigure)

            if cellnames and index==0:
                bbox = subplot.get_position()
                xloc = (bbox.xmax+bbox.xmin)/2.
                yloc = (3*bbox.ymax+1)/4.
                plot.text(xloc, yloc, cellnames[cellidx], fontsize=8, horizontalalignment='center', \
                    verticalalignment='bottom', transform=figure.transFigure)

            if hist:
                subplot = figure.add_subplot(numrows,numcols,(2*index+1)*numcols+cellidx+1)
                subplot = remove_spines(subplot)

                reads_unbound = np.power([read[0][idx].sum()+read[1][idx].sum() for idx in quantized[0] \
                    if read[0][idx].size==width and read[1][idx].size==width], 0.25)
                reads_bound = np.power([read[0][idx].sum()+read[1][idx].sum() for idx in quantized[-1] \
                    if read[0][idx].size==width and read[1][idx].size==width], 0.25)

                h0 = subplot.hist(reads_unbound, bins=200, color=colors[0], histtype='step', linewidth=0.2, normed=True)
                h1 = subplot.hist(reads_bound, bins=200, color=colors[-1], histtype='step', linewidth=0.2, normed=True)

                xmin = 0
                xmax = max([reads_bound.max(), reads_unbound.max()])
                ymin = 0
                ymax = max([h0[0].max(), h1[0].max()])
                subplot.axis([xmin, xmax, ymin, ymax])

                for text in subplot.get_xticklabels():
                    text.set_fontsize(7)
                    text.set_verticalalignment('center')

                ytick_locs = list(np.linspace(np.round(ymin,2),np.round(ymax,2),5))
                ytick_labels = tuple(['%.2f'%s for s in ytick_locs])
                subplot.set_yticks(ytick_locs)
                subplot.set_yticklabels(ytick_labels, color='k', fontsize=6, horizontalalignment='right')

                subplot.set_xlabel('Fourth root of total reads', fontsize=6, horizontalalignment='center')

    legends = ['(%.2f,%.2f)'%(xt[quant].min(),xt[quant].max()) for quant in quantized]
    leghandle = plot.figlegend(fwd, legends, loc='lower right', mode="expand", ncol=q)
    for text in leghandle.texts:
        text.set_fontsize(6)
    leghandle.set_frame_on(False)
    
    if title:
        plot.suptitle(title, fontsize=10)

    return figure
示例#8
0
def plot_reads(Reads,
               xts,
               motiflen=None,
               q=5,
               quantile=True,
               bounds=None,
               subtitle=None,
               cellnames=None,
               title=None,
               hist=False):
    """
    reads is a list of list of binary tuples of lists of numpy array with read counts. The outermost
    list is over different cell types. The next inner list
    is over different TSS dist thresholds. The binary tuple is for same / opposite strands.
    """

    width = Reads[0][0][0][0].size
    figure = plot.figure()
    xvals = np.arange(-width / 2, width / 2)
    numcols = len(Reads)
    numrows = len(Reads[0])
    if hist:
        numrows = 2 * numrows

    colors = colorwheel(q)

    for cellidx, reads in enumerate(Reads):

        for index, (xt, read) in enumerate(zip(xts, reads)):

            if quantile:
                quantized = utils.quantiles(xt, q=q)
            else:
                quantized = utils.quantize(xt, q=q, bounds=bounds)
            same = [
                np.mean([
                    read[0][idx] for idx in quant if read[0][idx].size == width
                ], 0) for quant in quantized
            ]
            opp = [
                -1 * np.mean([
                    read[1][idx] for idx in quant if read[1][idx].size == width
                ], 0) for quant in quantized
            ]
            if hist:
                subplot = figure.add_subplot(numrows, numcols,
                                             2 * index * numcols + cellidx + 1)
            else:
                subplot = figure.add_subplot(numrows, numcols,
                                             index * numcols + cellidx + 1)
            subplot = remove_spines(subplot)

            fwd = [
                subplot.plot(xvals, s, color=c, linestyle='-', linewidth=0.5)
                for s, c in zip(same, colors)
            ]
            rev = [
                subplot.plot(xvals, o, color=c, linestyle='-', linewidth=0.5)
                for o, c in zip(opp, colors)
            ]
            subplot.axhline(0, linestyle='--', linewidth=0.2)
            subplot.axvline(0, linestyle='--', linewidth=0.2)

            if motiflen:
                subplot.axvline(motiflen - 1,
                                linestyle='--',
                                c='g',
                                linewidth=0.2)

            xmin = xvals[0]
            xmax = xvals[-1]
            ymax = max([s.max() for s in same])
            ymin = min([o.min() for o in opp])
            subplot.axis([xmin, xmax, ymin, ymax])

            for text in subplot.get_xticklabels():
                text.set_fontsize(7)
                text.set_verticalalignment('center')

            ytick_locs = list(
                np.linspace(np.round(ymin, 2), np.round(ymax, 2), 5))
            if 0 not in ytick_locs:
                ytick_locs.append(0)
                ytick_locs.sort()
            ytick_labels = tuple(['%.2f' % s for s in ytick_locs])
            subplot.set_yticks(ytick_locs)
            subplot.set_yticklabels(ytick_labels,
                                    color='k',
                                    fontsize=6,
                                    horizontalalignment='right')

            if subtitle and cellidx == 0:
                bbox = subplot.get_position()
                xloc = bbox.xmin / 3.
                yloc = (bbox.ymax + bbox.ymin) / 2.
                plot.text(xloc, yloc, subtitle[index], fontsize=8, horizontalalignment='center', \
                    verticalalignment='center', transform=figure.transFigure)

            if cellnames and index == 0:
                bbox = subplot.get_position()
                xloc = (bbox.xmax + bbox.xmin) / 2.
                yloc = (3 * bbox.ymax + 1) / 4.
                plot.text(xloc, yloc, cellnames[cellidx], fontsize=8, horizontalalignment='center', \
                    verticalalignment='bottom', transform=figure.transFigure)

            if hist:
                subplot = figure.add_subplot(
                    numrows, numcols, (2 * index + 1) * numcols + cellidx + 1)
                subplot = remove_spines(subplot)

                reads_unbound = np.power([read[0][idx].sum()+read[1][idx].sum() for idx in quantized[0] \
                    if read[0][idx].size==width and read[1][idx].size==width], 0.25)
                reads_bound = np.power([read[0][idx].sum()+read[1][idx].sum() for idx in quantized[-1] \
                    if read[0][idx].size==width and read[1][idx].size==width], 0.25)

                h0 = subplot.hist(reads_unbound,
                                  bins=200,
                                  color=colors[0],
                                  histtype='step',
                                  linewidth=0.2,
                                  normed=True)
                h1 = subplot.hist(reads_bound,
                                  bins=200,
                                  color=colors[-1],
                                  histtype='step',
                                  linewidth=0.2,
                                  normed=True)

                xmin = 0
                xmax = max([reads_bound.max(), reads_unbound.max()])
                ymin = 0
                ymax = max([h0[0].max(), h1[0].max()])
                subplot.axis([xmin, xmax, ymin, ymax])

                for text in subplot.get_xticklabels():
                    text.set_fontsize(7)
                    text.set_verticalalignment('center')

                ytick_locs = list(
                    np.linspace(np.round(ymin, 2), np.round(ymax, 2), 5))
                ytick_labels = tuple(['%.2f' % s for s in ytick_locs])
                subplot.set_yticks(ytick_locs)
                subplot.set_yticklabels(ytick_labels,
                                        color='k',
                                        fontsize=6,
                                        horizontalalignment='right')

                subplot.set_xlabel('Fourth root of total reads',
                                   fontsize=6,
                                   horizontalalignment='center')

    legends = [
        '(%.2f,%.2f)' % (xt[quant].min(), xt[quant].max())
        for quant in quantized
    ]
    leghandle = plot.figlegend(fwd,
                               legends,
                               loc='lower right',
                               mode="expand",
                               ncol=q)
    for text in leghandle.texts:
        text.set_fontsize(6)
    leghandle.set_frame_on(False)

    if title:
        plot.suptitle(title, fontsize=10)

    return figure