def stats(self, alpha=0.05, start=0, batches=100): """ Generate posterior statistics for node. """ from utils import hpd, quantiles from numpy import sqrt try: trace = np.squeeze(np.array(self.trace(), float)[start:]) n = len(trace) if not n: print 'Cannot generate statistics for zero-length trace in', self.__name__ return return { 'n': n, 'standard deviation': trace.std(0), 'mean': trace.mean(0), '%s%s HPD interval' % (int(100*(1-alpha)),'%'): hpd(trace, alpha), 'mc error': batchsd(trace, batches), 'quantiles': quantiles(trace) } except: print 'Could not generate output statistics for', self.__name__ return
def histosAndQuantiles(tfile = None, dir = "") : histos = {} quantiles = {} for tkey in tfile.Get(dir).GetListOfKeys() : key = tkey.GetName() histos[key] = tfile.Get("/%s/%s"%(dir, key)) quantiles[key] = utils.quantiles(histos[key], sigmaList = [-1.0, 0.0, 1.0]) return histos,quantiles
def histosAndQuantiles(tfile=None, dir=""): histos = {} quantiles = {} for tkey in tfile.Get(dir).GetListOfKeys(): key = tkey.GetName() histos[key] = tfile.Get("/%s/%s" % (dir, key)) quantiles[key] = utils.quantiles(histos[key], sigmaList=[-1.0, 0.0, 1.0]) return histos, quantiles
def stats(self, alpha=0.05, start=0, batches=100, chain=None): """ Generate posterior statistics for node. :Parameters: alpha : float The alpha level for generating posterior intervals. Defaults to 0.05. start : int The starting index from which to summarize (each) chain. Defaults to zero. batches : int Batch size for calculating standard deviation for non-independent samples. Defaults to 100. chain : int The index for which chain to summarize. Defaults to None (all chains). """ from utils import hpd, quantiles from numpy import sqrt try: trace = np.squeeze(np.array(self.trace(burn=start, chain=chain), float)) n = len(trace) if not n: print 'Cannot generate statistics for zero-length trace in', self.__name__ return return { 'n': n, 'standard deviation': trace.std(0), 'mean': trace.mean(0), '%s%s HPD interval' % (int(100*(1-alpha)),'%'): hpd(trace, alpha), 'mc error': batchsd(trace, batches), 'quantiles': quantiles(trace) } except: print 'Could not generate output statistics for', self.__name__ return
def histoLines(args = {}, key = None, histo = None) : hLine = r.TLine(); hLine.SetLineColor(args["quantileColor"]) bestLine = r.TLine(); bestLine.SetLineColor(args["bestColor"]) errorLine = r.TLine(); errorLine.SetLineColor(args["errorColor"]) q = utils.quantiles(histo, sigmaList = [-1.0, 0.0, 1.0]) min = histo.GetMinimum() max = histo.GetMaximum() best = args["bestDict"][key] error = args["errorDict"][key] if "errorDict" in args else None out = [] out.append(hLine.DrawLine(q[1], min, q[1], max)) out.append(hLine.DrawLine(q[0], min, q[0], max)) out.append(hLine.DrawLine(q[2], min, q[2], max)) out.append(bestLine.DrawLine(best, min, best, max)) if error!=None : out.append(errorLine.DrawLine(best - error, max/2.0, best + error, max/2.0)) if "print" in args and args["print"] : print "%20s: %g + %g - %g"%(histo.GetName(), best, q[2]-best, best-q[0]) return out
def createHistos(var,filename,isData,histos,q): #Getting histograms labeling rec = var+'_rec' wgt = rec+'_wgt' gen = var+'_gen' mig = var+'_migration' #open file fIn=ROOT.TFile.Open(filename) #loop over events in the tree and fill histos tree=fIn.Get('DileptonInfo') for i in xrange(0,tree.GetEntriesFast()): tree.GetEntry(i) #select only emu events if tree.EvCat != -11*13 : continue if not isData: if tree.GenLpPt == 0 or tree.GenLmPt == 0: continue #base weight: BR fix for ttbar x pileup x lepton selection x xsec weight baseWeight = tree.Weight[0]*tree.Weight[1]*tree.Weight[4] #*tree.XSWeight #event weight weight = 1 if isData else baseWeight #positive lepton lp=ROOT.TLorentzVector() lp.SetPtEtaPhiM(tree.LpPt,tree.LpEta,tree.LpPhi,0.) glp=ROOT.TLorentzVector() glp.SetPtEtaPhiM(tree.GenLpPt,tree.GenLpEta,tree.GenLpPhi,0.) #negative lepton lm=ROOT.TLorentzVector() lm.SetPtEtaPhiM(tree.LmPt,tree.LmEta,tree.LmPhi,0.) glm=ROOT.TLorentzVector() glm.SetPtEtaPhiM(tree.GenLmPt,tree.GenLmEta,tree.GenLmPhi,0.) #charged lepton pair - pt ll=ROOT.TLorentzVector() ll = lp + lm gll=ROOT.TLorentzVector() gll = glp + glm #fill the histograms according to the distrubution variable #pT positive lepton if var == 'ptpos': histos[rec].Fill(lp.Pt(),weight) binWidth = histos[wgt].GetXaxis().GetBinWidth(histos[wgt].GetXaxis().FindBin(lp.Pt() ) ) histos[wgt].Fill(lp.Pt(),weight/binWidth) if not isData: histos[gen].Fill(glp.Pt(),weight) histos[mig].Fill(glp.Pt(),lp.Pt(),weight) #Second distribution: Pt(l+l-) = ll.Pt if var == 'ptll': histos[rec].Fill(ll.Pt(),weight) binWidth = histos[wgt].GetXaxis().GetBinWidth(histos[wgt].GetXaxis().FindBin(ll.Pt() ) ) histos[wgt].Fill(ll.Pt(),weight/binWidth) if not isData: histos[gen].Fill(gll.Pt(),weight) histos[mig].Fill(gll.Pt(),ll.Pt(),weight) #Third distribution: M(l+l-) = ll.M if var == 'mll': histos[rec].Fill(ll.M(),weight) binWidth = histos[wgt].GetXaxis().GetBinWidth(histos[wgt].GetXaxis().FindBin(ll.M() ) ) histos[wgt].Fill(ll.M(),weight/binWidth) if not isData: histos[gen].Fill(gll.M(),weight) histos[mig].Fill(gll.M(),ll.M(),weight) #Fourth distribution: E(l+)+E(l-) = lp.E() + lm.E() if var == 'EposEm': histos[rec].Fill(lp.E() + lm.E(),weight) binWidth = histos[wgt].GetXaxis().GetBinWidth(histos[wgt].GetXaxis().FindBin(lp.E() + lm.E() ) ) histos[wgt].Fill(lp.E() + lm.E(),weight/binWidth) if not isData: histos[gen].Fill(glp.E() + glm.E(),weight) histos[mig].Fill(glp.E() + glm.E(),lp.E() + lm.E(),weight) #Fifth distribution: Pt(l+)+Pt(l-) = lp.Pt() + lm.Pt() if var == 'ptposptm': histos[rec].Fill(lp.Pt() + lm.Pt(),weight) binWidth = histos[wgt].GetXaxis().GetBinWidth(histos[wgt].GetXaxis().FindBin(lp.Pt() + lm.Pt() ) ) histos[wgt].Fill(lp.Pt() + lm.Pt(),weight/binWidth) if not isData: histos[gen].Fill(glp.Pt() + glm.Pt(),weight) histos[mig].Fill(glp.Pt() + glm.Pt(),lp.Pt() + lm.Pt(),weight) #close file fIn.Close() # Gets quantiles from histos if q is True # GetQuantiles just works for TH1 not TH2, therefore h != migration if q == True: #print 'Getting quantiles from %s' %filename for h in histos: if h == gen: q_gen=[] q_gen=utils.quantiles(histos[h]) #print 'quantiles lenght for gen %s' %len(q_gen) #for i in xrange(0,len(q_gen)): print q_gen[i] #there should be an easier way to rebin the histograms just using the function rebin that I defined in utils.py but it doesn't seem to be working #histos[h]=utils.rebin(hist,q_gen[h]) #if h == wgt or h == rec: if h == rec: q_rec=[] q_rec=utils.quantiles(histos[h]) for i in xrange(0,len(q_rec)): print q_rec[i] return q_gen,q_rec
def plot_reads(Reads, xts, motiflen=None, q=5, quantile=True, bounds=None, subtitle=None, cellnames=None, title=None, hist=False): """ reads is a list of list of binary tuples of lists of numpy array with read counts. The outermost list is over different cell types. The next inner list is over different TSS dist thresholds. The binary tuple is for same / opposite strands. """ width = Reads[0][0][0][0].size figure = plot.figure() xvals = np.arange(-width/2,width/2) numcols = len(Reads) numrows = len(Reads[0]) if hist: numrows = 2*numrows colors = colorwheel(q) for cellidx, reads in enumerate(Reads): for index, (xt,read) in enumerate(zip(xts,reads)): if quantile: quantized = utils.quantiles(xt, q=q) else: quantized = utils.quantize(xt, q=q, bounds=bounds) same = [np.mean([read[0][idx] for idx in quant if read[0][idx].size==width],0) for quant in quantized] opp = [-1*np.mean([read[1][idx] for idx in quant if read[1][idx].size==width],0) for quant in quantized] if hist: subplot = figure.add_subplot(numrows,numcols,2*index*numcols+cellidx+1) else: subplot = figure.add_subplot(numrows,numcols,index*numcols+cellidx+1) subplot = remove_spines(subplot) fwd = [subplot.plot(xvals, s, color=c, linestyle='-', linewidth=0.5) for s,c in zip(same,colors)] rev = [subplot.plot(xvals, o, color=c, linestyle='-', linewidth=0.5) for o,c in zip(opp,colors)] subplot.axhline(0, linestyle='--', linewidth=0.2) subplot.axvline(0, linestyle='--', linewidth=0.2) if motiflen: subplot.axvline(motiflen-1, linestyle='--', c='g', linewidth=0.2) xmin = xvals[0] xmax = xvals[-1] ymax = max([s.max() for s in same]) ymin = min([o.min() for o in opp]) subplot.axis([xmin, xmax, ymin, ymax]) for text in subplot.get_xticklabels(): text.set_fontsize(7) text.set_verticalalignment('center') ytick_locs = list(np.linspace(np.round(ymin,2),np.round(ymax,2),5)) if 0 not in ytick_locs: ytick_locs.append(0) ytick_locs.sort() ytick_labels = tuple(['%.2f'%s for s in ytick_locs]) subplot.set_yticks(ytick_locs) subplot.set_yticklabels(ytick_labels, color='k', fontsize=6, horizontalalignment='right') if subtitle and cellidx==0: bbox = subplot.get_position() xloc = bbox.xmin/3. yloc = (bbox.ymax+bbox.ymin)/2. plot.text(xloc, yloc, subtitle[index], fontsize=8, horizontalalignment='center', \ verticalalignment='center', transform=figure.transFigure) if cellnames and index==0: bbox = subplot.get_position() xloc = (bbox.xmax+bbox.xmin)/2. yloc = (3*bbox.ymax+1)/4. plot.text(xloc, yloc, cellnames[cellidx], fontsize=8, horizontalalignment='center', \ verticalalignment='bottom', transform=figure.transFigure) if hist: subplot = figure.add_subplot(numrows,numcols,(2*index+1)*numcols+cellidx+1) subplot = remove_spines(subplot) reads_unbound = np.power([read[0][idx].sum()+read[1][idx].sum() for idx in quantized[0] \ if read[0][idx].size==width and read[1][idx].size==width], 0.25) reads_bound = np.power([read[0][idx].sum()+read[1][idx].sum() for idx in quantized[-1] \ if read[0][idx].size==width and read[1][idx].size==width], 0.25) h0 = subplot.hist(reads_unbound, bins=200, color=colors[0], histtype='step', linewidth=0.2, normed=True) h1 = subplot.hist(reads_bound, bins=200, color=colors[-1], histtype='step', linewidth=0.2, normed=True) xmin = 0 xmax = max([reads_bound.max(), reads_unbound.max()]) ymin = 0 ymax = max([h0[0].max(), h1[0].max()]) subplot.axis([xmin, xmax, ymin, ymax]) for text in subplot.get_xticklabels(): text.set_fontsize(7) text.set_verticalalignment('center') ytick_locs = list(np.linspace(np.round(ymin,2),np.round(ymax,2),5)) ytick_labels = tuple(['%.2f'%s for s in ytick_locs]) subplot.set_yticks(ytick_locs) subplot.set_yticklabels(ytick_labels, color='k', fontsize=6, horizontalalignment='right') subplot.set_xlabel('Fourth root of total reads', fontsize=6, horizontalalignment='center') legends = ['(%.2f,%.2f)'%(xt[quant].min(),xt[quant].max()) for quant in quantized] leghandle = plot.figlegend(fwd, legends, loc='lower right', mode="expand", ncol=q) for text in leghandle.texts: text.set_fontsize(6) leghandle.set_frame_on(False) if title: plot.suptitle(title, fontsize=10) return figure
def plot_reads(Reads, xts, motiflen=None, q=5, quantile=True, bounds=None, subtitle=None, cellnames=None, title=None, hist=False): """ reads is a list of list of binary tuples of lists of numpy array with read counts. The outermost list is over different cell types. The next inner list is over different TSS dist thresholds. The binary tuple is for same / opposite strands. """ width = Reads[0][0][0][0].size figure = plot.figure() xvals = np.arange(-width / 2, width / 2) numcols = len(Reads) numrows = len(Reads[0]) if hist: numrows = 2 * numrows colors = colorwheel(q) for cellidx, reads in enumerate(Reads): for index, (xt, read) in enumerate(zip(xts, reads)): if quantile: quantized = utils.quantiles(xt, q=q) else: quantized = utils.quantize(xt, q=q, bounds=bounds) same = [ np.mean([ read[0][idx] for idx in quant if read[0][idx].size == width ], 0) for quant in quantized ] opp = [ -1 * np.mean([ read[1][idx] for idx in quant if read[1][idx].size == width ], 0) for quant in quantized ] if hist: subplot = figure.add_subplot(numrows, numcols, 2 * index * numcols + cellidx + 1) else: subplot = figure.add_subplot(numrows, numcols, index * numcols + cellidx + 1) subplot = remove_spines(subplot) fwd = [ subplot.plot(xvals, s, color=c, linestyle='-', linewidth=0.5) for s, c in zip(same, colors) ] rev = [ subplot.plot(xvals, o, color=c, linestyle='-', linewidth=0.5) for o, c in zip(opp, colors) ] subplot.axhline(0, linestyle='--', linewidth=0.2) subplot.axvline(0, linestyle='--', linewidth=0.2) if motiflen: subplot.axvline(motiflen - 1, linestyle='--', c='g', linewidth=0.2) xmin = xvals[0] xmax = xvals[-1] ymax = max([s.max() for s in same]) ymin = min([o.min() for o in opp]) subplot.axis([xmin, xmax, ymin, ymax]) for text in subplot.get_xticklabels(): text.set_fontsize(7) text.set_verticalalignment('center') ytick_locs = list( np.linspace(np.round(ymin, 2), np.round(ymax, 2), 5)) if 0 not in ytick_locs: ytick_locs.append(0) ytick_locs.sort() ytick_labels = tuple(['%.2f' % s for s in ytick_locs]) subplot.set_yticks(ytick_locs) subplot.set_yticklabels(ytick_labels, color='k', fontsize=6, horizontalalignment='right') if subtitle and cellidx == 0: bbox = subplot.get_position() xloc = bbox.xmin / 3. yloc = (bbox.ymax + bbox.ymin) / 2. plot.text(xloc, yloc, subtitle[index], fontsize=8, horizontalalignment='center', \ verticalalignment='center', transform=figure.transFigure) if cellnames and index == 0: bbox = subplot.get_position() xloc = (bbox.xmax + bbox.xmin) / 2. yloc = (3 * bbox.ymax + 1) / 4. plot.text(xloc, yloc, cellnames[cellidx], fontsize=8, horizontalalignment='center', \ verticalalignment='bottom', transform=figure.transFigure) if hist: subplot = figure.add_subplot( numrows, numcols, (2 * index + 1) * numcols + cellidx + 1) subplot = remove_spines(subplot) reads_unbound = np.power([read[0][idx].sum()+read[1][idx].sum() for idx in quantized[0] \ if read[0][idx].size==width and read[1][idx].size==width], 0.25) reads_bound = np.power([read[0][idx].sum()+read[1][idx].sum() for idx in quantized[-1] \ if read[0][idx].size==width and read[1][idx].size==width], 0.25) h0 = subplot.hist(reads_unbound, bins=200, color=colors[0], histtype='step', linewidth=0.2, normed=True) h1 = subplot.hist(reads_bound, bins=200, color=colors[-1], histtype='step', linewidth=0.2, normed=True) xmin = 0 xmax = max([reads_bound.max(), reads_unbound.max()]) ymin = 0 ymax = max([h0[0].max(), h1[0].max()]) subplot.axis([xmin, xmax, ymin, ymax]) for text in subplot.get_xticklabels(): text.set_fontsize(7) text.set_verticalalignment('center') ytick_locs = list( np.linspace(np.round(ymin, 2), np.round(ymax, 2), 5)) ytick_labels = tuple(['%.2f' % s for s in ytick_locs]) subplot.set_yticks(ytick_locs) subplot.set_yticklabels(ytick_labels, color='k', fontsize=6, horizontalalignment='right') subplot.set_xlabel('Fourth root of total reads', fontsize=6, horizontalalignment='center') legends = [ '(%.2f,%.2f)' % (xt[quant].min(), xt[quant].max()) for quant in quantized ] leghandle = plot.figlegend(fwd, legends, loc='lower right', mode="expand", ncol=q) for text in leghandle.texts: text.set_fontsize(6) leghandle.set_frame_on(False) if title: plot.suptitle(title, fontsize=10) return figure