def cf_plot(evt_a, evt_b, label="", log_=False, ylim=[1, 1e5], ylim2=[0, 10], sli=slice(0, 10)): tim_a = " ".join( map(lambda f: "%5.2f" % f, map(float, filter(None, evt_a.tdii['propagate']))[sli])) tim_b = " ".join( map(lambda f: "%5.2f" % f, map(float, filter(None, evt_b.tdii['propagate']))[sli])) fig = plt.figure() suptitle = "Rainbow cfg4 " + label + "[" + tim_a + "] [" + tim_b + "]" log.info("plotting %s " % suptitle) fig.suptitle(suptitle) gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1]) ax = fig.add_subplot(gs[0]) c, bns = a_scatter_plot_cf(ax, evt_a, evt_b, log_=log_) droplet.bow_angle_rectangles() ax.set_ylim(ylim) ax.legend() xlim = ax.get_xlim() ax = fig.add_subplot(gs[1]) if len(c) == 2: a, b = c[0], c[1] c2, c2n, c2nn = chi2(a, b, cut=30) c2p = c2.sum() / c2n plt.plot(bns[:-1], c2, drawstyle='steps', label="chi2/ndf %4.2f" % c2p) ax.set_xlim(xlim) ax.legend() ax.set_ylim(ylim2) droplet.bow_angle_rectangles()
def __init__(self, cu, af, cnames=[], dbgseq=0, dbgmsk=0, dbgzero=False, cmx=0, c2cut=30, smry=False, shortname="noshortname?"): """ :param cu: count unique array, typically shaped (n, 2) or (n,3) for comparisons :param af: instance of SeqType subclass such as HisType :param cnames: column names """ log.debug("SeqTable.__init__ dbgseq %x" % dbgseq) #ipdb.set_trace() assert len(cu.shape) == 2 and cu.shape[1] >= 2 ncol = cu.shape[1] - 1 self.smry = smry self.dirty = False self.cu = cu self.ncol = ncol self.dbgseq = dbgseq self.dbgmsk = dbgmsk self.dbgzero = dbgzero self.cmx = cmx self.shortname = shortname seqs = cu[:,0] msks = seq2msk(seqs) tots = [cu[:,n].sum() for n in range(1,ncol+1)] if ncol == 2: a = cu[:,1].astype(np.float64) b = cu[:,2].astype(np.float64) ia = cu[:,1].astype(np.int64) ib = cu[:,2].astype(np.int64) idif = ia-ib c2, c2n, c2c = chi2(a, b, cut=c2cut) #c2s = c2/c2n #c2s_tot = c2s.sum() # same as c2p ndf = c2n - 1 ## totals are constrained to match, so one less degree of freedom ? c2sum = c2.sum() c2p = c2sum/max(1,ndf) c2_pval = chi2_pvalue( c2sum , ndf ) log.debug(" c2sum %10.4f ndf %d c2p %10.4f c2_pval %10.4f " % (c2sum,ndf,c2p, c2_pval )) cnames += ["c2"] tots += ["%10.2f/%d = %5.2f (pval:%0.3f prob:%0.3f) " % (c2sum,ndf,c2p,c2_pval,1-c2_pval) ] cfcount = cu[:,1:] ab, ba = ratio(a, b) cnames += ["ab"] cnames += ["ba"] else: c2 = None #c2s = None c2p = None cfcount = None ab = None ba = None idif = None pass self.idif = idif if len(tots) == 1: total = tots[0] tots += ["%10.2f" % 1.0 ] else: total = None pass self.total = total self.c2 = c2 #self.c2s = c2s self.c2p = c2p self.ab = ab self.ba = ba self.seqs = seqs self.msks = msks codes = cu[:,0] counts = cu[:,1] #log.debug("codes : %s " % repr(codes)) #log.debug("counts : %s " % repr(counts)) labels = map(lambda i:af.label(i), codes ) nstep = map(lambda l:len(l.split(af.delim)),labels) self.label2nstep = dict(zip(labels, nstep)) self.labels = labels lines = filter(None, map(lambda n:self.line(n), range(len(cu)))) self.codes = codes self.counts = counts self.lines = lines self.label2count = dict(zip(labels, counts)) self.label2line = dict(zip(labels, lines)) self.label2code = dict(zip(labels, seqs)) if cfcount is not None: self.label2cfcount = dict(zip(labels, cfcount)) self.cnames = cnames self.tots = tots self.af = af self.sli = slice(None)
def en_compare(self, bi, num_edges=101): """ Compare the energy samples created by QCKTest for a single BetaInverse """ ri = self.rindex el = self.el es = self.es s2cn = self.s2cn avph = self.avph s2c = self.s2c ibi = self.getBetaInverseIndex(bi) approach = self.approach if approach == "UpperCut": # see QCerenkov::getS2Integral_UpperCut en_slot = 0 s2_slot = 1 cdf_slot = 2 emn = s2cn[ibi, 0, en_slot] emx = s2cn[ibi, -1, en_slot] avp = s2c[ibi, -1, cdf_slot] elif approach == "SplitBin": # see QCerenkov::getS2Integral_SplitBin en_slot = 0 # en_b s2_slot = 5 # s2_b cdf_slot = 7 # s2integral emn = avph[ibi, 1] emx = avph[ibi, 2] avp = avph[ibi, 3] else: assert 0, "unknown approach %s " % approach pass self.en_slot = en_slot self.s2_slot = s2_slot self.cdf_slot = cdf_slot self.emn = emn self.emx = emx self.avp = avp edom = emx - emn edif = edom / (num_edges - 1) edges0 = np.linspace(emn, emx, num_edges) # across Cerenkov permissable range edges = np.linspace(emn - edif, emx + edif, num_edges + 2) # push out with extra bins either side #edges = np.linspace(1.55,15.5,100) # including rightmost #edges = np.linspace(1.55,15.5,200) # including rightmost #edges = divide_bins( ri[:,0], mul=4 ) hl = np.histogram(el, bins=edges) hs = np.histogram(es, bins=edges) c2, c2n, c2c = chi2(hl[0], hs[0]) ndf = max(c2n - 1, 1) c2sum = c2.sum() c2p = c2sum / ndf c2label = "chi2/ndf %4.2f [%d] %.2f " % (c2p, ndf, c2sum) c2amx = c2.argmax() rimax = ri[:, 1].max() c2max = c2.max() c2riscale = rimax / c2max c2poppy = np.where(c2 > c2max / 3.)[0] hmax = max(hl[0].max(), hs[0].max()) c2hscale = hmax / c2max cf = " c2max:%4.2f c2amx:%d c2[c2amx] %4.2f edges[c2amx] %5.3f edges[c2amx+1] %5.3f " % ( c2max, c2amx, c2[c2amx], edges[c2amx], edges[c2amx + 1]) print("cf", cf) #print("c2", c2) print("c2n", c2n) print("c2c", c2c) qq = "hl hs c2 c2label c2n c2c c2riscale c2hscale hmax edges c2max c2poppy cf bi ibi" for q in qq.split(): globals()[q] = locals()[q] setattr(self, q, locals()[q]) pass t = self print("np.c_[t.c2, t.hs[0], t.hl[0]][t.c2 > 0]") print(np.c_[t.c2, t.hs[0], t.hl[0]][t.c2 > 0]) return [bi, c2sum, ndf, c2p, emn, emx, avp]
pass if 1: # energy histogram in a lot of bins dom = 3, 10, 100 #dom = 3,10,1001 edom = np.linspace(*dom) ea_h = np.histogram(ea, edom) eb_h = np.histogram(eb, edom) prefix = "energy_chi2_dom%d_arg%d" % (dom[2], arg) stem = "%s_%s_%s" % (prefix, la, lb) figpath = os.path.join(wl.FOLD, "%s.png" % stem) c2 = chi2(ea_h[0], eb_h[0], cut=10) c2ndf = c2[0].sum() / c2[1] c2_smry = " c2/ndf = %6.2f/%3d = %4.2f " % (c2[0].sum(), c2[1], c2ndf) dom_smry = " edom %s %s nbin:%s " % (dom[0], dom[1], dom[2] - 1) title = "\n".join([figpath, c2_smry, dom_smry]) print(title) figsize = [12.8, 7.2] fig, ax = plt.subplots(1, figsize=figsize) fig.suptitle(title) ax.plot(edom[:-1], ea_h[0], label=la, drawstyle="steps-post") ax.plot(edom[:-1], eb_h[0], label=lb, drawstyle="steps-post") ax.plot(edom[:-1], -2000 * c2[0] / c2[0].max(),
def __call__(self, bn, av, bv, lab, c2cut=30, c2shape=False): """ :param bn: bin edges array :param av: a values array :param bv: b values array :param lab: :param c2cut: a+b stat requirement to compute chi2 Called from AB.rhist """ na = len(av) nb = len(bv) nv = 0.5 * float(na + nb) #log.info("CFH.__call__ na %d nb %d nv %7.2f " % (na,nb,nv)) ahis, _ = np.histogram(av, bins=bn) bhis, _ = np.histogram(bv, bins=bn) ah = ahis.astype(np.float32) bh = bhis.astype(np.float32) if c2shape: # shape comparison, normalize bin counts to average #log.info("c2shape comparison") uah = ah * nv / float(na) ubh = bh * nv / float(nb) else: uah = ah ubh = bh pass c2, c2n, c2c = chi2(uah, ubh, cut=c2cut) assert len(ahis) == len(bhis) == len(c2) nval = len(ahis) assert len(bn) - 1 == nval lhabc = np.zeros((nval, 5), dtype=np.float32) lhabc[:, 0] = bn[0:-1] lhabc[:, 1] = bn[1:] lhabc[:, 2] = uah lhabc[:, 3] = ubh lhabc[:, 4] = c2 self.lhabc = lhabc meta = {} meta['nedge'] = "%d" % len(bn) meta['nval'] = "%d" % nval meta['c2cut'] = c2cut meta['c2n'] = c2n meta['c2c'] = c2c meta['la'] = lab[0] meta['lb'] = lab[1] meta['c2_ymax'] = "10" meta['logyfac'] = "3." meta['linyfac'] = "1.3" self.update(meta)
def __init__(self, cu, af, cnames=[], dbgseq=0, dbgmsk=0, dbgzero=False, cmx=0, c2cut=30, shortname="noshortname"): """ :param cu: count unique array, typically shaped (n, 2) or (n,3) for comparisons :param af: instance of SeqType subclass such as HisType :param cnames: column names """ log.debug("cnames %s " % repr(cnames)) assert len(cu.shape) == 2 and cu.shape[1] >= 2 ncol = cu.shape[1] - 1 # excluding column 0 which is the seq code log.debug("SeqTable.__init__ dbgseq %x" % dbgseq) log.debug("shortname %s cu.shape %s ncol: %s" % (shortname, repr(cu.shape), ncol)) assert shortname != "noshortname" if sys.version_info.major in (2, 3): pass #ipdb.set_trace() # plant an ipython debugger breakpoint pass # self.smry = smry ## more convenient as method argument, not ctor argument self.dirty = False self.cu = cu self.ncol = ncol self.dbgseq = dbgseq self.dbgmsk = dbgmsk self.dbgzero = dbgzero self.cmx = cmx self.shortname = shortname seqs = cu[:, 0] msks = seq2msk(seqs) tots = [cu[:, n].sum() for n in range(1, ncol + 1)] if ncol == 2: a = cu[:, 1].astype(np.float64) b = cu[:, 2].astype(np.float64) ia = cu[:, 1].astype(np.int64) ib = cu[:, 2].astype(np.int64) idif = ia - ib c2, c2n, c2c = chi2(a, b, cut=c2cut) #c2s = c2/c2n #c2s_tot = c2s.sum() # same as c2p ndf = c2n - 1 ## totals are constrained to match, so one less degree of freedom ? c2sum = c2.sum() c2p = c2sum / max(1, ndf) c2_pval = chi2_pvalue(c2sum, ndf) log.debug(" c2sum %10.4f ndf %d c2p %10.4f c2_pval %10.4f " % (c2sum, ndf, c2p, c2_pval)) #cnames += ["c2"] #tots += ["%10.2f" % c2sum ] stats = "%.2f/%d = %5.2f pvalue:P[C2>]:%0.3f 1-pvalue:P[C2<]:%0.3f " % ( c2sum, ndf, c2p, c2_pval, 1 - c2_pval) cfcount = cu[:, 1:] ab, ba = ratio(a, b) #cnames += ["ab"] #cnames += ["ba"] else: c2 = None #c2s = None c2p = None cfcount = None ab = None ba = None ia = None ib = None idif = None stats = None c2sum = None pass self.ia = ia self.ib = ib self.idif = idif if len(tots) == 1: total = tots[0] tots += ["%10.2f" % 1.0] else: total = None pass self.total = total self.c2 = c2 #self.c2s = c2s self.c2p = c2p self.ab = ab self.ba = ba self.stats = stats self.seqs = seqs self.msks = msks codes = cu[:, 0] counts = cu[:, 1] #log.debug("codes : %s " % repr(codes)) #log.debug("counts : %s " % repr(counts)) labels = list(map(lambda i: af.label(i), codes)) nstep = list(map(lambda l: len(l.split(af.delim)), labels)) self.label2nstep = dict(zip(labels, nstep)) self.labels = labels self.codes = codes self.counts = counts k_line = self.line(0, key=True, smry=False) if len(cu) > 0 else "" k_sine = self.line(-1, key=True, smry=True) if len(cu) > 0 else "" lines = list( filter( None, list(map(lambda n: self.line(n, smry=False), range(len(cu)))))) sines = list( filter( None, list(map(lambda n: self.line(n, smry=True), range(len(cu)))))) self.lines = [k_line] + lines + [k_line] self.sines = [k_sine] + sines + [k_sine] self.label2count = dict(zip(labels, counts)) self.label2line = dict(zip(labels, lines)) self.label2sine = dict(zip(labels, sines)) self.label2code = dict(zip(labels, seqs)) if cfcount is not None: self.label2cfcount = dict(zip(labels, cfcount)) pass self.cnames = cnames self.tots = tots self.c2sum = c2sum log.debug(" tots %s " % repr(tots)) self.af = af self.sli = slice(None)