def test_compare_gauss_vs_exponent () : _ig = 0 for ig in ( h1g , h2g , h3g , h4g , h5g ) : _ig += 1 _ie = 0 for ie in ( h1e , h2e , h3e , h4e , h5e ) : _ie += 1 logger.info( 'Compare gaussian (%d) and exponent (%d)' % ( _ig , _ie ) ) compare ( ig , ie )
def test_compare_uniform_vs_exponent () : _iu = 0 for iu in ( h1u , h2u , h3u , h4u , h5u ) : _iu += 1 _ie = 0 for ie in ( h1e , h2e , h3e , h4e , h5e ) : _ie += 1 logger.info( 'Compare uniform (%d) and exponent (%d)' % ( _iu , _ie ) ) compare ( iu , ie )
def test_compare_gauss_vs_uniform() : _ig = 0 for ig in ( h1g , h2g , h3g , h4g , h5g ) : _ig += 1 _iu = 0 for iu in ( h1u , h2u , h3u , h4u , h5u ) : _iu += 1 logger.info( 'Compare gaussian (%d) and uniform (%d)' % ( _ig , _iu ) ) compare ( ig , iu )
def makeWeights( dataset, plots=[], database="weights.db", compare=None, ## comparison function delta=0.01, ## delta for ``mean'' weight variation minmax=0.03, ## delta for ``minmax'' weight variation power=None, ## auto-determination debug=True, ## save intermediate information in DB make_plots=False, ## make plots tag="Reweighting"): """The main function: perform one re-weighting iteration and reweight ``MC''-data set to looks as ``data''(reference) dataset >>> results = makeWeights ( ... dataset , ## data source to be reweighted (DataSet, TTree, abstract source) ... plots , ## reweighting plots ... database , ## datadabse to store/update reweigting results ... delta , ## stopping criteria for `mean` weight variation ... minmax , ## stopping criteria for `min/max` weight variation ... power , ## effective power to apply to the weigths ... debug = True , ## store debuig information in database ... make_plots = True , ## produce useful comparison plots ... tag = 'RW' ) ## tag for better printout If `make_plots = False`, it returns the tuple of active reweitings: >>> active = makeWeights ( ... , make_plots = False , ... ) Otherwise it also returns list of comparison plots >>> active, cmp_plots = makeWeights ( ... , make_plots = True , ... ) >>> for item in cmp_plots : ... what = item.what ... hdata = item.data ... hmc = item.mc ... hweight = item.weight If no more rewighting iteratios required, <code>active</code> is an empty tuple """ assert 0 < delta, "makeWeights(%s): Invalid value for ``delta'' %s" % ( tag, delta) assert 0 < minmax, "makeWeights(%s): Invalid value for ``minmax'' %s" % ( tag, minmax) from ostap.logger.colorized import allright, attention, infostr from ostap.utils.basic import isatty nplots = len(plots) ## if 1 < nplots : ## import math ## fudge_factor = math.sqrt ( 1.0 / max ( 2.0 , nplots - 1.0 ) ) ## delta = delta * fudge_factor ## minmax = minmax * fudge_factor ## list of plots to compare cmp_plots = [] ## reweighting summary table header = ('Reweighting', 'wmin/wmax', 'OK?', 'wrms[%]', 'OK?', 'chi2/ndf', 'ww', 'exp') rows = {} save_to_db = [] ## number of active plots for reweighting for wplot in plots: what = wplot.what ## variable/function to plot/compare how = wplot.how ## weight and/or additional cuts address = wplot.address ## address in database hdata0 = wplot.data ## original "DATA" object hmc0 = wplot.mc_histo ## original "MC" histogram ww = wplot.w ## relative weight projector = wplot.projector ## projector for MC data ignore = wplot.ignore ## ignore for weigtht building? # # normalize the data # hdata = hdata0 if isinstance(hdata, ROOT.TH1): hdata = hdata.density() # ===================================================================== ## make a plot on (MC) data with the weight # ===================================================================== hmc0 = projector(dataset, hmc0, what, how) st = hmc0.stat() mnmx = st.minmax() if iszero(mnmx[0]): logger.warning("%s: statistic goes to zero %s/``%s''" % (tag, st, address)) elif mnmx[0] <= 0: logger.warning("%s: statistic is negative %s/``%s''" % (tag, st, address)) # ===================================================================== ## normalize MC # ===================================================================== hmc = hmc0.density() # ===================================================================== ## calculate the reweighting factor : a bit conservative (?) # this is the only important line # ===================================================================== # try to exploit finer binning if/when possible hboth = isinstance(hmc, ROOT.TH1) and isinstance(hdata, ROOT.TH1) if hboth and 1 == hmc.dim () and 1 == hdata.dim () and \ len ( hmc ) >= len( hdata ) : w = (1.0 / hmc) * hdata ## NB! elif hboth and 2 == hmc.dim () and 2 == hdata.dim () and \ ( hmc.binsx() >= hdata.binsx() ) and \ ( hmc.binsy() >= hdata.binsy() ) : w = (1.0 / hmc) * hdata ## NB! elif hboth and 3 == hmc.dim () and 3 == hdata.dim () and \ ( hmc.binsx() >= hdata.binsx() ) and \ ( hmc.binsy() >= hdata.binsy() ) and \ ( hmc.binsz() >= hdata.binsz() ) : w = (1.0 / hmc) * hdata ## NB! else: w = hdata / hmc ## NB! # ===================================================================== ## scale & get the statistics of weights w /= w.stat().mean().value() cnt = w.stat() # mnw, mxw = cnt.minmax() wvar = cnt.rms() / cnt.mean() good1 = wvar.value() <= delta good2 = abs(mxw - mnw) <= minmax good = good1 and good2 ## small variance? # c2ndf = 0 for i in w: c2ndf += w[i].chi2(1.0) c2ndf /= (len(w) - 1) ## build the row in the summary table row = address , \ '%-5.3f/%5.3f' % ( cnt.minmax()[0] , cnt.minmax()[1] ) , \ allright ( '+' ) if good2 else attention ( '-' ) , \ (wvar * 100).toString('%6.2f+-%-6.2f') , \ allright ( '+' ) if good1 else attention ( '-' ) , '%6.2f' % c2ndf ## make plots at the start of each iteration? if make_plots: item = ComparisonPlot(what, hdata, hmc, w) cmp_plots.append(item) row = tuple(list(row) + ['%4.3f' % ww if 1 != ww else '']) rows[address] = row # ## make decision based on the variance of weights # mnw, mxw = cnt.minmax() if (not good) and (not ignore): ## small variance? save_to_db.append((address, ww, hdata0, hmc0, hdata, hmc, w)) # ===================================================================== ## make a comparison (if needed) # ===================================================================== if compare: compare(hdata0, hmc0, address) active = tuple([p[0] for p in save_to_db]) nactive = len(active) if power and callable(power): eff_exp = power(nactive) elif isinstance(power, num_types) and 0 < power <= 1.5: eff_exp = 1.0 * power elif 1 == nactive and 1 < len(plots): eff_exp = 0.95 elif 1 == nactive: eff_exp = 1.00 else: eff_exp = 1.10 / max(nactive, 1) while database and save_to_db: entry = save_to_db.pop() address, ww, hd0, hm0, hd, hm, weight = entry cnt = weight.stat() mnw, mxw = cnt.minmax() ## avoid too large or too small weights for i in weight: w = weight[i] if w.value() < 0.5: weight[i] = VE(0.5, w.cov2()) elif w.value() > 2.0: weight[i] = VE(2.0, w.cov2()) if 1 < nactive and 1 != ww: eff_exp *= ww logger.info("%s: apply ``effective exponent'' of %.3f for ``%s''" % (tag, eff_exp, address)) if 1 != eff_exp and 0 < eff_exp: weight = weight**eff_exp row = list(rows[address]) row.append('%4.3f' % eff_exp) rows[address] = tuple(row) with DBASE.open(database) as db: db[address] = db.get(address, []) + [weight] if debug: addr = address + ':REWEIGHTING' db[addr] = db.get(addr, []) + list(entry[2:]) del hd0, hm0, hd, hm, weight, entry table = [header] for row in rows: table.append(rows[row]) import ostap.logger.table as Table logger.info( '%s, active:#%d \n%s ' % (tag, nactive, Table.table(table, title=tag, prefix='# ', alignment='lccccccc'))) cmp_plots = tuple(cmp_plots) return (active, cmp_plots) if make_plots else active
def test_compare_exponentials () : logger.info( 'Compare exponentials (1) and (2)' ) compare ( h1e , h2e ) logger.info( 'Compare exponentials (1) and (3)' ) compare ( h1e , h3e ) logger.info( 'Compare exponentials (1) and (4)' ) compare ( h1e , h4e ) logger.info( 'Compare exponentials (1) and (4) with rescale' ) compare ( h1e , h4e , True ) logger.info( 'Compare exponentials (1) and (5)' ) compare ( h1e , h5e ) logger.info( 'Compare exponentials (2) and (3) : should be the same!' ) compare ( h2e , h3e ) logger.info( 'Compare exponentials (2) and (4)' ) compare ( h2e , h4e ) logger.info( 'Compare exponentials (2) and (4) with rescale' ) compare ( h2e , h4e , True ) logger.info( 'Compare exponentials (2) and (5)' ) compare ( h2e , h5e ) logger.info( 'Compare exponentials (3) and (4)' ) compare ( h3e , h4e ) logger.info( 'Compare exponentials (3) and (4) with rescale' ) compare ( h3e , h4e , True ) logger.info( 'Compare exponentials (3) and (5)' ) compare ( h3e , h5e ) logger.info( 'Compare exponentials (4) and (5)' ) compare ( h4e , h5e )
def test_compare_uniforms () : logger.info( 'Compare uniforms (1) and (2)' ) compare ( h1u , h2u ) logger.info( 'Compare uniforms (1) and (3)' ) compare ( h1u , h3u ) logger.info( 'Compare uniforms (1) and (4)' ) compare ( h1u , h4u ) logger.info( 'Compare uniforms (1) and (4) with rescale' ) compare ( h1u , h4u , True ) logger.info( 'Compare uniforms (1) and (5)' ) compare ( h1u , h5u ) logger.info( 'Compare uniforms (2) and (3) : should be the same!' ) compare ( h2u , h3u ) logger.info( 'Compare uniforms (2) and (4)' ) compare ( h2u , h4u ) logger.info( 'Compare uniforms (2) and (4) with rescale' ) compare ( h2u , h4u , True ) logger.info( 'Compare uniforms (2) and (5)' ) compare ( h2u , h4u ) logger.info( 'Compare uniforms (3) and (4)' ) compare ( h3u , h4u ) logger.info( 'Compare uniforms (3) and (4) with rescale;' ) compare ( h3u , h4u , True ) logger.info( 'Compare uniforms (3) and (5)' ) compare ( h3u , h5u ) logger.info( 'Compare uniforms (4) and (5)' ) compare ( h4u , h5u )
def test_compare_gaussians() : logger.info( 'Compare gaussians (1) and (2)' ) compare ( h1g , h2g ) logger.info( 'Compare gaussians (1) and (3)' ) compare ( h1g , h3g ) logger.info( 'Compare gaussians (1) and (4)' ) compare ( h1g , h4g ) logger.info( 'Compare gaussians (1) and (4) with rescale' ) compare ( h1g , h4g , True ) logger.info( 'Compare gaussians (1) and (5)' ) compare ( h1g , h5g ) logger.info( 'Compare gaussians (2) and (3) : should be the same!' ) compare ( h2g , h3g ) logger.info( 'Compare gaussians (2) and (4)' ) compare ( h2g , h4g ) logger.info( 'Compare gaussians (2) and (4) with rescale' ) compare ( h2g , h4g , True ) logger.info( 'Compare gaussians (2) and (5)' ) compare ( h2g , h5g ) logger.info( 'Compare gaussians (3) and (4)' ) compare ( h3g , h4g ) logger.info( 'Compare gaussians (3) and (4) with rescale' ) compare ( h3g , h4g , True ) logger.info( 'Compare gaussians (3) and (5)' ) compare ( h3g , h5g ) logger.info( 'Compare gaussians (4) and (5)' ) compare ( h4g , h5g )
def test_compare_exponentials(): compare(h1e, h2e, 'Compare exponentials (1) and (2)') compare(h1e, h3e, 'Compare exponentials (1) and (3)') compare(h1e, h4e, 'Compare exponentials (1) and (4)') compare(h1e, h4e, 'Compare exponentials (1) and (4) with rescale', density=True) compare(h1e, h5e, 'Compare exponentials (1) and (5)') compare(h2e, h3e, 'Compare exponentials (2) and (3) : should be the same!') compare(h2e, h4e, 'Compare exponentials (2) and (4)') compare(h2e, h4e, 'Compare exponentials (2) and (4) with rescale', density=True) compare(h2e, h5e, 'Compare exponentials (2) and (5)') compare(h3e, h4e, 'Compare exponentials (3) and (4)') compare(h3e, h4e, 'Compare exponentials (3) and (4) with rescale', density=True) compare(h3e, h5e, 'Compare exponentials (3) and (5)') compare(h4e, h5e, 'Compare exponentials (4) and (5)')
def test_compare_uniforms(): compare(h1u, h2u, 'Compare uniforms (1) and (2)') compare(h1u, h3u, 'Compare uniforms (1) and (3)') compare(h1u, h4u, 'Compare uniforms (1) and (4)') compare(h1u, h4u, 'Compare uniforms (1) and (4) with rescale', density=True) compare(h1u, h5u, 'Compare uniforms (1) and (5)') compare(h2u, h3u, 'Compare uniforms (2) and (3) : should be the same!') compare(h2u, h4u, 'Compare uniforms (2) and (4)') compare(h2u, h4u, 'Compare uniforms (2) and (4) with rescale', density=True) compare(h2u, h4u, 'Compare uniforms (2) and (5)') compare(h3u, h4u, 'Compare uniforms (3) and (4)') compare(h3u, h4u, 'Compare uniforms (3) and (4) with rescale;', density=True) compare(h3u, h5u, 'Compare uniforms (3) and (5)') compare(h4u, h5u, 'Compare uniforms (4) and (5)')
def test_compare_gaussians(): compare(h1g, h2g, 'Compare gaussians (1) and (2)') compare(h1g, h3g, 'Compare gaussians (1) and (3)') compare(h1g, h4g, 'Compare gaussians (1) and (4)') compare(h1g, h4g, 'Compare gaussians (1) and (4) with rescale', density=True) compare(h1g, h5g, 'Compare gaussians (1) and (5)') compare(h2g, h3g, 'Compare gaussians (2) and (3) : should be the same!') compare(h2g, h4g, 'Compare gaussians (2) and (4)') compare(h2g, h4g, 'Compare gaussians (2) and (4) with rescale', density=True) compare(h2g, h5g, 'Compare gaussians (2) and (5)') compare(h3g, h4g, 'Compare gaussians (3) and (4)') compare(h3g, h4g, 'Compare gaussians (3) and (4) with rescale', density=True) compare(h3g, h5g, 'Compare gaussians (3) and (5)') compare(h4g, h5g, 'Compare gaussians (4) and (5)')