def convert_junc_txt_component(juncFilePath, uncFile): name, layout, pars, nBinnedVars, \ nBinColumns, nEvalVars, formula, \ nParms, columns, dtypes = _parse_jme_formatted_file(juncFilePath, interpolatedFunc=True, parmsFromColumns=True, jme_f=uncFile) temp = _build_standard_jme_lookup(name, layout, pars, nBinnedVars, nBinColumns, nEvalVars, formula, nParms, columns, dtypes, interpolatedFunc=True) wrapped_up = {} for key, val in temp.items(): newkey = (key[0], 'jec_uncertainty_lookup') vallist = list(val) vals, names = vallist[-1] knots = vals[0:len(vals):3] downs = vals[1:len(vals):3] ups = vals[2:len(vals):3] downs = np.array([down.flatten() for down in downs]) ups = np.array([up.flatten() for up in ups]) for knotv in knots: knot = np.unique(knotv.flatten()) if knot.size != 1: raise Exception('Multiple bin low edges found') knots = np.array([np.unique(k.flatten())[0] for k in knots]) vallist[2] = ({'knots': knots, 'ups': ups.T, 'downs': downs.T}, vallist[2][-1]) vallist = vallist[:-1] wrapped_up[newkey] = tuple(vallist) return wrapped_up
def test_lumimask(): lumimask = LumiMask( "tests/samples/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON.txt" ) runs = np.array([303825, 123], dtype=np.uint32) lumis = np.array([115, 123], dtype=np.uint32) mask = lumimask(runs, lumis) print("mask:", mask) assert (mask[0] == True) assert (mask[1] == False)
def hackEvaluatorForVJetsQQ_2016(lookup): wscale = np.array([1.0, 1.0, 1.0, 1.20, 1.25, 1.25, 1.0]) ptscale = np.array([0, 500, 600, 700, 800, 900, 1000, 3000]) zqq = deepcopy(lookup['ZJetsNLO']) wqq = deepcopy(lookup['WJetsNLO']) wqq._values = 1.35 * wscale wqq._axes = ptscale zqq._values = np.array([1.45]) zqq._axes = np.array([0, 3000]) lookup._functions['WJetsNLO_2016'] = wqq lookup._functions['ZJetsNLO_2016'] = zqq
def __init__(self, formula, bins_and_orders, knots_and_vars): """ The constructor takes the output of the "convert_junc_txt_file" text file converter, which returns a formula, bins, and an interpolation table. """ super(jec_uncertainty_lookup, self).__init__() self._dim_order = bins_and_orders[1] self._bins = bins_and_orders[0] self._eval_vars = knots_and_vars[1] self._eval_knots = knots_and_vars[0]['knots'] self._eval_downs = [] self._eval_ups = [] self._formula_str = formula.strip('"') self._formula = None if self._formula_str != 'None' and self._formula_str != '': raise Exception('jet energy uncertainties have no formula!') for binname in self._dim_order[1:]: binsaslists = self._bins[binname].tolist() self._bins[binname] = [np.array(bins) for bins in binsaslists] #convert downs and ups into interp1ds #(yes this only works for one binning dimension right now, fight me) for bin in range(self._bins[self._dim_order[0]].size - 1): self._eval_downs.append( interp1d(self._eval_knots, knots_and_vars[0]['downs'][bin])) self._eval_ups.append( interp1d(self._eval_knots, knots_and_vars[0]['ups'][bin])) #get the jit to compile if we've got more than one bin dim if len(self._dim_order) > 1: masked_bin_eval(np.array([0]), self._bins[self._dim_order[1]], np.array([0.0])) self._signature = deepcopy(self._dim_order) for eval in self._eval_vars: if eval not in self._signature: self._signature.append(eval) self._dim_args = { self._dim_order[i]: i for i in range(len(self._dim_order)) } self._eval_args = {} for i, argname in enumerate(self._eval_vars): self._eval_args[argname] = i + len(self._dim_order) if argname in self._dim_args.keys(): self._eval_args[argname] = self._dim_args[argname]
def __init__(self, formula, bins_and_orders, clamps_and_vars, parms_and_orders): """ The constructor takes the output of the "convert_jec(jr)_txt_file" text file converter, which returns a formula, bins, and parameter values. """ super(jme_standard_function, self).__init__() self._dim_order = bins_and_orders[1] self._bins = bins_and_orders[0] self._eval_vars = clamps_and_vars[2] self._eval_clamp_mins = clamps_and_vars[0] self._eval_clamp_maxs = clamps_and_vars[1] self._parm_order = parms_and_orders[1] self._parms = parms_and_orders[0] self._formula_str = formula self._formula = wrap_formula(formula, self._parm_order + self._eval_vars) for binname in self._dim_order[1:]: binsaslists = self._bins[binname].tolist() self._bins[binname] = [np.array(bins) for bins in binsaslists] #get the jit to compile if we've got more than one bin dim if len(self._dim_order) > 1: masked_bin_eval(np.array([0, 0]), self._bins[self._dim_order[1]], np.array([0.0, 0.0])) #compile the formula argsize = len(self._parm_order) + len(self._eval_vars) some_ones = [50 * np.ones(argsize) for i in range(argsize)] _ = self._formula(*tuple(some_ones)) self._signature = deepcopy(self._dim_order) for eval in self._eval_vars: if eval not in self._signature: self._signature.append(eval) self._dim_args = { self._dim_order[i]: i for i in range(len(self._dim_order)) } self._eval_args = {} for i, argname in enumerate(self._eval_vars): self._eval_args[argname] = i + len(self._dim_order) if argname in self._dim_args.keys(): self._eval_args[argname] = self._dim_args[argname]
def __init__(self, jsonfile): with open(jsonfile) as fin: goldenjson = json.load(fin) self._masks = {} for run, lumilist in goldenjson.items(): run = int(run) mask = np.array(lumilist).flatten() mask[::2] -= 1 self._masks[run] = mask
def __init__(self, formula, bins_and_orders, clamps_and_vars, parms_and_orders): """ The constructor takes the output of the "convert_jersf_txt_file" text file converter, which returns a formula, bins, and values. """ super(jersf_lookup, self).__init__() self._dim_order = bins_and_orders[1] self._bins = bins_and_orders[0] self._eval_vars = clamps_and_vars[2] self._eval_clamp_mins = clamps_and_vars[0] self._eval_clamp_maxs = clamps_and_vars[1] self._parm_order = parms_and_orders[1] self._parms = parms_and_orders[0] self._formula_str = formula self._formula = None if formula != 'None': raise Exception( 'jet energy resolution scale factors have no formula!') for binname in self._dim_order[1:]: binsaslists = self._bins[binname].tolist() self._bins[binname] = [np.array(bins) for bins in binsaslists] # get the jit to compile if we've got more than one bin dim if len(self._dim_order) > 1: masked_bin_eval(np.array([0]), self._bins[self._dim_order[1]], np.array([0.0])) self._signature = deepcopy(self._dim_order) for eval in self._eval_vars: if eval not in self._signature: self._signature.append(eval) self._dim_args = { self._dim_order[i]: i for i in range(len(self._dim_order)) } self._eval_args = {} for i, argname in enumerate(self._eval_vars): self._eval_args[argname] = i + len(self._dim_order) if argname in self._dim_args.keys(): self._eval_args[argname] = self._dim_args[argname]
def __init__(self, jsonfile): with open(jsonfile) as fin: goldenjson = json.load(fin) self._masks = Dict.empty( key_type=types.uint32, value_type=types.uint32[:] ) for run, lumilist in goldenjson.items(): mask = np.array(lumilist, dtype=np.uint32).flatten() mask[::2] -= 1 self._masks[np.uint32(run)] = mask
def extract_json_histo_structure(parselevel, axis_names, axes): if 'value' in parselevel.keys(): return name = list(parselevel)[0].split(':')[0] bins_pairs = [ key.split(':')[-1].strip('[]').split(',') for key in parselevel.keys() ] bins = [] for pair in bins_pairs: bins.extend([float(val) for val in pair]) bins.sort() bins = np.unique(np.array(bins)) axis_names.append(name.encode()) axes[axis_names[-1]] = bins extract_json_histo_structure(parselevel[list(parselevel)[0]], axis_names, axes)
def test_root_scalefactors(): extractor = lookup_tools.extractor() extractor.add_weight_sets([ "testSF2d scalefactors_Tight_Electron tests/samples/testSF2d.histo.root" ]) extractor.finalize() evaluator = extractor.make_evaluator() counts, test_eta, test_pt = dummy_jagged_eta_pt() # test flat eval test_out = evaluator["testSF2d"](test_eta, test_pt) # test structured eval test_eta_jagged = awkward.JaggedArray.fromcounts(counts, test_eta) test_pt_jagged = awkward.JaggedArray.fromcounts(counts, test_pt) test_out_jagged = evaluator["testSF2d"](test_eta_jagged, test_pt_jagged) assert (test_out_jagged.counts == counts).all() assert (test_out == test_out_jagged.flatten()).all() # From make_expected_lookup.py expected_output = np.array([ 0.90780139, 0.82748538, 0.86332178, 0.86332178, 0.97981155, 0.79701495, 0.88245934, 0.82857144, 0.91884059, 0.97466666, 0.94072163, 1.00775194, 0.82748538, 1.00775194, 0.97203946, 0.98199672, 0.80655736, 0.90893763, 0.88245934, 0.79701495, 0.82748538, 0.82857144, 0.91884059, 0.90893763, 0.97520661, 0.97520661, 0.82748538, 0.91884059, 0.97203946, 0.88245934, 0.79701495, 0.9458763, 1.00775194, 0.80655736, 1.00775194, 1.00775194, 0.98976982, 0.98976982, 0.86332178, 0.94072163, 0.80655736, 0.98976982, 0.96638656, 0.9458763, 0.90893763, 0.9529984, 0.9458763, 0.9529984, 0.80655736, 0.80655736, 0.80655736, 0.98976982, 0.97466666, 0.98199672, 0.86332178, 1.03286386, 0.94072163, 1.03398061, 0.82857144, 0.80655736, 1.00775194, 0.80655736 ]) diff = np.abs(test_out - expected_output) print("Max diff: %.16f" % diff.max()) print("Median diff: %.16f" % np.median(diff)) print("Diff over threshold rate: %.1f %%" % (100 * (diff >= 1.e-8).sum() / diff.size)) assert (diff < 1.e-8).all()
def clopper_pearson_interval(num, denom, coverage=_coverage1sd): """ Compute Clopper-Pearson coverage interval for binomial distribution num: successes denom: trials coverage: coverage, default to 68% c.f. http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval """ if np.any(num > denom): raise ValueError( "Found numerator larger than denominator while calculating binomial uncertainty" ) lo = scipy.stats.beta.ppf((1 - coverage) / 2, num, denom - num + 1) hi = scipy.stats.beta.ppf((1 + coverage) / 2, num + 1, denom - num) interval = np.array([lo, hi]) interval[:, num == 0.] = 0. interval[:, num == denom] = 1. return interval
def __init__(self, name, label, n_or_arr, lo=None, hi=None): super(Bin, self).__init__(name, label) if isinstance(n_or_arr, (list, np.ndarray)): self._uniform = False self._bins = np.array(n_or_arr, dtype='d') if not all(np.sort(self._bins) == self._bins): raise ValueError("Binning not sorted!") self._lo = self._bins[0] self._hi = self._bins[-1] # to make searchsorted differentiate inf from nan self._bins = np.append(self._bins, np.inf) interval_bins = np.r_[-np.inf, self._bins, np.nan] self._intervals = [ Interval(lo, hi) for lo, hi in zip(interval_bins[:-1], interval_bins[1:]) ] elif isinstance(n_or_arr, numbers.Integral): if lo is None or hi is None: raise TypeError( "Interpreting n_or_arr as uniform binning, please specify lo and hi values" ) self._uniform = True self._lo = lo self._hi = hi self._bins = n_or_arr interval_bins = np.r_[-np.inf, np.linspace(self._lo, self._hi, self._bins + 1), np.inf, np.nan] self._intervals = [ Interval(lo, hi) for lo, hi in zip(interval_bins[:-1], interval_bins[1:]) ] else: raise TypeError( "Cannot understand n_or_arr (nbins or binning array) type %r" % n_or_arr)
def poisson_interval(sumw, sumw2, coverage=_coverage1sd): """ sumw: sum of weights sumw2: sum weights**2 coverage: coverage, default to 68% The so-called 'Garwood' interval c.f. https://www.ine.pt/revstat/pdf/rs120203.pdf or http://ms.mcmaster.ca/peter/s743/poissonalpha.html For weighted data, approximate the observed count by sumw**2/sumw2 This choice effectively scales the unweighted poisson interval by the average weight Maybe not the best... see https://arxiv.org/pdf/1309.1287.pdf for a proper treatment When a bin is zero, find the scale of the nearest nonzero bin If all bins zero, raise warning and set interval to sumw """ scale = np.empty_like(sumw) scale[sumw != 0] = sumw2[sumw != 0] / sumw[sumw != 0] if np.sum(sumw == 0) > 0: missing = np.where(sumw == 0) available = np.nonzero(sumw) if len(available[0]) == 0: warnings.warn( "All sumw are zero! Cannot compute meaningful error bars", RuntimeWarning) return np.vstack([sumw, sumw]) nearest = sum([ np.subtract.outer(d, d0)**2 for d, d0 in zip(available, missing) ]).argmin(axis=0) argnearest = tuple(dim[nearest] for dim in available) scale[missing] = scale[argnearest] counts = sumw / scale lo = scale * scipy.stats.chi2.ppf((1 - coverage) / 2, 2 * counts) / 2. hi = scale * scipy.stats.chi2.ppf( (1 + coverage) / 2, 2 * (counts + 1)) / 2. interval = np.array([lo, hi]) interval[interval == np.nan] = 0. # chi2.ppf produces nan for counts=0 return interval
def flatten_idxs(idx_in, jaggedarray): """ This provides a faster way to convert between tuples of jagged indices and flat indices in a jagged array's contents """ if len(idx_in) == 0: return np.array([], dtype=np.int) idx_out = jaggedarray.starts[idx_in[0]] if len(idx_in) == 1: pass elif len(idx_in) == 2: idx_out += idx_in[1] else: raise Exception('jme_standard_function only works for two binning dimensions!') good_idx = (idx_out < jaggedarray.content.size) if((~good_idx).any()): input_idxs = tuple([idx_out[~good_idx]] + [idx_in[i][~good_idx] for i in range(len(idx_in))]) raise Exception('Calculated invalid index {} for' ' array with length {}'.format(np.vstack(input_idxs), jaggedarray.content.size)) return idx_out
def test_hist(): counts, test_eta, test_pt = dummy_jagged_eta_pt() h_nothing = hist.Hist("empty inside") assert h_nothing.sparse_dim() == h_nothing.dense_dim() == 0 assert h_nothing.values() == {} h_regular_bins = hist.Hist("regular joe", hist.Bin("x", "x", 20, 0, 200), hist.Bin("y", "why", 20, -3, 3)) h_regular_bins.fill(x=test_pt, y=test_eta) nentries = np.sum(counts) assert h_regular_bins.sum("x", "y", overflow='all').values(sumw2=True)[()] == (nentries, nentries) # bin x=2, y=10 (when overflow removed) count_some_bin = np.sum((test_pt>=20.)&(test_pt<30.)&(test_eta>=0.)&(test_eta<0.3)) assert h_regular_bins.project("x", slice(20, 30)).values()[()][10] == count_some_bin assert h_regular_bins.project("y", slice(0, 0.3)).values()[()][2] == count_some_bin h_reduced = h_regular_bins[10:,-.6:] # bin x=1, y=2 assert h_reduced.project("x", slice(20, 30)).values()[()][2] == count_some_bin assert h_reduced.project("y", slice(0, 0.3)).values()[()][1] == count_some_bin h_reduced.fill(x=23, y=0.1) assert h_reduced.project("x", slice(20, 30)).values()[()][2] == count_some_bin + 1 assert h_reduced.project("y", slice(0, 0.3)).values()[()][1] == count_some_bin + 1 animal = hist.Cat("animal", "type of animal") vocalization = hist.Cat("vocalization", "onomatopoiea is that how you spell it?") h_cat_bins = hist.Hist("I like cats", animal, vocalization) h_cat_bins.fill(animal="cat", vocalization="meow", weight=2.) h_cat_bins.fill(animal="dog", vocalization="meow", weight=np.array([-1., -1., -5.])) h_cat_bins.fill(animal="dog", vocalization="woof", weight=100.) h_cat_bins.fill(animal="dog", vocalization="ruff") assert h_cat_bins.values()[("cat", "meow")] == 2. assert h_cat_bins.values(sumw2=True)[("dog", "meow")] == (-7., 27.) assert h_cat_bins.project("vocalization", ["woof", "ruff"]).values(sumw2=True)[("dog",)] == (101., 10001.) height = hist.Bin("height", "height [m]", 10, 0, 5) h_mascots_1 = hist.Hist("fermi mascot showdown", animal, vocalization, height, # weight is a reserved keyword hist.Bin("mass", "weight (g=9.81m/s**2) [kg]", np.power(10., np.arange(5)-1)), ) adult_bison_h = np.random.normal(loc=2.5, scale=0.2, size=40) adult_bison_w = np.random.normal(loc=700, scale=100, size=40) h_mascots_1.fill(animal="bison", vocalization="huff", height=adult_bison_h, mass=adult_bison_w) goose_h = np.random.normal(loc=0.4, scale=0.05, size=1000) goose_w = np.random.normal(loc=7, scale=1, size=1000) h_mascots_1.fill(animal="goose", vocalization="honk", height=goose_h, mass=goose_w) crane_h = np.random.normal(loc=1, scale=0.05, size=4) crane_w = np.random.normal(loc=10, scale=1, size=4) h_mascots_1.fill(animal="crane", vocalization="none", height=crane_h, mass=crane_w) h_mascots_2 = h_mascots_1.copy() h_mascots_2.clear() baby_bison_h = np.random.normal(loc=.5, scale=0.1, size=20) baby_bison_w = np.random.normal(loc=200, scale=10, size=20) baby_bison_cutefactor = 2.5*np.ones_like(baby_bison_w) h_mascots_2.fill(animal="bison", vocalization="baa", height=baby_bison_h, mass=baby_bison_w, weight=baby_bison_cutefactor) h_mascots_2.fill(animal="fox", vocalization="none", height=1., mass=30.) h_mascots = h_mascots_1 + h_mascots_2 assert h_mascots.project("vocalization", "h*").sum("height", "mass", "animal").values()[()] == 1040. species_class = hist.Cat("species_class", "where the subphylum is vertibrates") classes = { 'birds': ['goose', 'crane'], 'mammals': ['bison', 'fox'], } h_species = h_mascots.group(species_class, "animal", classes) assert set(h_species.project("vocalization").values().keys()) == set([('birds',), ('mammals',)]) nbirds_bin = np.sum((goose_h>=0.5)&(goose_h<1)&(goose_w>10)&(goose_w<100)) nbirds_bin += np.sum((crane_h>=0.5)&(crane_h<1)&(crane_w>10)&(crane_w<100)) assert h_species.project("vocalization").values()[('birds',)][1,2] == nbirds_bin tally = h_species.sum("mass", "height", "vocalization").values() assert tally[('birds',)] == 1004. assert tally[('mammals',)] == 91. h_species.scale({"honk": 0.1, "huff": 0.9}, axis="vocalization") h_species.scale(5.) tally = h_species.sum("mass", height, vocalization).values(sumw2=True) assert tally[('birds',)] == (520., 350.) assert tally[('mammals',)] == (435., 25*(40*(0.9**2)+20*(2.5**2)+1)) assert h_species.axis("vocalization") is vocalization assert h_species.axis("height") is height assert h_species.project("vocalization", "h*").axis("height") is height tall_class = hist.Cat("tall_class", "species class (species above 1m)") mapping = { 'birds': (['goose', 'crane'], slice(1., None)), 'mammals': (['bison', 'fox'], slice(1., None)), } h_tall = h_mascots.group(tall_class, (animal, height), mapping) tall_bird_count = np.sum(goose_h>=1.) + np.sum(crane_h>=1) assert h_tall.sum("mass", "vocalization").values()[('birds',)] == tall_bird_count tall_mammal_count = np.sum(adult_bison_h>=1.) + np.sum(baby_bison_h>=1) + 1 assert h_tall.sum("mass", "vocalization").values()[('mammals',)] == tall_mammal_count
def plotgrid(h, figure=None, row=None, col=None, overlay=None, row_overflow='none', col_overflow='none', **plot_opts): """ Create a grid of plots, enumerating identifiers on up to 3 axes: row: name of row axis col: name of column axis overlay: name of overlay axis The remaining axis will be the plot axis, with plot_opts passed to the plot1d() call Pass a figure object to redraw on existing figure """ haxes = set(ax.name for ax in h.axes()) nrow, ncol = 1, 1 if row: row_identifiers = h.identifiers(row, overflow=row_overflow) nrow = len(row_identifiers) haxes.remove(row) if col: col_identifiers = h.identifiers(col, overflow=col_overflow) ncol = len(col_identifiers) haxes.remove(col) if overlay: haxes.remove(overlay) if len(haxes) > 1: raise ValueError("More than one dimension left: %s" % (",".join(ax for ax in haxes), )) elif len(haxes) == 0: raise ValueError("Not enough dimensions available in %r" % h) figsize = plt.rcParams['figure.figsize'] figsize = figsize[0] * max(ncol, 1), figsize[1] * max(nrow, 1) if figure is None: fig, axes = plt.subplots(nrow, ncol, figsize=figsize, squeeze=False, sharex=True, sharey=True) else: fig = figure shape = (0, 0) lastax = fig.get_children()[-1] if isinstance(lastax, plt.Axes): shape = lastax.rowNum + 1, lastax.colNum + 1 if shape[0] == nrow and shape[1] == ncol: axes = np.array(fig.axes).reshape(shape) else: fig.clear() # fig.set_size_inches(figsize) axes = fig.subplots(nrow, ncol, squeeze=False, sharex=True, sharey=True) for icol in range(ncol): hcol = h coltitle = None if col: vcol = col_identifiers[icol] hcol = h.project(col, vcol) coltitle = str(vcol) if isinstance(vcol, Interval) and vcol.label is None: coltitle = "%s ∈ %s" % (h.axis(col).label, coltitle) for irow in range(nrow): ax = axes[irow, icol] hplot = hcol rowtitle = None if row: vrow = row_identifiers[irow] hplot = hcol.project(row, vrow) rowtitle = str(vrow) if isinstance(vrow, Interval) and vrow.label is None: rowtitle = "%s ∈ %s" % (h.axis(row).label, rowtitle) plot1d(hplot, ax=ax, overlay=overlay, **plot_opts) if row is not None and col is not None: ax.set_title("%s, %s" % (rowtitle, coltitle)) elif row is not None: ax.set_title(rowtitle) elif col is not None: ax.set_title(coltitle) for ax in axes.flatten(): ax.autoscale(axis='y') ax.set_ylim(0, None) return fig, axes