def write_hist(histfile: Path, name: str, hist: np.array, bin_edges: np.array) -> None: if histfile.exists(): raise Exception(f"Error writing {histfile}, already exists.") if histfile.suffix == ".npz": np.savez_compressed(histfile, **{ name: hist, "bin_edges": bin_edges, }) elif histfile.suffix == ".txt": np.savetxt( histfile, hist, header="bin edges:\n" + str(bin_edges) + f"\n{name}:", ) elif histfile.suffix == ".hdf5": try: import h5py with h5py.File(histfile, "w") as f: f.create_dataset(name, data=hist, compression="gzip", compression_opts=9) f.create_dataset( "bin_edges", data=bin_edges, compression="gzip", compression_opts=9, ) except ModuleNotFoundError: raise Exception("Please install h5py to write hdf5 files") elif histfile.suffix == ".root": import uproot # TODO: Discard sumw2? if hist.ndim == 1: from uproot_methods.classes.TH1 import from_numpy h = from_numpy([hist, bin_edges]) else: from uproot_methods.classes.TH2 import from_numpy h = from_numpy([hist, np.arange(0, hist.shape[0] + 1), bin_edges]) with uproot.create(histfile) as f: f[name] = h else: raise Exception(f"Unknown output format: {histfile.suffix}")
def test_th1(self): from uproot_methods.classes.TH1 import Methods, _histtype, from_numpy edges = np.array((0., 1., 2.)) values = np.array([2, 3]) h = from_numpy((values, edges)) assert h.name is None assert h.numbins == 2 assert h.title == b"" assert h.low == 0 assert h.high == 2 assert h.underflows == 0 assert h.overflows == 0 np.testing.assert_equal(h.edges, edges) np.testing.assert_equal(h.values, values) np.testing.assert_equal(h.variances, values**2) np.testing.assert_equal(h.alledges, [-np.inf] + list(edges) + [np.inf]) np.testing.assert_equal(h.allvalues, [0] + list(values) + [0]) np.testing.assert_equal(h.allvariances, [0] + list(values**2) + [0]) np.testing.assert_equal(h.bins, ((0, 1), (1, 2))) np.testing.assert_equal(h.allbins, ((-np.inf, 0), (0, 1), (1, 2), (2, np.inf))) assert h.interval(0) == (-np.inf, 0) assert h.interval(1) == (0, 1) assert h.interval(2) == (1, 2) assert h.interval(3) == (2, np.inf) assert h.interval(-1) == h.interval(3)
def test_histogram(self): np = TestHistogram.NUMPY_LIB data = np.array([2,3,4,5,6,7], dtype=np.float32) data[data<2] = 0 weights = np.ones_like(data, dtype=np.float32) w, w2, e = self.ha.histogram_from_vector(data, weights, np.array([0,1,2,3,4,5], dtype=np.float32)) npw, npe = np.histogram(data, np.array([0,1,2,3,4,5])) hr = from_numpy((w, e)) f = uproot.recreate("test.root") f["hist"] = hr data = np.random.normal(size=10000) data = np.array(data, dtype=np.float32) weights = np.ones_like(data, dtype=np.float32) w, w2, e = self.ha.histogram_from_vector(data, weights, np.linspace(-1,1,100, dtype=np.float32)) hr = from_numpy((w, e)) f["hist2"] = hr f.close()
def to_th1(hdict, name): content = np.array(hdict.contents) content_w2 = np.array(hdict.contents_w2) edges = np.array(hdict.edges) #remove inf/nan just in case content[np.isinf(content)] = 0 content_w2[np.isinf(content_w2)] = 0 content[np.isnan(content)] = 0 content_w2[np.isnan(content_w2)] = 0 #update the error bars centers = (edges[:-1] + edges[1:]) / 2.0 th1 = from_numpy((content, edges)) th1._fName = name th1._fSumw2 = np.array(hdict.contents_w2) th1._fTsumw2 = np.array(hdict.contents_w2).sum() th1._fTsumwx2 = np.array(hdict.contents_w2 * centers).sum() return th1
def save_shapes(var, hist, edges, args): def get_vwname(v, w): vwname = '' if 'nominal' in v: if 'off' in w: return () elif 'nominal' in w: vwname = 'nominal' elif '_up' in w: vwname = w.replace('_up', 'Up').replace('wgt_', '') elif '_down' in w: vwname = w.replace('_down', 'Down').replace('wgt_', '') else: if 'nominal' not in w: return () elif '_up' in v: vwname = v.replace('_up', 'Up') elif '_down' in v: vwname = v.replace('_down', 'Down') return vwname hist = hist[var.name] centers = (edges[:-1] + edges[1:]) / 2.0 bin_columns = [c for c in hist.columns if 'bin' in c] sumw2_columns = [c for c in hist.columns if 'sumw2' in c] data_names = [n for n in hist.s.unique() if 'data' in n] for c in args['channels']: for r in args['regions']: out_fn = f'combine_new/shapes_{c}_{r}_{args["year"]}_{args["label"]}.root' out_file = uproot.recreate(out_fn) data_obs_hist = np.zeros(len(bin_columns), dtype=float) data_obs_sumw2 = np.zeros(len(sumw2_columns), dtype=float) for v in hist.v.unique(): for w in hist.w.unique(): vwname = get_vwname(v, w) if vwname == '': continue if vwname == 'nominal': data_obs = hist[hist.s.isin(data_names) & (hist.r == r) & (hist.c == c)] data_obs_hist = data_obs[bin_columns].sum( axis=0).values data_obs_sumw2 = data_obs[sumw2_columns].sum( axis=0).values mc_hist = hist[~hist.s.isin(data_names) & (hist.v == v) & (hist.w == w) & (hist.r == r) & (hist.c == c)] for s in mc_hist.s.unique(): if s in grouping.keys(): mc_hist.loc[hist.s == s, 'group'] = grouping[s] mc_hist = mc_hist.groupby('group').aggregate( np.sum).reset_index() for g in mc_hist.group.unique(): histo = mc_hist[mc_hist.group == g][bin_columns].values[0] if len(histo) == 0: continue sumw2 = mc_hist[mc_hist.group == g][sumw2_columns].values[0] rname = r.replace('-', '_') name = f'{rname}_{g}_{vwname}' th1 = from_numpy([histo, edges]) th1._fName = name th1._fSumw2 = np.array(sumw2) th1._fTsumw2 = np.array(sumw2).sum() th1._fTsumwx2 = np.array(sumw2 * centers).sum() out_file[f'{g}_{vwname}'] = th1 th1_data = from_numpy([data_obs_hist, edges]) th1_data._fName = 'data_obs' th1_data._fSumw2 = np.array(data_obs_sumw2) th1_data._fTsumw2 = np.array(data_obs_sumw2).sum() th1_data._fTsumwx2 = np.array(data_obs_sumw2 * centers).sum() out_file['data_obs'] = th1_data out_file.close()