def __init__(self, germlines, name, only_correct_gene_fractions=False): self.germlines = germlines self.name = name self.values = {} self.only_correct_gene_fractions = only_correct_gene_fractions for column in utils.index_columns: if column == 'cdr3_length': # kind of finicky to figure out what this is, so I don't always set it continue self.values[column] = {} if column in bool_columns: self.values[column]['right'] = 0 self.values[column]['wrong'] = 0 self.values['hamming_to_true_naive'] = {} self.values['hamming_to_true_naive_normed'] = {} for region in utils.regions: self.values[region + '_hamming_to_true_naive'] = {} self.values[region + '_hamming_to_true_naive_normed'] = {} # for bound in utils.boundaries: # self.counts[bound + '_insertion_content'] = {'A':0, 'C':0, 'G':0, 'T':0} # base content of each insertion # self.counts['seq_content'] = {'A':0, 'C':0, 'G':0, 'T':0} # n_bins, xmin, xmax = 100, 0.0, 1.0 self.hists = {} self.hists['mute_freqs'] = Hist(30, -0.05, 0.05) for region in utils.regions: self.hists[region + '_mute_freqs'] = Hist(30, -0.05, 0.05) for region in utils.regions: # plots of correct gene calls vs mute freq self.hists[region + '_gene_right_vs_mute_freq'] = Hist(50, 0., 0.4) self.hists[region + '_gene_wrong_vs_mute_freq'] = Hist(50, 0., 0.4)
def hist_tuple(): hist_1 = Hist( hist.axis.Regular(50, -5, 5, name="x", label="x [units]", underflow=False, overflow=False), storage=hist.storage.Weight(), ).fill(np.random.normal(size=1000), weight=1.0) hist_2 = Hist( hist.axis.Regular(50, -5, 5, name="x", label="x [units]", underflow=False, overflow=False), storage=hist.storage.Weight(), ).fill(np.random.normal(size=1000), weight=1.0) hist_3 = Hist( hist.axis.Regular(50, -5, 5, name="x", label="x [units]", underflow=False, overflow=False), storage=hist.storage.Weight(), ).fill(np.random.normal(size=1000), weight=1.0) return hist_1, hist_2, hist_3
def __init__(self, name): self.name = name self.values, self.hists = {}, { } # the dictionary-based approach in <self.values> is nice because you can decide your hist bounds after filling everything self.skipped_queries = [] for column in plotconfig.gene_usage_columns: self.values[column] = {'right': 0, 'wrong': 0} for column in plotconfig.int_columns: # it might be nicer to eventually switch these to hists (I think the ony reason they're separte is that they predate the existence of the hist class) self.values[column] = {} for rstr in plotconfig.rstrings: self.values[rstr + 'hamming_to_true_naive'] = {} self.values[rstr + 'muted_bases'] = {} self.values['shm_indel_length'] = {} self.hists['mute_freqs'] = Hist( 25, -0.04, 0.04) # only do mutation frequency for the whole sequence # NOTE this hist bounds here are intended to be super inclusive, whereas in compare-plotdirs.py we apply the more-restrictive ones from plotconfig.py (we still shift overflows here, where appropriate, though) for region in utils.regions: self.hists[region + '_gene_right_vs_mute_freq'] = Hist( 25, 0., 0.4 ) # correct *up* to allele (i.e. you can get the allele wrong) self.hists[region + '_gene_wrong_vs_mute_freq'] = Hist(25, 0., 0.4) self.hists[region + '_allele_right_vs_per_gene_support'] = Hist( 25, 0., 1.) # whereas these require the *correct* allele self.hists[region + '_allele_wrong_vs_per_gene_support'] = Hist( 25, 0., 1.) self.subplotdirs = ['gene-call', 'mutation', 'boundaries'] self.v_3p_exclusion = 3
def test_from_array(named_hist): h = Hist( axis.Regular(10, 1, 2, name="A"), axis.Regular(7, 1, 3, name="B"), data=np.ones((10, 7)), ) assert h.values() == approx(np.ones((10, 7))) assert h.sum() == approx(70) assert h.sum(flow=True) == approx(70) h = Hist( axis.Regular(10, 1, 2, name="A"), axis.Regular(7, 1, 3, name="B"), data=np.ones((12, 9)), ) assert h.values(flow=False) == approx(np.ones((10, 7))) assert h.values(flow=True) == approx(np.ones((12, 9))) assert h.sum() == approx(70) assert h.sum(flow=True) == approx(12 * 9) with pytest.raises(ValueError): h = Hist( axis.Regular(10, 1, 2, name="A"), axis.Regular(7, 1, 3, name="B"), data=np.ones((11, 9)), )
def __init__(self, name): self.name = name self.values, self.hists = {}, { } # the dictionary-based approach in <self.values> is nice because you can decide your hist bounds after filling everything for column in utils.index_columns: self.values[column] = {} if column in bool_columns: self.values[column] = {'right': 0, 'wrong': 0} for rstr in plotconfig.rstrings: self.values[rstr + 'hamming_to_true_naive'] = {} for rstr in plotconfig.rstrings: self.values[rstr + 'muted_bases'] = {} self.hists['mute_freqs'] = Hist( 25, -0.04, 0.04) # only do mutation frequency for the whole sequence # NOTE this hist bounds here are intended to be super inclusive, whereas in compare-plotdirs.py we apply the more-restrictive ones from plotconfig.py (we still shift overflows here, where appropriate, though) for region in utils.regions: self.hists[region + '_gene_right_vs_mute_freq'] = Hist( 25, 0., 0.4 ) # correct *up* to allele (i.e. you can get the allele wrong) self.hists[region + '_gene_wrong_vs_mute_freq'] = Hist(25, 0., 0.4) self.hists[region + '_allele_right_vs_per_gene_support'] = Hist( 25, 0., 1.) # whereas these require the *correct* allele self.hists[region + '_allele_wrong_vs_per_gene_support'] = Hist( 25, 0., 1.) self.subplotdirs = ['gene-call', 'mutation', 'boundaries']
def test_weighted_mean(self): h = ( Hist.new.Reg(10, 0, 1, name="x") .WeightedMean() .fill([0.5, 0.5], weight=[1, 1], sample=[1, 1]) ) assert h[0.5j].sum_of_weights == 2 assert h[0.5j].sum_of_weights_squared == 2 assert h[0.5j].value == 1 assert h[0.5j].variance == 0 # add storage to existing storage with pytest.raises(Exception): h.WeightedMean() assert ( Hist(axis.Regular(10, 0, 1, name="x"), "WeighTEDMEAn")._storage_type == storage.WeightedMean ) assert ( Hist(axis.Regular(10, 0, 1, name="x"), storage="weightedMean")._storage_type == storage.WeightedMean ) assert ( Hist(axis.Regular(10, 0, 1, name="x"), storage.WeightedMean())._storage_type == storage.WeightedMean )
def test_general_access(): """ Test general access -- whether Hist bins can be accessed. """ h = Hist(axis.Regular(10, -5, 5, name="X", label="x [units]")).fill(np.random.normal(size=1000)) assert h[6] == h[bh.loc(1)] == h[1j] == h[0j + 1] == h[-3j + 4] == h[bh.loc( 1, 0)] h[6] = h[bh.loc(1)] = h[1j] = h[0j + 1] = h[-3j + 4] = h[bh.loc(1, 0)] = 0 h = Hist( axis.Regular(50, -5, 5, name="Norm", label="normal distribution"), axis.Regular(50, -5, 5, name="Unif", label="uniform distribution"), axis.StrCategory(["hi", "hello"], name="Greet"), axis.Boolean(name="Yes"), axis.Integer(0, 1000, name="Int"), ).fill( np.random.normal(size=1000), np.random.uniform(size=1000), ["hi"] * 800 + ["hello"] * 200, [True] * 600 + [False] * 400, np.ones(1000), ) assert h[0j, -0j + 2, "hi", True, 1] # mismatch dimension with pytest.raises(Exception): h[0j, -0j + 2, "hi", True]
def test_double(self): h = ( Hist.new.Reg(10, 0, 1, name="x") .Reg(10, 0, 1, name="y") .Double() .fill(x=[0.5, 0.5], y=[0.2, 0.6]) ) assert h[0.5j, 0.2j] == 1 assert h[bh.loc(0.5), bh.loc(0.6)] == 1 assert isinstance(h[0.5j, 0.5j], float) # add storage to existing storage with pytest.raises(Exception): h.Double() assert ( Hist(axis.Regular(10, 0, 1, name="x"), "double")._storage_type == storage.Double ) assert ( Hist(axis.Regular(10, 0, 1, name="x"), storage="DouBle")._storage_type == storage.Double ) assert ( Hist(axis.Regular(10, 0, 1, name="x"), storage.Double())._storage_type == storage.Double )
def test_mean(self): h = ( Hist.new.Reg(10, 0, 1, name="x") .Mean() .fill([0.5, 0.5], weight=[1, 1], sample=[1, 1]) ) assert h[0.5j].count == 2 assert h[0.5j].value == 1 assert h[0.5j].variance == 0 # add storage to existing storage with pytest.raises(Exception): h.Mean() assert ( Hist(axis.Regular(10, 0, 1, name="x"), "MEAn")._storage_type == storage.Mean ) assert ( Hist(axis.Regular(10, 0, 1, name="x"), storage="mean")._storage_type == storage.Mean ) assert ( Hist(axis.Regular(10, 0, 1, name="x"), storage.Mean())._storage_type == storage.Mean )
def test_basic_usage(): ''' Test basic usage -- whether Hist are properly derived from\ boost-histogram. ''' # Test normal Hist h = Hist(axis.Regular(10, 0, 1, name='x')) h.fill([0.35, 0.35, 0.45]) assert h[2] == 0 assert h[3] == 2 assert h[4] == 1 assert h[5] == 0 assert h[{0:2}] == 0 assert h[{0:3}] == 2 assert h[{0:4}] == 1 assert h[{0:5}] == 0 # Test multi-axis Hist h = Hist( axis.Regular(10, 0, 1, name="x"), axis.Regular(10, 0, 1, name="y"), axis.Integer(0, 2, name="z") ) h.fill([0.35, 0.35, 0.35, 0.45, 0.55, 0.55, 0.55], [0.35, 0.35, 0.45, 0.45, 0.45, 0.45, 0.45], [0, 0, 1, 1, 1, 1, 1])
def get_mute_hist(self, mtype): if self.args.mutate_from_scratch: mean_mute_val = self.args.scratch_mute_freq if self.args.same_mute_freq_for_all_seqs: hist = Hist(1, mean_mute_val - utils.eps, mean_mute_val + utils.eps) hist.fill(mean_mute_val) else: n_entries = 500 length_vals = [ v for v in numpy.random.exponential(mean_mute_val, n_entries) ] # count doesn't work on numpy.ndarray objects max_val = 0.8 # this is arbitrary, but you shouldn't be calling this with anything that gets a significant number anywhere near there, anyway if length_vals.count(max_val): print '%s lots of really high mutation rates treegenerator::get_mute_hist()' % utils.color( 'yellow', 'warning') length_vals = [min(v, max_val) for v in length_vals] hist = Hist(30, 0., max_val) for val in length_vals: hist.fill(val) hist.normalize() else: hist = Hist(fname=self.parameter_dir + '/' + mtype + '-mean-mute-freqs.csv') return hist
def test_image_plot_ratio_hist(): """ Test plot_pull by comparing against a reference image generated via `pytest --mpl-generate-path=tests/baseline` """ np.random.seed(42) hist_1 = Hist( axis.Regular(50, -5, 5, name="X", label="x [units]", underflow=False, overflow=False)).fill(np.random.normal(size=1000)) hist_2 = Hist( axis.Regular(50, -5, 5, name="X", label="x [units]", underflow=False, overflow=False)).fill(np.random.normal(size=1700)) fig = plt.figure() assert hist_1.plot_ratio(hist_2, rp_num_label="numerator", rp_denom_label="denominator") return fig
def test_general_plot(): """ Test general plot -- whether Hist can be plotted properly. """ h = Hist( axis.Regular( 50, -5, 5, name="A", label="a [units]", underflow=False, overflow=False ), ).fill(np.random.normal(size=10)) assert h.plot(color="green", ls="--", lw=3) h = Hist( axis.Regular( 50, -5, 5, name="A", label="a [units]", underflow=False, overflow=False ), axis.Regular( 50, -4, 4, name="B", label="b [units]", underflow=False, overflow=False ), ).fill(np.random.normal(size=10), np.random.normal(size=10)) assert h.plot(cmap="cividis") # dimension error h = Hist( axis.Regular( 50, -5, 5, name="A", label="a [units]", underflow=False, overflow=False ), axis.Regular( 50, -4, 4, name="B", label="b [units]", underflow=False, overflow=False ), axis.Regular( 50, -4, 4, name="C", label="c [units]", underflow=False, overflow=False ), ).fill( np.random.normal(size=10), np.random.normal(size=10), np.random.normal(size=10) ) with pytest.raises(Exception): h.plot() # wrong kwargs names with pytest.raises(Exception): h.project("A").plot(abc="red") with pytest.raises(Exception): h.project("A", "C").plot(abc="red") # wrong kwargs type with pytest.raises(Exception): h.project("B").plot(ls="red") with pytest.raises(Exception): h.project("A", "C").plot(cmap=0.1) plt.close("all")
def __init__( self, germline_seqs ): #, base_outdir='', base_plotdir='', write_parameters=True, plot_parameters=True): self.germline_seqs = germline_seqs self.counts, self.freqs, self.plotting_info = {}, {}, {} n_bins, xmin, xmax = 100, 0.0, 0.5 self.mean_rates = {'all': Hist(n_bins, xmin, xmax)} for region in utils.regions: self.mean_rates[region] = Hist(n_bins, xmin, xmax) self.finalized = False
def test_general_project(): """ Test general project -- whether Hist can be projected properly. """ h = Hist( axis.Regular( 50, -5, 5, name="A", label="a [units]", underflow=False, overflow=False ), axis.Boolean(name="B", label="b [units]"), axis.Variable(range(11), name="C", label="c [units]"), axis.Integer(0, 10, name="D", label="d [units]"), axis.IntCategory(range(10), name="E", label="e [units]"), axis.StrCategory("FT", name="F", label="f [units]"), ) # via indices assert h.project() assert h.project(0, 1) assert h.project(0, 1, 2, 3, 4, 5) # via names assert h.project() assert h.project("A", "B") assert h.project("A", "B", "C", "D", "E", "F") h = Hist( axis.Regular( 50, -5, 5, name="A", label="a [units]", underflow=False, overflow=False ), axis.Boolean(name="B", label="b [units]"), axis.Variable(range(11), name="C", label="c [units]"), axis.Integer(0, 10, name="D", label="d [units]"), axis.IntCategory(range(10), name="E", label="e [units]"), axis.StrCategory("FT", name="F", label="f [units]"), ) # duplicated with pytest.raises(Exception): h.project(0, 0) with pytest.raises(Exception): h.project("A", "A") with pytest.raises(Exception): h.project(0, "A") # mixed types assert h.project(2, "A") # cannot found with pytest.raises(Exception): h.project(-1, 9) with pytest.raises(Exception): h.project("G", "H")
def test_unlimited(self): h = Hist.new.Reg(10, 0, 1, name="x").Unlimited().fill([0.5, 0.5]) assert h[0.5j] == 2 # add storage to existing storage with pytest.raises(Exception): h.Unlimited() assert (Hist(axis.Regular(10, 0, 1, name="x"), "unlimited")._storage_type == storage.Unlimited) assert (Hist(axis.Regular(10, 0, 1, name="x"), storage="UNLImited")._storage_type == storage.Unlimited) assert (Hist(axis.Regular(10, 0, 1, name="x"), storage.Unlimited())._storage_type == storage.Unlimited)
def make_hist_from_dict_of_counts(values, var_type, hist_label, log='', xmin_force=0.0, xmax_force=0.0, normalize=False, sort=False): """ Fill a histogram with values from a dictionary (each key will correspond to one bin) """ assert var_type == 'int' or var_type == 'string' # floats should be handled by Hist class in hist.py if len(values) == 0: print 'WARNING no values for %s in make_hist' % hist_label return Hist(1, 0, 1) bin_labels = sorted(values) if not sort and var_type == 'string': # for strings, sort so most common value is to left side bin_labels = sorted(values, key=values.get, reverse=True) if var_type == 'string': n_bins = len(values) else: n_bins = bin_labels[-1] - bin_labels[0] + 1 hist = None xbins = [0. for _ in range(n_bins+1)] # NOTE the +1 is 'cause you need the lower edge of the overflow bin if xmin_force == xmax_force: # if boundaries aren't set explicitly, work out what they should be if var_type == 'string': set_bins(bin_labels, n_bins, 'x' in log, xbins, var_type) hist = Hist(n_bins, xbins[0], xbins[-1], xbins=xbins) else: hist = Hist(n_bins, bin_labels[0] - 0.5, bin_labels[-1] + 0.5) # for integers, just go from the first to the last bin label (they're sorted) else: hist = Hist(n_bins, xmin_force, xmax_force) for ival in range(len(values)): if var_type == 'string': label = bin_labels[ival] ibin = ival + 1 else: label = '' ibin = hist.find_bin(bin_labels[ival]) hist.set_ibin(ibin, values[bin_labels[ival]], error=math.sqrt(values[bin_labels[ival]]), label=label) # make sure there's no overflows if hist.bin_contents[0] != 0.0 or hist.bin_contents[-1] != 0.0: for ibin in range(hist.n_bins + 2): print '%d %f %f' % (ibin, hist.low_edges[ibin], hist.bin_contents[ibin]) raise Exception('overflows in ' + hist_label) if normalize: hist.normalize() hist.ytitle = 'freq' else: hist.ytitle = 'counts' return hist
def test_general_plot2d_full(): """ Test general plot2d_full -- whether 2d-Hist can be fully plotted properly. """ h = Hist( axis.Regular( 50, -5, 5, name="A", label="a [units]", underflow=False, overflow=False ), axis.Regular( 50, -4, 4, name="B", label="b [units]", underflow=False, overflow=False ), ).fill(np.random.normal(size=10), np.random.normal(size=10)) assert h.plot2d_full( main_cmap="cividis", top_ls="--", top_color="orange", top_lw=2, side_ls="-.", side_lw=1, side_color="steelblue", ) # dimension error h = Hist( axis.Regular( 50, -5, 5, name="A", label="a [units]", underflow=False, overflow=False ), axis.Regular( 50, -4, 4, name="B", label="b [units]", underflow=False, overflow=False ), ).fill(np.random.normal(size=10), np.random.normal(size=10)) with pytest.raises(Exception): h.project("A").plot2d_full() # wrong kwargs names with pytest.raises(Exception): h.plot2d_full(abc="red") with pytest.raises(Exception): h.plot2d_full(color="red") # wrong kwargs type with pytest.raises(Exception): h.plot2d_full(main_cmap=0.1, side_lw="autumn") plt.close("all")
def test_int64(self): h = Hist.new.Reg(10, 0, 1, name="x").Int64().fill([0.5, 0.5]) assert h[0.5j] == 2 assert isinstance(h[0.5j], int) # add storage to existing storage with pytest.raises(Exception): h.Int64() assert (Hist(axis.Regular(10, 0, 1, name="x"), "int64")._storage_type == storage.Int64) assert (Hist(axis.Regular(10, 0, 1, name="x"), storage="INT64")._storage_type == storage.Int64) assert (Hist(axis.Regular(10, 0, 1, name="x"), storage.Int64())._storage_type == storage.Int64)
def test_weight(self): h = Hist.new.Reg(10, 0, 1, name="x").Weight().fill([0.5, 0.5]) assert h[0.5j].variance == 2 assert h[0.5j].value == 2 # add storage to existing storage with pytest.raises(Exception): h.Weight() assert (Hist(axis.Regular(10, 0, 1, name="x"), "WeighT")._storage_type == storage.Weight) assert (Hist(axis.Regular(10, 0, 1, name="x"), storage="weight")._storage_type == storage.Weight) assert (Hist(axis.Regular(10, 0, 1, name="x"), storage.Weight())._storage_type == storage.Weight)
def TEfficiency2data(self, histo): """Convert TEfficiency to internal format. No support for re-binning TEfficiencies.""" h_histo = histo.GetPassedHistogram() bin_contents = [] bin_errors_up = [] bin_errors_dn = [] bin_centers = [] bin_widths = [] bin_edges = [h_histo.GetXaxis().GetBinLowEdge(1)] for i in xrange(1, h_histo.GetNbinsX() + 1): bin_contents.append(histo.GetEfficiency(i)) bin_errors_up.append(histo.GetEfficiencyErrorUp(i)) bin_errors_dn.append(histo.GetEfficiencyErrorLow(i)) bin_centers.append(h_histo.GetXaxis().GetBinCenter(i)) bin_edges.append(h_histo.GetXaxis().GetBinUpEdge(i)) bin_widths.append(h_histo.GetXaxis().GetBinWidth(1) / 2.) results = Hist() results.content = np.array(bin_contents) results.error = [bin_errors_dn, bin_errors_up] results.bins = np.array(bin_edges) results.center = bin_centers results.width = bin_widths return results
def get_unified_bin_hist(hists): """ Unify bins in <hists>. Starts from the bins from <hists[0]>, then loops over the rest of 'em adding bins as it goes (with width from <hists[0]>) so we won't have any under/overflows. NOTE totally ignores under/overflows in the original hists. That's on purpose, but like everying else in this foolish thing we call life may in fact turn out to be dumb later on. """ assert len(hists) > 0 dx = hists[0].GetXaxis().GetBinLowEdge(2) - hists[0].GetXaxis().GetBinLowEdge(1) # always have at least one bin, in which case this'd be the low edge of the overflow bin minus low edge of the first bin # print 'dx:', dx low_edges = [] for ib in range(1, hists[0].GetNbinsX()+1): low_edges.append(hists[0].GetXaxis().GetBinLowEdge(ib)) # for d in [ low_edges[i] - low_edges[i-1] for i in range(1, len(low_edges)) ]: # print ' ', d for hist in hists[1:]: for ib in range(1, hist.GetNbinsX()+1): bincenter = hist.GetXaxis().GetBinCenter(ib) while bincenter <= low_edges[0]: # as long as <bincenter> is outside of the current bounds, keep adding bins on the left... low_edges.insert(0, low_edges[0] - dx) while bincenter >= low_edges[-1] + dx: # ...and same thing on the right low_edges.insert(len(low_edges), low_edges[-1] + dx) return Hist(len(low_edges), low_edges[0], low_edges[-1] + dx)
def add_bin_labels_not_in_all_hists(hists): """ find the OR of all bin labels present in <hists>, and remake each hist in <hists> to have zero bins for any that weren't there already """ # first convert each hist to a map from bin label to entries all_labels = [] histmaps = [] for hist in hists: histmaps.append({}) for ibin in range(1, hist.n_bins + 1): # ignore under/over flows, they're kinda useless for bin-labelled hists label = hist.bin_labels[ibin] histmaps[-1][label] = (hist.bin_contents[ibin], hist.errors[ibin]) # 2-tuple with (content, error) if label not in all_labels: all_labels.append(label) all_labels = sorted(all_labels) # then go through and make new histograms for everybody finalhists = [] for ih in range(len(histmaps)): original_hist = hists[ih] hmap = histmaps[ih] finalhists.append(Hist(len(all_labels), 0.5, len(all_labels) + 0.5, title=original_hist.title)) for ilabel in range(len(all_labels)): label = all_labels[ilabel] ibin = ilabel + 1 # root conventions finalhists[-1].bin_labels[ibin] = label if label in hmap: finalhists[-1].bin_contents[ibin] = hmap[label][0] finalhists[-1].errors[ibin] = hmap[label][1] else: finalhists[-1].bin_contents[ibin] = 0.0 finalhists[-1].errors[ibin] = 0.0 return finalhists
def convert(self, data): """Convert ROOT/Numpy data into uniform format""" try: classname = data._classname except AttributeError: classname = str(type(data)) self._isHistogram = ('TH1' in classname) or ('TH2' in classname) self._isEfficiency = False # TEfficiency currently unsupported in uproot '3.2.5' and uproot-methods '0.2.5' # - throws NotImplementedError (/.../uproot/rootio.py", line 645) h_data = Hist() if self._isHistogram: if self.dimensions == 1: h_data = self.hist2data(data, reBin=self.rebin, normed=self.normed) else: h_data = self.hist2data2D(data, reBin=self.rebin, normed=self.normed) else: # others, e.g., numpy data (may or may not need to be put into a histogram) # no support for TEfficiencies in uproot right now h_data = self.convert_array(data) return h_data
def test_image_plot_ratio_callable(): """ Test plot_pull by comparing against a reference image generated via `pytest --mpl-generate-path=tests/baseline` """ np.random.seed(42) hist_1 = Hist( axis.Regular(50, -5, 5, name="X", label="x [units]", underflow=False, overflow=False)).fill(np.random.normal(size=1000)) def model(x, a=1 / np.sqrt(2 * np.pi), x0=0, sigma=1, offset=0): return a * np.exp(-((x - x0)**2) / (2 * sigma**2)) + offset fig = plt.figure() assert hist_1.plot_ratio(model, eb_color="black", fp_color="blue", ub_color="lightblue") return fig
def test_image_plot_pull(): """ Test plot_pull by comparing against a reference image generated via `pytest --mpl-generate-path=tests/baseline` """ np.random.seed(42) h = Hist( axis.Regular(50, -4, 4, name="S", label="s [units]", underflow=False, overflow=False)).fill(np.random.normal(size=100)) def pdf(x, a=1 / np.sqrt(2 * np.pi), x0=0, sigma=1, offset=0): return a * np.exp(-((x - x0)**2) / (2 * sigma**2)) + offset fig = plt.figure() assert h.plot_pull( pdf, eb_color="black", fp_color="blue", ub_color="lightblue", fit_fmt=r"{name} = {value:.3g} $\pm$ {error:.3g}", ) return fig
def make_mean_hist(hists, debug=False): """ return the hist with bin contents the mean over <hists> of each bin """ binvals = {} for hist in hists: if debug: print ' sub', for ib in range(0, hist.n_bins + 2): low_edge = hist.low_edges[ib] if low_edge not in binvals: binvals[low_edge] = 0. binvals[low_edge] += hist.bin_contents[ib] if debug: print ' ', low_edge, hist.bin_contents[ib], if debug: print '' binlist = sorted(binvals.keys()) meanhist = Hist(len(binlist) - 2, binlist[1], binlist[-1], binlist[1:-1]) if debug: print ' mean', for ib in range(len(binlist)): meanhist.set_ibin(ib, binvals[binlist[ib]]) if debug: print ' ', meanhist.low_edges[ib], meanhist.bin_contents[ib], if debug: print '' meanhist.normalize() return meanhist
def array2data2D(self, data, weights=None, normed=False, binning=1, reBin=None): """ Convert array of data to internal format - Designed for arrays of raw, un-binned data. - If you pass values here from an existing histogram ('weights' is not None and the 'data' param is just bin centers), it is possible to re-bin this histogram using the 'reBin' keyword """ try: x = data['x'] y = data['y'] except TypeError: x = data[0] y = data[1] data, bins_x, bins_y = np.histogram2d(x, y, bins=binning, normed=normed, weights=weights) results = Hist() results.content = data.flatten() # data is a ndarray (nxbins,nybins) results.bins = {'x': bins_x, 'y': bins_y} xcenter, ycenter = tools.dummy_bins2D(tools.midpoints(bins_x), tools.midpoints(bins_y)) xwidth, ywidth = tools.dummy_bins2D(tools.widths(bins_x), tools.widths(bins_y)) results.center = {'x': xcenter, 'y': ycenter} results.width = {'x': xwidth, 'y': ywidth} results.error = np.sqrt(data) if weights is not None: # scipy.stats to get sumw2 (x,y,weights should have the same shape) results.error = results.sumw2_2D(xdata=x, ydata=y, values=weights) if reBin is not None: # re-binning after making data from array, likely that the user # passed in binned data and wants to re-bin. results.Rebin2D(reBin) if normed: results.normalize() # normalize after re-binning results.xbins = results.bins['x'] results.ybins = results.bins['y'] results.xcenter = results.center['x'] results.ycenter = results.center['y'] results.xwidth = results.width['x'] results.ywidth = results.width['y'] return results ## THE END ##
def array2data(self, data, weights=None, normed=False, binning=1, reBin=None): """ Convert array of data to internal format - Designed for arrays of raw, un-binned data. - If you pass values here from an existing histogram ('weights' is not None and the 'data' param is just bin centers), it is possible to re-bin this histogram using the 'reBin' keyword """ data, bins = np.histogram(data, bins=binning, weights=weights, normed=normed) results = Hist() results.content = data results.bins = bins results.center = tools.midpoints(bins) results.width = tools.widths(bins) results.error = np.sqrt(data) if weights is not None: # numpy digitize to get sumw2 results.error = results.sumw2_1D(xdata=data, values=weights) if reBin is not None: results.Rebin(reBin) if normed: results.normalize() # normalize after re-binning return results
def convert(self, data): """Convert ROOT data into uniform format for framework""" self._isHistogram = isinstance(data, ROOT.TH1) self._isEfficiency = isinstance(data, ROOT.TEfficiency) h_data = Hist() if self._isHistogram: # TH1/TH2 if self.dimensions == 1: h_data = self.hist2data(data, reBin=self.rebin, normed=self.normed) else: h_data = self.hist2data2D(data, reBin=self.rebin, normed=self.normed) elif self._isEfficiency: # TEfficincy if self.dimensions == 1: h_data = self.TEfficiency2data(data) else: h_data = self.TEfficiency2data2D(data) else: # assume the data is stored in an array h_data = self.convert_array(data) return h_data