def __init__(self, germlines, name, only_correct_gene_fractions=False):
     self.germlines = germlines
     self.name = name
     self.values = {}
     self.only_correct_gene_fractions = only_correct_gene_fractions
     for column in utils.index_columns:
         if column == 'cdr3_length':  # kind of finicky to figure out what this is, so I don't always set it
             continue
         self.values[column] = {}
         if column in bool_columns:
             self.values[column]['right'] = 0
             self.values[column]['wrong'] = 0
     self.values['hamming_to_true_naive'] = {}
     self.values['hamming_to_true_naive_normed'] = {}
     for region in utils.regions:
         self.values[region + '_hamming_to_true_naive'] = {}
         self.values[region + '_hamming_to_true_naive_normed'] = {}
     # for bound in utils.boundaries:
     #     self.counts[bound + '_insertion_content'] = {'A':0, 'C':0, 'G':0, 'T':0}  # base content of each insertion
     # self.counts['seq_content'] = {'A':0, 'C':0, 'G':0, 'T':0}
     # n_bins, xmin, xmax = 100, 0.0, 1.0
     self.hists = {}
     self.hists['mute_freqs'] = Hist(30, -0.05, 0.05)
     for region in utils.regions:
         self.hists[region + '_mute_freqs'] = Hist(30, -0.05, 0.05)
     for region in utils.regions:  # plots of correct gene calls vs mute freq
         self.hists[region + '_gene_right_vs_mute_freq'] = Hist(50, 0., 0.4)
         self.hists[region + '_gene_wrong_vs_mute_freq'] = Hist(50, 0., 0.4)
示例#2
0
def hist_tuple():
    hist_1 = Hist(
        hist.axis.Regular(50,
                          -5,
                          5,
                          name="x",
                          label="x [units]",
                          underflow=False,
                          overflow=False),
        storage=hist.storage.Weight(),
    ).fill(np.random.normal(size=1000), weight=1.0)
    hist_2 = Hist(
        hist.axis.Regular(50,
                          -5,
                          5,
                          name="x",
                          label="x [units]",
                          underflow=False,
                          overflow=False),
        storage=hist.storage.Weight(),
    ).fill(np.random.normal(size=1000), weight=1.0)
    hist_3 = Hist(
        hist.axis.Regular(50,
                          -5,
                          5,
                          name="x",
                          label="x [units]",
                          underflow=False,
                          overflow=False),
        storage=hist.storage.Weight(),
    ).fill(np.random.normal(size=1000), weight=1.0)

    return hist_1, hist_2, hist_3
示例#3
0
    def __init__(self, name):
        self.name = name
        self.values, self.hists = {}, {
        }  # the dictionary-based approach in <self.values> is nice because you can decide your hist bounds after filling everything
        self.skipped_queries = []

        for column in plotconfig.gene_usage_columns:
            self.values[column] = {'right': 0, 'wrong': 0}
        for column in plotconfig.int_columns:  # it might be nicer to eventually switch these to hists (I think the ony reason they're separte is that they predate the existence of the hist class)
            self.values[column] = {}
        for rstr in plotconfig.rstrings:
            self.values[rstr + 'hamming_to_true_naive'] = {}
            self.values[rstr + 'muted_bases'] = {}
        self.values['shm_indel_length'] = {}

        self.hists['mute_freqs'] = Hist(
            25, -0.04,
            0.04)  # only do mutation frequency for the whole sequence
        # NOTE this hist bounds here are intended to be super inclusive, whereas in compare-plotdirs.py we apply the more-restrictive ones from plotconfig.py (we still shift overflows here, where appropriate, though)
        for region in utils.regions:
            self.hists[region + '_gene_right_vs_mute_freq'] = Hist(
                25, 0., 0.4
            )  # correct *up* to allele (i.e. you can get the allele wrong)
            self.hists[region + '_gene_wrong_vs_mute_freq'] = Hist(25, 0., 0.4)
            self.hists[region + '_allele_right_vs_per_gene_support'] = Hist(
                25, 0., 1.)  # whereas these require the *correct* allele
            self.hists[region + '_allele_wrong_vs_per_gene_support'] = Hist(
                25, 0., 1.)

        self.subplotdirs = ['gene-call', 'mutation', 'boundaries']

        self.v_3p_exclusion = 3
示例#4
0
def test_from_array(named_hist):
    h = Hist(
        axis.Regular(10, 1, 2, name="A"),
        axis.Regular(7, 1, 3, name="B"),
        data=np.ones((10, 7)),
    )
    assert h.values() == approx(np.ones((10, 7)))
    assert h.sum() == approx(70)
    assert h.sum(flow=True) == approx(70)

    h = Hist(
        axis.Regular(10, 1, 2, name="A"),
        axis.Regular(7, 1, 3, name="B"),
        data=np.ones((12, 9)),
    )

    assert h.values(flow=False) == approx(np.ones((10, 7)))
    assert h.values(flow=True) == approx(np.ones((12, 9)))
    assert h.sum() == approx(70)
    assert h.sum(flow=True) == approx(12 * 9)

    with pytest.raises(ValueError):
        h = Hist(
            axis.Regular(10, 1, 2, name="A"),
            axis.Regular(7, 1, 3, name="B"),
            data=np.ones((11, 9)),
        )
示例#5
0
    def __init__(self, name):
        self.name = name
        self.values, self.hists = {}, {
        }  # the dictionary-based approach in <self.values> is nice because you can decide your hist bounds after filling everything

        for column in utils.index_columns:
            self.values[column] = {}
            if column in bool_columns:
                self.values[column] = {'right': 0, 'wrong': 0}

        for rstr in plotconfig.rstrings:
            self.values[rstr + 'hamming_to_true_naive'] = {}

        for rstr in plotconfig.rstrings:
            self.values[rstr + 'muted_bases'] = {}
        self.hists['mute_freqs'] = Hist(
            25, -0.04,
            0.04)  # only do mutation frequency for the whole sequence
        # NOTE this hist bounds here are intended to be super inclusive, whereas in compare-plotdirs.py we apply the more-restrictive ones from plotconfig.py (we still shift overflows here, where appropriate, though)
        for region in utils.regions:
            self.hists[region + '_gene_right_vs_mute_freq'] = Hist(
                25, 0., 0.4
            )  # correct *up* to allele (i.e. you can get the allele wrong)
            self.hists[region + '_gene_wrong_vs_mute_freq'] = Hist(25, 0., 0.4)
            self.hists[region + '_allele_right_vs_per_gene_support'] = Hist(
                25, 0., 1.)  # whereas these require the *correct* allele
            self.hists[region + '_allele_wrong_vs_per_gene_support'] = Hist(
                25, 0., 1.)

        self.subplotdirs = ['gene-call', 'mutation', 'boundaries']
示例#6
0
    def test_weighted_mean(self):
        h = (
            Hist.new.Reg(10, 0, 1, name="x")
            .WeightedMean()
            .fill([0.5, 0.5], weight=[1, 1], sample=[1, 1])
        )
        assert h[0.5j].sum_of_weights == 2
        assert h[0.5j].sum_of_weights_squared == 2
        assert h[0.5j].value == 1
        assert h[0.5j].variance == 0

        # add storage to existing storage
        with pytest.raises(Exception):
            h.WeightedMean()

        assert (
            Hist(axis.Regular(10, 0, 1, name="x"), "WeighTEDMEAn")._storage_type
            == storage.WeightedMean
        )
        assert (
            Hist(axis.Regular(10, 0, 1, name="x"), storage="weightedMean")._storage_type
            == storage.WeightedMean
        )
        assert (
            Hist(axis.Regular(10, 0, 1, name="x"), storage.WeightedMean())._storage_type
            == storage.WeightedMean
        )
示例#7
0
def test_general_access():
    """
    Test general access -- whether Hist bins can be accessed.
    """

    h = Hist(axis.Regular(10, -5, 5, name="X",
                          label="x [units]")).fill(np.random.normal(size=1000))

    assert h[6] == h[bh.loc(1)] == h[1j] == h[0j + 1] == h[-3j +
                                                           4] == h[bh.loc(
                                                               1, 0)]
    h[6] = h[bh.loc(1)] = h[1j] = h[0j + 1] = h[-3j + 4] = h[bh.loc(1, 0)] = 0

    h = Hist(
        axis.Regular(50, -5, 5, name="Norm", label="normal distribution"),
        axis.Regular(50, -5, 5, name="Unif", label="uniform distribution"),
        axis.StrCategory(["hi", "hello"], name="Greet"),
        axis.Boolean(name="Yes"),
        axis.Integer(0, 1000, name="Int"),
    ).fill(
        np.random.normal(size=1000),
        np.random.uniform(size=1000),
        ["hi"] * 800 + ["hello"] * 200,
        [True] * 600 + [False] * 400,
        np.ones(1000),
    )

    assert h[0j, -0j + 2, "hi", True, 1]

    # mismatch dimension
    with pytest.raises(Exception):
        h[0j, -0j + 2, "hi", True]
示例#8
0
    def test_double(self):
        h = (
            Hist.new.Reg(10, 0, 1, name="x")
            .Reg(10, 0, 1, name="y")
            .Double()
            .fill(x=[0.5, 0.5], y=[0.2, 0.6])
        )

        assert h[0.5j, 0.2j] == 1
        assert h[bh.loc(0.5), bh.loc(0.6)] == 1
        assert isinstance(h[0.5j, 0.5j], float)

        # add storage to existing storage
        with pytest.raises(Exception):
            h.Double()

        assert (
            Hist(axis.Regular(10, 0, 1, name="x"), "double")._storage_type
            == storage.Double
        )
        assert (
            Hist(axis.Regular(10, 0, 1, name="x"), storage="DouBle")._storage_type
            == storage.Double
        )
        assert (
            Hist(axis.Regular(10, 0, 1, name="x"), storage.Double())._storage_type
            == storage.Double
        )
示例#9
0
    def test_mean(self):
        h = (
            Hist.new.Reg(10, 0, 1, name="x")
            .Mean()
            .fill([0.5, 0.5], weight=[1, 1], sample=[1, 1])
        )
        assert h[0.5j].count == 2
        assert h[0.5j].value == 1
        assert h[0.5j].variance == 0

        # add storage to existing storage
        with pytest.raises(Exception):
            h.Mean()

        assert (
            Hist(axis.Regular(10, 0, 1, name="x"), "MEAn")._storage_type == storage.Mean
        )
        assert (
            Hist(axis.Regular(10, 0, 1, name="x"), storage="mean")._storage_type
            == storage.Mean
        )
        assert (
            Hist(axis.Regular(10, 0, 1, name="x"), storage.Mean())._storage_type
            == storage.Mean
        )
示例#10
0
def test_basic_usage():
    '''
        Test basic usage -- whether Hist are properly derived from\
        boost-histogram.
    '''
    
    # Test normal Hist
    h = Hist(axis.Regular(10, 0, 1, name='x'))

    h.fill([0.35, 0.35, 0.45])

    assert h[2] == 0
    assert h[3] == 2
    assert h[4] == 1
    assert h[5] == 0

    assert h[{0:2}] == 0 
    assert h[{0:3}] == 2 
    assert h[{0:4}] == 1 
    assert h[{0:5}] == 0 
    
    # Test multi-axis Hist
    h = Hist(
        axis.Regular(10, 0, 1, name="x"),
        axis.Regular(10, 0, 1, name="y"),
        axis.Integer(0, 2, name="z")
    )

    h.fill([0.35, 0.35, 0.35, 0.45, 0.55, 0.55, 0.55], 
           [0.35, 0.35, 0.45, 0.45, 0.45, 0.45, 0.45],
           [0, 0, 1, 1, 1, 1, 1])
示例#11
0
    def get_mute_hist(self, mtype):
        if self.args.mutate_from_scratch:
            mean_mute_val = self.args.scratch_mute_freq
            if self.args.same_mute_freq_for_all_seqs:
                hist = Hist(1, mean_mute_val - utils.eps,
                            mean_mute_val + utils.eps)
                hist.fill(mean_mute_val)
            else:
                n_entries = 500
                length_vals = [
                    v
                    for v in numpy.random.exponential(mean_mute_val, n_entries)
                ]  # count doesn't work on numpy.ndarray objects
                max_val = 0.8  # this is arbitrary, but you shouldn't be calling this with anything that gets a significant number anywhere near there, anyway
                if length_vals.count(max_val):
                    print '%s lots of really high mutation rates treegenerator::get_mute_hist()' % utils.color(
                        'yellow', 'warning')
                length_vals = [min(v, max_val) for v in length_vals]
                hist = Hist(30, 0., max_val)
                for val in length_vals:
                    hist.fill(val)
                hist.normalize()
        else:
            hist = Hist(fname=self.parameter_dir + '/' + mtype +
                        '-mean-mute-freqs.csv')

        return hist
示例#12
0
文件: test_plot.py 项目: cranmer/hist
def test_image_plot_ratio_hist():
    """
    Test plot_pull by comparing against a reference image generated via
    `pytest --mpl-generate-path=tests/baseline`
    """

    np.random.seed(42)

    hist_1 = Hist(
        axis.Regular(50,
                     -5,
                     5,
                     name="X",
                     label="x [units]",
                     underflow=False,
                     overflow=False)).fill(np.random.normal(size=1000))
    hist_2 = Hist(
        axis.Regular(50,
                     -5,
                     5,
                     name="X",
                     label="x [units]",
                     underflow=False,
                     overflow=False)).fill(np.random.normal(size=1700))

    fig = plt.figure()

    assert hist_1.plot_ratio(hist_2,
                             rp_num_label="numerator",
                             rp_denom_label="denominator")

    return fig
示例#13
0
def test_general_plot():
    """
    Test general plot -- whether Hist can be plotted properly.
    """

    h = Hist(
        axis.Regular(
            50, -5, 5, name="A", label="a [units]", underflow=False, overflow=False
        ),
    ).fill(np.random.normal(size=10))

    assert h.plot(color="green", ls="--", lw=3)

    h = Hist(
        axis.Regular(
            50, -5, 5, name="A", label="a [units]", underflow=False, overflow=False
        ),
        axis.Regular(
            50, -4, 4, name="B", label="b [units]", underflow=False, overflow=False
        ),
    ).fill(np.random.normal(size=10), np.random.normal(size=10))

    assert h.plot(cmap="cividis")

    # dimension error
    h = Hist(
        axis.Regular(
            50, -5, 5, name="A", label="a [units]", underflow=False, overflow=False
        ),
        axis.Regular(
            50, -4, 4, name="B", label="b [units]", underflow=False, overflow=False
        ),
        axis.Regular(
            50, -4, 4, name="C", label="c [units]", underflow=False, overflow=False
        ),
    ).fill(
        np.random.normal(size=10), np.random.normal(size=10), np.random.normal(size=10)
    )

    with pytest.raises(Exception):
        h.plot()

    # wrong kwargs names
    with pytest.raises(Exception):
        h.project("A").plot(abc="red")

    with pytest.raises(Exception):
        h.project("A", "C").plot(abc="red")

    # wrong kwargs type
    with pytest.raises(Exception):
        h.project("B").plot(ls="red")

    with pytest.raises(Exception):
        h.project("A", "C").plot(cmap=0.1)

    plt.close("all")
 def __init__(
     self, germline_seqs
 ):  #, base_outdir='', base_plotdir='', write_parameters=True, plot_parameters=True):
     self.germline_seqs = germline_seqs
     self.counts, self.freqs, self.plotting_info = {}, {}, {}
     n_bins, xmin, xmax = 100, 0.0, 0.5
     self.mean_rates = {'all': Hist(n_bins, xmin, xmax)}
     for region in utils.regions:
         self.mean_rates[region] = Hist(n_bins, xmin, xmax)
     self.finalized = False
示例#15
0
def test_general_project():
    """
    Test general project -- whether Hist can be projected properly.
    """
    h = Hist(
        axis.Regular(
            50, -5, 5, name="A", label="a [units]", underflow=False, overflow=False
        ),
        axis.Boolean(name="B", label="b [units]"),
        axis.Variable(range(11), name="C", label="c [units]"),
        axis.Integer(0, 10, name="D", label="d [units]"),
        axis.IntCategory(range(10), name="E", label="e [units]"),
        axis.StrCategory("FT", name="F", label="f [units]"),
    )

    # via indices
    assert h.project()
    assert h.project(0, 1)
    assert h.project(0, 1, 2, 3, 4, 5)

    # via names
    assert h.project()
    assert h.project("A", "B")
    assert h.project("A", "B", "C", "D", "E", "F")

    h = Hist(
        axis.Regular(
            50, -5, 5, name="A", label="a [units]", underflow=False, overflow=False
        ),
        axis.Boolean(name="B", label="b [units]"),
        axis.Variable(range(11), name="C", label="c [units]"),
        axis.Integer(0, 10, name="D", label="d [units]"),
        axis.IntCategory(range(10), name="E", label="e [units]"),
        axis.StrCategory("FT", name="F", label="f [units]"),
    )

    # duplicated
    with pytest.raises(Exception):
        h.project(0, 0)

    with pytest.raises(Exception):
        h.project("A", "A")

    with pytest.raises(Exception):
        h.project(0, "A")

    # mixed types
    assert h.project(2, "A")

    # cannot found
    with pytest.raises(Exception):
        h.project(-1, 9)

    with pytest.raises(Exception):
        h.project("G", "H")
示例#16
0
    def test_unlimited(self):
        h = Hist.new.Reg(10, 0, 1, name="x").Unlimited().fill([0.5, 0.5])
        assert h[0.5j] == 2

        # add storage to existing storage
        with pytest.raises(Exception):
            h.Unlimited()

        assert (Hist(axis.Regular(10, 0, 1, name="x"),
                     "unlimited")._storage_type == storage.Unlimited)
        assert (Hist(axis.Regular(10, 0, 1, name="x"),
                     storage="UNLImited")._storage_type == storage.Unlimited)
        assert (Hist(axis.Regular(10, 0, 1, name="x"),
                     storage.Unlimited())._storage_type == storage.Unlimited)
示例#17
0
def make_hist_from_dict_of_counts(values, var_type, hist_label, log='', xmin_force=0.0, xmax_force=0.0, normalize=False, sort=False):
    """ Fill a histogram with values from a dictionary (each key will correspond to one bin) """
    assert var_type == 'int' or var_type == 'string'  # floats should be handled by Hist class in hist.py

    if len(values) == 0:
        print 'WARNING no values for %s in make_hist' % hist_label
        return Hist(1, 0, 1)

    bin_labels = sorted(values)
    if not sort and var_type == 'string':  # for strings, sort so most common value is to left side
        bin_labels = sorted(values, key=values.get, reverse=True)

    if var_type == 'string':
        n_bins = len(values)
    else:
        n_bins = bin_labels[-1] - bin_labels[0] + 1

    hist = None
    xbins = [0. for _ in range(n_bins+1)]  # NOTE the +1 is 'cause you need the lower edge of the overflow bin
    if xmin_force == xmax_force:  # if boundaries aren't set explicitly, work out what they should be
        if var_type == 'string':
            set_bins(bin_labels, n_bins, 'x' in log, xbins, var_type)
            hist = Hist(n_bins, xbins[0], xbins[-1], xbins=xbins)
        else:
            hist = Hist(n_bins, bin_labels[0] - 0.5, bin_labels[-1] + 0.5)  # for integers, just go from the first to the last bin label (they're sorted)
    else:
      hist = Hist(n_bins, xmin_force, xmax_force)

    for ival in range(len(values)):
        if var_type == 'string':
            label = bin_labels[ival]
            ibin = ival + 1
        else:
            label = ''
            ibin = hist.find_bin(bin_labels[ival])
        hist.set_ibin(ibin, values[bin_labels[ival]], error=math.sqrt(values[bin_labels[ival]]), label=label)
  
    # make sure there's no overflows
    if hist.bin_contents[0] != 0.0 or hist.bin_contents[-1] != 0.0:
        for ibin in range(hist.n_bins + 2):
            print '%d %f %f' % (ibin, hist.low_edges[ibin], hist.bin_contents[ibin])
        raise Exception('overflows in ' + hist_label)

    if normalize:
        hist.normalize()
        hist.ytitle = 'freq'
    else:
        hist.ytitle = 'counts'
    
    return hist
示例#18
0
def test_general_plot2d_full():
    """
    Test general plot2d_full -- whether 2d-Hist can be fully plotted properly.
    """

    h = Hist(
        axis.Regular(
            50, -5, 5, name="A", label="a [units]", underflow=False, overflow=False
        ),
        axis.Regular(
            50, -4, 4, name="B", label="b [units]", underflow=False, overflow=False
        ),
    ).fill(np.random.normal(size=10), np.random.normal(size=10))

    assert h.plot2d_full(
        main_cmap="cividis",
        top_ls="--",
        top_color="orange",
        top_lw=2,
        side_ls="-.",
        side_lw=1,
        side_color="steelblue",
    )

    # dimension error
    h = Hist(
        axis.Regular(
            50, -5, 5, name="A", label="a [units]", underflow=False, overflow=False
        ),
        axis.Regular(
            50, -4, 4, name="B", label="b [units]", underflow=False, overflow=False
        ),
    ).fill(np.random.normal(size=10), np.random.normal(size=10))

    with pytest.raises(Exception):
        h.project("A").plot2d_full()

    # wrong kwargs names
    with pytest.raises(Exception):
        h.plot2d_full(abc="red")

    with pytest.raises(Exception):
        h.plot2d_full(color="red")

    # wrong kwargs type
    with pytest.raises(Exception):
        h.plot2d_full(main_cmap=0.1, side_lw="autumn")

    plt.close("all")
示例#19
0
    def test_int64(self):
        h = Hist.new.Reg(10, 0, 1, name="x").Int64().fill([0.5, 0.5])
        assert h[0.5j] == 2
        assert isinstance(h[0.5j], int)

        # add storage to existing storage
        with pytest.raises(Exception):
            h.Int64()

        assert (Hist(axis.Regular(10, 0, 1, name="x"),
                     "int64")._storage_type == storage.Int64)
        assert (Hist(axis.Regular(10, 0, 1, name="x"),
                     storage="INT64")._storage_type == storage.Int64)
        assert (Hist(axis.Regular(10, 0, 1, name="x"),
                     storage.Int64())._storage_type == storage.Int64)
示例#20
0
    def test_weight(self):
        h = Hist.new.Reg(10, 0, 1, name="x").Weight().fill([0.5, 0.5])
        assert h[0.5j].variance == 2
        assert h[0.5j].value == 2

        # add storage to existing storage
        with pytest.raises(Exception):
            h.Weight()

        assert (Hist(axis.Regular(10, 0, 1, name="x"),
                     "WeighT")._storage_type == storage.Weight)
        assert (Hist(axis.Regular(10, 0, 1, name="x"),
                     storage="weight")._storage_type == storage.Weight)
        assert (Hist(axis.Regular(10, 0, 1, name="x"),
                     storage.Weight())._storage_type == storage.Weight)
示例#21
0
    def TEfficiency2data(self, histo):
        """Convert TEfficiency to internal format. No support for re-binning TEfficiencies."""
        h_histo = histo.GetPassedHistogram()

        bin_contents = []
        bin_errors_up = []
        bin_errors_dn = []
        bin_centers = []
        bin_widths = []
        bin_edges = [h_histo.GetXaxis().GetBinLowEdge(1)]

        for i in xrange(1, h_histo.GetNbinsX() + 1):
            bin_contents.append(histo.GetEfficiency(i))
            bin_errors_up.append(histo.GetEfficiencyErrorUp(i))
            bin_errors_dn.append(histo.GetEfficiencyErrorLow(i))
            bin_centers.append(h_histo.GetXaxis().GetBinCenter(i))
            bin_edges.append(h_histo.GetXaxis().GetBinUpEdge(i))
            bin_widths.append(h_histo.GetXaxis().GetBinWidth(1) / 2.)

        results = Hist()
        results.content = np.array(bin_contents)
        results.error = [bin_errors_dn, bin_errors_up]
        results.bins = np.array(bin_edges)
        results.center = bin_centers
        results.width = bin_widths

        return results
示例#22
0
def get_unified_bin_hist(hists):
    """ 
    Unify bins in <hists>.
    Starts from the bins from <hists[0]>, then loops over the rest of 'em adding bins as it goes (with width from <hists[0]>) so we won't have any under/overflows.
    NOTE totally ignores under/overflows in the original hists. That's on purpose, but like everying else in this foolish thing we call life may in fact turn out to be dumb later on.
    """
    assert len(hists) > 0
    dx = hists[0].GetXaxis().GetBinLowEdge(2) - hists[0].GetXaxis().GetBinLowEdge(1)  # always have at least one bin, in which case this'd be the low edge of the overflow bin minus low edge of the first bin
    # print 'dx:', dx
    low_edges = []
    for ib in range(1, hists[0].GetNbinsX()+1):
        low_edges.append(hists[0].GetXaxis().GetBinLowEdge(ib))

    # for d in [ low_edges[i] - low_edges[i-1] for i in range(1, len(low_edges)) ]:
    #     print ' ', d

    for hist in hists[1:]:
        for ib in range(1, hist.GetNbinsX()+1):
            bincenter = hist.GetXaxis().GetBinCenter(ib)
            while bincenter <= low_edges[0]:  # as long as <bincenter> is outside of the current bounds, keep adding bins on the left...
                low_edges.insert(0, low_edges[0] - dx)
            while bincenter >= low_edges[-1] + dx:  # ...and same thing on the right
                low_edges.insert(len(low_edges), low_edges[-1] + dx)

    return Hist(len(low_edges), low_edges[0], low_edges[-1] + dx)
示例#23
0
def add_bin_labels_not_in_all_hists(hists):
    """ find the OR of all bin labels present in <hists>, and remake each hist in <hists> to have zero bins for any that weren't there already """
    # first convert each hist to a map from bin label to entries
    all_labels = []
    histmaps = []
    for hist in hists:
        histmaps.append({})
        for ibin in range(1, hist.n_bins + 1):  # ignore under/over flows, they're kinda useless for bin-labelled hists
            label = hist.bin_labels[ibin]
            histmaps[-1][label] = (hist.bin_contents[ibin], hist.errors[ibin])  # 2-tuple with (content, error)
            if label not in all_labels:
                all_labels.append(label)

    all_labels = sorted(all_labels)

    # then go through and make new histograms for everybody
    finalhists = []
    for ih in range(len(histmaps)):
        original_hist = hists[ih]
        hmap = histmaps[ih]
        finalhists.append(Hist(len(all_labels), 0.5, len(all_labels) + 0.5, title=original_hist.title))
        for ilabel in range(len(all_labels)):
            label = all_labels[ilabel]
            ibin = ilabel + 1  # root conventions
            finalhists[-1].bin_labels[ibin] = label
            if label in hmap:
                finalhists[-1].bin_contents[ibin] = hmap[label][0]
                finalhists[-1].errors[ibin] = hmap[label][1]
            else:
                finalhists[-1].bin_contents[ibin] = 0.0
                finalhists[-1].errors[ibin] = 0.0

    return finalhists
示例#24
0
    def convert(self, data):
        """Convert ROOT/Numpy data into uniform format"""
        try:
            classname = data._classname
        except AttributeError:
            classname = str(type(data))
        self._isHistogram = ('TH1' in classname) or ('TH2' in classname)
        self._isEfficiency = False
        # TEfficiency currently unsupported in uproot '3.2.5' and uproot-methods '0.2.5'
        # - throws NotImplementedError (/.../uproot/rootio.py", line 645)

        h_data = Hist()
        if self._isHistogram:
            if self.dimensions == 1:
                h_data = self.hist2data(data,
                                        reBin=self.rebin,
                                        normed=self.normed)
            else:
                h_data = self.hist2data2D(data,
                                          reBin=self.rebin,
                                          normed=self.normed)
        else:
            # others, e.g., numpy data (may or may not need to be put into a histogram)
            # no support for TEfficiencies in uproot right now
            h_data = self.convert_array(data)

        return h_data
示例#25
0
文件: test_plot.py 项目: cranmer/hist
def test_image_plot_ratio_callable():
    """
    Test plot_pull by comparing against a reference image generated via
    `pytest --mpl-generate-path=tests/baseline`
    """

    np.random.seed(42)

    hist_1 = Hist(
        axis.Regular(50,
                     -5,
                     5,
                     name="X",
                     label="x [units]",
                     underflow=False,
                     overflow=False)).fill(np.random.normal(size=1000))

    def model(x, a=1 / np.sqrt(2 * np.pi), x0=0, sigma=1, offset=0):
        return a * np.exp(-((x - x0)**2) / (2 * sigma**2)) + offset

    fig = plt.figure()

    assert hist_1.plot_ratio(model,
                             eb_color="black",
                             fp_color="blue",
                             ub_color="lightblue")

    return fig
示例#26
0
文件: test_plot.py 项目: cranmer/hist
def test_image_plot_pull():
    """
    Test plot_pull by comparing against a reference image generated via
    `pytest --mpl-generate-path=tests/baseline`
    """

    np.random.seed(42)

    h = Hist(
        axis.Regular(50,
                     -4,
                     4,
                     name="S",
                     label="s [units]",
                     underflow=False,
                     overflow=False)).fill(np.random.normal(size=100))

    def pdf(x, a=1 / np.sqrt(2 * np.pi), x0=0, sigma=1, offset=0):
        return a * np.exp(-((x - x0)**2) / (2 * sigma**2)) + offset

    fig = plt.figure()

    assert h.plot_pull(
        pdf,
        eb_color="black",
        fp_color="blue",
        ub_color="lightblue",
        fit_fmt=r"{name} = {value:.3g} $\pm$ {error:.3g}",
    )

    return fig
示例#27
0
def make_mean_hist(hists, debug=False):
    """ return the hist with bin contents the mean over <hists> of each bin """
    binvals = {}
    for hist in hists:
        if debug:
            print '    sub',
        for ib in range(0, hist.n_bins + 2):
            low_edge = hist.low_edges[ib]
            if low_edge not in binvals:
                binvals[low_edge] = 0.
            binvals[low_edge] += hist.bin_contents[ib]
            if debug:
                print '   ', low_edge, hist.bin_contents[ib],
        if debug:
            print ''
    binlist = sorted(binvals.keys())
    meanhist = Hist(len(binlist) - 2, binlist[1], binlist[-1], binlist[1:-1])
    if debug:
        print '   mean',
    for ib in range(len(binlist)):
        meanhist.set_ibin(ib, binvals[binlist[ib]])
        if debug:
            print '   ', meanhist.low_edges[ib], meanhist.bin_contents[ib],
    if debug:
        print ''

    meanhist.normalize()
    return meanhist
示例#28
0
    def array2data2D(self,
                     data,
                     weights=None,
                     normed=False,
                     binning=1,
                     reBin=None):
        """
        Convert array of data to internal format
        - Designed for arrays of raw, un-binned data.
        - If you pass values here from an existing histogram ('weights' is not None
          and the 'data' param is just bin centers), it is possible to re-bin
          this histogram using the 'reBin' keyword
        """
        try:
            x = data['x']
            y = data['y']
        except TypeError:
            x = data[0]
            y = data[1]

        data, bins_x, bins_y = np.histogram2d(x,
                                              y,
                                              bins=binning,
                                              normed=normed,
                                              weights=weights)

        results = Hist()
        results.content = data.flatten()  # data is a ndarray (nxbins,nybins)
        results.bins = {'x': bins_x, 'y': bins_y}

        xcenter, ycenter = tools.dummy_bins2D(tools.midpoints(bins_x),
                                              tools.midpoints(bins_y))
        xwidth, ywidth = tools.dummy_bins2D(tools.widths(bins_x),
                                            tools.widths(bins_y))

        results.center = {'x': xcenter, 'y': ycenter}
        results.width = {'x': xwidth, 'y': ywidth}

        results.error = np.sqrt(data)
        if weights is not None:
            # scipy.stats to get sumw2 (x,y,weights should have the same shape)
            results.error = results.sumw2_2D(xdata=x, ydata=y, values=weights)

        if reBin is not None:
            # re-binning after making data from array, likely that the user
            # passed in binned data and wants to re-bin.
            results.Rebin2D(reBin)
            if normed: results.normalize()  # normalize after re-binning

        results.xbins = results.bins['x']
        results.ybins = results.bins['y']
        results.xcenter = results.center['x']
        results.ycenter = results.center['y']
        results.xwidth = results.width['x']
        results.ywidth = results.width['y']

        return results


## THE END ##
示例#29
0
    def array2data(self,
                   data,
                   weights=None,
                   normed=False,
                   binning=1,
                   reBin=None):
        """
        Convert array of data to internal format
        - Designed for arrays of raw, un-binned data.
        - If you pass values here from an existing histogram ('weights' is not None
          and the 'data' param is just bin centers), it is possible to re-bin
          this histogram using the 'reBin' keyword
        """
        data, bins = np.histogram(data,
                                  bins=binning,
                                  weights=weights,
                                  normed=normed)

        results = Hist()
        results.content = data
        results.bins = bins
        results.center = tools.midpoints(bins)
        results.width = tools.widths(bins)

        results.error = np.sqrt(data)
        if weights is not None:
            # numpy digitize to get sumw2
            results.error = results.sumw2_1D(xdata=data, values=weights)

        if reBin is not None:
            results.Rebin(reBin)
            if normed: results.normalize()  # normalize after re-binning

        return results
示例#30
0
    def convert(self, data):
        """Convert ROOT data into uniform format for framework"""
        self._isHistogram = isinstance(data, ROOT.TH1)
        self._isEfficiency = isinstance(data, ROOT.TEfficiency)

        h_data = Hist()
        if self._isHistogram:
            # TH1/TH2
            if self.dimensions == 1:
                h_data = self.hist2data(data,
                                        reBin=self.rebin,
                                        normed=self.normed)
            else:
                h_data = self.hist2data2D(data,
                                          reBin=self.rebin,
                                          normed=self.normed)
        elif self._isEfficiency:
            # TEfficincy
            if self.dimensions == 1:
                h_data = self.TEfficiency2data(data)
            else:
                h_data = self.TEfficiency2data2D(data)
        else:
            # assume the data is stored in an array
            h_data = self.convert_array(data)

        return h_data