Пример #1
0
    def test_init_retvalue(self):
        boundaries = (10, 20, 30, 40, 50)
        Binning(boundaries=boundaries)
        Binning(boundaries=boundaries, retvalue='number')
        Binning(boundaries=boundaries, retvalue='lowedge')
        self.assertRaises(ValueError,
                          Binning,
                          boundaries=boundaries,
                          retvalue='center')
        self.assertRaises(ValueError,
                          Binning,
                          boundaries=boundaries,
                          retvalue='yyy')

        self.assertRaises(ValueError,
                          Binning,
                          boundaries=boundaries,
                          retvalue='lowedge',
                          bins=(1, 2, 3, 4))
        self.assertRaises(ValueError,
                          Binning,
                          boundaries=boundaries,
                          retvalue='lowedge',
                          underflow_bin=-1)
        self.assertRaises(ValueError,
                          Binning,
                          boundaries=boundaries,
                          retvalue='lowedge',
                          overflow_bin=-1)
Пример #2
0
def dataframe_config():
    '''
        Creates the definition/config of the data frame (DF).

        :return a list of DF configs
    '''
    # Set up categorical binning
    # use simple binning for HT and N_jet
    htbin = Binning(boundaries=[200, 400, 600, 900, 1200])
    njetbin = Binning(boundaries=[1, 2, 3, 4, 5, 6])
    # Echo simply returns the value it gets
    nbjetbin = Echo()
    # explicit version
    # nbjetbin = Echo(nextFunc = lambda x: x+1, valid = lambda x: True)
    # add HEPPY component name binning, has no rules to produce the next bin
    # component names are strings!
    component = Echo(nextFunc=None)

    # a list of DF configs
    df_configs = [
        dict(
            # which tree branches to read in
            keyAttrNames=('componentName', 'ht40', 'nJet40', 'nBJet40'),
            # which columns in the DF they should be mapped to
            keyOutColumnNames=('component', 'htbin', 'njetbin', 'nbjetbin'),
            # the binning for the categories
            binnings=(component, htbin, njetbin, nbjetbin),
            # list of weight branches that are multiplied together for the
            # final event weight
            weight=WeightCalculatorProduct(['genWeight'])),
    ]

    return df_configs
Пример #3
0
def dataframe_config():
    '''
        Creates the definition/config of the data frame (DF).

        :return a list of DF configs
    '''
    # Set up categorical binning
    # use simple binning for HT and N_jet
    htbin = Binning(boundaries=[200, 400, 600, 900, 1200])
    njetbin = Binning(boundaries=[1, 2, 3, 4, 5, 6])
    # Echo simply returns the value it gets
    nbjetbin = Echo()
    # explicit version
    # nbjetbin = Echo(nextFunc = lambda x: x+1, valid = lambda x: True)

    # a list of DF configs
    df_configs = [
        dict(
            # which tree branches to read in
            keyAttrNames=('ht40', 'nJet40', 'nBJet40'),
            # which columns in the DF they should be mapped to
            keyOutColumnNames=('htbin', 'njetbin', 'nbjetbin'),
            # the binning for the categories
            binnings=(htbin, njetbin, nbjetbin)
        ),
    ]

    return df_configs
Пример #4
0
 def test_valid(self):
     obj = Binning(boundaries=(30, 40, 50),
                   retvalue='number',
                   valid=lambda x: x >= 10)
     self.assertEqual(1, obj(33))
     self.assertEqual(2, obj(45))
     self.assertIsNone(obj(9))
Пример #5
0
def _create_one_dimension(stage_name, _in, _out, _bins=None, _index=None):
    if not isinstance(_in, six.string_types):
        msg = "{}: binning dictionary contains non-string value for 'in'"
        raise BadBinnedDataframeConfig(msg.format(stage_name))
    if not isinstance(_out, six.string_types):
        msg = "{}: binning dictionary contains non-string value for 'out'"
        raise BadBinnedDataframeConfig(msg.format(stage_name))
    if _index and not isinstance(_index, six.string_types):
        msg = "{}: binning dictionary contains non-string and non-integer value for 'index'"
        raise BadBinnedDataframeConfig(msg.format(stage_name))

    if _bins is None:
        bin_obj = Echo(nextFunc=None)
    elif isinstance(_bins, dict):
        # - bins: {nbins: 6 , low: 1  , high: 5 , overflow: True}
        # - bins: {edges: [0, 200., 900], overflow: True}
        if "nbins" in _bins and "low" in _bins and "high" in _bins:
            low = _bins["low"]
            high = _bins["high"]
            nbins = _bins["nbins"]
            edges = np.linspace(low, high, nbins + 1)
        elif "edges" in _bins:
            edges = _bins["edges"]
        else:
            msg = "{}: No way to infer binning edges for in={}"
            raise BadBinnedDataframeConfig(msg.format(stage_name, _in))
        bin_obj = Binning(boundaries=edges)
    else:
        msg = "{}: bins is neither None nor a dictionary for in={}"
        raise BadBinnedDataframeConfig(msg.format(stage_name, _in))

    return (str(_in), str(_out), bin_obj, _index)
Пример #6
0
def test_lowedge():
    lows = (10.0, 20.0, 30.0, 40.0)
    ups = (20.0, 30.0, 40.0, 50.0)
    obj = Binning(lows=lows, ups=ups, retvalue='lowedge')
    assert 10 == obj(15)
    assert 20 == obj(21)
    assert 20 == obj(20)
    assert float("-inf") == obj(5)
    assert 50 == obj(55)

    obj = Binning(lows=lows, ups=ups)  # 'lowedge' is default
    assert 10 == obj(15)
    assert 20 == obj(21)
    assert 20 == obj(20)
    assert float("-inf") == obj(5)
    assert 50 == obj(55)
Пример #7
0
def test_valid():
    obj = Binning(boundaries=(30, 40, 50),
                  retvalue='number',
                  valid=lambda x: x >= 10)
    assert 1 == obj(33)
    assert 2 == obj(45)
    assert obj(9) is None
Пример #8
0
    def test_lowedge(self):
        lows = (10.0, 20.0, 30.0, 40.0)
        ups = (20.0, 30.0, 40.0, 50.0)
        obj = Binning(lows=lows, ups=ups, retvalue='lowedge')
        self.assertEqual(10, obj(15))
        self.assertEqual(20, obj(21))
        self.assertEqual(20, obj(20))
        self.assertEqual(float("-inf"), obj(5))
        self.assertEqual(50, obj(55))

        obj = Binning(lows=lows, ups=ups)  # 'lowedge' is default
        self.assertEqual(10, obj(15))
        self.assertEqual(20, obj(21))
        self.assertEqual(20, obj(20))
        self.assertEqual(float("-inf"), obj(5))
        self.assertEqual(50, obj(55))
Пример #9
0
def test_next_number():
    boundaries = (10, 20, 30, 40, 50)
    obj = Binning(boundaries=boundaries, retvalue='number')
    assert 1 == obj.next(0)
    assert 2 == obj.next(1)
    assert 3 == obj.next(2)
    assert 4 == obj.next(3)
    assert 5 == obj.next(4)
    assert 5 == obj.next(5)

    assert 5 == obj.next(5)  # overflow_bin returns the same

    with pytest.raises(ValueError):
        obj.next(2.5)

    with pytest.raises(ValueError):
        obj.next(6)
Пример #10
0
def test_init_with_lows_ups():
    bins = (1, 2, 3, 4)
    lows = (10.0, 20.0, 30.0, 40.0)
    ups = (20.0, 30.0, 40.0, 50.0)
    boundaries = (10, 20, 30, 40, 50)
    obj = Binning(lows=lows, ups=ups, retvalue='number')
    assert bins == obj.bins
    assert boundaries == obj.boundaries
Пример #11
0
 def test_init_with_lows_ups(self):
     bins = (1, 2, 3, 4)
     lows = (10.0, 20.0, 30.0, 40.0)
     ups = (20.0, 30.0, 40.0, 50.0)
     boundaries = (10, 20, 30, 40, 50)
     obj = Binning(lows=lows, ups=ups, retvalue='number')
     self.assertEqual(bins, obj.bins)
     self.assertEqual(boundaries, obj.boundaries)
Пример #12
0
def test_init_with_boundaries():
    bins = (1, 2, 3, 4)
    lows = (10.0, 20.0, 30.0, 40.0)
    ups = (20.0, 30.0, 40.0, 50.0)
    boundaries = (10, 20, 30, 40, 50)
    obj = Binning(boundaries=boundaries, retvalue='number')
    assert bins == obj.bins
    assert lows == obj.lows
    assert ups == obj.ups
Пример #13
0
def test_call():
    bins = (1, 2, 3, 4)
    lows = (10.0, 20.0, 30.0, 40.0)
    ups = (20.0, 30.0, 40.0, 50.0)
    obj = Binning(bins=bins, lows=lows, ups=ups, retvalue='number')
    assert 1 == obj(15)
    assert 2 == obj(21)
    assert 2 == obj(20)  # on the low edge
    assert 0 == obj(5)  # underflow
    assert 5 == obj(55)  # overflow
Пример #14
0
 def test_call(self):
     bins = (1, 2, 3, 4)
     lows = (10.0, 20.0, 30.0, 40.0)
     ups = (20.0, 30.0, 40.0, 50.0)
     obj = Binning(bins=bins, lows=lows, ups=ups, retvalue='number')
     self.assertEqual(1, obj(15))
     self.assertEqual(2, obj(21))
     self.assertEqual(2, obj(20))  # on the low edge
     self.assertEqual(0, obj(5))  # underflow
     self.assertEqual(5, obj(55))  # overflow
Пример #15
0
def test_next_number():
    boundaries = (10, 20, 30, 40, 50)
    obj = Binning(boundaries=boundaries, retvalue='number')
    assert  1 == obj.next(0)
    assert  2 == obj.next(1)
    assert  3 == obj.next(2)
    assert  4 == obj.next(3)
    assert  5 == obj.next(4)
    assert  5 == obj.next(5)

    assert 5 == obj.next(5) # overflow_bin returns the same

    with pytest.raises(ValueError):
         obj.next(2.5)

    with pytest.raises(ValueError):
        obj.next(6)
Пример #16
0
def test_next_lowedge():
    boundaries = (10, 20, 30, 40, 50)
    obj = Binning(boundaries=boundaries, retvalue='lowedge')

    # on the boundaries
    assert  20 == obj.next(10)
    assert  30 == obj.next(20)
    assert  40 == obj.next(30)
    assert  50 == obj.next(40)
    assert  50 == obj.next(50)

    # underflow_bin
    assert 10 == obj.next(float('-inf'))

    boundaries = (0.001, 0.002, 0.003, 0.004, 0.005)
    obj = Binning(boundaries=boundaries, retvalue='lowedge')
    assert  0.002 == obj.next( 0.001)
    assert  0.003 == obj.next( 0.002)
    assert  0.004 == obj.next( 0.003)
    assert  0.005 == obj.next( 0.004)
    assert  0.005 == obj.next( 0.005)
Пример #17
0
def test_init_retvalue():
    boundaries = (10, 20, 30, 40, 50)
    Binning(boundaries=boundaries)
    Binning(boundaries=boundaries, retvalue='number')
    Binning(boundaries=boundaries, retvalue='lowedge')
    with pytest.raises(ValueError):
        Binning(boundaries=boundaries, retvalue='center')

    with pytest.raises(ValueError):
        Binning(boundaries=boundaries, retvalue='yyy')

    with pytest.raises(ValueError):
        Binning(boundaries=boundaries, retvalue='lowedge', bins=(1, 2, 3, 4))

    with pytest.raises(ValueError):
        Binning(boundaries=boundaries, retvalue='lowedge', underflow_bin=-1)

    with pytest.raises(ValueError):
        Binning(boundaries=boundaries, retvalue='lowedge', overflow_bin=-1)
Пример #18
0
def prepare_dataframe_configs(weights=[]):
    '''
        Creates the definition/config of the data frame (DF).

        :return a list of DF configs
    '''
    # Set up categorical binning
    jetpt_bin = Binning(boundaries=range(0, 1000, 20))
    njetbin = Echo()

    # a list of DF configs
    base = dict(
        keyAttrNames=('Jet_pt', 'nJet'),
        keyOutColumnNames=('jetpt', 'njet'),
        binnings=(jetpt_bin, njetbin),
    )

    df_configs = {"data": base}
    #else:
    #    df_configs = {}

    #    # List of weight branches that are multiplied together for the final event weight
    #    #
    #    # TODO: Storing the product of these weights as weight_nominal for each
    #    # event would be more efficient, but this needs communication or a
    #    # common interface between a nominal weight scribbler and the DF
    #    # configuration

    #    # Build a dictionary for all the combinations of weights we need to check for systematics
    #    weight_combinations = {"nominal": weights, "unweighted": []}
    #    for weight in weights:
    #        for variation in ["up", "down"]:
    #            variation_name = "{}_{}".format(weight, variation)
    #            weight_combinations[variation_name] = weights + [variation_name]

    #    df_configs = {}
    #    for name, weight_list in weight_combinations.items():
    #        config = copy.copy(base)
    #        if weight_list:
    #            config["weight"] = WeightCalculatorProduct(weight_list)
    #        df_configs[name] = config

    return df_configs
Пример #19
0
    def test_next_number(self):
        boundaries = (10, 20, 30, 40, 50)
        obj = Binning(boundaries=boundaries, retvalue='number')
        self.assertEqual(1, obj.next(0))
        self.assertEqual(2, obj.next(1))
        self.assertEqual(3, obj.next(2))
        self.assertEqual(4, obj.next(3))
        self.assertEqual(5, obj.next(4))
        self.assertEqual(5, obj.next(5))

        self.assertEqual(5, obj.next(5))  # overflow_bin returns the same

        self.assertRaises(ValueError, obj.next, 2.5)
        self.assertRaises(ValueError, obj.next, 6)
Пример #20
0
def test_init_exceptions():
    with pytest.raises(ValueError):
        Binning()

    with pytest.raises(ValueError):
        Binning(lows=1)

    with pytest.raises(ValueError):
        Binning(ups=1)

    with pytest.raises(ValueError):
        Binning(boundaries=1, lows=1)

    with pytest.raises(ValueError):
        Binning(boundaries=1, ups=1)

    with pytest.raises(ValueError):
        Binning(boundaries=1, lows=1, ups=1)

    lows = (10.0, 20.0, 30.0, 45.0)
    ups = (20.0, 30.0, 40.0, 50.0)

    with pytest.raises(ValueError):
        Binning(lows=lows, ups=ups)
Пример #21
0
def test_repr():
    boundaries = (10, 20, 30, 40, 50)
    obj = Binning(boundaries=boundaries)
    repr(obj)
Пример #22
0
def test_next_lowedge():
    boundaries = (10, 20, 30, 40, 50)
    obj = Binning(boundaries=boundaries, retvalue='lowedge')

    # on the boundaries
    assert 20 == obj.next(10)
    assert 30 == obj.next(20)
    assert 40 == obj.next(30)
    assert 50 == obj.next(40)
    assert 50 == obj.next(50)

    # underflow_bin
    assert 10 == obj.next(float('-inf'))

    boundaries = (0.001, 0.002, 0.003, 0.004, 0.005)
    obj = Binning(boundaries=boundaries, retvalue='lowedge')
    assert 0.002 == obj.next(0.001)
    assert 0.003 == obj.next(0.002)
    assert 0.004 == obj.next(0.003)
    assert 0.005 == obj.next(0.004)
    assert 0.005 == obj.next(0.005)
Пример #23
0
    def test_next_lowedge(self):
        boundaries = (10, 20, 30, 40, 50)
        obj = Binning(boundaries=boundaries, retvalue='lowedge')

        # on the boundaries
        self.assertEqual(20, obj.next(10))
        self.assertEqual(30, obj.next(20))
        self.assertEqual(40, obj.next(30))
        self.assertEqual(50, obj.next(40))
        self.assertEqual(50, obj.next(50))

        # underflow_bin
        self.assertEqual(10, obj.next(float('-inf')))

        boundaries = (0.001, 0.002, 0.003, 0.004, 0.005)
        obj = Binning(boundaries=boundaries, retvalue='lowedge')
        self.assertEqual(0.002, obj.next(0.001))
        self.assertEqual(0.003, obj.next(0.002))
        self.assertEqual(0.004, obj.next(0.003))
        self.assertEqual(0.005, obj.next(0.004))
        self.assertEqual(0.005, obj.next(0.005))
Пример #24
0
def test_onBoundary():
    boundaries = (0.000001, 0.00001, 0.0001)
    obj = Binning(boundaries=boundaries, retvalue='number')
    assert 1 == obj(0.000001)
    assert 2 == obj(0.00001)
    assert 3 == obj(0.0001)
Пример #25
0
def test_init_exceptions_nobin():
    boundaries = (10, )

    with pytest.raises(ValueError):
        Binning(boundaries=boundaries)
Пример #26
0
 def test_onBoundary(self):
     boundaries = (0.000001, 0.00001, 0.0001)
     obj = Binning(boundaries=boundaries, retvalue='number')
     self.assertEqual(1, obj(0.000001))
     self.assertEqual(2, obj(0.00001))
     self.assertEqual(3, obj(0.0001))