Пример #1
0
def test_multisource(xes: fd.ERSource):
    lf = fd.LogLikelihood(sources=dict(er=xes.__class__),
                          elife=(100e3, 500e3, 5),
                          free_rates='er',
                          data=xes.data)
    l1 = lf.log_likelihood(er_rate_multiplier=2.)

    lf2 = fd.LogLikelihood(sources=dict(er=xes.__class__, er2=xes.__class__),
                           elife=(100e3, 500e3, 5),
                           data=xes.data)
    # Prevent jitter from mu interpolator simulation to fail test
    itp = lf.mu_itps['er']
    lf2.mu_itps = dict(er=itp, er2=itp)
    assert lf2.log_likelihood()[0] == l1[0]
Пример #2
0
def test_constraint(xes: fd.ERSource):
    lf = fd.LogLikelihood(sources=dict(er=fd.ERSource), data=xes.data.copy())
    ll1 = lf()

    lf2 = fd.LogLikelihood(sources=dict(er=fd.ERSource),
                           log_constraint=lambda **kwargs: 100.,
                           data=xes.data.copy())

    # Fix interpolator nondeterminism
    itp = lf.mu_itps['er']
    lf2.mu_itps = dict(er=itp)

    ll2 = lf2()

    np.testing.assert_almost_equal(ll1 + 100., ll2)
Пример #3
0
def test_hessian_rateonly(xes: fd.ERSource):
    class Bla(xes.__class__):
        """ER source with slightly different elife
        to prevent a singular matrix
        """
        @staticmethod
        def electron_detection_eff(drift_time,
                                   *,
                                   different_elife=333e3,
                                   extraction_eff=0.96):
            return extraction_eff * tf.exp(-drift_time / different_elife)

    # Test the hessian at the guess position
    lf = fd.LogLikelihood(sources=dict(er=xes.__class__, er2=Bla),
                          free_rates=['er', 'er2'],
                          data=xes.data)

    guess = lf.guess()
    assert len(guess) == 2

    print(guess)
    print(lf.log_likelihood(second_order=True, **guess))

    inv_hess = lf.inverse_hessian(guess)
    assert inv_hess.shape == (2, 2)
    assert inv_hess.dtype == np.float64
    # Check symmetry of hessian
    # The hessian is explicitly symmetrized before being passed to
    # the optimizer in bestfit
    a = inv_hess[0, 1]
    b = inv_hess[1, 0]
    assert abs(a - b) / (a + b) < 1e-3
Пример #4
0
def test_inference(xes: fd.ERSource):
    lf = fd.LogLikelihood(sources=dict(er=xes.__class__),
                          elife=(100e3, 500e3, 5),
                          data=xes.data)

    # Test single-batch likelihood
    x, x_grad, _ = lf._log_likelihood(
        i_batch=tf.constant(0),
        dsetname=DEFAULT_DSETNAME,
        data_tensor=lf.data_tensors[DEFAULT_DSETNAME][0],
        batch_info=lf.batch_info,
        elife=tf.constant(200e3))
    assert isinstance(x, tf.Tensor)
    assert x.dtype == fd.float_type()
    assert x.numpy() < 0

    assert isinstance(x_grad, tf.Tensor)
    assert x_grad.dtype == fd.float_type()
    assert x_grad.numpy().shape == (1, )

    # Test a different parameter gives a different likelihood
    x2, x2_grad, _ = lf._log_likelihood(
        i_batch=tf.constant(0),
        dsetname=DEFAULT_DSETNAME,
        data_tensor=lf.data_tensors[DEFAULT_DSETNAME][0],
        batch_info=lf.batch_info,
        elife=tf.constant(300e3))
    assert (x - x2).numpy() != 0
    assert (x_grad - x2_grad).numpy().sum() != 0

    # Test batching
    l1 = lf.log_likelihood()
    l2 = lf()
    lf.log_likelihood(elife=tf.constant(200e3))
Пример #5
0
def test_multisource_er_nr(xes: fd.ERSource):
    lf = fd.LogLikelihood(
        sources=dict(er=xes.__class__, nr=fd.NRSource),
        elife=(100e3, 500e3, 5),
        data=xes.data)

    lf()
Пример #6
0
def test_inference(xes: fd.ERSource):
    lf = fd.LogLikelihood(
        sources=dict(er=xes.__class__),
        elife=(100e3, 500e3, 5),
        data=xes.data)

    ##
    # Test non-autograph version
    ##
    x, x_grad = lf._log_likelihood(i_batch=tf.constant(0),
                                   dsetname=DEFAULT_DSETNAME,
                                   autograph=False,
                                   elife=tf.constant(200e3))
    assert isinstance(x, tf.Tensor)
    assert x.dtype == fd.float_type()
    assert x.numpy() < 0

    assert isinstance(x_grad, tf.Tensor)
    assert x_grad.dtype == fd.float_type()
    assert x_grad.numpy().shape == (1,)

    # Test a different parameter gives a different likelihood
    x2, x2_grad = lf._log_likelihood(i_batch=tf.constant(0),
                                     dsetname=DEFAULT_DSETNAME,
                                     autograph=False,
                                     elife=tf.constant(300e3))
    assert (x - x2).numpy() != 0
    assert (x_grad - x2_grad).numpy().sum() !=0

    ##
    # Test batching
    # ##
    l1 = lf.log_likelihood(autograph=False)
    l2 = lf(autograph=False)
    lf.log_likelihood(elife=tf.constant(200e3), autograph=False)
Пример #7
0
def test_multi_dset(xes: fd.ERSource):
    lf = fd.LogLikelihood(sources=dict(er=fd.ERSource), data=xes.data.copy())
    ll1 = lf()

    lf2 = fd.LogLikelihood(sources=dict(data1=dict(er1=fd.ERSource),
                                        data2=dict(er2=fd.ERSource)),
                           data=dict(data1=xes.data.copy(),
                                     data2=xes.data.copy()))

    # Fix interpolator nondeterminism
    itp = lf.mu_itps['er']
    lf2.mu_itps = dict(er1=itp, er2=itp)

    ll2 = lf2()

    np.testing.assert_almost_equal(2 * ll1, ll2, decimal=2)
Пример #8
0
def test_wimp_SR1_source(xes):
    # test KeyError 't' issue, because of add_extra_columns bug
    lf = fd.LogLikelihood(sources=dict(er=fd.SR1ERSource,
                                       wimp=fd.SR1WIMPSource),
                          free_rates=('er', 'wimp'))

    d = lf.simulate(er_rate_multiplier=1.0, wimp_rate_multiplier=0.)
    lf.set_data(d)
Пример #9
0
def test_simulate_column(xes):
    # Test for issue #47, check if not crashing since ColumnSource has no
    # simulator
    lf = fd.LogLikelihood(sources=dict(er=fd.ERSource, muur=fd.ColumnSource),
                          data=None)

    events = lf.simulate()
    events = lf.simulate(er_rate_multiplier=2.)
    events = lf.simulate(fix_truth=dict(x=0., y=0., z=-50.))
Пример #10
0
def test_columnsource(xes: fd.ERSource):
    class myColumnSource(fd.ColumnSource):
        column = "diffrate"
        mu = 3.14

    xes.data['diffrate'] = 5.

    lf = fd.LogLikelihood(sources=dict(muur=myColumnSource), data=xes.data)

    np.testing.assert_almost_equal(lf(), -3.14 + len(xes.data) * np.log(5.))
Пример #11
0
def test_retrace_set_data(xes: fd.ERSource):
    # Test issue #53
    lf = fd.LogLikelihood(sources=dict(er=fd.ERSource), data=xes.data.copy())
    ll1 = lf()

    new_data = xes.data.copy()
    new_data['s2'] *= 2
    lf.set_data(new_data)

    ll2 = lf()

    # issue 53 would not have retraced ll so lf() would be unchanged
    assert not ll1 == ll2
Пример #12
0
def test_set_data_on_no_dset(xes: fd.ERSource):
    lf = fd.LogLikelihood(sources=dict(er=fd.ERSource),
                          data=None,
                          batch_size=4)
    # The batch_size can be at most 2 * len(data) or padding wont work
    # which is why it is set explicitly in this test with only 2 events
    # Usually when constructing the likelihood with a very small dataset
    # the batch_size is set accordingly, but in this test with data=None
    # that is not possible (an assert has been put in Source._init_padding)

    lf.set_data(xes.data.copy())

    assert lf.sources['er'].batch_size == 4
    assert lf.sources['er'].n_batches == 1
    assert lf.sources['er'].n_padding == 2
    ll1 = lf()

    lf2 = fd.LogLikelihood(sources=dict(data1=dict(er1=fd.ERSource),
                                        data2=dict(er2=fd.ERSource)),
                           data=dict(data1=None, data2=None),
                           batch_size=4)

    lf2.set_data(dict(data1=xes.data.copy(), data2=xes.data.copy()))
    ll2 = lf2()
Пример #13
0
def test_set_data(xes: fd.ERSource):
    data1 = xes.data
    data2 = pd.concat([data1.copy(), data1.iloc[:1].copy()])
    data2['s1'] *= 1.3

    data3 = pd.concat([data2, data2.iloc[:1]])

    data1.reset_index(drop=True, inplace=True)
    data2.reset_index(drop=True, inplace=True)
    data3.reset_index(drop=True, inplace=True)

    lf = fd.LogLikelihood(
        sources=dict(data1=dict(er1=fd.ERSource),
                     data2=dict(er2=fd.ERSource)),
        data=dict(data1=data1,
                  data2=data2))

    def internal_data(sname, col):
        series = lf.sources[sname].data[col]
        n_padding = lf.sources[sname].n_padding
        return series.iloc[:len(series)-n_padding]

    # Test S1 columns are the same (DFs are annotated)
    # Here we don't have any padding since batch_size is n_events
    pd.testing.assert_series_equal(internal_data('er1', 's1'), data1['s1'])
    pd.testing.assert_series_equal(internal_data('er2', 's1'), data2['s1'])

    # Set new data for only one dataset
    lf.set_data(dict(data1=data2))

    # Test S1 columns are the same (DFs are annotated)
    # Here we might have padding
    pd.testing.assert_series_equal(internal_data('er1', 's1'), data2['s1'])
    pd.testing.assert_series_equal(internal_data('er2', 's1'), data2['s1'])

    # Set new data for both datasets
    lf.set_data(dict(data1=data1,
                     data2=data3))

    # Test S1 columns are the same (DFs are annotated)
    pd.testing.assert_series_equal(internal_data('er1', 's1'), data1['s1'])
    pd.testing.assert_series_equal(internal_data('er2', 's1'), data3['s1'])

    # Test padding for smaller dsets
    lf.set_data(dict(data2=data1))

    pd.testing.assert_series_equal(internal_data('er2', 's1'), data1['s1'])
Пример #14
0
def test_hessian_rate_and_shape(xes: fd.ERSource):
    lf = fd.LogLikelihood(sources=dict(er=xes.__class__),
                          elife=(100e3, 500e3, 5),
                          free_rates='er',
                          data=xes.data)

    guess = lf.guess()
    assert len(guess) == 2

    print(guess)
    print(lf.log_likelihood(second_order=True, **guess))

    inv_hess = lf.inverse_hessian(guess)
    assert inv_hess.shape == (2, 2)
    assert inv_hess.dtype == np.float64
    a = inv_hess[0, 1]
    b = inv_hess[1, 0]
    assert abs(a - b) / (a + b) < 1e-3
Пример #15
0
def test_bestfit_scipy(xes):
    # Test bestfit (including hessian)
    lf = fd.LogLikelihood(
        sources=dict(er=xes.__class__),
        elife=(100e3, 500e3, 5),
        free_rates='er',
        data=xes.data)

    guess = lf.guess()
    # Set reasonable rate
    # Evaluate the likelihood curve around the minimum
    xs_er = np.linspace(0.001, 0.004, 20)  # ER source range
    xs_nr = np.linspace(0.04, 0.1, 20)  # NR source range
    xs = list(xs_er) + list(xs_nr)
    ys = np.array([-lf(er_rate_multiplier=x) for x in xs])
    guess['er_rate_multiplier'] = xs[np.argmin(ys)]
    assert len(guess) == 2

    bestfit = lf.bestfit(guess, optimizer='scipy')
    assert isinstance(bestfit, dict)
    assert len(bestfit) == 2
Пример #16
0
def test_hessian(xes: fd.ERSource):
    # Test the hessian at the guess position
    lf = fd.LogLikelihood(
        sources=dict(er=xes.__class__),
        elife=(100e3, 500e3, 5),
        free_rates='er',
        data=xes.data)

    guess = lf.guess()
    assert len(guess) == 2

    inv_hess = lf.inverse_hessian(guess)
    inv_hess_np = inv_hess.numpy()
    assert inv_hess_np.shape == (2, 2)
    assert inv_hess.dtype == fd.float_type()
    # Check symmetry of hessian
    # The hessian is explicitly symmetrized before being passed to
    # the optimizer in bestfit
    a = inv_hess_np[0, 1]
    b = inv_hess_np[1, 0]
    assert abs(a - b)/(a+b) < 1e-3
Пример #17
0
def test_one_parameter_interval(xes):
    lf = fd.LogLikelihood(
        sources=dict(er=xes.__class__),
        elife=(100e3, 500e3, 5),
        free_rates='er',
        data=xes.data)

    guess = lf.guess()
    # Set reasonable rate
    # Evaluate the likelihood curve around the minimum
    xs_er = np.linspace(0.001, 0.004, 20)  # ER source range
    xs_nr = np.linspace(0.04, 0.1, 20)  # NR source range
    xs = list(xs_er) + list(xs_nr)
    ys = np.array([-lf(er_rate_multiplier=x) for x in xs])
    guess['er_rate_multiplier'] = xs[np.argmin(ys)]
    assert len(guess) == 2

    # First find global best so we can check intervals
    bestfit = lf.bestfit(guess,
                         optimizer='scipy')

    ul = lf.limit('er_rate_multiplier', bestfit,
                  confidence_level=0.9, kind='upper')
    assert ul > bestfit['er_rate_multiplier']

    ll = lf.limit('er_rate_multiplier', bestfit,
                  confidence_level=0.9, kind='lower')
    assert ll < bestfit['er_rate_multiplier']

    ll, ul = lf.limit('er_rate_multiplier', bestfit,
                      confidence_level=0.9, kind='central')
    assert ll < bestfit['er_rate_multiplier'] < ul

    # Test fixed parameter
    fix = dict(elife=bestfit['elife'])

    ul = lf.limit('er_rate_multiplier', bestfit, fix=fix,
                  confidence_level=0.9, kind='upper')
    assert bestfit['er_rate_multiplier'] < ul
Пример #18
0
def test_no_dset(xes: fd.ERSource):
    lf = fd.LogLikelihood(sources=dict(er=fd.ERSource), data=None)

    lf2 = fd.LogLikelihood(sources=dict(data1=dict(er1=fd.ERSource),
                                        data2=dict(er2=fd.ERSource)),
                           data=dict(data1=None, data2=None))
Пример #19
0
def test_simulate(xes):
    lf = fd.LogLikelihood(sources=dict(er=fd.ERSource), data=None)

    events = lf.simulate()
    events = lf.simulate(er_rate_multiplier=2.)
    events = lf.simulate(fix_truth=dict(x=0., y=0., z=-50.))