Пример #1
0
def neg2LogL(pars,const_pars,analyses,data,transform=None):
    """General -2logL function to optimise
       TODO: parameter 'transform' feature not currently in use, probably doesn't work correctly
    """
    #print("In neg2LogL:")
    #print("pars:", c.print_with_id(pars,id_only))
    #print("const_pars:", c.print_with_id(const_pars,id_only))
    if transform is not None:
        pars_t = transform(pars)
    else:
        pars_t = pars
    if const_pars is None:
        all_pars = pars_t
    else:
        all_pars = c.deep_merge(const_pars,pars_t)

    # Sanity check: make sure parameters haven't become nan somehow
    anynan = False
    nanpar = ""
    for a,par_dict in pars.items():
        for p, val in par_dict.items():
            if tf.math.reduce_any(tf.math.is_nan(val)):
                anynan = True
                nanpar += "\n    {0}::{1}".format(a,p)
    if anynan:
        msg = "NaNs detected in parameter arrays during optimization! The fit may have become unstable and wandering into an invalid region of parameter space; please check your analysis setup. Parameter arrays containing NaNs were:{0}".format(nanpar)
        raise ValueError(msg)

    # Parameters will enter this function pre-scaled such that MLEs have variance ~1
    # So we need to set the pre-scaled flag for the JointDistribution constructor to
    # avoid applying the scaling a second time.
    joint = JointDistribution(analyses.values(),all_pars,pre_scaled_pars=True)
    q = -2*joint.log_prob(data)
    #print("q:", q)
    #print("all_pars:", all_pars)
    #print("logL parts:", joint.log_prob_parts(data))

    if tf.math.reduce_any(tf.math.is_nan(q)):
        # Attempt to locate components generating the nans
        component_logprobs = joint.log_prob_parts(data)
        nan_components = ""
        for comp,val in component_logprobs.items():
            if tf.math.reduce_any(tf.math.is_nan(val)):
                nan_components += "\n    {0}".format(comp)                
        msg = "NaNs detect in result of neg2LogL calculation! Please check that your input parameters are valid for the distributions you are investigating, and that the fit is stable! Components of the joint distribution whose log_prob contained nans were:" + nan_components
        raise ValueError(msg)
    total_loss = tf.math.reduce_sum(q)
    return total_loss, q, None, None
Пример #2
0
def test_trivial_alternate(fresh_lee, params1):
    """Tests that same log_prob values are obtained for
       null and alternate fits when the only alternate
       hypothesis *is* the null.
       Also checks that results match 'vanilla' results
       from JointDistribution"""
    # Need a largish number of events here to detect some rare problems
    fresh_lee.add_events(1000)
    fresh_lee.process_null()
    fresh_lee.process_alternate(get_hyp_gen_1(params1))
    df_null, df_null_obs = fresh_lee.load_results(fresh_lee.null_table,
                                                  ['log_prob'],
                                                  get_observed=True)
    df_prof, df_prof_obs = fresh_lee.load_results(fresh_lee.profiled_table,
                                                  ['log_prob_quad', 'logw'],
                                                  get_observed=True)
    print("df_null:", df_null)
    print("df_null['log_prob']:", df_null['log_prob'])
    print("df_prof:", df_prof)
    print("df_prof['log_prob_quad']:", df_prof['log_prob_quad'])
    print("df_null_obs:", df_null_obs)
    print("df_prof_obs:", df_prof_obs)
    # Get 'vanilla' JointDistribution results
    samples = fresh_lee.load_all_events()  # Loads all events currently on disk
    joint = JointDistribution(fresh_lee.analyses, params1)
    log_prob, joint_fitted, par_dict = joint.fit_nuisance(samples, params1)
    print("log_prob:", log_prob)
    print("df_null['log_prob'] - log_prob:",
          df_null['log_prob'] - log_prob[:, 0, 0])
    print("df_prof['log_prob_quad'] - log_prob:",
          df_prof['log_prob_quad'] - log_prob[:, 0, 0])
    print("df_null['log_prob'] - df_prof['log_prob_quad']:",
          df_null['log_prob'] - df_prof['log_prob_quad'])
    tol = 1e-6
    assert ((df_null['log_prob'] - log_prob[:, 0, 0]).abs() <
            tol).all()  # LEE null vs JointDistribution
    assert ((df_prof['log_prob_quad'] - log_prob[:, 0, 0]).abs() <
            tol).all()  # LEE (quad) alternate vs JointDistribution
    assert ((df_null['log_prob'] - df_prof['log_prob_quad']).abs() < tol).all(
    )  # LEE: null vs (quad) alternate (redundant but why not do it)
    assert ((df_null_obs['log_prob'] - df_prof_obs['log_prob_quad']).abs() <
            tol).all()  # LEE: null vs alternate (obs)
Пример #3
0
def joint(analysis, parameters):
    joint = JointDistribution([analysis], parameters)
    return joint
Пример #4
0
def test_JointDistribution_init_basic(analysis):
    """Test that JointDistribution objects can be instantiated from the test analysis objects
       No parmeter version"""
    joint = JointDistribution([analysis])
Пример #5
0
def jointA(leeAnalysis):
    joint = JointDistribution([leeAnalysis.analysis])
    return joint
Пример #6
0
def test_plot_quad_logl(analysis, pars, samples, curve_par, test_name):
    """Create plots of test logl curves, comparing direct fits with 'quad' versions"""
    print("pars:", pars)
    print("samples:", samples)

    # We want to fit all samples for all parameters, so we need to make sure the batch shapes
    # can be broadcast against each other. Easiest way is to insert some extra dimensions into
    # both (since they are both 1D batches to start with)
    pars_batch = c.deep_expand_dims(pars, axis=0)
    samples_batch = c.deep_expand_dims(samples, axis=1)

    joint = JointDistribution([analysis], pars_batch)
    log_prob, joint_fitted_nuisance, fitted_pars_nuisance = joint.fit_nuisance(
        samples_batch)

    print("log_prob:", log_prob)
    print("pars_batch:", pars_batch)
    print("fitted_pars_nuisance['fitted']:", fitted_pars_nuisance['fitted'])
    print("fitted_pars_nuisance['fixed']:", fitted_pars_nuisance['fixed'])
    print("pars_batch:", pars_batch)

    # The 'quad' log_prob, expanding about every nuisance BF point for every hypothesis (i.e. NOT what is done in lee-correction)
    # So the following happens:
    # 1. (2,20) shape nuisance parameter fits obtained (2 samples * 20 hypotheses, broadcast against each other)
    #    These become the expansion points in the log_prob_quad evaluation
    # 2. (2,1) shaped samples are provided to create the log_prob_quad_f function
    # 3. (1,20) parameters are provided for log-likelihood evaluation
    #    These are the same parameters used as input to the fits, so should cause evaluation to occur exactly at the expansion points
    # 4. Result has shape (2,20)
    f = joint_fitted_nuisance.log_prob_quad_f(samples_batch)
    log_prob_quad = f(
        pars_batch
    )  #fitted_pars_nuisance['fixed']) # the 'fixed' parameters include the 'signal' ones (EDIT: can just use pars_batch, same thing)

    # The 'quad' log_prob, expanding just once about the global BF point amongst input hypotheses, per sample (i.e. what IS done in lee-correction, more or less. Actually we use a null-hypothesis point rather than the BF, but the point is there is just one expansion point per sample)
    # So the following happens:
    # 1. (2,1) shape nuisance parameter fits obtained (2 samples * 1 hypothesis)
    #    These become the expansion points in the log_prob_quad evaluation
    # 2. (2,1) shaped samples are provided to create the log_prob_quad_f function
    # 3. (1,20) parameters are provided for log-likelihood evaluation
    #    These are DIFFERENT parameters to those used input to the fits, so should cause more non-trivial evaluation of the log_prob_quad
    #    function to occur. Results will be less accurate of course, but this is the "real-world" use case. Will make plots to check accuracy.
    log_prob_g, joint_fitted_all, fitted_pars_all = joint.fit_all(
        samples_batch)
    print("log_prob_g:", log_prob_g)
    print("fitted_pars_all['fitted']:", fitted_pars_all['fitted'])
    print("fitted_pars_all['fixed']:", fitted_pars_all['fixed'])

    f2 = joint_fitted_all.log_prob_quad_f(samples_batch)
    log_prob_quad_2 = f2(pars_batch)

    print("log_prob:", log_prob)
    print("log_prob_quad   (expanded from exact signal points):",
          log_prob_quad)
    print("log_prob_quad_2 (global BF expansion):", log_prob_quad_2)

    # Ok let's make some plots!

    fig = plt.figure()
    ax = fig.add_subplot(111)

    # Plot curve for each sample (0th axis of batch)
    if isinstance(curve_par, str):
        cpar, index = (curve_par, None)
    else:
        try:
            cpar, index = curve_par
        except ValueError as e:
            msg = "Failed to interpret curve 'parameter' specification! Needs to be either a string, or a (string,index) tuple indicating which parameter (and which index if multivariate) is the one that varies for this test!"
            raise ValueError(msg) from e

    if index is None:
        x = pars[analysis.name][cpar]
    else:
        x = pars[analysis.name][cpar][:, index]

    first = True
    for y, y_quad_1, y_quad_2 in zip(log_prob, log_prob_quad, log_prob_quad_2):
        if first:
            ax.plot(x, y, c='k', label="Full numerical profiling")
            ax.plot(
                x,
                y_quad_1,
                c='g',
                ls='--',
                label=
                "\"quad\" expansion at profiled points (i.e. no real expansion done)"
            )
            ax.plot(
                x,
                y_quad_2,
                c='r',
                ls='--',
                label=
                "\"quad\" expansion around single global best fit per sample")
            first = False
        else:
            # No labels this time
            ax.plot(x, y, c='k')
            ax.plot(x, y_quad_1, c='g', ls='--')
            ax.plot(x, y_quad_2, c='r', ls='--')

    ax.set_ylabel("log_prob")
    ax.set_xlabel(curve_par)
    ax.set_title(
        "log_prob_quad curve test for analysis {0}, parameter {1}".format(
            analysis.name, curve_par))
    ax.legend(loc=0, frameon=False, framealpha=0, prop={'size': 10}, ncol=1)
    plt.tight_layout()
    fig.savefig(
        "unit_test_output/log_prob_quad_comparison_{0}.png".format(test_name))
Пример #7
0
import numpy as np
import tensorflow as tf
from jmctf import NormalAnalysis, BinnedAnalysis, JointDistribution

verb = True

# make_norm
sigma = 2.
norm = NormalAnalysis("Test normal", 5, sigma)
# make_binned
# (name, n, b, sigma_b)
bins = [("SR1", 10, 9, 2), ("SR2", 50, 55, 4)]
binned = BinnedAnalysis("Test binned", bins)
# make_joint
joint = JointDistribution([norm, binned])
#joint = JointDistribution([binned])
#joint = JointDistribution([norm])
#DOF = 1
DOF = 3

sig_t = 1.

# get_structure
print("Sample structure:", joint.get_sample_structure())
# >> {'Test normal': {'x': 1, 'x_theta': 1}, 'Test binned': {'n': 2, 'x': 2}}
# fit
my_sample = {
    'Test normal::x': 4.3,
    'Test normal::x_theta': 0,
    'Test binned::n': [9, 53],
Пример #8
0
  print('Evaluation took: %f seconds' % dt)
# ==================


# Will use the same JointDistribution as in the quickstart examples.

# make_norm
sigma = 2.
norm = NormalAnalysis("Test normal", 5, sigma)
# make_binned
# (name, n, b, sigma_b)
bins = [("SR1", 10, 9, 2),
        ("SR2", 50, 55, 4)]
binned = BinnedAnalysis("Test binned", bins)
# make_joint
joint = JointDistribution([norm,binned])

# JointDistribution here is a child class of the tfp class 
# JointDistributionNamed, so it should work pretty well with
# other tfp tools.

np.random.seed(12345)

dim = 100
batches = 500
minimum = np.random.randn(batches, dim)
scales = np.exp(np.random.randn(batches, dim))

@make_val_and_grad_fn
def quadratic(x):
  return tf.reduce_sum(input_tensor=scales * (x - minimum)**2, axis=-1)