示例#1
0
文件: experiment.py 项目: bjfar/jpvc
 def create_jointmodel(cls,parmodels,common_pars=[]):
     """Create a single giant ParameterModel out of a list of
     ParameterModels"""
     print("In create_joint_model")
     print("parmodels:", parmodels)
     all_submodels = []
     all_fargs = []
     all_dims = []
     all_renaming = []
     for i,m in enumerate(parmodels):
         # Collect submodels and perform parameter renaming to avoid
         # collisions, except where parameters are explicitly set
         # as being common.
         all_renaming += [[]]
         for submodel in m.model.submodels:
             temp = jtd.TransDist(submodel) # Need this to figure out parameter names
             renaming = ['Exp{0}_{1} -> {1}'.format(i,par) for par in temp.args if par not in common_pars] 
             #print(renaming, temp.args, common_pars)
             all_renaming[i] += renaming  
             all_submodels += [jtd.TransDist(submodel,renaming_map=renaming)]
         all_dims += m.model.dims
         all_fargs += m.submodel_deps
         print("m:",m)
         print("all_dims", m.model.dims, all_dims)
     new_joint = jtd.JointDist(list(zip(all_submodels,all_dims)))
     return jtm.ParameterModel(new_joint,all_fargs), all_renaming
示例#2
0
def make_mu_model(s):
    # Create new parameter mapping functions with 'mu1' and 'mu2' parameters fixed.
    # The 'partial' tool from functools is super useful for this.
    s_model = jtm.ParameterModel([
        jtd.TransDist(sps.norm, partial(pars1, mu1=s[0])),
        jtd.TransDist(sps.norm, partial(pars2, mu2=s[1]))
    ], [['mu'], ['mu']])
    return s_model
def custpois(func, rename):
    """Construction transformed Poisson distribution,
       with logpmf function replaced by a version that
       can be evaluated for non-integer data (needed
       for Asimov calculationd"""

    mypois = jtd.TransDist(
        sps.poisson)  # Null transformation, just to build object
    mypois.set_logpdf(smooth_poisson)  # replace pdf calculation

    # Now build the transformed object
    return jtd.TransDist(mypois, func, rename)
    def make_experiment_cov(self):
        # Create the transformed pdf functions
        # Also requires some parameter renaming since we use the
        # same underlying function repeatedly
        poisson_part = [jtd.TransDist(sps.poisson,partial(poisson_f_add,b=self.SR_b[i]),
                               ['s_{0} -> s'.format(i), 
                                'theta_{0} -> theta'.format(i)])
                         for i in range(self.N_SR)]
        corr_dist = jtd.TransDist(sps.multivariate_normal,partial(func_nuis_corr,cov=self.cov),
                       func_args=["theta_{0}".format(i) for i in range(self.N_SR)])
        correlations = [(corr_dist,self.N_SR)]

        # Create the joint PDF object
        joint = jtd.JointDist(poisson_part + correlations)
         
        # Set options for parameter fitting
        theta_opt  = {'theta_{0}'.format(i) : 0 for i in range(self.N_SR)}
        theta_opt2 = {'error_theta_{0}'.format(i) : 0.1*np.sqrt(self.cov[i][i]) for i in range(self.N_SR)} # Get good step sizes from covariance matrix
        s_opt  = {'s_{0}'.format(i): 0 for i in range(self.N_SR)} # Maybe zero is a good starting guess? Should use seeds that guess based on data.
        s_opt2 = {'error_s_{0}'.format(i) :  0.1*np.sqrt(self.cov[i][i]) for i in range(self.N_SR)} # Get good step sizes from covariance matrix.
        s_options = {**s_opt, **s_opt2}
        
        nuis_options = {**theta_opt, **theta_opt2}
        general_options = {**s_options, **nuis_options}

        # Full observed data list, included observed values of nuisance measurements
        observed_data = np.concatenate([np.array(self.SR_n),np.zeros(self.N_SR)],axis=-1)

        # Define the experiment object and options for fitting during statistical tests
        e = Experiment(self.name,joint,observed_data,DOF=self.N_SR)
         
        e.define_gof_test(nuisance_par_null=theta_opt,
                          test_pars={**s_opt,**theta_opt}, # Just for testing purposes
                          null_options=nuis_options,
                          full_options=general_options,
                          null_seeds=self.seeds_null_f(),
                          full_seeds=self.seeds_full_f_add(),
                          diagnostics=[self.make_dfull(s_opt,theta_opt),
                                       self.make_dnull(theta_opt)]
                          )
        
        e.define_mu_test(nuisance_par_null=theta_opt,
                         null_options=nuis_options,
                         null_seeds=self.seeds_null_f(),
                         scale_with_mu=['s_{0}'.format(i) for i in range(self.N_SR)],
                         test_signal=self.test_signal
                         )
        return e
示例#5
0
    def make_mu_model(self, signal):
        """Create ParameterModel object for fitting with mu_test"""
        if not 'mu' in self.tests.keys():
            raise ValueError(
                "Options for 'mu' test have not been defined for experiment {0}!"
                .format(self.name))

        # Currently we cannot apply the transform func directly to the JointDist object,
        # so we have to pull it apart, apply the transformation to eah submodel, and then
        # put it all back together.
        transformed_submodels = []
        for submodel, dim in zip(self.joint_pdf.submodels,
                                 self.joint_pdf.dims):
            args = c.get_dist_args(submodel)
            # Pull out the arguments that aren't getting scaled by mu, and replace them with mu.
            new_args = [
                a for a in args if a not in self.tests['mu'].scale_with_mu
            ] + ['mu']
            # Pull out the arguments that ARE scaled by mu; we only need to provide these ones,
            # the other signal arguments are for some other submodel.
            sig_args = [a for a in args if a in self.tests['mu'].scale_with_mu]
            my_signal = {
                a: signal[a]
                for a in sig_args
            }  # extract subset of signal that applies to this submodel
            transform_func = partial(
                mu_parameter_mapping,
                scale_with_mu=self.tests['mu'].scale_with_mu,
                **my_signal)
            trans_submodel = jtd.TransDist(submodel,
                                           transform_func,
                                           func_args=new_args)
            #print('in make_mu_model:', trans_submodel.args)
            transformed_submodels += [(trans_submodel, dim)]
        #print("new_submodels:", transformed_submodels)
        new_joint = jtd.JointDist(transformed_submodels)
        return jtm.ParameterModel(new_joint)
示例#6
0
        "mean": [gamma_inv_BSM + gamma_inv_SM, 0],
        "cov": [gamma_inv_sigma**2, sigma_err**2]
    }


def prof_loglike(Z, mean, cov):
    X = Z[..., 0] - Z[
        ...,
        1]  # Should be two components, second is the nuisance parameter measurement
    return sps.norm.logpdf(
        X, loc=mean[0], scale=np.sqrt(np.sum(cov))
    )  # Proportional only! Normalisation is wrong but will cancel in likelihood ratios


# Build distribution function object
mynorm = jtd.TransDist(
    sps.multivariate_normal)  # Null transformation, just to build object
mynorm.set_logpdf(
    prof_loglike)  # replace pdf calculation with profiled version
# Now build the joint pdf object
joint = jtd.JointDist(
    [(jtd.TransDist(mynorm, parfunc), 2)]
)  # make sure to let JointDist know that this is a multivariate distribution (2)


def get_seeds_full(samples, signal):
    print("samples.shape:", samples.shape)
    Inv = samples[..., 0]  # invisible width measurements
    X = samples[...,
                1]  # Nuisance measurements (well, theory pseudo-measurements)
    gamma_inv_SM = signal["gamma_inv_SM"]
    return {
示例#7
0
    l[m] = 0  #Poisson cannot have negative mean
    return {'mu': l}


# Parameter mapping function for nuisance parameter constraints
def func_nuis(**thetas):
    #print("in func_nuis:", thetas)
    means = np.array([thetas['theta_{0}'.format(i)] for i in range(N_regions)])
    return {'mean': means.flatten(), 'cov': CMS_cov}


# Create the transformed pdf functions
# Also requires some parameter renaming since we use the
# same underlying function repeatedly
poisson_part = [
    jtd.TransDist(sps.poisson, partial(poisson_f, i),
                  ['s_{0} -> s'.format(i), 'theta_{0} -> theta'.format(i)])
    for i in range(N_regions)
]
corr_dist = jtd.TransDist(
    sps.multivariate_normal,
    func_nuis,
    func_args=["theta_{0}".format(i) for i in range(N_regions)])
correlations = [(corr_dist, 7)]

# Create the joint PDF object
joint = jtd.JointDist(poisson_part + correlations)

# Set options for parameter fitting
theta_opt = {'theta_{0}'.format(i): 0 for i in range(N_regions)}
theta_opt2 = {
    'error_theta_{0}'.format(i): 1. * np.sqrt(CMS_cov[i][i])
示例#8
0
x0 = np.array([0, 0.1, 0])
r = least_squares(res, x0)
hatBF, sigma, K = r.x

BF = np.arange(0, 1, 0.001)
chi2 = chi2f(BF, hatBF, sigma, K)
chi2_min = np.min(chi2)  #Minimum over range [0,1]
dchi2 = chi2 - chi2_min


# Ok now build the probabilistic model for the MLE
def pars(BF):
    return {"loc": BF, "scale": sigma}


joint = jtd.JointDist([jtd.TransDist(sps.norm, pars)])


def get_seeds_full(samples, signal):
    BF = samples[..., 0]
    return {'BF': BF}  # We are directly sampling the MLEs, so this is trivial


def get_seeds_null(samples, signal):
    return {}  # No nuisance parameters, so no nuisance parameter seeds


def get_asimov(mu, signal=None):
    # Need to return data for which mu=1 or mu=0 is the MLE
    BF = signal['BF']
    nA = mu * BF  # I guess it is just this
示例#9
0
def null_seeds(samples, signal):
    return {}  # No nuisance parameters


def full_seeds(samples, signal, b):
    x = samples[:, 0, 0]
    #print("s:", x - b)
    return {"s": x - b}  # Exact MLE for s


N = 10  # Number of Gaussian experiments to construct
for i in range(N):
    # We will set the "background" differently for each piece so we can tell them apart easier
    b = 20 + 5 * i
    gauss = jtd.TransDist(sps.norm, partial(pars, b=b))

    # Create the "joint" PDF object (not very interesting since just one component)
    joint = jtd.JointDist([gauss])

    # Set options for parameter fitting
    s_opt = {
        's': 0,
        'error_s': 1
    }  # Will actually use seeds to obtain better starting guesses than this (actually, exact "guesses")

    nuis_options = {}  # No nuisance parameters (for now)
    general_options = {**s_opt}

    # Full observed data list, included observed values of nuisance measurements
    observed_data = [b + 5]  # let's try a slight excess
示例#10
0
def get_seeds_full(samples, signal):
    loc = samples[..., 0]
    return {
        'loc': loc
    }  # We are directly sampling the MLEs, so this is trivial


def get_seeds_null(samples, signal):
    return {}  # No nuisance parameters, so no nuisance parameter seeds


nuis_options = {}  # None, no nuisance fit necessary

experiments = []
for n, o, s in zip(name, obs, sigma):
    joint = jtd.JointDist([jtd.TransDist(sps.norm, partial(pars, scale=s))])

    # Define the experiment object and options for fitting during statistical tests
    e = Experiment(n, joint, [o], DOF=1)

    general_options = {
        'loc': o,
        'error_loc': s
    }  # No real need for this either since seeds give exact MLE already.

    # For now we only define a 'gof' test, since there is no clear notion of a BSM contribution for these observables. At least not one that we can extract from our scan output.
    e.define_gof_test(
        null_options=nuis_options,
        full_options=general_options,
        null_seeds=(get_seeds_null,
                    True),  # extra flag indicates that seeds are exact
示例#11
0
    def make_experiment_nocov(self, signal):
        # Create the transformed pdf functions
        # Also requires some parameter renaming since we use the
        # same underlying function repeatedly
        # poisson_part_mult = [jtd.TransDist(sps.poisson,partial(poisson_f_mult,b=self.SR_b[i]),
        #                        ['s_{0} -> s'.format(i),
        #                         'theta_{0} -> theta'.format(i)])
        #                  for i in range(self.N_SR)]

        poisson_part_add = [
            jtd.TransDist(
                sps.poisson, partial(poisson_f_add, b=self.SR_b[i]),
                ['s_{0} -> s'.format(i), 'theta_{0} -> theta'.format(i)])
            for i in range(self.N_SR)
        ]

        # Using lognormal constraint on multiplicative systematic parameter
        # sys_dist_mult = [jtd.TransDist(sps.lognorm,
        #                           partial(func_nuis_lognorm_mult,
        #                                   theta_std=self.SR_b_sys[i]/self.SR_b[i]),
        #                           ['theta_{0} -> theta'.format(i)])
        #               for i in range(self.N_SR)]

        # Using normal constaint on additive systematic parameter
        sys_dist_add = [
            jtd.TransDist(
                sps.norm,
                partial(func_nuis_norm_add, theta_std=self.SR_b_sys[i]),
                ['theta_{0} -> theta'.format(i)]) for i in range(self.N_SR)
        ]

        # Median data under background-only hypothesis
        expected_data = np.concatenate(
            [np.round(self.SR_b), np.zeros(self.N_SR)], axis=-1)
        expected_data = expected_data[
            np.newaxis, np.newaxis, :]  # Add required extra axes.

        #print("fractional systematic uncertainties:")
        #print([self.SR_b_sys[i]/self.SR_b[i] for i in range(self.N_SR)])
        #quit()

        # This next part is a little tricky. We DON'T know the correlations
        # between signal regions here, so we follow the method used in
        # ColliderBit and choose just one signal region to use in our test,
        # by picking, in advance, the region with the best sensitivity to
        # the signal that we are interested in.
        # That is, the signal region with the highest value of
        # Delta LogL = LogL(n=b|s,b) - LogL(n=b|s=0,b)
        # is selected.
        #
        # So, we need to compute this for all signal regions.
        seedf = self.seeds_null_f_gof()
        seedb = seedf(
            expected_data,
            signal)  # null hypothesis fits depend on signal parameters
        zero_signal = {'s_{0}'.format(i): 0 for i in range(self.N_SR)}
        seed = seedf(expected_data, zero_signal)
        LLR = []
        for i in range(self.N_SR):
            model = jtm.ParameterModel([poisson_part_add[i]] +
                                       [sys_dist_add[i]])

            odata = np.array([np.round(self.SR_b[i])] +
                             [0])  # median expected background-only data
            si = 's_{0}'.format(i)
            ti = 'theta_{0}'.format(i)
            parsb = {ti: seedb[ti], **zero_signal}
            pars = {ti: seed[ti], **signal}

            Lmaxb = model.logpdf(parsb, odata)
            Lmax = model.logpdf(pars, odata)

            LLR += [-2 * (Lmax - Lmaxb)]

        # Select region with largest expected (background-only) LLR for this signal
        selected = np.argmax(LLR)

        print("Selected signal region {0} ({1}) in analysis {2}".format(
            selected, self.SR_names[selected], self.name))

        # Create the joint PDF object
        #joint = jtd.JointDist(poisson_part_mult + sys_dist_mult)
        joint = jtd.JointDist([poisson_part_add[selected]] +
                              [sys_dist_add[selected]])

        theta_opt = {'theta_{0}'.format(selected): 0}  # additive
        theta_opt2 = {
            'error_theta_{0}'.format(selected): 1. * self.SR_b_sys[selected]
        }  # Get good step sizes from systematic error estimate
        s_opt = {
            's_{0}'.format(selected): 0
        }  # Maybe zero is a good starting guess? Should use seeds that guess based on data.
        s_opt2 = {
            'error_s_{0}'.format(selected): 0.1 * self.SR_b_sys[selected]
        }  # Get good step sizes from systematic error estimate
        s_options = {**s_opt, **s_opt2}

        nuis_options = {**theta_opt, **theta_opt2}  #, 'print_level':1}
        general_options = {**s_options, **nuis_options}

        # # Set options for parameter fitting
        # #theta_opt  = {'theta_{0}'.format(i) : 1 for i in range(self.N_SR)} # multiplicative
        # theta_opt  = {'theta_{0}'.format(i) : 0 for i in range(self.N_SR)} # additive
        # theta_opt2 = {'error_theta_{0}'.format(i) : 1.*self.SR_b_sys[i] for i in range(self.N_SR)} # Get good step sizes from systematic error estimate
        # s_opt  = {'s_{0}'.format(i): 0 for i in range(self.N_SR)} # Maybe zero is a good starting guess? Should use seeds that guess based on data.
        # s_opt2 = {'error_s_{0}'.format(i) :  0.1*self.SR_b_sys[i] for i in range(self.N_SR)} # Get good step sizes from systematic error estimate
        # s_options = {**s_opt, **s_opt2}

        # nuis_options = {**theta_opt, **theta_opt2} #, 'print_level':1}
        # general_options = {**s_options, **nuis_options}

        # print("Setup for experiment {0}".format(self.name))
        # #print("general_options:", general_options)
        # #print("s_MLE:", self.s_MLE)
        # #print("N_SR:", self.N_SR)
        # #print("observed_data:", observed_data.shape)
        # oseed = self.seeds_full_f_mult()(np.array(observed_data)[np.newaxis,np.newaxis,:])
        # print("parameter, MLE, data, seed")
        # for i in range(self.N_SR):
        #     par = "s_{0}".format(i)
        #     print("{0}, {1}, {2}, {3}".format(par, self.s_MLE[i], observed_data[i], oseed[par]))
        # for i in range(self.N_SR):
        #     par = "theta_{0}".format(i)
        #     print("{0}, {1}, {2}, {3}".format(par, 1, observed_data[i+self.N_SR], oseed[par]))
        # quit()

        # Define the experiment object and options for fitting during statistical tests
        odata = np.array([self.SR_n[selected]] +
                         [0])  # median expected background-only data
        e = Experiment(self.name, joint, odata, DOF=1)

        e.define_gof_test(
            test_pars={
                **s_opt,
                **theta_opt
            },  # Just for testing purposes
            null_options=nuis_options,
            full_options=general_options,
            null_seeds=(self.seeds_null_f_gof(selected), True),
            full_seeds=(
                self.seeds_full_f_add(selected), True
            ),  # Extra flag indicates that the "seeds" are actually the analytically exact MLEs, so no numerical minimisation needed
            diagnostics=[
                self.make_dfull(s_opt, theta_opt, selected),
                self.make_dnull(theta_opt, selected),
            ])
        #             self.make_seedcheck(),
        #             self.make_checkpdf()]
        #)

        e.define_mu_test(nuisance_par_null=theta_opt,
                         null_options=nuis_options,
                         null_seeds=self.seeds_null_f_gof(selected),
                         scale_with_mu=['s_{0}'.format(selected)],
                         test_signal=self.test_signal)

        # Just check that pdf calculation gives expected answer:
        # pars = {**s_opt,**theta_opt}
        # x = np.zeros(self.N_SR)
        # logpdf = e.general_model.logpdf(pars,e.observed_data)
        # expected_logpdf = [sps.poisson.logpmf(self.SR_n[i],self.SR_b[i]+pars['s_{0}'.format(i)]+pars['theta_{0}'.format(i)]) for i in range(self.N_SR)] \
        #                   + [sps.norm.logpdf(x[i],loc=pars['theta_{0}'.format(i)],scale=self.SR_b_sys[i]) for i in range(self.N_SR)]
        # print('logpdf         :',logpdf)
        # print('expected logpdf:', np.sum(expected_logpdf))

        # print("Components:")
        # for l, el in zip(e.general_model.logpdf_list(pars,e.observed_data), expected_logpdf):
        #     print('   logpdf:{0},  exp:{1}'.format(l[0][0],el))

        return e
示例#12
0
# Simple model for testing
def pars2_A(mu1):
    return {"loc": mu1, "scale": 1}


def pars2_B(mu2):
    return {"loc": mu2, "scale": 1}


def pars2_C(mu3):
    return {"loc": mu3, "scale": 1}


jointmodel = jtd.JointModel([
    jtd.TransDist(sps.norm, pars2_A),
    jtd.TransDist(sps.norm, pars2_B),
    jtd.TransDist(sps.norm, pars2_C)
])
parmodel = jtm.ParameterModel(jointmodel, [['mu1'], ['mu2'], ['mu3']])

# Define the null hypothesis
null_parameters = {'mu1': 0, 'mu2': 0, 'mu3': 0, 'mu4': 0}

# Get some test data (will be stored internally)
parmodel.simulate(10000, null_parameters)

# Set ranges for parameter "scan"
ranges = {}
for p in null_parameters.keys():
    ranges[p] = (-5, 5)
示例#13
0
    def make_experiment_cov(self):
        # Create the transformed pdf functions
        # Also requires some parameter renaming since we use the
        # same underlying function repeatedly
        poisson_part = [
            custpois(partial(poisson_f_add, b=self.SR_b[i]),
                     ['s_{0} -> s'.format(i), 'theta_{0} -> theta'.format(i)])
            for i in range(self.N_SR)
        ]
        corr_dist = jtd.TransDist(
            sps.multivariate_normal,
            partial(func_nuis_corr, cov=self.cov),
            func_args=["theta_{0}".format(i) for i in range(self.N_SR)])
        correlations = [(corr_dist, self.N_SR)]

        # Create the joint PDF object
        joint = jtd.JointDist(poisson_part + correlations)

        # Set options for parameter fitting
        theta_opt = {'theta_{0}'.format(i): 0 for i in range(self.N_SR)}
        theta_opt2 = {
            'error_theta_{0}'.format(i): 0.1 * np.sqrt(self.cov[i][i])
            for i in range(self.N_SR)
        }  # Get good step sizes from covariance matrix
        s_opt = {
            's_{0}'.format(i): 0
            for i in range(self.N_SR)
        }  # Maybe zero is a good starting guess? Should use seeds that guess based on data.
        s_opt2 = {
            'error_s_{0}'.format(i): 0.1 * np.sqrt(self.cov[i][i])
            for i in range(self.N_SR)
        }  # Get good step sizes from covariance matrix.
        s_options = {**s_opt, **s_opt2}

        nuis_options = {**theta_opt, **theta_opt2}
        general_options = {**s_options, **nuis_options}

        # Full observed data list, included observed values of nuisance measurements
        observed_data = ljoin(self.SR_n, np.zeros(self.N_SR))

        # Define the experiment object and options for fitting during statistical tests
        e = Experiment(self.name, joint, observed_data, DOF=self.N_SR)

        e.define_gof_test(
            null_options=nuis_options,
            full_options=general_options,
            null_seeds=(self.seeds_null_f_gof(
            ), False),  # Seeds NOT exact with covariance matrix! Just testing.
            full_seeds=(self.seeds_full_f_add(), False),
            diagnostics=[
                self.make_dfull(s_opt, theta_opt),
                self.make_dnull(theta_opt),
            ])

        e.define_mu_test(
            null_options=nuis_options,
            null_seeds=(self.seeds_null_f_gof(), False),
            scale_with_mu=list(s_opt.keys()),
        )

        e.define_musb_test(
            null_options=nuis_options,
            mu1_seeds=(
                self.seeds_null_f_gof(mu=1),
                False),  # naming a bit odd, but these are the mu=1 seeds
            mu0_seeds=(self.seeds_null_f_gof(mu=0), False),  # " "   mu=0
            scale_with_mu=list(s_opt.keys()),
            asimov=self.make_get_asimov_nocov(
            )  # pretty sure Asimov data is the same regardless of correlations.
        )

        selected = slice(
            0, self.N_SR
        )  # let calling function know that all signal regions are to be used
        return e, selected
示例#14
0
s_MLE = [1.5, 1.5]


# Create parameter mappings
# Proxy for 'signal strength' parameter added
def pars1(mu, mu1):
    return {"loc": b[0] + mu * mu1, "scale": 1}


def pars2(mu, mu2):
    return {"loc": b[1] + mu * mu2, "scale": 1}


# Create the joint PDF object
general_model = jtm.ParameterModel(
    [jtd.TransDist(sps.norm, pars1),
     jtd.TransDist(sps.norm, pars2)])
# Create the "observed" data
# Need extra axes for matching shape of many simulated datasets
observed_data = np.array([6.5, 7.5])[np.newaxis, np.newaxis, :]

# Define the null hypothesis
null_parameters = {'mu': 0, 'mu1': 0, 'mu2': 0}


# Define functions to get good starting guesses for fitting simulated data
def get_seeds(samples):
    X1 = samples[..., 0]
    X2 = samples[..., 1]
    return {'mu1': X1 - b[0], 'mu2': X2 - b[1]}