Пример #1
0
def setup(dm, key, data_list, rate_stoch):
    """ Generate the PyMC variables for a log-normal model of
    a function of age

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      the object containing all the data, priors, and additional
      information (like input and output age-mesh)
      
    key : str
      the name of the key for everything about this model (priors,
      initial values, estimations)

    data_list : list of data dicts
      the observed data to use in the beta-binomial liklihood function

    rate_stoch : pymc.Stochastic
      a PyMC stochastic (or deterministic) object, with
      len(rate_stoch.value) == len(dm.get_estimation_age_mesh()).

    Results
    -------
    vars : dict
      Return a dictionary of all the relevant PyMC objects for the
      log-normal model.  vars['rate_stoch'] is of particular
      relevance, for details see the beta_binomial_model
    """
    vars = {}
    est_mesh = dm.get_estimate_age_mesh()
    vars['rate_stoch'] = rate_stoch

    # set up priors and observed data
    prior_str = dm.get_priors(key)
    generate_prior_potentials(vars, prior_str, est_mesh)

    vars['observed_rates'] = []
    for d in data_list:
        age_indices = indices_for_range(est_mesh, d['age_start'], d['age_end'])
        age_weights = d.get('age_weights', np.ones(len(age_indices)) / len(age_indices))

        lb, ub = dm.bounds_per_1(d)
        se = (np.log(ub) - np.log(lb)) / (2. * 1.96)
        if np.isnan(se) or se <= 0.:
            se = 1.
        print 'data %d: log(value) = %f, se = %f' % (d['id'], np.log(dm.value_per_1(d)), se)
        
        @mc.observed
        @mc.stochastic(name='obs_%d' % d['id'])
        def obs(f=vars['rate_stoch'],
                age_indices=age_indices,
                age_weights=age_weights,
                value=np.log(dm.value_per_1(d)),
                tau=se**-2, data=d):
            f_i = rate_for_range(f, age_indices, age_weights)
            return mc.normal_like(value, np.log(f_i), tau)
        vars['observed_rates'].append(obs)
        
    return vars
Пример #2
0
def setup(dm, key, data_list=[], rate_stoch=None, emp_prior={}, lower_bound_data=[]):
    """ Generate the PyMC variables for a negative-binomial model of
    a single rate function

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      the object containing all the data, priors, and additional
      information (like input and output age-mesh)
      
    key : str
      the name of the key for everything about this model (priors,
      initial values, estimations)

    data_list : list of data dicts
      the observed data to use in the negative binomial liklihood function

    rate_stoch : pymc.Stochastic, optional
      a PyMC stochastic (or deterministic) object, with
      len(rate_stoch.value) == len(dm.get_estimation_age_mesh()).
      This is used to link rate stochs into a larger model,
      for example.

    emp_prior : dict, optional
      the empirical prior dictionary, retrieved from the disease model
      if appropriate by::

          >>> t, r, y, s = type_region_year_sex_from_key(key)
          >>> emp_prior = dm.get_empirical_prior(t)

    Results
    -------
    vars : dict
      Return a dictionary of all the relevant PyMC objects for the
      rate model.  vars['rate_stoch'] is of particular
      relevance; this is what is used to link the rate model
      into more complicated models, like the generic disease model.
    """
    vars = {}
    est_mesh = dm.get_estimate_age_mesh()
    param_mesh = dm.get_param_age_mesh()

    if np.any(np.diff(est_mesh) != 1):
        raise ValueError, 'ERROR: Gaps in estimation age mesh must all equal 1'

    # calculate effective sample size for all data and lower bound data
    dm.calc_effective_sample_size(data_list)
    dm.calc_effective_sample_size(lower_bound_data)

    # generate regional covariates
    covariate_dict = dm.get_covariates()
    X_region, X_study = regional_covariates(key, covariate_dict)

    # use confidence prior from prior_str
    mu_delta = 100.
    sigma_delta = 1.
    from dismod3.settings import PRIOR_SEP_STR
    for line in dm.get_priors(key).split(PRIOR_SEP_STR):
        prior = line.strip().split()
        if len(prior) == 0:
            continue
        if prior[0] == 'heterogeneity':
            mu_delta = float(prior[1])
            sigma_delta = float(prior[2])

    # use the empirical prior mean if it is available
    if len(set(emp_prior.keys()) & set(['alpha', 'beta', 'gamma'])) == 3:
        mu_alpha = np.array(emp_prior['alpha'])
        sigma_alpha = np.maximum(.1, emp_prior['sigma_alpha'])
        alpha = np.array(emp_prior['alpha'])
        vars.update(region_coeffs=alpha)

        beta = np.array(emp_prior['beta'])
        sigma_beta = np.maximum(.1, emp_prior['sigma_beta'])
        vars.update(study_coeffs=beta)

        mu_gamma = np.array(emp_prior['gamma'])
        sigma_gamma = np.maximum(.1, emp_prior['sigma_gamma'])

        # leave mu_delta and sigma_delta as they were set in the expert prior
    else:
        import dismod3.regional_similarity_matrices as similarity_matrices
        
        n = len(X_region)
        mu_alpha = np.zeros(n)
        sigma_alpha = .01
        C_alpha = similarity_matrices.regions_nested_in_superregions(n, sigma_alpha)
        #C_alpha = similarity_matrices.all_related_equally(n, sigma_alpha)
        alpha = mc.MvNormalCov('region_coeffs_%s' % key, mu=mu_alpha,
                            C=C_alpha,
                            value=mu_alpha)
        vars.update(region_coeffs=alpha)

        mu_beta = np.zeros(len(X_study))
        sigma_beta = .1
        beta = mc.Normal('study_coeffs_%s' % key, mu=mu_beta, tau=sigma_beta**-2., value=mu_beta)
        vars.update(study_coeffs=beta)

        mu_gamma = -5.*np.ones(len(est_mesh))
        sigma_gamma = 10.*np.ones(len(est_mesh))

    if mu_delta != 0.:
        log_delta = mc.Uninformative('log_dispersion_%s' % key, value=np.log(mu_delta-1))
        delta = mc.Lambda('dispersion_%s' % key, lambda x=log_delta: 1. + np.exp(x))
        @mc.potential(name='potential_dispersion_%s' % key)
        def delta_pot(delta=delta, mu=mu_delta, tau=sigma_delta**-2):
            return mc.normal_like(delta, mu, tau)
        
        vars.update(dispersion=delta, log_dispersion=log_delta, dispersion_potential=delta_pot, dispersion_step_sd=.1*sigma_delta)

    if len(sigma_gamma) == 1:
        sigma_gamma = sigma_gamma[0]*np.ones(len(est_mesh))

    # create varible for interpolated rate;
    # also create variable for age-specific rate function, if it does not yet exist
    if rate_stoch:
        # if the rate_stoch already exists, for example prevalence in the generic model,
        # we use it to back-calculate mu and eventually gamma
        mu = rate_stoch

        @mc.deterministic(name='age_coeffs_%s' % key)
        def gamma(mu=mu, Xa=X_region, Xb=X_study, alpha=alpha, beta=beta):
            return np.log(1.e-8 + mu) - np.dot(alpha, Xa) - np.dot(beta, Xb)

        @mc.potential(name='age_coeffs_potential_%s' % key)
        def gamma_potential(gamma=gamma, mu_gamma=mu_gamma, tau_gamma=1./sigma_gamma[param_mesh]**2, param_mesh=param_mesh):
            return mc.normal_like(gamma[param_mesh], mu_gamma[param_mesh], tau_gamma)

        vars.update(rate_stoch=mu, age_coeffs=gamma, age_coeffs_potential=gamma_potential)
    else:
        # if the rate_stoch does not yet exists, we make gamma a stoch, and use it to calculate mu
        # for computational efficiency, gamma is a linearly interpolated version of gamma_mesh
        initial_gamma = mu_gamma

        # FOR TEST: use a linear age pattern for remission, since there is not sufficient data for more complicated fit
        #if key.find('remission') == 0:
        #    param_mesh = [0., 100.]
        #param_mesh = est_mesh # try full mesh; how much does this slow things down, really?  answer: a lot

        gamma_mesh = mc.Normal('age_coeffs_mesh_%s' % key, mu=mu_gamma[param_mesh], tau=sigma_gamma[param_mesh]**-2, value=initial_gamma[param_mesh])

        @mc.deterministic(name='age_coeffs_%s' % key)
        def gamma(gamma_mesh=gamma_mesh, param_mesh=param_mesh, est_mesh=est_mesh):
            return interpolate(param_mesh, gamma_mesh, est_mesh)

        @mc.deterministic(name=key)
        def mu(Xa=X_region, Xb=X_study, alpha=alpha, beta=beta, gamma=gamma):
            return predict_rate([Xa, Xb], alpha, beta, gamma, lambda f, age: f, est_mesh)

        # Create a guess at the covariance matrix for MCMC proposals to update gamma_mesh
        from pymc.gp.cov_funs import matern
        a = np.atleast_2d(param_mesh).T
        C = matern.euclidean(a, a, diff_degree = 2, amp = 1.**2, scale = 10.)

        vars.update(age_coeffs_mesh=gamma_mesh, age_coeffs=gamma, rate_stoch=mu, age_coeffs_mesh_step_cov=.005*np.array(C))

        # adjust value of gamma_mesh based on priors, if necessary
        # TODO: implement more adjustments, currently only adjusted based on at_least priors
        for line in dm.get_priors(key).split(PRIOR_SEP_STR):
            prior = line.strip().split()
            if len(prior) == 0:
                continue
            if prior[0] == 'at_least':
                delta_gamma = np.log(np.maximum(mu.value, float(prior[1]))) - np.log(mu.value)
                gamma_mesh.value = gamma_mesh.value + delta_gamma[param_mesh]

    # create potentials for priors
    generate_prior_potentials(vars, dm.get_priors(key), est_mesh)


    # create effect coefficients to explain overdispersion
    eta = mc.Laplace('eta_%s' % key, mu=0., tau=1., value=0.)
    vars['eta'] = eta
    
    # create observed stochastics for data
    vars['data'] = []

    if mu_delta != 0.:  
        value = []
        N = []
        Xa = []
        Xb = []
        ai = []
        aw = []

        # overdispersion-explaining covariates
        Z = []
    
        for d in data_list:
            try:
                age_indices, age_weights, Y_i, N_i = values_from(dm, d)
            except ValueError:
                debug('WARNING: could not calculate likelihood for data %d' % d['id'])
                continue

            value.append(Y_i*N_i)
            N.append(N_i)
            Xa.append(covariates(d, covariate_dict)[0])
            Xb.append(covariates(d, covariate_dict)[1])
            ai.append(age_indices)
            aw.append(age_weights)

            Z.append(float(d.get('bias', 0.)))

            vars['data'].append(d)

        N = np.array(N)
        Z = np.array(Z)
        vars['effective_sample_size'] = list(N)
        
    if len(vars['data']) > 0:
        @mc.deterministic(name='rate_%s' % key)
        def rates(N=N,
                Xa=Xa, Xb=Xb,
                alpha=alpha, beta=beta, gamma=gamma,
                bounds_func=vars['bounds_func'],
                age_indices=ai,
                age_weights=aw):

            # calculate study-specific rate function
            shifts = np.exp(np.dot(Xa, alpha) + np.dot(Xb, np.atleast_1d(beta)))
            exp_gamma = np.exp(gamma)
            mu_i = [np.dot(weights, bounds_func(s_i * exp_gamma[ages], ages)) for s_i, ages, weights in zip(shifts, age_indices, age_weights)]  # TODO: try vectorizing this loop to increase speed

            return mu_i
        vars['expected_rates'] = rates
        
        @mc.observed
        @mc.stochastic(name='data_%s' % key)
        def obs(value=value, N=N,
                mu_i=rates,
                delta=delta,
                Z=Z, eta=0.):
            logp = mc.negative_binomial_like(value, N*mu_i, delta + eta*Z)
            return logp

        vars['observed_counts'] = obs

        @mc.deterministic(name='predicted_data_%s' % key)
        def predictions(value=value, N=N,
                        mu_i=rates,
                        delta=delta,
                        Z=Z, eta=0.):
            return mc.rnegative_binomial(N*mu_i, delta + eta*Z)/N

        vars['predicted_rates'] = predictions
        debug('likelihood of %s contains %d rates' % (key, len(vars['data'])))

    # now do the same thing for the lower bound data
    # TODO: refactor to remove duplicated code
    vars['lower_bound_data'] = []
    value = []
    N = []
    Xa = []
    Xb = []
    ai = []
    aw = []
    for d in lower_bound_data:
        try:
            age_indices, age_weights, Y_i, N_i = values_from(dm, d)
        except ValueError:
            debug('WARNING: could not calculate likelihood for data %d' % d['id'])
            continue

        value.append(Y_i*N_i)
        N.append(N_i)
        Xa.append(covariates(d, covariate_dict)[0])
        Xb.append(covariates(d, covariate_dict)[1])
        ai.append(age_indices)
        aw.append(age_weights)

        vars['lower_bound_data'].append(d)

    N = np.array(N)
    value = np.array(value)

    if len(vars['lower_bound_data']) > 0:
        @mc.observed
        @mc.stochastic(name='lower_bound_data_%s' % key)
        def obs_lb(value=value, N=N,
                   Xa=Xa, Xb=Xb,
                   alpha=alpha, beta=beta, gamma=gamma,
                   bounds_func=vars['bounds_func'],
                   delta=delta,
                   age_indices=ai,
                   age_weights=aw):

            # calculate study-specific rate function
            shifts = np.exp(np.dot(Xa, alpha) + np.dot(Xb, np.atleast_1d(beta)))
            exp_gamma = np.exp(gamma)
            mu_i = [np.dot(weights, bounds_func(s_i * exp_gamma[ages], ages)) for s_i, ages, weights in zip(shifts, age_indices, age_weights)]  # TODO: try vectorizing this loop to increase speed
            rate_param = mu_i*N
            violated_bounds = np.nonzero(rate_param < value)
            logp = mc.negative_binomial_like(value[violated_bounds], rate_param[violated_bounds], delta)
            return logp

        vars['observed_lower_bounds'] = obs_lb
        debug('likelihood of %s contains %d lowerbounds' % (key, len(vars['lower_bound_data'])))

    return vars
Пример #3
0
def setup(dm, key, data_list, rate_stoch=None, emp_prior={}):
    """ Generate the PyMC variables for a beta binomial model of
    a single rate function

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      the object containing all the data, priors, and additional
      information (like input and output age-mesh)
      
    key : str
      the name of the key for everything about this model (priors,
      initial values, estimations)

    data_list : list of data dicts
      the observed data to use in the beta-binomial liklihood function

    rate_stoch : pymc.Stochastic, optional
      a PyMC stochastic (or deterministic) object, with
      len(rate_stoch.value) == len(dm.get_estimation_age_mesh()).
      This is used to link beta-binomial stochs into a larger model,
      for example.

    emp_prior : dict, optional
      the empirical prior dictionary, retrieved from the disease model
      if appropriate by::

          >>> t, r, y, s = type_region_year_sex_from_key(key)
          >>> emp_prior = dm.get_empirical_prior(t)
      

    Results
    -------
    vars : dict
      Return a dictionary of all the relevant PyMC objects for the
      beta binomial model.  vars['rate_stoch'] is of particular
      relevance; this is what is used to link the beta-binomial model
      into more complicated models, like the generic disease model.

    Details
    -------
    The beta binomial model parameters are the following:
      * the mean age-specific rate function
      * dispersion of this mean
      * the p_i value for each data observation that has a standard
        error (data observations that do not have standard errors
        recorded are fit as observations of the beta r.v., while
        observations with standard errors recorded have a latent
        variable for the beta, and an observed binomial r.v.).
    """
    vars = {}
    est_mesh = dm.get_estimate_age_mesh()
    if np.any(np.diff(est_mesh) != 1):
        raise ValueError, "ERROR: Gaps in estimation age mesh must all equal 1"

    # set up age-specific rate function, if it does not yet exist
    if not rate_stoch:
        param_mesh = dm.get_param_age_mesh()

        if emp_prior.has_key("mu"):
            initial_value = emp_prior["mu"]
        else:
            initial_value = dm.get_initial_value(key)

        # find the logit of the initial values, which is a little bit
        # of work because initial values are sampled from the est_mesh,
        # but the logit_initial_values are needed on the param_mesh
        logit_initial_value = mc.logit(interpolate(est_mesh, initial_value, param_mesh))

        logit_rate = mc.Normal(
            "logit(%s)" % key, mu=-5.0 * np.ones(len(param_mesh)), tau=1.0e-2, value=logit_initial_value
        )
        # logit_rate = [mc.Normal('logit(%s)_%d' % (key, a), mu=-5., tau=1.e-2) for a in param_mesh]
        vars["logit_rate"] = logit_rate

        @mc.deterministic(name=key)
        def rate_stoch(logit_rate=logit_rate):
            return interpolate(param_mesh, mc.invlogit(logit_rate), est_mesh)

    if emp_prior.has_key("mu"):

        @mc.potential(name="empirical_prior_%s" % key)
        def emp_prior_potential(f=rate_stoch, mu=emp_prior["mu"], tau=1.0 / np.array(emp_prior["se"]) ** 2):
            return mc.normal_like(f, mu, tau)

        vars["empirical_prior"] = emp_prior_potential

    vars["rate_stoch"] = rate_stoch

    # create stochastic variable for over-dispersion "random effect"
    mu_od = emp_prior.get("dispersion", 0.001)
    dispersion = mc.Gamma("dispersion_%s" % key, alpha=10.0, beta=10.0 / mu_od)
    vars["dispersion"] = dispersion

    @mc.deterministic(name="alpha_%s" % key)
    def alpha(rate=rate_stoch, dispersion=dispersion):
        return rate / dispersion ** 2

    @mc.deterministic(name="beta_%s" % key)
    def beta(rate=rate_stoch, dispersion=dispersion):
        return (1.0 - rate) / dispersion ** 2

    vars["alpha"] = alpha
    vars["beta"] = beta

    # create potentials for priors
    vars["priors"] = generate_prior_potentials(dm.get_priors(key), est_mesh, rate_stoch, dispersion)

    # create latent and observed stochastics for data
    vars["data"] = data_list
    vars["ab"] = []
    vars["latent_p"] = []
    vars["observations"] = []

    for d in data_list:
        # set up observed stochs for all relevant data
        id = d["id"]

        if d["value"] == MISSING:
            print "WARNING: data %d missing value" % id
            continue

        # ensure all rate data is valid
        d_val = dm.value_per_1(d)
        d_se = dm.se_per_1(d)

        if d_val < 0 or d_val > 1:
            print "WARNING: data %d not in range [0,1]" % id
            continue

        if d["age_start"] < est_mesh[0] or d["age_end"] > est_mesh[-1]:
            raise ValueError, "Data %d is outside of estimation range---([%d, %d] is not inside [%d, %d])" % (
                d["id"],
                d["age_start"],
                d["age_end"],
                est_mesh[0],
                est_mesh[-1],
            )

        age_indices = indices_for_range(est_mesh, d["age_start"], d["age_end"])
        age_weights = d["age_weights"]

        @mc.deterministic(name="a_%d^%s" % (id, key))
        def a_i(alpha=alpha, age_indices=age_indices, age_weights=age_weights):
            return rate_for_range(alpha, age_indices, age_weights)

        @mc.deterministic(name="b_%d^%s" % (id, key))
        def b_i(beta=beta, age_indices=age_indices, age_weights=age_weights):
            return rate_for_range(beta, age_indices, age_weights)

        vars["ab"] += [a_i, b_i]

        if d_se > 0:
            # if the data has a standard error, model it as a realization
            # of a beta binomial r.v.
            latent_p_i = mc.Beta(
                "latent_p_%d^%s" % (id, key), alpha=a_i, beta=b_i, value=trim(d_val, NEARLY_ZERO, 1 - NEARLY_ZERO)
            )
            vars["latent_p"].append(latent_p_i)

            denominator = d_val * (1 - d_val) / d_se ** 2.0
            numerator = d_val * denominator
            obs_binomial = mc.Binomial(
                "data_%d^%s" % (id, key), value=numerator, n=denominator, p=latent_p_i, observed=True
            )
            vars["observations"].append(obs_binomial)
        else:
            # if the data is a point estimate with no uncertainty
            # recorded, model it as a realization of a beta r.v.
            obs_p_i = mc.Beta(
                "latent_p_%d" % id, value=trim(d_val, NEARLY_ZERO, 1 - NEARLY_ZERO), alpha=a_i, beta=b_i, observed=True
            )
            vars["observations"].append(obs_p_i)

    return vars
Пример #4
0
def setup(dm, key, data_list, rate_stoch=None, emp_prior={}):
    """ Generate the PyMC variables for a logit-normal model of
    a single rate function

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      the object containing all the data, priors, and additional
      information (like input and output age-mesh)
      
    key : str
      the name of the key for everything about this model (priors,
      initial values, estimations)

    data_list : list of data dicts
      the observed data to use in the logit-normal liklihood function

    rate_stoch : pymc.Stochastic, optional
      a PyMC stochastic (or deterministic) object, with
      len(rate_stoch.value) == len(dm.get_estimation_age_mesh()).
      This is used to link rate stochs into a larger model,
      for example.

    emp_prior : dict, optional
      the empirical prior dictionary, retrieved from the disease model
      if appropriate by::

          >>> t, r, y, s = type_region_year_sex_from_key(key)
          >>> emp_prior = dm.get_empirical_prior(t)

    Results
    -------
    vars : dict
      Return a dictionary of all the relevant PyMC objects for the
      rate model.  vars['rate_stoch'] is of particular
      relevance; this is what is used to link the rate model
      into more complicated models, like the generic disease model.
    """
    vars = {}
    est_mesh = dm.get_estimate_age_mesh()
    param_mesh = dm.get_param_age_mesh()

    if np.any(np.diff(est_mesh) != 1):
        raise ValueError, 'ERROR: Gaps in estimation age mesh must all equal 1'

    # for debugging
    #if key == 'incidence+asia_southeast+1990+female':
    #    import pdb; pdb.set_trace()

    # generate regional covariates
    X_region, X_study = regional_covariates(key)

    # use the empirical prior mean if it is available
    if set(emp_prior.keys()) == set(['alpha', 'beta', 'gamma', 'sigma']):
        mu_alpha = np.array(emp_prior['alpha'])
        sigma_alpha = .01

        beta = np.array(emp_prior['beta'])

        mu_gamma = np.array(emp_prior['gamma'])
        sigma_gamma = emp_prior['sigma']

        mu_sigma = .01
        conf_sigma = 1000.
    else:
        mu_alpha = np.zeros(len(X_region))
        sigma_alpha = 1.

        mu_beta = np.zeros(len(X_study))
        sigma_beta = .01
        beta = mc.Normal('study_coeffs_%s' % key, mu=mu_beta, tau=1/sigma_beta**2, value=mu_beta)
        vars.update(study_coeffs=beta)

        mu_gamma = -5.*np.ones(len(est_mesh))
        sigma_gamma = 1.

        mu_sigma = .1
        conf_sigma = 10.

    alpha = mc.Normal('region_coeffs_%s' % key, mu=mu_alpha, tau=1/sigma_alpha**2, value=mu_alpha)
    vars.update(region_coeffs=alpha)


    log_sigma = mc.Uninformative('log(dispersion_%s)' % key, value=np.log(mu_sigma))
    @mc.deterministic(name='dispersion_%s' % key)
    def sigma(log_sigma=log_sigma):
        return np.exp(log_sigma)
    # TODO: replace this potential in the generate_prior_potentials function if confidence is set
    @mc.potential(name='dispersion_potential_%s' % key)
    def sigma_potential(sigma=sigma, alpha=conf_sigma, beta=conf_sigma/mu_sigma):
        return mc.gamma_like(sigma, alpha, beta)
    vars.update(log_dispersion=log_sigma,
                dispersion=sigma,
                dispersion_potential=sigma_potential)


    # create varible for interpolated logit rate;
    # also create variable for age-specific rate function, if it does not yet exist
    if rate_stoch:
        # if the rate_stoch already exists, for example prevalence in the generic model,
        # we use it to back-calculate mu and eventually gamma
        @mc.deterministic(name='logit_%s' % key)
        def mu(invlogit_mu=rate_stoch):
            return mc.logit(invlogit_mu)

        @mc.deterministic(name='age_coeffs_%s' % key)
        def gamma(mu=mu, Xa=X_region, Xb=X_study, alpha=alpha, beta=beta):
            return mu - np.dot(alpha, Xa) - np.dot(beta, Xb)

        @mc.potential(name='age_coeffs_potential_%s' % key)
        def gamma_potential(gamma=gamma, mu_gamma=mu_gamma, tau_gamma=1./sigma_gamma**2, param_mesh=param_mesh):
            return mc.normal_like(gamma[param_mesh], mu_gamma[param_mesh], tau_gamma)

        vars.update(rate_stoch=rate_stoch, logit_rate_stoch=mu, age_coeffs=gamma, age_coeffs_potential=gamma_potential)
        
    else:
        # if the rate_stoch does not yet exists, we make gamma a stoch, and use it to calculate mu
        # for computational efficiency, gamma is a linearly interpolated version of gamma_mesh
        initial_gamma = mu_gamma
        gamma_mesh = mc.Normal('age_coeffs_mesh_%s' % key, mu=mu_gamma[param_mesh], tau=1/sigma_gamma**2, value=initial_gamma[param_mesh])
        
        @mc.deterministic(name='age_coeffs_%s' % key)
        def gamma(gamma_mesh=gamma_mesh, param_mesh=param_mesh, est_mesh=est_mesh):
            return interpolate(param_mesh, gamma_mesh, est_mesh)

        @mc.deterministic(name='logit_%s' % key)
        def mu(Xa=X_region, Xb=X_study, alpha=alpha, beta=beta, gamma=gamma):
            return np.dot(alpha, Xa) + np.dot(beta, Xb) + gamma

        @mc.deterministic(name=key)
        def rate_stoch(mu=mu):
            return mc.invlogit(mu)

        vars.update(age_coeffs_mesh=gamma_mesh, age_coeffs=gamma, logit_rate_stoch=mu, rate_stoch=rate_stoch)


    # create potentials for priors
    vars['priors'] = generate_prior_potentials(dm.get_priors(key), est_mesh, rate_stoch)
    
    
    # create observed stochastics for data
    vars['data'] = data_list
    vars['observed_rates'] = []

    min_val = min([1.e-9] + [dm.value_per_1(d) for d in data_list if dm.value_per_1(d) > 0]) # TODO: assess validity of this minimum value
    max_se = max([.000001] + [dm.se_per_1(d) for d in data_list if dm.se_per_1(d) > 0])  # TODO: assess validity of this maximum std err

    #import pdb; pdb.set_trace()
    for d in data_list:
        try:
            age_indices, age_weights, logit_val, logit_se = values_from(dm, d, min_val, max_se)
        except ValueError:
            continue

        @mc.observed
        @mc.stochastic(name='data_%d' % d['id'])
        def obs(value=logit_val, logit_se=logit_se,
                X=covariates(d),
                alpha=alpha, beta=beta, gamma=gamma, sigma=sigma,
                age_indices=age_indices,
                age_weights=age_weights):

            # calculate study-specific rate function
            mu = predict_logit_rate(X, alpha, beta, gamma)
            mu_i = rate_for_range(mu, age_indices, age_weights)
            
            tau_i = 1. / (sigma**2 + logit_se**2)
            logp = mc.normal_like(x=value, mu=mu_i, tau=tau_i)
            return logp
            
        vars['observed_rates'].append(obs)
        
    return vars
Пример #5
0
def setup(dm, key, data_list, rate_stoch):
    """ Generate the PyMC variables for a normal model of
    a function of age

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      the object containing all the data, priors, and additional
      information (like input and output age-mesh)
      
    key : str
      the name of the key for everything about this model (priors,
      initial values, estimations)

    data_list : list of data dicts
      the observed data to use in the beta-binomial liklihood function

    rate_stoch : pymc.Stochastic
      a PyMC stochastic (or deterministic) object, with
      len(rate_stoch.value) == len(dm.get_estimation_age_mesh()).

    Results
    -------
    vars : dict
      Return a dictionary of all the relevant PyMC objects for the
      normal model.  vars['rate_stoch'] is of particular
      relevance, for details see the beta_binomial_model
    """
    vars = {}
    est_mesh = dm.get_estimate_age_mesh()
    if np.any(np.diff(est_mesh) != 1):
        raise ValueError, 'ERROR: Gaps in estimation age mesh must all equal 1'

    vars['rate_stoch'] = rate_stoch

    # set up priors and observed data
    prior_str = dm.get_priors(key)
    generate_prior_potentials(vars, prior_str, est_mesh)

    vars['observed_rates'] = []
    for d in data_list:
        # set up observed stochs for all relevant data
        id = d['id']
        
        if d['value'] == MISSING:
            print 'WARNING: data %d missing value' % id
            continue

        # ensure all rate data is valid
        d_val = dm.value_per_1(d)
        d_se = dm.se_per_1(d)

        if d['age_start'] < est_mesh[0] or d['age_end'] > est_mesh[-1]:
            raise ValueError, 'Data %d is outside of estimation range---([%d, %d] is not inside [%d, %d])' \
                % (d['id'], d['age_start'], d['age_end'], est_mesh[0], est_mesh[-1])

        age_indices = indices_for_range(est_mesh, d['age_start'], d['age_end'])
        age_weights = d.get('age_weights', np.ones(len(age_indices)) / len(age_indices))

        # data must have standard error to use normal model
        if d_se == 0:
            raise ValueError, 'Data %d has invalid standard error' % d['id']

        @mc.observed
        @mc.stochastic(name='obs_%d' % id)
        def obs(f=rate_stoch,
                age_indices=age_indices,
                age_weights=age_weights,
                value=d_val,
                tau=1./(d_se)**2):
            f_i = rate_for_range(f, age_indices, age_weights)
            return mc.normal_like(value, f_i, tau)
        vars['observed_rates'].append(obs)
        
    return vars