Пример #1
0
def test_expert_model_level_value():
    d = data.ModelData()
    ages = pl.arange(101)

    # create model with no priors
    vars = {}
    vars.update(
        age_pattern.age_pattern('test',
                                ages,
                                knots=pl.arange(0, 101, 5),
                                smoothing=.01))
    vars.update(
        expert_prior_model.level_constraints('test', {}, vars['mu_age'], ages))

    # fit model
    m = mc.MCMC(vars)
    m.sample(3)

    # create model with expert priors
    parameters = {}
    parameters['level_value'] = dict(value=.1, age_below=15, age_above=95)
    parameters['level_bound'] = dict(upper=.01, lower=.001)
    vars = {}
    vars.update(
        age_pattern.age_pattern('test',
                                ages,
                                knots=pl.arange(0, 101, 5),
                                smoothing=.01))
    vars.update(
        expert_prior_model.level_constraints('test', parameters,
                                             vars['mu_age'], ages))

    # fit model
    m = mc.MCMC(vars)
    m.sample(3)
Пример #2
0
def test_expert_model_level_value():
    d = data.ModelData()
    ages=pl.arange(101)

    # create model with no priors
    vars = {}
    vars.update(age_pattern.age_pattern('test', ages, knots=pl.arange(0,101,5), smoothing=.01))
    vars.update(expert_prior_model.level_constraints('test', {}, vars['mu_age'], ages))

    # fit model
    m = mc.MCMC(vars)
    m.sample(3)


    # create model with expert priors
    parameters = {}
    parameters['level_value'] = dict(value=.1, age_below=15, age_above=95)
    parameters['level_bound'] = dict(upper=.01, lower=.001)
    vars = {}
    vars.update(age_pattern.age_pattern('test', ages, knots=pl.arange(0,101,5), smoothing=.01))
    vars.update(expert_prior_model.level_constraints('test', parameters, vars['mu_age'], ages))

    # fit model
    m = mc.MCMC(vars)
    m.sample(3)
Пример #3
0
# <codecell>

### @export 'level_value-spline_fig'

knots = [0, 15, 60, 100]
#knots = range(0,101,10)

fig2_data = {}
for i, params in enumerate([dict(label='$h(a) = .1$ for $a<15$', subt='(a)', value=.1),
                            dict(label='$h(a) = .5$ for $a<15$', subt='(b)', value=.5),
                            dict(label='$h(a) = 1$ for $a<15$', subt='(c)', value=1.)]):
    
    vars = age_pattern.age_pattern('t', ages=ages, knots=knots, smoothing=pl.inf)
    vars.update(expert_prior_model.level_constraints('t',
                                                     dict(level_value=dict(age_before=15, age_after=101, value=params.pop('value')),
                                                          level_bounds=dict(upper=pl.inf, lower=-pl.inf)),
                                                     vars['mu_age'], ages))
    vars['mu_pred'] = mc.Lambda('mu_pred', lambda mu_age=vars['mu_age'], X=X : mu_age[X])
    vars['Y'] = mc.Normal('Y', mu=vars['mu_pred'], tau=tau, value=Y, observed=True)

    for i, k_i in enumerate(knots):
        vars['gamma'][i].value = Y_true[k_i]
    mc.MAP(vars).fit(method='fmin_powell', tol=.00001, verbose=0)
    
    fig2_data[params['subt']] = vars['mu_age'].value[knots]
    
fig2 = pl.figure(**book_graphics.full_minus_page_params)

ax21 = fig2.add_subplot(3,1,1)
ax21.plot(X, Y, 'ks', ms=4, mew=2)
ax21.plot(ages[knots], fig2_data['(a)'], 'k-', linewidth=2)
Пример #4
0
def age_specific_rate(model,
                      data_type,
                      reference_area='all',
                      reference_sex='total',
                      reference_year='all',
                      mu_age=None,
                      mu_age_parent=None,
                      sigma_age_parent=None,
                      rate_type='neg_binom',
                      lower_bound=None,
                      interpolation_method='linear',
                      include_covariates=True,
                      zero_re=False):
    # TODO: expose (and document) interface for alternative rate_type as well as other options,
    # record reference values in the model
    """ Generate PyMC objects for model of epidemological age-interval data

    :Parameters:
      - `model` : data.ModelData
      - `data_type` : str, one of 'i', 'r', 'f', 'p', or 'pf'
      - `reference_area, reference_sex, reference_year` : the node of the model to fit consistently
      - `mu_age` : pymc.Node, will be used as the age pattern, set to None if not needed
      - `mu_age_parent` : pymc.Node, will be used as the age pattern of the parent of the root area, set to None if not needed
      - `sigma_age_parent` : pymc.Node, will be used as the standard deviation of the age pattern, set to None if not needed
      - `rate_type` : str, optional. One of 'beta_binom', 'binom', 'log_normal_model', 'neg_binom', 'neg_binom_lower_bound_model', 'neg_binom_model', 'normal_model', 'offest_log_normal', or 'poisson'
      - `lower_bound` : 
      - `interpolation_method` : str, optional, one of 'linear', 'nearest', 'zero', 'slinear', 'quadratic, or 'cubic'
      - `include_covariates` : boolean
      - `zero_re` : boolean, change one stoch from each set of siblings in area hierarchy to a 'sum to zero' deterministic

    :Results:
      - Returns dict of PyMC objects, including 'pi', the covariate adjusted predicted values for each row of data

    """
    name = data_type
    import data
    result = data.ModelVars()

    if (mu_age_parent != None and pl.any(pl.isnan(mu_age_parent))) \
           or (sigma_age_parent != None and pl.any(pl.isnan(sigma_age_parent))):
        mu_age_parent = None
        sigma_age_parent = None
        print 'WARNING: nan found in parent mu/sigma.  Ignoring'

    ages = pl.array(model.parameters['ages'])
    data = model.get_data(data_type)
    if lower_bound:
        lb_data = model.get_data(lower_bound)
    parameters = model.parameters.get(data_type, {})
    area_hierarchy = model.hierarchy

    vars = dismod3.data.ModelVars()
    vars += dict(data=data)

    if 'parameter_age_mesh' in parameters:
        knots = pl.array(parameters['parameter_age_mesh'])
    else:
        knots = pl.arange(ages[0], ages[-1] + 1, 5)

    smoothing_dict = {
        'No Prior': pl.inf,
        'Slightly': .5,
        'Moderately': .05,
        'Very': .005
    }
    if 'smoothness' in parameters:
        smoothing = smoothing_dict[parameters['smoothness']['amount']]
    else:
        smoothing = 0.

    if mu_age == None:
        vars.update(
            age_pattern.age_pattern(name,
                                    ages=ages,
                                    knots=knots,
                                    smoothing=smoothing,
                                    interpolation_method=interpolation_method))
    else:
        vars.update(dict(mu_age=mu_age, ages=ages))

    vars.update(
        expert_prior_model.level_constraints(name, parameters, vars['mu_age'],
                                             ages))
    vars.update(
        expert_prior_model.derivative_constraints(name, parameters,
                                                  vars['mu_age'], ages))

    if mu_age_parent != None:
        # setup a hierarchical prior on the simliarity between the
        # consistent estimate here and (inconsistent) estimate for its
        # parent in the areas hierarchy
        #weight_dict = {'Unusable': 10., 'Slightly': 10., 'Moderately': 1., 'Very': .1}
        #weight = weight_dict[parameters['heterogeneity']]
        vars.update(
            similarity_prior_model.similar('parent_similarity_%s' % name,
                                           vars['mu_age'], mu_age_parent,
                                           sigma_age_parent, 0.))

        # also use this as the initial value for the age pattern, if it is not already specified
        if mu_age == None:
            if isinstance(mu_age_parent, mc.Node):  # TODO: test this code
                initial_mu = mu_age_parent.value
            else:
                initial_mu = mu_age_parent

            for i, k_i in enumerate(knots):
                vars['gamma'][i].value = (pl.log(
                    initial_mu[k_i - ages[0]])).clip(-12, 6)

    age_weights = pl.ones_like(
        vars['mu_age'].value
    )  # TODO: use age pattern appropriate to the rate type
    if len(data) > 0:
        vars.update(
            age_integrating_model.age_standardize_approx(
                name, age_weights, vars['mu_age'], data['age_start'],
                data['age_end'], ages))

        # uncomment the following to effectively remove alleffects
        #if 'random_effects' in parameters:
        #    for i in range(5):
        #        effect = 'sigma_alpha_%s_%d' % (name, i)
        #        parameters['random_effects'][effect] = dict(dist='TruncatedNormal', mu=.0001, sigma=.00001, lower=.00009, upper=.00011)
        #if 'fixed_effects' in parameters:
        #    for effect in ['x_sex', 'x_LDI_id_Updated_7July2011']:
        #        parameters['fixed_effects'][effect] = dict(dist='normal', mu=.0001, sigma=.00001)

        if include_covariates:
            vars.update(
                covariate_model.mean_covariate_model(name,
                                                     vars['mu_interval'],
                                                     data,
                                                     parameters,
                                                     model,
                                                     reference_area,
                                                     reference_sex,
                                                     reference_year,
                                                     zero_re=zero_re))
        else:
            vars.update({'pi': vars['mu_interval']})

        ## ensure that all data has uncertainty quantified appropriately
        # first replace all missing se from ci
        missing_se = pl.isnan(
            data['standard_error']) | (data['standard_error'] < 0)
        data['standard_error'][missing_se] = (data['upper_ci'][missing_se] -
                                              data['lower_ci'][missing_se]) / (
                                                  2 * 1.96)

        # then replace all missing ess with se
        missing_ess = pl.isnan(data['effective_sample_size'])
        data['effective_sample_size'][missing_ess] = data['value'][
            missing_ess] * (1 - data['value'][missing_ess]
                            ) / data['standard_error'][missing_ess]**2

        if rate_type == 'neg_binom':

            # warn and drop data that doesn't have effective sample size quantified, or is is non-positive
            missing_ess = pl.isnan(data['effective_sample_size']) | (
                data['effective_sample_size'] < 0)
            if sum(missing_ess) > 0:
                print 'WARNING: %d rows of %s data has invalid quantification of uncertainty.' % (
                    sum(missing_ess), name)
                data['effective_sample_size'][missing_ess] = 0.0

            # warn and change data where ess is unreasonably huge
            large_ess = data['effective_sample_size'] >= 1.e10
            if sum(large_ess) > 0:
                print 'WARNING: %d rows of %s data have effective sample size exceeding 10 billion.' % (
                    sum(large_ess), name)
                data['effective_sample_size'][large_ess] = 1.e10

            if 'heterogeneity' in parameters:
                lower_dict = {'Slightly': 9., 'Moderately': 3., 'Very': 1.}
                lower = lower_dict[parameters['heterogeneity']]
            else:
                lower = 1.

            # special case, treat pf data as poisson
            if data_type == 'pf':
                lower = 1.e12

            vars.update(
                covariate_model.dispersion_covariate_model(
                    name, data, lower, lower * 9.))

            vars.update(
                rate_model.neg_binom_model(name, vars['pi'], vars['delta'],
                                           data['value'],
                                           data['effective_sample_size']))
        elif rate_type == 'log_normal':

            # warn and drop data that doesn't have effective sample size quantified
            missing = pl.isnan(
                data['standard_error']) | (data['standard_error'] < 0)
            if sum(missing) > 0:
                print 'WARNING: %d rows of %s data has no quantification of uncertainty.' % (
                    sum(missing), name)
                data['standard_error'][missing] = 1.e6

            # TODO: allow options for alternative priors for sigma
            vars['sigma'] = mc.Uniform('sigma_%s' % name,
                                       lower=.0001,
                                       upper=1.,
                                       value=.01)
            #vars['sigma'] = mc.Exponential('sigma_%s'%name, beta=100., value=.01)
            vars.update(
                rate_model.log_normal_model(name, vars['pi'], vars['sigma'],
                                            data['value'],
                                            data['standard_error']))
        elif rate_type == 'normal':

            # warn and drop data that doesn't have standard error quantified
            missing = pl.isnan(
                data['standard_error']) | (data['standard_error'] < 0)
            if sum(missing) > 0:
                print 'WARNING: %d rows of %s data has no quantification of uncertainty.' % (
                    sum(missing), name)
                data['standard_error'][missing] = 1.e6

            vars['sigma'] = mc.Uniform('sigma_%s' % name,
                                       lower=.0001,
                                       upper=.1,
                                       value=.01)
            vars.update(
                rate_model.normal_model(name, vars['pi'], vars['sigma'],
                                        data['value'], data['standard_error']))
        elif rate_type == 'binom':
            missing_ess = pl.isnan(data['effective_sample_size']) | (
                data['effective_sample_size'] < 0)
            if sum(missing_ess) > 0:
                print 'WARNING: %d rows of %s data has invalid quantification of uncertainty.' % (
                    sum(missing_ess), name)
                data['effective_sample_size'][missing_ess] = 0.0
            vars += rate_model.binom(name, vars['pi'], data['value'],
                                     data['effective_sample_size'])
        elif rate_type == 'beta_binom':
            vars += rate_model.beta_binom(name, vars['pi'], data['value'],
                                          data['effective_sample_size'])
        elif rate_type == 'poisson':
            missing_ess = pl.isnan(data['effective_sample_size']) | (
                data['effective_sample_size'] < 0)
            if sum(missing_ess) > 0:
                print 'WARNING: %d rows of %s data has invalid quantification of uncertainty.' % (
                    sum(missing_ess), name)
                data['effective_sample_size'][missing_ess] = 0.0

            vars += rate_model.poisson(name, vars['pi'], data['value'],
                                       data['effective_sample_size'])
        elif rate_type == 'offset_log_normal':
            vars['sigma'] = mc.Uniform('sigma_%s' % name,
                                       lower=.0001,
                                       upper=10.,
                                       value=.01)
            vars += rate_model.offset_log_normal(name, vars['pi'],
                                                 vars['sigma'], data['value'],
                                                 data['standard_error'])
        else:
            raise Exception, 'rate_model "%s" not implemented' % rate_type
    else:
        if include_covariates:
            vars.update(
                covariate_model.mean_covariate_model(name, [],
                                                     data,
                                                     parameters,
                                                     model,
                                                     reference_area,
                                                     reference_sex,
                                                     reference_year,
                                                     zero_re=zero_re))
    if include_covariates:
        vars.update(
            expert_prior_model.covariate_level_constraints(
                name, model, vars, ages))

    if lower_bound and len(lb_data) > 0:
        vars['lb'] = age_integrating_model.age_standardize_approx(
            'lb_%s' % name, age_weights, vars['mu_age'], lb_data['age_start'],
            lb_data['age_end'], ages)

        if include_covariates:

            vars['lb'].update(
                covariate_model.mean_covariate_model('lb_%s' % name,
                                                     vars['lb']['mu_interval'],
                                                     lb_data,
                                                     parameters,
                                                     model,
                                                     reference_area,
                                                     reference_sex,
                                                     reference_year,
                                                     zero_re=zero_re))
        else:
            vars['lb'].update({'pi': vars['lb']['mu_interval']})

        vars['lb'].update(
            covariate_model.dispersion_covariate_model(
                'lb_%s' % name, lb_data, 1e12, 1e13)  # treat like poisson
        )

        ## ensure that all data has uncertainty quantified appropriately
        # first replace all missing se from ci
        missing_se = pl.isnan(
            lb_data['standard_error']) | (lb_data['standard_error'] <= 0)
        lb_data['standard_error'][missing_se] = (
            lb_data['upper_ci'][missing_se] -
            lb_data['lower_ci'][missing_se]) / (2 * 1.96)

        # then replace all missing ess with se
        missing_ess = pl.isnan(lb_data['effective_sample_size'])
        lb_data['effective_sample_size'][missing_ess] = lb_data['value'][
            missing_ess] * (1 - lb_data['value'][missing_ess]
                            ) / lb_data['standard_error'][missing_ess]**2

        # warn and drop lb_data that doesn't have effective sample size quantified
        missing_ess = pl.isnan(lb_data['effective_sample_size']) | (
            lb_data['effective_sample_size'] <= 0)
        if sum(missing_ess) > 0:
            print 'WARNING: %d rows of %s lower bound data has no quantification of uncertainty.' % (
                sum(missing_ess), name)
            lb_data['effective_sample_size'][missing_ess] = 1.0

        vars['lb'].update(
            rate_model.neg_binom_lower_bound_model(
                'lb_%s' % name, vars['lb']['pi'], vars['lb']['delta'],
                lb_data['value'], lb_data['effective_sample_size']))

    result[data_type] = vars
    return result
Пример #5
0
Файл: ism.py Проект: peterhm/gbd
def age_specific_rate(
    model,
    data_type,
    reference_area="all",
    reference_sex="total",
    reference_year="all",
    mu_age=None,
    mu_age_parent=None,
    sigma_age_parent=None,
    rate_type="neg_binom",
    lower_bound=None,
    interpolation_method="linear",
    include_covariates=True,
    zero_re=False,
):
    # TODO: expose (and document) interface for alternative rate_type as well as other options,
    # record reference values in the model
    """ Generate PyMC objects for model of epidemological age-interval data

    :Parameters:
      - `model` : data.ModelData
      - `data_type` : str, one of 'i', 'r', 'f', 'p', or 'pf'
      - `reference_area, reference_sex, reference_year` : the node of the model to fit consistently
      - `mu_age` : pymc.Node, will be used as the age pattern, set to None if not needed
      - `mu_age_parent` : pymc.Node, will be used as the age pattern of the parent of the root area, set to None if not needed
      - `sigma_age_parent` : pymc.Node, will be used as the standard deviation of the age pattern, set to None if not needed
      - `rate_type` : str, optional. One of 'beta_binom', 'binom', 'log_normal_model', 'neg_binom', 'neg_binom_lower_bound_model', 'neg_binom_model', 'normal_model', 'offest_log_normal', or 'poisson'
      - `lower_bound` : 
      - `interpolation_method` : str, optional, one of 'linear', 'nearest', 'zero', 'slinear', 'quadratic, or 'cubic'
      - `include_covariates` : boolean
      - `zero_re` : boolean, change one stoch from each set of siblings in area hierarchy to a 'sum to zero' deterministic

    :Results:
      - Returns dict of PyMC objects, including 'pi', the covariate adjusted predicted values for each row of data

    """
    name = data_type
    import data

    result = data.ModelVars()

    if (mu_age_parent != None and pl.any(pl.isnan(mu_age_parent))) or (
        sigma_age_parent != None and pl.any(pl.isnan(sigma_age_parent))
    ):
        mu_age_parent = None
        sigma_age_parent = None
        print "WARNING: nan found in parent mu/sigma.  Ignoring"

    ages = pl.array(model.parameters["ages"])
    data = model.get_data(data_type)
    if lower_bound:
        lb_data = model.get_data(lower_bound)
    parameters = model.parameters.get(data_type, {})
    area_hierarchy = model.hierarchy

    vars = dismod3.data.ModelVars()
    vars += dict(data=data)

    if "parameter_age_mesh" in parameters:
        knots = pl.array(parameters["parameter_age_mesh"])
    else:
        knots = pl.arange(ages[0], ages[-1] + 1, 5)

    smoothing_dict = {"No Prior": pl.inf, "Slightly": 0.5, "Moderately": 0.05, "Very": 0.005}
    if "smoothness" in parameters:
        smoothing = smoothing_dict[parameters["smoothness"]["amount"]]
    else:
        smoothing = 0.0

    if mu_age == None:
        vars.update(
            age_pattern.age_pattern(
                name, ages=ages, knots=knots, smoothing=smoothing, interpolation_method=interpolation_method
            )
        )
    else:
        vars.update(dict(mu_age=mu_age, ages=ages))

    vars.update(expert_prior_model.level_constraints(name, parameters, vars["mu_age"], ages))
    vars.update(expert_prior_model.derivative_constraints(name, parameters, vars["mu_age"], ages))

    if mu_age_parent != None:
        # setup a hierarchical prior on the simliarity between the
        # consistent estimate here and (inconsistent) estimate for its
        # parent in the areas hierarchy
        # weight_dict = {'Unusable': 10., 'Slightly': 10., 'Moderately': 1., 'Very': .1}
        # weight = weight_dict[parameters['heterogeneity']]
        vars.update(
            similarity_prior_model.similar(
                "parent_similarity_%s" % name, vars["mu_age"], mu_age_parent, sigma_age_parent, 0.0
            )
        )

        # also use this as the initial value for the age pattern, if it is not already specified
        if mu_age == None:
            if isinstance(mu_age_parent, mc.Node):  # TODO: test this code
                initial_mu = mu_age_parent.value
            else:
                initial_mu = mu_age_parent

            for i, k_i in enumerate(knots):
                vars["gamma"][i].value = (pl.log(initial_mu[k_i - ages[0]])).clip(-12, 6)

    age_weights = pl.ones_like(vars["mu_age"].value)  # TODO: use age pattern appropriate to the rate type
    if len(data) > 0:
        vars.update(
            age_integrating_model.age_standardize_approx(
                name, age_weights, vars["mu_age"], data["age_start"], data["age_end"], ages
            )
        )

        # uncomment the following to effectively remove alleffects
        # if 'random_effects' in parameters:
        #    for i in range(5):
        #        effect = 'sigma_alpha_%s_%d' % (name, i)
        #        parameters['random_effects'][effect] = dict(dist='TruncatedNormal', mu=.0001, sigma=.00001, lower=.00009, upper=.00011)
        # if 'fixed_effects' in parameters:
        #    for effect in ['x_sex', 'x_LDI_id_Updated_7July2011']:
        #        parameters['fixed_effects'][effect] = dict(dist='normal', mu=.0001, sigma=.00001)

        if include_covariates:
            vars.update(
                covariate_model.mean_covariate_model(
                    name,
                    vars["mu_interval"],
                    data,
                    parameters,
                    model,
                    reference_area,
                    reference_sex,
                    reference_year,
                    zero_re=zero_re,
                )
            )
        else:
            vars.update({"pi": vars["mu_interval"]})

        ## ensure that all data has uncertainty quantified appropriately
        # first replace all missing se from ci
        missing_se = pl.isnan(data["standard_error"]) | (data["standard_error"] < 0)
        data["standard_error"][missing_se] = (data["upper_ci"][missing_se] - data["lower_ci"][missing_se]) / (2 * 1.96)

        # then replace all missing ess with se
        missing_ess = pl.isnan(data["effective_sample_size"])
        data["effective_sample_size"][missing_ess] = (
            data["value"][missing_ess] * (1 - data["value"][missing_ess]) / data["standard_error"][missing_ess] ** 2
        )

        if rate_type == "neg_binom":

            # warn and drop data that doesn't have effective sample size quantified, or is is non-positive
            missing_ess = pl.isnan(data["effective_sample_size"]) | (data["effective_sample_size"] < 0)
            if sum(missing_ess) > 0:
                print "WARNING: %d rows of %s data has invalid quantification of uncertainty." % (
                    sum(missing_ess),
                    name,
                )
                data["effective_sample_size"][missing_ess] = 0.0

            # warn and change data where ess is unreasonably huge
            large_ess = data["effective_sample_size"] >= 1.0e10
            if sum(large_ess) > 0:
                print "WARNING: %d rows of %s data have effective sample size exceeding 10 billion." % (
                    sum(large_ess),
                    name,
                )
                data["effective_sample_size"][large_ess] = 1.0e10

            if "heterogeneity" in parameters:
                lower_dict = {"Slightly": 9.0, "Moderately": 3.0, "Very": 1.0}
                lower = lower_dict[parameters["heterogeneity"]]
            else:
                lower = 1.0

            # special case, treat pf data as poisson
            if data_type == "pf":
                lower = 1.0e12

            vars.update(covariate_model.dispersion_covariate_model(name, data, lower, lower * 9.0))

            vars.update(
                rate_model.neg_binom_model(
                    name, vars["pi"], vars["delta"], data["value"], data["effective_sample_size"]
                )
            )
        elif rate_type == "log_normal":

            # warn and drop data that doesn't have effective sample size quantified
            missing = pl.isnan(data["standard_error"]) | (data["standard_error"] < 0)
            if sum(missing) > 0:
                print "WARNING: %d rows of %s data has no quantification of uncertainty." % (sum(missing), name)
                data["standard_error"][missing] = 1.0e6

            # TODO: allow options for alternative priors for sigma
            vars["sigma"] = mc.Uniform("sigma_%s" % name, lower=0.0001, upper=1.0, value=0.01)
            # vars['sigma'] = mc.Exponential('sigma_%s'%name, beta=100., value=.01)
            vars.update(
                rate_model.log_normal_model(name, vars["pi"], vars["sigma"], data["value"], data["standard_error"])
            )
        elif rate_type == "normal":

            # warn and drop data that doesn't have standard error quantified
            missing = pl.isnan(data["standard_error"]) | (data["standard_error"] < 0)
            if sum(missing) > 0:
                print "WARNING: %d rows of %s data has no quantification of uncertainty." % (sum(missing), name)
                data["standard_error"][missing] = 1.0e6

            vars["sigma"] = mc.Uniform("sigma_%s" % name, lower=0.0001, upper=0.1, value=0.01)
            vars.update(rate_model.normal_model(name, vars["pi"], vars["sigma"], data["value"], data["standard_error"]))
        elif rate_type == "binom":
            missing_ess = pl.isnan(data["effective_sample_size"]) | (data["effective_sample_size"] < 0)
            if sum(missing_ess) > 0:
                print "WARNING: %d rows of %s data has invalid quantification of uncertainty." % (
                    sum(missing_ess),
                    name,
                )
                data["effective_sample_size"][missing_ess] = 0.0
            vars += rate_model.binom(name, vars["pi"], data["value"], data["effective_sample_size"])
        elif rate_type == "beta_binom":
            vars += rate_model.beta_binom(name, vars["pi"], data["value"], data["effective_sample_size"])
        elif rate_type == "poisson":
            missing_ess = pl.isnan(data["effective_sample_size"]) | (data["effective_sample_size"] < 0)
            if sum(missing_ess) > 0:
                print "WARNING: %d rows of %s data has invalid quantification of uncertainty." % (
                    sum(missing_ess),
                    name,
                )
                data["effective_sample_size"][missing_ess] = 0.0

            vars += rate_model.poisson(name, vars["pi"], data["value"], data["effective_sample_size"])
        elif rate_type == "offset_log_normal":
            vars["sigma"] = mc.Uniform("sigma_%s" % name, lower=0.0001, upper=10.0, value=0.01)
            vars += rate_model.offset_log_normal(name, vars["pi"], vars["sigma"], data["value"], data["standard_error"])
        else:
            raise Exception, 'rate_model "%s" not implemented' % rate_type
    else:
        if include_covariates:
            vars.update(
                covariate_model.mean_covariate_model(
                    name, [], data, parameters, model, reference_area, reference_sex, reference_year, zero_re=zero_re
                )
            )
    if include_covariates:
        vars.update(expert_prior_model.covariate_level_constraints(name, model, vars, ages))

    if lower_bound and len(lb_data) > 0:
        vars["lb"] = age_integrating_model.age_standardize_approx(
            "lb_%s" % name, age_weights, vars["mu_age"], lb_data["age_start"], lb_data["age_end"], ages
        )

        if include_covariates:

            vars["lb"].update(
                covariate_model.mean_covariate_model(
                    "lb_%s" % name,
                    vars["lb"]["mu_interval"],
                    lb_data,
                    parameters,
                    model,
                    reference_area,
                    reference_sex,
                    reference_year,
                    zero_re=zero_re,
                )
            )
        else:
            vars["lb"].update({"pi": vars["lb"]["mu_interval"]})

        vars["lb"].update(
            covariate_model.dispersion_covariate_model("lb_%s" % name, lb_data, 1e12, 1e13)  # treat like poisson
        )

        ## ensure that all data has uncertainty quantified appropriately
        # first replace all missing se from ci
        missing_se = pl.isnan(lb_data["standard_error"]) | (lb_data["standard_error"] <= 0)
        lb_data["standard_error"][missing_se] = (lb_data["upper_ci"][missing_se] - lb_data["lower_ci"][missing_se]) / (
            2 * 1.96
        )

        # then replace all missing ess with se
        missing_ess = pl.isnan(lb_data["effective_sample_size"])
        lb_data["effective_sample_size"][missing_ess] = (
            lb_data["value"][missing_ess]
            * (1 - lb_data["value"][missing_ess])
            / lb_data["standard_error"][missing_ess] ** 2
        )

        # warn and drop lb_data that doesn't have effective sample size quantified
        missing_ess = pl.isnan(lb_data["effective_sample_size"]) | (lb_data["effective_sample_size"] <= 0)
        if sum(missing_ess) > 0:
            print "WARNING: %d rows of %s lower bound data has no quantification of uncertainty." % (
                sum(missing_ess),
                name,
            )
            lb_data["effective_sample_size"][missing_ess] = 1.0

        vars["lb"].update(
            rate_model.neg_binom_lower_bound_model(
                "lb_%s" % name,
                vars["lb"]["pi"],
                vars["lb"]["delta"],
                lb_data["value"],
                lb_data["effective_sample_size"],
            )
        )

    result[data_type] = vars
    return result
Пример #6
0
fig2_data = {}
for i, params in enumerate([
        dict(label='$h(a) = .1$ for $a<15$', subt='(a)', value=.1),
        dict(label='$h(a) = .5$ for $a<15$', subt='(b)', value=.5),
        dict(label='$h(a) = 1$ for $a<15$', subt='(c)', value=1.)
]):

    vars = age_pattern.age_pattern('t',
                                   ages=ages,
                                   knots=knots,
                                   smoothing=pl.inf)
    vars.update(
        expert_prior_model.level_constraints(
            't',
            dict(level_value=dict(age_before=15,
                                  age_after=101,
                                  value=params.pop('value')),
                 level_bounds=dict(upper=pl.inf, lower=-pl.inf)),
            vars['mu_age'], ages))
    vars['mu_pred'] = mc.Lambda('mu_pred',
                                lambda mu_age=vars['mu_age'], X=X: mu_age[X])
    vars['Y'] = mc.Normal('Y',
                          mu=vars['mu_pred'],
                          tau=tau,
                          value=Y,
                          observed=True)

    for i, k_i in enumerate(knots):
        vars['gamma'][i].value = Y_true[k_i]
    mc.MAP(vars).fit(method='fmin_powell', tol=.00001, verbose=0)