def test_fixed_effect_priors(): model = data.ModelData() # set prior on sex parameters = dict(fixed_effects={'x_sex': dict(dist='TruncatedNormal', mu=1., sigma=.5, lower=-10, upper=10)}) # simulate normal data n = 32. sex_list = pl.array(['male', 'female', 'total']) sex = sex_list[mc.rcategorical([.3, .3, .4], n)] beta_true = dict(male=-1., total=0., female=1.) pi_true = pl.exp([beta_true[s] for s in sex]) sigma_true = .05 p = mc.rnormal(pi_true, 1./sigma_true**2.) model.input_data = pandas.DataFrame(dict(value=p, sex=sex)) model.input_data['area'] = 'all' model.input_data['year_start'] = 2010 model.input_data['year_start'] = 2010 # create model and priors vars = {} vars.update(covariate_model.mean_covariate_model('test', 1, model.input_data, parameters, model, 'all', 'total', 'all')) print vars['beta'] assert vars['beta'][0].parents['mu'] == 1.
def test_covariate_model_sim_no_hierarchy(): # simulate normal data model = data.ModelData() model.hierarchy, model.output_template = data_simulation.small_output() X = mc.rnormal(0., 1.**2, size=(128,3)) beta_true = [-.1, .1, .2] Y_true = pl.dot(X, beta_true) pi_true = pl.exp(Y_true) sigma_true = .01*pl.ones_like(pi_true) p = mc.rnormal(pi_true, 1./sigma_true**2.) model.input_data = pandas.DataFrame(dict(value=p, x_0=X[:,0], x_1=X[:,1], x_2=X[:,2])) model.input_data['area'] = 'all' model.input_data['sex'] = 'total' model.input_data['year_start'] = 2000 model.input_data['year_end'] = 2000 # create model and priors vars = {} vars.update(covariate_model.mean_covariate_model('test', 1, model.input_data, {}, model, 'all', 'total', 'all')) vars.update(rate_model.normal_model('test', vars['pi'], 0., p, sigma_true)) # fit model m = mc.MCMC(vars) m.sample(2)
def test_random_effect_priors(): model = data.ModelData() # set prior on sex parameters = dict(random_effects={'USA': dict(dist='TruncatedNormal', mu=.1, sigma=.5, lower=-10, upper=10)}) # simulate normal data n = 32. area_list = pl.array(['all', 'USA', 'CAN']) area = area_list[mc.rcategorical([.3, .3, .4], n)] alpha_true = dict(all=0., USA=.1, CAN=-.2) pi_true = pl.exp([alpha_true[a] for a in area]) sigma_true = .05 p = mc.rnormal(pi_true, 1./sigma_true**2.) model.input_data = pandas.DataFrame(dict(value=p, area=area)) model.input_data['sex'] = 'male' model.input_data['year_start'] = 2010 model.input_data['year_end'] = 2010 model.hierarchy.add_edge('all', 'USA') model.hierarchy.add_edge('all', 'CAN') # create model and priors vars = {} vars.update(covariate_model.mean_covariate_model('test', 1, model.input_data, parameters, model, 'all', 'total', 'all')) print vars['alpha'] print vars['alpha'][1].parents['mu'] assert vars['alpha'][1].parents['mu'] == .1
def test_covariate_model_dispersion(): # simulate normal data n = 100 model = data.ModelData() model.hierarchy, model.output_template = data_simulation.small_output() Z = mc.rcategorical([.5, 5.], n) zeta_true = -.2 pi_true = .1 ess = 10000.*pl.ones(n) eta_true = pl.log(50) delta_true = 50 + pl.exp(eta_true) p = mc.rnegative_binomial(pi_true*ess, delta_true*pl.exp(Z*zeta_true)) / ess model.input_data = pandas.DataFrame(dict(value=p, z_0=Z)) model.input_data['area'] = 'all' model.input_data['sex'] = 'total' model.input_data['year_start'] = 2000 model.input_data['year_end'] = 2000 # create model and priors vars = dict(mu=mc.Uninformative('mu_test', value=pi_true)) vars.update(covariate_model.mean_covariate_model('test', vars['mu'], model.input_data, {}, model, 'all', 'total', 'all')) vars.update(covariate_model.dispersion_covariate_model('test', model.input_data, .1, 10.)) vars.update(rate_model.neg_binom_model('test', vars['pi'], vars['delta'], p, ess)) # fit model m = mc.MCMC(vars) m.sample(2)
def test_covariate_model_sim_no_hierarchy(): # simulate normal data model = data.ModelData() model.hierarchy, model.output_template = data_simulation.small_output() X = mc.rnormal(0., 1.**2, size=(128, 3)) beta_true = [-.1, .1, .2] Y_true = pl.dot(X, beta_true) pi_true = pl.exp(Y_true) sigma_true = .01 * pl.ones_like(pi_true) p = mc.rnormal(pi_true, 1. / sigma_true**2.) model.input_data = pandas.DataFrame( dict(value=p, x_0=X[:, 0], x_1=X[:, 1], x_2=X[:, 2])) model.input_data['area'] = 'all' model.input_data['sex'] = 'total' model.input_data['year_start'] = 2000 model.input_data['year_end'] = 2000 # create model and priors vars = {} vars.update( covariate_model.mean_covariate_model('test', 1, model.input_data, {}, model, 'all', 'total', 'all')) vars.update(rate_model.normal_model('test', vars['pi'], 0., p, sigma_true)) # fit model m = mc.MCMC(vars) m.sample(2)
def test_fixed_effect_priors(): model = data.ModelData() # set prior on sex parameters = dict( fixed_effects={ 'x_sex': dict(dist='TruncatedNormal', mu=1., sigma=.5, lower=-10, upper=10) }) # simulate normal data n = 32. sex_list = pl.array(['male', 'female', 'total']) sex = sex_list[mc.rcategorical([.3, .3, .4], n)] beta_true = dict(male=-1., total=0., female=1.) pi_true = pl.exp([beta_true[s] for s in sex]) sigma_true = .05 p = mc.rnormal(pi_true, 1. / sigma_true**2.) model.input_data = pandas.DataFrame(dict(value=p, sex=sex)) model.input_data['area'] = 'all' model.input_data['year_start'] = 2010 model.input_data['year_start'] = 2010 # create model and priors vars = {} vars.update( covariate_model.mean_covariate_model('test', 1, model.input_data, parameters, model, 'all', 'total', 'all')) print vars['beta'] assert vars['beta'][0].parents['mu'] == 1.
def test_covariate_model_sim_w_hierarchy(): n = 50 # setup hierarchy hierarchy, output_template = data_simulation.small_output() # simulate normal data area_list = pl.array(['all', 'USA', 'CAN']) area = area_list[mc.rcategorical([.3, .3, .4], n)] sex_list = pl.array(['male', 'female', 'total']) sex = sex_list[mc.rcategorical([.3, .3, .4], n)] year = pl.array(mc.runiform(1990, 2010, n), dtype=int) alpha_true = dict(all=0., USA=.1, CAN=-.2) pi_true = pl.exp([alpha_true[a] for a in area]) sigma_true = .05 * pl.ones_like(pi_true) p = mc.rnormal(pi_true, 1. / sigma_true**2.) model = data.ModelData() model.input_data = pandas.DataFrame( dict(value=p, area=area, sex=sex, year_start=year, year_end=year)) model.hierarchy, model.output_template = hierarchy, output_template # create model and priors vars = {} vars.update( covariate_model.mean_covariate_model('test', 1, model.input_data, {}, model, 'all', 'total', 'all')) vars.update(rate_model.normal_model('test', vars['pi'], 0., p, sigma_true)) # fit model m = mc.MCMC(vars) m.sample(2) assert 'sex' not in vars['U'] assert 'x_sex' in vars['X'] assert len(vars['beta']) == 1
def test_covariate_model_sim_w_hierarchy(): n = 50 # setup hierarchy hierarchy, output_template = data_simulation.small_output() # simulate normal data area_list = pl.array(['all', 'USA', 'CAN']) area = area_list[mc.rcategorical([.3, .3, .4], n)] sex_list = pl.array(['male', 'female', 'total']) sex = sex_list[mc.rcategorical([.3, .3, .4], n)] year = pl.array(mc.runiform(1990, 2010, n), dtype=int) alpha_true = dict(all=0., USA=.1, CAN=-.2) pi_true = pl.exp([alpha_true[a] for a in area]) sigma_true = .05*pl.ones_like(pi_true) p = mc.rnormal(pi_true, 1./sigma_true**2.) model = data.ModelData() model.input_data = pandas.DataFrame(dict(value=p, area=area, sex=sex, year_start=year, year_end=year)) model.hierarchy, model.output_template = hierarchy, output_template # create model and priors vars = {} vars.update(covariate_model.mean_covariate_model('test', 1, model.input_data, {}, model, 'all', 'total', 'all')) vars.update(rate_model.normal_model('test', vars['pi'], 0., p, sigma_true)) # fit model m = mc.MCMC(vars) m.sample(2) assert 'sex' not in vars['U'] assert 'x_sex' in vars['X'] assert len(vars['beta']) == 1
def test_covariate_model_dispersion(): # simulate normal data n = 100 model = data.ModelData() model.hierarchy, model.output_template = data_simulation.small_output() Z = mc.rcategorical([.5, 5.], n) zeta_true = -.2 pi_true = .1 ess = 10000. * pl.ones(n) eta_true = pl.log(50) delta_true = 50 + pl.exp(eta_true) p = mc.rnegative_binomial(pi_true * ess, delta_true * pl.exp(Z * zeta_true)) / ess model.input_data = pandas.DataFrame(dict(value=p, z_0=Z)) model.input_data['area'] = 'all' model.input_data['sex'] = 'total' model.input_data['year_start'] = 2000 model.input_data['year_end'] = 2000 # create model and priors vars = dict(mu=mc.Uninformative('mu_test', value=pi_true)) vars.update( covariate_model.mean_covariate_model('test', vars['mu'], model.input_data, {}, model, 'all', 'total', 'all')) vars.update( covariate_model.dispersion_covariate_model('test', model.input_data, .1, 10.)) vars.update( rate_model.neg_binom_model('test', vars['pi'], vars['delta'], p, ess)) # fit model m = mc.MCMC(vars) m.sample(2)
def test_random_effect_priors(): model = data.ModelData() # set prior on sex parameters = dict(random_effects={ 'USA': dict(dist='TruncatedNormal', mu=.1, sigma=.5, lower=-10, upper=10) }) # simulate normal data n = 32. area_list = pl.array(['all', 'USA', 'CAN']) area = area_list[mc.rcategorical([.3, .3, .4], n)] alpha_true = dict(all=0., USA=.1, CAN=-.2) pi_true = pl.exp([alpha_true[a] for a in area]) sigma_true = .05 p = mc.rnormal(pi_true, 1. / sigma_true**2.) model.input_data = pandas.DataFrame(dict(value=p, area=area)) model.input_data['sex'] = 'male' model.input_data['year_start'] = 2010 model.input_data['year_end'] = 2010 model.hierarchy.add_edge('all', 'USA') model.hierarchy.add_edge('all', 'CAN') # create model and priors vars = {} vars.update( covariate_model.mean_covariate_model('test', 1, model.input_data, parameters, model, 'all', 'total', 'all')) print vars['alpha'] print vars['alpha'][1].parents['mu'] assert vars['alpha'][1].parents['mu'] == .1
def age_specific_rate(model, data_type, reference_area='all', reference_sex='total', reference_year='all', mu_age=None, mu_age_parent=None, sigma_age_parent=None, rate_type='neg_binom', lower_bound=None, interpolation_method='linear', include_covariates=True, zero_re=False): # TODO: expose (and document) interface for alternative rate_type as well as other options, # record reference values in the model """ Generate PyMC objects for model of epidemological age-interval data :Parameters: - `model` : data.ModelData - `data_type` : str, one of 'i', 'r', 'f', 'p', or 'pf' - `reference_area, reference_sex, reference_year` : the node of the model to fit consistently - `mu_age` : pymc.Node, will be used as the age pattern, set to None if not needed - `mu_age_parent` : pymc.Node, will be used as the age pattern of the parent of the root area, set to None if not needed - `sigma_age_parent` : pymc.Node, will be used as the standard deviation of the age pattern, set to None if not needed - `rate_type` : str, optional. One of 'beta_binom', 'binom', 'log_normal_model', 'neg_binom', 'neg_binom_lower_bound_model', 'neg_binom_model', 'normal_model', 'offest_log_normal', or 'poisson' - `lower_bound` : - `interpolation_method` : str, optional, one of 'linear', 'nearest', 'zero', 'slinear', 'quadratic, or 'cubic' - `include_covariates` : boolean - `zero_re` : boolean, change one stoch from each set of siblings in area hierarchy to a 'sum to zero' deterministic :Results: - Returns dict of PyMC objects, including 'pi', the covariate adjusted predicted values for each row of data """ name = data_type import data result = data.ModelVars() if (mu_age_parent != None and pl.any(pl.isnan(mu_age_parent))) \ or (sigma_age_parent != None and pl.any(pl.isnan(sigma_age_parent))): mu_age_parent = None sigma_age_parent = None print 'WARNING: nan found in parent mu/sigma. Ignoring' ages = pl.array(model.parameters['ages']) data = model.get_data(data_type) if lower_bound: lb_data = model.get_data(lower_bound) parameters = model.parameters.get(data_type, {}) area_hierarchy = model.hierarchy vars = dismod3.data.ModelVars() vars += dict(data=data) if 'parameter_age_mesh' in parameters: knots = pl.array(parameters['parameter_age_mesh']) else: knots = pl.arange(ages[0], ages[-1] + 1, 5) smoothing_dict = { 'No Prior': pl.inf, 'Slightly': .5, 'Moderately': .05, 'Very': .005 } if 'smoothness' in parameters: smoothing = smoothing_dict[parameters['smoothness']['amount']] else: smoothing = 0. if mu_age == None: vars.update( age_pattern.age_pattern(name, ages=ages, knots=knots, smoothing=smoothing, interpolation_method=interpolation_method)) else: vars.update(dict(mu_age=mu_age, ages=ages)) vars.update( expert_prior_model.level_constraints(name, parameters, vars['mu_age'], ages)) vars.update( expert_prior_model.derivative_constraints(name, parameters, vars['mu_age'], ages)) if mu_age_parent != None: # setup a hierarchical prior on the simliarity between the # consistent estimate here and (inconsistent) estimate for its # parent in the areas hierarchy #weight_dict = {'Unusable': 10., 'Slightly': 10., 'Moderately': 1., 'Very': .1} #weight = weight_dict[parameters['heterogeneity']] vars.update( similarity_prior_model.similar('parent_similarity_%s' % name, vars['mu_age'], mu_age_parent, sigma_age_parent, 0.)) # also use this as the initial value for the age pattern, if it is not already specified if mu_age == None: if isinstance(mu_age_parent, mc.Node): # TODO: test this code initial_mu = mu_age_parent.value else: initial_mu = mu_age_parent for i, k_i in enumerate(knots): vars['gamma'][i].value = (pl.log( initial_mu[k_i - ages[0]])).clip(-12, 6) age_weights = pl.ones_like( vars['mu_age'].value ) # TODO: use age pattern appropriate to the rate type if len(data) > 0: vars.update( age_integrating_model.age_standardize_approx( name, age_weights, vars['mu_age'], data['age_start'], data['age_end'], ages)) # uncomment the following to effectively remove alleffects #if 'random_effects' in parameters: # for i in range(5): # effect = 'sigma_alpha_%s_%d' % (name, i) # parameters['random_effects'][effect] = dict(dist='TruncatedNormal', mu=.0001, sigma=.00001, lower=.00009, upper=.00011) #if 'fixed_effects' in parameters: # for effect in ['x_sex', 'x_LDI_id_Updated_7July2011']: # parameters['fixed_effects'][effect] = dict(dist='normal', mu=.0001, sigma=.00001) if include_covariates: vars.update( covariate_model.mean_covariate_model(name, vars['mu_interval'], data, parameters, model, reference_area, reference_sex, reference_year, zero_re=zero_re)) else: vars.update({'pi': vars['mu_interval']}) ## ensure that all data has uncertainty quantified appropriately # first replace all missing se from ci missing_se = pl.isnan( data['standard_error']) | (data['standard_error'] < 0) data['standard_error'][missing_se] = (data['upper_ci'][missing_se] - data['lower_ci'][missing_se]) / ( 2 * 1.96) # then replace all missing ess with se missing_ess = pl.isnan(data['effective_sample_size']) data['effective_sample_size'][missing_ess] = data['value'][ missing_ess] * (1 - data['value'][missing_ess] ) / data['standard_error'][missing_ess]**2 if rate_type == 'neg_binom': # warn and drop data that doesn't have effective sample size quantified, or is is non-positive missing_ess = pl.isnan(data['effective_sample_size']) | ( data['effective_sample_size'] < 0) if sum(missing_ess) > 0: print 'WARNING: %d rows of %s data has invalid quantification of uncertainty.' % ( sum(missing_ess), name) data['effective_sample_size'][missing_ess] = 0.0 # warn and change data where ess is unreasonably huge large_ess = data['effective_sample_size'] >= 1.e10 if sum(large_ess) > 0: print 'WARNING: %d rows of %s data have effective sample size exceeding 10 billion.' % ( sum(large_ess), name) data['effective_sample_size'][large_ess] = 1.e10 if 'heterogeneity' in parameters: lower_dict = {'Slightly': 9., 'Moderately': 3., 'Very': 1.} lower = lower_dict[parameters['heterogeneity']] else: lower = 1. # special case, treat pf data as poisson if data_type == 'pf': lower = 1.e12 vars.update( covariate_model.dispersion_covariate_model( name, data, lower, lower * 9.)) vars.update( rate_model.neg_binom_model(name, vars['pi'], vars['delta'], data['value'], data['effective_sample_size'])) elif rate_type == 'log_normal': # warn and drop data that doesn't have effective sample size quantified missing = pl.isnan( data['standard_error']) | (data['standard_error'] < 0) if sum(missing) > 0: print 'WARNING: %d rows of %s data has no quantification of uncertainty.' % ( sum(missing), name) data['standard_error'][missing] = 1.e6 # TODO: allow options for alternative priors for sigma vars['sigma'] = mc.Uniform('sigma_%s' % name, lower=.0001, upper=1., value=.01) #vars['sigma'] = mc.Exponential('sigma_%s'%name, beta=100., value=.01) vars.update( rate_model.log_normal_model(name, vars['pi'], vars['sigma'], data['value'], data['standard_error'])) elif rate_type == 'normal': # warn and drop data that doesn't have standard error quantified missing = pl.isnan( data['standard_error']) | (data['standard_error'] < 0) if sum(missing) > 0: print 'WARNING: %d rows of %s data has no quantification of uncertainty.' % ( sum(missing), name) data['standard_error'][missing] = 1.e6 vars['sigma'] = mc.Uniform('sigma_%s' % name, lower=.0001, upper=.1, value=.01) vars.update( rate_model.normal_model(name, vars['pi'], vars['sigma'], data['value'], data['standard_error'])) elif rate_type == 'binom': missing_ess = pl.isnan(data['effective_sample_size']) | ( data['effective_sample_size'] < 0) if sum(missing_ess) > 0: print 'WARNING: %d rows of %s data has invalid quantification of uncertainty.' % ( sum(missing_ess), name) data['effective_sample_size'][missing_ess] = 0.0 vars += rate_model.binom(name, vars['pi'], data['value'], data['effective_sample_size']) elif rate_type == 'beta_binom': vars += rate_model.beta_binom(name, vars['pi'], data['value'], data['effective_sample_size']) elif rate_type == 'poisson': missing_ess = pl.isnan(data['effective_sample_size']) | ( data['effective_sample_size'] < 0) if sum(missing_ess) > 0: print 'WARNING: %d rows of %s data has invalid quantification of uncertainty.' % ( sum(missing_ess), name) data['effective_sample_size'][missing_ess] = 0.0 vars += rate_model.poisson(name, vars['pi'], data['value'], data['effective_sample_size']) elif rate_type == 'offset_log_normal': vars['sigma'] = mc.Uniform('sigma_%s' % name, lower=.0001, upper=10., value=.01) vars += rate_model.offset_log_normal(name, vars['pi'], vars['sigma'], data['value'], data['standard_error']) else: raise Exception, 'rate_model "%s" not implemented' % rate_type else: if include_covariates: vars.update( covariate_model.mean_covariate_model(name, [], data, parameters, model, reference_area, reference_sex, reference_year, zero_re=zero_re)) if include_covariates: vars.update( expert_prior_model.covariate_level_constraints( name, model, vars, ages)) if lower_bound and len(lb_data) > 0: vars['lb'] = age_integrating_model.age_standardize_approx( 'lb_%s' % name, age_weights, vars['mu_age'], lb_data['age_start'], lb_data['age_end'], ages) if include_covariates: vars['lb'].update( covariate_model.mean_covariate_model('lb_%s' % name, vars['lb']['mu_interval'], lb_data, parameters, model, reference_area, reference_sex, reference_year, zero_re=zero_re)) else: vars['lb'].update({'pi': vars['lb']['mu_interval']}) vars['lb'].update( covariate_model.dispersion_covariate_model( 'lb_%s' % name, lb_data, 1e12, 1e13) # treat like poisson ) ## ensure that all data has uncertainty quantified appropriately # first replace all missing se from ci missing_se = pl.isnan( lb_data['standard_error']) | (lb_data['standard_error'] <= 0) lb_data['standard_error'][missing_se] = ( lb_data['upper_ci'][missing_se] - lb_data['lower_ci'][missing_se]) / (2 * 1.96) # then replace all missing ess with se missing_ess = pl.isnan(lb_data['effective_sample_size']) lb_data['effective_sample_size'][missing_ess] = lb_data['value'][ missing_ess] * (1 - lb_data['value'][missing_ess] ) / lb_data['standard_error'][missing_ess]**2 # warn and drop lb_data that doesn't have effective sample size quantified missing_ess = pl.isnan(lb_data['effective_sample_size']) | ( lb_data['effective_sample_size'] <= 0) if sum(missing_ess) > 0: print 'WARNING: %d rows of %s lower bound data has no quantification of uncertainty.' % ( sum(missing_ess), name) lb_data['effective_sample_size'][missing_ess] = 1.0 vars['lb'].update( rate_model.neg_binom_lower_bound_model( 'lb_%s' % name, vars['lb']['pi'], vars['lb']['delta'], lb_data['value'], lb_data['effective_sample_size'])) result[data_type] = vars return result
def age_specific_rate( model, data_type, reference_area="all", reference_sex="total", reference_year="all", mu_age=None, mu_age_parent=None, sigma_age_parent=None, rate_type="neg_binom", lower_bound=None, interpolation_method="linear", include_covariates=True, zero_re=False, ): # TODO: expose (and document) interface for alternative rate_type as well as other options, # record reference values in the model """ Generate PyMC objects for model of epidemological age-interval data :Parameters: - `model` : data.ModelData - `data_type` : str, one of 'i', 'r', 'f', 'p', or 'pf' - `reference_area, reference_sex, reference_year` : the node of the model to fit consistently - `mu_age` : pymc.Node, will be used as the age pattern, set to None if not needed - `mu_age_parent` : pymc.Node, will be used as the age pattern of the parent of the root area, set to None if not needed - `sigma_age_parent` : pymc.Node, will be used as the standard deviation of the age pattern, set to None if not needed - `rate_type` : str, optional. One of 'beta_binom', 'binom', 'log_normal_model', 'neg_binom', 'neg_binom_lower_bound_model', 'neg_binom_model', 'normal_model', 'offest_log_normal', or 'poisson' - `lower_bound` : - `interpolation_method` : str, optional, one of 'linear', 'nearest', 'zero', 'slinear', 'quadratic, or 'cubic' - `include_covariates` : boolean - `zero_re` : boolean, change one stoch from each set of siblings in area hierarchy to a 'sum to zero' deterministic :Results: - Returns dict of PyMC objects, including 'pi', the covariate adjusted predicted values for each row of data """ name = data_type import data result = data.ModelVars() if (mu_age_parent != None and pl.any(pl.isnan(mu_age_parent))) or ( sigma_age_parent != None and pl.any(pl.isnan(sigma_age_parent)) ): mu_age_parent = None sigma_age_parent = None print "WARNING: nan found in parent mu/sigma. Ignoring" ages = pl.array(model.parameters["ages"]) data = model.get_data(data_type) if lower_bound: lb_data = model.get_data(lower_bound) parameters = model.parameters.get(data_type, {}) area_hierarchy = model.hierarchy vars = dismod3.data.ModelVars() vars += dict(data=data) if "parameter_age_mesh" in parameters: knots = pl.array(parameters["parameter_age_mesh"]) else: knots = pl.arange(ages[0], ages[-1] + 1, 5) smoothing_dict = {"No Prior": pl.inf, "Slightly": 0.5, "Moderately": 0.05, "Very": 0.005} if "smoothness" in parameters: smoothing = smoothing_dict[parameters["smoothness"]["amount"]] else: smoothing = 0.0 if mu_age == None: vars.update( age_pattern.age_pattern( name, ages=ages, knots=knots, smoothing=smoothing, interpolation_method=interpolation_method ) ) else: vars.update(dict(mu_age=mu_age, ages=ages)) vars.update(expert_prior_model.level_constraints(name, parameters, vars["mu_age"], ages)) vars.update(expert_prior_model.derivative_constraints(name, parameters, vars["mu_age"], ages)) if mu_age_parent != None: # setup a hierarchical prior on the simliarity between the # consistent estimate here and (inconsistent) estimate for its # parent in the areas hierarchy # weight_dict = {'Unusable': 10., 'Slightly': 10., 'Moderately': 1., 'Very': .1} # weight = weight_dict[parameters['heterogeneity']] vars.update( similarity_prior_model.similar( "parent_similarity_%s" % name, vars["mu_age"], mu_age_parent, sigma_age_parent, 0.0 ) ) # also use this as the initial value for the age pattern, if it is not already specified if mu_age == None: if isinstance(mu_age_parent, mc.Node): # TODO: test this code initial_mu = mu_age_parent.value else: initial_mu = mu_age_parent for i, k_i in enumerate(knots): vars["gamma"][i].value = (pl.log(initial_mu[k_i - ages[0]])).clip(-12, 6) age_weights = pl.ones_like(vars["mu_age"].value) # TODO: use age pattern appropriate to the rate type if len(data) > 0: vars.update( age_integrating_model.age_standardize_approx( name, age_weights, vars["mu_age"], data["age_start"], data["age_end"], ages ) ) # uncomment the following to effectively remove alleffects # if 'random_effects' in parameters: # for i in range(5): # effect = 'sigma_alpha_%s_%d' % (name, i) # parameters['random_effects'][effect] = dict(dist='TruncatedNormal', mu=.0001, sigma=.00001, lower=.00009, upper=.00011) # if 'fixed_effects' in parameters: # for effect in ['x_sex', 'x_LDI_id_Updated_7July2011']: # parameters['fixed_effects'][effect] = dict(dist='normal', mu=.0001, sigma=.00001) if include_covariates: vars.update( covariate_model.mean_covariate_model( name, vars["mu_interval"], data, parameters, model, reference_area, reference_sex, reference_year, zero_re=zero_re, ) ) else: vars.update({"pi": vars["mu_interval"]}) ## ensure that all data has uncertainty quantified appropriately # first replace all missing se from ci missing_se = pl.isnan(data["standard_error"]) | (data["standard_error"] < 0) data["standard_error"][missing_se] = (data["upper_ci"][missing_se] - data["lower_ci"][missing_se]) / (2 * 1.96) # then replace all missing ess with se missing_ess = pl.isnan(data["effective_sample_size"]) data["effective_sample_size"][missing_ess] = ( data["value"][missing_ess] * (1 - data["value"][missing_ess]) / data["standard_error"][missing_ess] ** 2 ) if rate_type == "neg_binom": # warn and drop data that doesn't have effective sample size quantified, or is is non-positive missing_ess = pl.isnan(data["effective_sample_size"]) | (data["effective_sample_size"] < 0) if sum(missing_ess) > 0: print "WARNING: %d rows of %s data has invalid quantification of uncertainty." % ( sum(missing_ess), name, ) data["effective_sample_size"][missing_ess] = 0.0 # warn and change data where ess is unreasonably huge large_ess = data["effective_sample_size"] >= 1.0e10 if sum(large_ess) > 0: print "WARNING: %d rows of %s data have effective sample size exceeding 10 billion." % ( sum(large_ess), name, ) data["effective_sample_size"][large_ess] = 1.0e10 if "heterogeneity" in parameters: lower_dict = {"Slightly": 9.0, "Moderately": 3.0, "Very": 1.0} lower = lower_dict[parameters["heterogeneity"]] else: lower = 1.0 # special case, treat pf data as poisson if data_type == "pf": lower = 1.0e12 vars.update(covariate_model.dispersion_covariate_model(name, data, lower, lower * 9.0)) vars.update( rate_model.neg_binom_model( name, vars["pi"], vars["delta"], data["value"], data["effective_sample_size"] ) ) elif rate_type == "log_normal": # warn and drop data that doesn't have effective sample size quantified missing = pl.isnan(data["standard_error"]) | (data["standard_error"] < 0) if sum(missing) > 0: print "WARNING: %d rows of %s data has no quantification of uncertainty." % (sum(missing), name) data["standard_error"][missing] = 1.0e6 # TODO: allow options for alternative priors for sigma vars["sigma"] = mc.Uniform("sigma_%s" % name, lower=0.0001, upper=1.0, value=0.01) # vars['sigma'] = mc.Exponential('sigma_%s'%name, beta=100., value=.01) vars.update( rate_model.log_normal_model(name, vars["pi"], vars["sigma"], data["value"], data["standard_error"]) ) elif rate_type == "normal": # warn and drop data that doesn't have standard error quantified missing = pl.isnan(data["standard_error"]) | (data["standard_error"] < 0) if sum(missing) > 0: print "WARNING: %d rows of %s data has no quantification of uncertainty." % (sum(missing), name) data["standard_error"][missing] = 1.0e6 vars["sigma"] = mc.Uniform("sigma_%s" % name, lower=0.0001, upper=0.1, value=0.01) vars.update(rate_model.normal_model(name, vars["pi"], vars["sigma"], data["value"], data["standard_error"])) elif rate_type == "binom": missing_ess = pl.isnan(data["effective_sample_size"]) | (data["effective_sample_size"] < 0) if sum(missing_ess) > 0: print "WARNING: %d rows of %s data has invalid quantification of uncertainty." % ( sum(missing_ess), name, ) data["effective_sample_size"][missing_ess] = 0.0 vars += rate_model.binom(name, vars["pi"], data["value"], data["effective_sample_size"]) elif rate_type == "beta_binom": vars += rate_model.beta_binom(name, vars["pi"], data["value"], data["effective_sample_size"]) elif rate_type == "poisson": missing_ess = pl.isnan(data["effective_sample_size"]) | (data["effective_sample_size"] < 0) if sum(missing_ess) > 0: print "WARNING: %d rows of %s data has invalid quantification of uncertainty." % ( sum(missing_ess), name, ) data["effective_sample_size"][missing_ess] = 0.0 vars += rate_model.poisson(name, vars["pi"], data["value"], data["effective_sample_size"]) elif rate_type == "offset_log_normal": vars["sigma"] = mc.Uniform("sigma_%s" % name, lower=0.0001, upper=10.0, value=0.01) vars += rate_model.offset_log_normal(name, vars["pi"], vars["sigma"], data["value"], data["standard_error"]) else: raise Exception, 'rate_model "%s" not implemented' % rate_type else: if include_covariates: vars.update( covariate_model.mean_covariate_model( name, [], data, parameters, model, reference_area, reference_sex, reference_year, zero_re=zero_re ) ) if include_covariates: vars.update(expert_prior_model.covariate_level_constraints(name, model, vars, ages)) if lower_bound and len(lb_data) > 0: vars["lb"] = age_integrating_model.age_standardize_approx( "lb_%s" % name, age_weights, vars["mu_age"], lb_data["age_start"], lb_data["age_end"], ages ) if include_covariates: vars["lb"].update( covariate_model.mean_covariate_model( "lb_%s" % name, vars["lb"]["mu_interval"], lb_data, parameters, model, reference_area, reference_sex, reference_year, zero_re=zero_re, ) ) else: vars["lb"].update({"pi": vars["lb"]["mu_interval"]}) vars["lb"].update( covariate_model.dispersion_covariate_model("lb_%s" % name, lb_data, 1e12, 1e13) # treat like poisson ) ## ensure that all data has uncertainty quantified appropriately # first replace all missing se from ci missing_se = pl.isnan(lb_data["standard_error"]) | (lb_data["standard_error"] <= 0) lb_data["standard_error"][missing_se] = (lb_data["upper_ci"][missing_se] - lb_data["lower_ci"][missing_se]) / ( 2 * 1.96 ) # then replace all missing ess with se missing_ess = pl.isnan(lb_data["effective_sample_size"]) lb_data["effective_sample_size"][missing_ess] = ( lb_data["value"][missing_ess] * (1 - lb_data["value"][missing_ess]) / lb_data["standard_error"][missing_ess] ** 2 ) # warn and drop lb_data that doesn't have effective sample size quantified missing_ess = pl.isnan(lb_data["effective_sample_size"]) | (lb_data["effective_sample_size"] <= 0) if sum(missing_ess) > 0: print "WARNING: %d rows of %s lower bound data has no quantification of uncertainty." % ( sum(missing_ess), name, ) lb_data["effective_sample_size"][missing_ess] = 1.0 vars["lb"].update( rate_model.neg_binom_lower_bound_model( "lb_%s" % name, vars["lb"]["pi"], vars["lb"]["delta"], lb_data["value"], lb_data["effective_sample_size"], ) ) result[data_type] = vars return result