def initialize_model(): ### @export 'load model' dm = dismod3.load_disease_model(19807) ### @export 'initialize model data' dm.params['global_priors']['level_bounds']['excess_mortality'] = dict( lower=.1, upper=100.) dm.params['global_priors']['increasing']['excess_mortality'] = dict( age_start=0, age_end=0) dm.params['global_priors']['level_bounds']['relative_risk'] = dict( lower=0., upper=10000.) for cv in dm.params['covariates']['Study_level']: dm.params['covariates']['Study_level'][cv]['rate']['value'] = 0 for cv in dm.params['covariates']['Country_level']: dm.params['covariates']['Country_level'][cv]['rate']['value'] = 0 level = .001 dm.params['sex_effect_prevalence'] = dict(mean=1, upper_ci=pl.exp(level * 1.96), lower_ci=pl.exp(-level * 1.96)) dm.params['time_effect_prevalence'] = dict(mean=1, upper_ci=pl.exp(level * 1.96), lower_ci=pl.exp(-level * 1.96)) dm.params['region_effect_prevalence'] = dict(std=level) dm.clear_fit() dm.clear_empirical_prior() dismod3.neg_binom_model.covariate_hash = {} return dm
def download_model(id): """ Copy model from web to j drive Parameters ---------- id : int The model id number to copy """ dir = dismod3.settings.JOB_WORKING_DIR % id # TODO: refactor into a function try: model = dismod3.data.ModelData.load(dir) print 'model already on j drive in %s' % dir except (IOError, AssertionError): print 'downloading disease model' dm = dismod3.load_disease_model(id) import simplejson as json try: model = dismod3.data.ModelData.from_gbd_jsons(json.loads(dm.to_json())) except Exception as e: print e print 'attempting to use old covariate format' import old_cov_data model = old_cov_data.from_gbd_jsons(json.loads(dm.to_json())) model.save(dir) print 'loaded data from json, saved in new format for next time in %s' % dir
def initialize_model(): ### @export 'load model' dm = dismod3.load_disease_model(16391) ### @export 'set expert priors' dm.set_param_age_mesh(pl.arange(0,101,10)) dm.params['global_priors']['smoothness']['prevalence']['amount'] = 'Moderately' dm.params['global_priors']['heterogeneity']['prevalence'] = 'Slightly' dm.params['global_priors']['level_value']['prevalence'] = dict(value=0., age_before=0, age_after=100) dm.params['global_priors']['level_bounds']['prevalence'] = dict(lower=0., upper =.1) dm.params['global_priors']['increasing']['prevalence'] = dict(age_start=0, age_end=0) dm.params['global_priors']['decreasing']['prevalence'] = dict(age_start=100, age_end=100) dm.params['covariates']['Study_level']['bias']['rate']['value'] = 1 for cv in dm.params['covariates']['Country_level']: dm.params['covariates']['Country_level'][cv]['rate']['value'] = 0 ### @export 'initialize model data' dm.data = [d for d in dm.data if dm.relevant_to(d, 'prevalence', region, 'all', 'all')] # fit model dm.clear_fit() dm.clear_empirical_prior() dismod3.neg_binom_model.covariate_hash = {} return dm
def initialize_model(): ### @export 'load model' dm = dismod3.load_disease_model(16391) ### @export 'set expert priors' dm.set_param_age_mesh(pl.arange(0, 101, 10)) dm.params['global_priors']['smoothness']['prevalence'][ 'amount'] = 'Slightly' dm.params['global_priors']['heterogeneity']['prevalence'] = 'Slightly' dm.params['global_priors']['level_value']['prevalence'] = dict( value=0., age_before=0, age_after=100) dm.params['global_priors']['level_bounds']['prevalence'] = dict(lower=0., upper=.1) dm.params['global_priors']['increasing']['prevalence'] = dict(age_start=0, age_end=0) dm.params['global_priors']['decreasing']['prevalence'] = dict( age_start=100, age_end=100) dm.params['covariates']['Study_level']['bias']['rate']['value'] = 1 for cv in dm.params['covariates']['Country_level']: dm.params['covariates']['Country_level'][cv]['rate']['value'] = 0 ### @export 'initialize model data' dm.data = [ d for d in dm.data if dm.relevant_to(d, 'prevalence', region, year, 'all') ] # fit model dm.clear_fit() dm.clear_empirical_prior() dismod3.neg_binom_model.covariate_hash = {} return dm
def validate_prior_similarity(): #dm = dismod3.load_disease_model(20945) #dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json())) #t = 'i' #area, sex, year = 'europe_eastern', 'male', 2005 dm = dismod3.load_disease_model(20928) dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json())) t = 'p' area, sex, year = 'sub-saharan_africa_central', 'male', 2005 # select data that is about areas in this region, recent years, and sex of male or total only model = dm.model subtree = nx.traversal.bfs_tree(model.hierarchy, area) relevant_rows = [i for i, r in model.input_data.T.iteritems() \ if (r['area'] in subtree or r['area'] == 'all')\ and ((year == 2005 and r['year_end'] >= 1997) or r['year_start'] <= 1997) \ and r['sex'] in [sex, 'total']] model.input_data = model.input_data.ix[relevant_rows] # replace area 'all' with area model.input_data['area'][model.input_data['area'] == 'all'] = area for het in 'Slightly Moderately Very'.split(): dm.model.parameters[t]['parameter_age_mesh'] = [0, 15, 20, 25, 35, 45, 55, 65, 75, 100] dm.model.parameters[t]['heterogeneity'] = het setup_regional_model(dm, area, sex, year) dm.vars = {} dm.vars[t] = data_model.data_model(t, dm.model, t, root_area=area, root_sex=sex, root_year=year, mu_age=None, mu_age_parent=dm.emp_priors[t, 'mu'], sigma_age_parent=dm.emp_priors[t, 'sigma'], rate_type=(t == 'rr') and 'log_normal' or 'neg_binom') fit_model.fit_data_model(dm.vars[t], iter=10050, burn=5000, thin=50, tune_interval=100) #2graphics.plot_one_effects(dm.vars[t], t, dm.model.hierarchy) #pl.title(het) graphics.plot_convergence_diag(dm.vars[t]) pl.title(het) #graphics.plot_one_ppc(dm.vars[t], t) #pl.title(het) graphics.plot_one_type(dm.model, dm.vars[t], dm.emp_priors, t) pl.title(het) pl.show() return dm
def initialize_model(): ### @export 'load model' dm = dismod3.load_disease_model(19271) ### @export 'initialize model data' dm.data = [d for d in dm.data if dm.relevant_to(d, type, region, year, sex)] for d in dm.data: d['standard_error'] = float(d['sd_1enadj'] or d['parameter_value_old'])/10000. / pl.sqrt(d['effective_sample_size']) d.pop('effective_sample_size') # fit model dm.clear_fit() dm.clear_empirical_prior() dismod3.neg_binom_model.covariate_hash = {} return dm
def fit_emp_prior(id, param_type): """ Fit empirical prior of specified type for specified model Parameters ---------- id : int The model id number for the job to fit param_type : str, one of incidence, prevalence, remission, excess-mortality The disease parameter to generate empirical priors for Example ------- >>> import fit_emp_prior >>> fit_emp_prior.fit_emp_prior(2552, 'incidence') """ #dismod3.log_job_status(id, 'empirical_priors', param_type, 'Running') # load disease model dm = dismod3.load_disease_model(id) #dm.data = [] # remove all data to speed up computation, for test import dismod3.neg_binom_model as model dir = dismod3.settings.JOB_WORKING_DIR % id model.fit_emp_prior(dm, param_type, dbname='%s/empirical_priors/pickle/dm-%d-emp_prior-%s.pickle' % (dir, id, param_type)) # generate empirical prior plots from pylab import subplot for sex in dismod3.settings.gbd_sexes: for year in dismod3.settings.gbd_years: keys = dismod3.utils.gbd_keys(region_list=['all'], year_list=[year], sex_list=[sex], type_list=[param_type]) dismod3.tile_plot_disease_model(dm, keys, defaults={}) dm.savefig('dm-%d-emp_prior-%s-%s-%s.png' % (id, param_type, sex, year)) # TODO: put this in a separate script, which runs after all empirical priors are computed for effect in ['alpha', 'beta', 'gamma', 'delta']: dismod3.plotting.plot_empirical_prior_effects([dm], effect) dm.savefig('dm-%d-emp-prior-%s-%s.png' % (id, param_type, effect)) # summarize fit quality graphically, as well as parameter posteriors k0 = keys[0] dm.vars = {k0: dm.vars} # hack to make posterior predictions plot dismod3.plotting.plot_posterior_predicted_checks(dm, k0) dm.savefig('dm-%d-emp-prior-check-%s.png' % (dm.id, param_type)) dm.vars = dm.vars[k0] # undo hack to make posterior predictions plot # save results (do this last, because it removes things from the disease model that plotting function, etc, might need dm.save('dm-%d-prior-%s.json' % (id, param_type)) dismod3.try_posting_disease_model(dm, ntries=5) #dismod3.log_job_status(id, 'empirical_priors', param_type, 'Completed') return dm
def find_fnrfx(model, disease, data_type, country, sex, year): '''add fixed and random effects from GBD as priors to new model''' # create dummy model to get appropriate Model.vars fields dummy = load_new_model(disease, country, sex) dummy.vars += dismod3.ism.age_specific_rate(dummy, data_type) vars = dummy.vars[data_type] # save random effects try: emp_re = pandas.read_csv( '/home/j/Project/dismod/output/dm-%s/posterior/re-%s-%s+%s+%s.csv' % (disease, data_type, geo_info(country, disease), sex, year), index_col=0) for col in emp_re.index: model.parameters[data_type]['random_effects'][col] = dict( dist='Constant', mu=emp_re.ix[col, 'mu_coeff'], sigma=emp_re.ix[col, 'sigma_coeff']) except: pass # also save empirical prior on sigma_alpha, the dispersion of the random effects dm = dismod3.load_disease_model(disease) for n in vars['sigma_alpha']: try: dm_na = dm.get_empirical_prior(full_name[data_type])['new_alpha'] model.parameters[data_type]['random_effects'][n.__name__] = dict( dist=dm_na[n.__name__]['dist'], mu=dm_na[n.__name__]['mu'], sigma=dm_na[n.__name__]['sigma'], lower=dm_na[n.__name__]['lower'], upper=dm_na[n.__name__]['upper']) except: model.parameters[data_type]['random_effects'][n.__name__] = dict( dist='TruncatedNormal', mu=.05, sigma=.03**-2, lower=0.01, upper=0.5) # save fixed effects emp_fe = pandas.read_csv( '/home/j/Project/dismod/output/dm-%s/posterior/fe-%s-%s+%s+%s.csv' % (disease, data_type, geo_info(country, disease), sex, year), index_col=0) for n, col in zip(vars['beta'], vars['X'].columns): model.parameters[data_type]['fixed_effects'][col] = dict( dist='Constant', mu=emp_fe.ix[col, 'mu_coeff'], sigma=emp_fe.ix[col, 'sigma_coeff'])
def fetch_disease_model_if_necessary(id, dir_name): try: model = ModelData.load(dir_name) print 'loaded data from new format from %s' % dir_name except (IOError, AssertionError): import os os.makedirs(dir_name) import dismod3.disease_json dm = dismod3.load_disease_model(id) import simplejson as json model = ModelData.from_gbd_jsons(json.loads(dm.to_json())) model.save(dir_name) print 'loaded data from json, saved in new format for next time in %s' % dir_name print 'model has %d rows of input data' % len(model.input_data.index) return model
def main(): import optparse usage = 'usage: %prog [options] disease_model_id' parser = optparse.OptionParser(usage) parser.add_option('-s', '--sex', default='male', help='only estimate given sex (valid settings ``male``, ``female``, ``all``)') parser.add_option('-y', '--year', default='2005', help='only estimate given year (valid settings ``1990``, ``2005``, ``2010``)') parser.add_option('-r', '--region', default='australasia', help='only estimate given GBD Region') parser.add_option('-f', '--fast', default='False', help='use MAP only') parser.add_option('-i', '--inconsistent', default='False', help='use inconsistent model for posteriors') parser.add_option('-t', '--types', default='p i r', help='with rate types to fit (only used if inconsistent=true)') parser.add_option('-z', '--zerore', default='true', help='enforce zero constraint on random effects') parser.add_option('-o', '--onlyposterior', default='False', help='skip empirical prior phase') (options, args) = parser.parse_args() if len(args) != 1: parser.error('incorrect number of arguments') try: id = int(args[0]) except ValueError: parser.error('disease_model_id must be an integer') dm = dismod3.load_disease_model(id) # set model id to passed-in id (should not be necessary) dm.id = id assert id == dm.id, 'model id should equal parameter id' dm = fit_posterior(dm, options.region, options.sex, options.year, fast_fit=options.fast.lower() == 'true', inconsistent_fit=options.inconsistent.lower() == 'true', params_to_fit=options.types.split(), posteriors_only=(options.onlyposterior.lower()=='true'), zero_re=options.zerore.lower() == 'true') return dm
def fit_posterior(id, region, sex, year): """ Fit posterior of specified region/sex/year for specified model Parameters ---------- id : int The model id number for the job to fit region : str From dismod3.settings.gbd_regions, but clean()-ed sex : str, from dismod3.settings.gbd_sexes year : str, from dismod3.settings.gbd_years Example ------- >>> import fit_posterior >>> fit_posterior.fit_posterior(2552, 'asia_east', 'male', '2005') """ #print 'updating job status on server' #dismod3.log_job_status(id, 'posterior', '%s--%s--%s' % (region, sex, year), 'Running') dm = dismod3.load_disease_model(id) #dm.data = [] # for testing, remove all data keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex], type_list=['prevalence']) # fit the model dir = dismod3.settings.JOB_WORKING_DIR % id import dismod3.neg_binom_model as model k0 = keys[0] dm.vars = {} dm.vars[k0] = model.setup(dm, k0, dm.data) dm.mcmc = mc.MCMC(dm.vars) dm.mcmc.sample(iter=50000,burn=25000,thin=50,verbose=1) dm.map = mc.MAP(dm.vars) dm.map.fit() model.store_mcmc_fit(dm, k0, dm.vars[k0]) # update job status file #print 'updating job status on server' #dismod3.log_job_status(id, 'posterior', # '%s--%s--%s' % (region, sex, year), 'Completed') # save results (do this last, because it removes things from the disease model that plotting function, etc, might need keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex], type_list=['prevalence']) dm.save('dm-%d-posterior-%s-%s-%s.json' % (id, region, sex, year), keys_to_save=keys) return dm
def upload_fits(id): """ Send results of cluster fits to dismod server Parameters ---------- id : int The model id number Example ------- >>> import fit_emp_prior >>> fit_emp_prior.fit_emp_prior(2552, 'incidence') >>> import upload_fits >>> upload_fits.upload_fits(2552) """ # load disease model dm = dismod3.load_disease_model(id) # this merges together results from all fits dismod3.try_posting_disease_model(dm, ntries=5)
def find_fnrfx(model, disease, data_type, country, sex, year): '''add fixed and random effects from GBD as priors to new model''' # create dummy model to get appropriate Model.vars fields dummy = load_new_model(disease, country, sex) dummy.vars += dismod3.ism.age_specific_rate(dummy, data_type) vars = dummy.vars[data_type] # save random effects try: emp_re = pandas.read_csv('/home/j/Project/dismod/output/dm-%s/posterior/re-%s-%s+%s+%s.csv'%(disease, data_type, geo_info(country,disease), sex, year), index_col=0) for col in emp_re.index: model.parameters[data_type]['random_effects'][col] = dict(dist='Constant', mu=emp_re.ix[col, 'mu_coeff'], sigma=emp_re.ix[col, 'sigma_coeff']) except: pass # also save empirical prior on sigma_alpha, the dispersion of the random effects dm = dismod3.load_disease_model(disease) for n in vars['sigma_alpha']: try: dm_na = dm.get_empirical_prior(full_name[data_type])['new_alpha'] model.parameters[data_type]['random_effects'][n.__name__] = dict(dist = dm_na[n.__name__]['dist'], mu = dm_na[n.__name__]['mu'], sigma = dm_na[n.__name__]['sigma'], lower = dm_na[n.__name__]['lower'], upper = dm_na[n.__name__]['upper']) except: model.parameters[data_type]['random_effects'][n.__name__] = dict(dist = 'TruncatedNormal', mu = .05, sigma = .03**-2, lower = 0.01, upper = 0.5) # save fixed effects emp_fe = pandas.read_csv('/home/j/Project/dismod/output/dm-%s/posterior/fe-%s-%s+%s+%s.csv'%(disease, data_type, geo_info(country,disease), sex, year), index_col=0) for n, col in zip(vars['beta'], vars['X'].columns): model.parameters[data_type]['fixed_effects'][col] = dict(dist = 'Constant', mu = emp_fe.ix[col, 'mu_coeff'], sigma = emp_fe.ix[col, 'sigma_coeff'])
def initialize_model(): ### @export 'load model' dm = dismod3.load_disease_model(19807) ### @export 'initialize model data' dm.params['global_priors']['level_bounds']['excess_mortality'] = dict(lower=.1, upper=100.) dm.params['global_priors']['increasing']['excess_mortality'] = dict(age_start=0, age_end=0) dm.params['global_priors']['level_bounds']['relative_risk'] = dict(lower=0., upper=10000.) for cv in dm.params['covariates']['Study_level']: dm.params['covariates']['Study_level'][cv]['rate']['value'] = 0 for cv in dm.params['covariates']['Country_level']: dm.params['covariates']['Country_level'][cv]['rate']['value'] = 0 level = .001 dm.params['sex_effect_prevalence'] = dict(mean=1, upper_ci=pl.exp(level *1.96), lower_ci=pl.exp(-level*1.96)) dm.params['time_effect_prevalence'] = dict(mean=1, upper_ci=pl.exp(level *1.96), lower_ci=pl.exp(-level*1.96)) dm.params['region_effect_prevalence'] = dict(std=level) dm.clear_fit() dm.clear_empirical_prior() dismod3.neg_binom_model.covariate_hash = {} return dm
def upload_fits(id): """ Send results of cluster fits to dismod server Parameters ---------- id : int The model id number Example ------- >>> import fit_emp_prior >>> fit_emp_prior.fit_emp_prior(2552, 'incidence') >>> import upload_fits >>> upload_fits.upload_fits(2552) """ # load disease model dm = dismod3.load_disease_model( id) # this merges together results from all fits # save dta output dir = dismod3.settings.JOB_WORKING_DIR % id # TODO: refactor into a function #dm_to_dta(dm, '%s/regional_predictions' % dir) # plot empirical priors (in a separate script, to run after all empirical priors are computed) for effect in ['alpha', 'beta', 'gamma', 'delta']: try: dismod3.plotting.plot_empirical_prior_effects([dm], effect) dm.savefig('dm-%d-emp-prior-%s.png' % (id, effect)) except Exception: print 'failed to plot %s' % effect # save table output try: dismod3.table.make_tables(dm) except Exception, e: print 'Failed to make table' print e
def upload_fits(id): """ Send results of cluster fits to dismod server Parameters ---------- id : int The model id number Example ------- >>> import fit_emp_prior >>> fit_emp_prior.fit_emp_prior(2552, 'incidence') >>> import upload_fits >>> upload_fits.upload_fits(2552) """ # load disease model dm = dismod3.load_disease_model(id) # this merges together results from all fits # save dta output dir = dismod3.settings.JOB_WORKING_DIR % id # TODO: refactor into a function #dm_to_dta(dm, '%s/regional_predictions' % dir) # plot empirical priors (in a separate script, to run after all empirical priors are computed) for effect in ['alpha', 'beta', 'gamma', 'delta']: try: dismod3.plotting.plot_empirical_prior_effects([dm], effect) dm.savefig('dm-%d-emp-prior-%s.png' % (id, effect)) except Exception: print 'failed to plot %s' % effect # save table output try: dismod3.table.make_tables(dm) except Exception, e: print 'Failed to make table' print e
def fit_without_confrontation(id, region, sex, year): """ Fit posterior of specified region/sex/year for specified model without trying to integrate conflicting sources of data Parameters ---------- id : int The model id number for the job to fit region : str From dismod3.settings.gbd_regions, but clean()-ed sex : str, from dismod3.settings.gbd_sexes year : str, from dismod3.settings.gbd_years """ ## load model dm = dismod3.load_disease_model(id) ## separate out prevalence and relative-risk data prev_data = [ d for d in dm.data if dm.relevant_to(d, 'prevalence', region, year, sex) ] rr_data = [ d for d in dm.data if dm.relevant_to(d, 'relative-risk', region, year, sex) ] dm.data = [d for d in dm.data if not d in prev_data and not d in rr_data] ### setup the generic disease model (without prevalence data) import dismod3.gbd_disease_model as model keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) dm.calc_effective_sample_size(dm.data) dm.vars = model.setup(dm, keys) ## override the birth prevalence prior, based on the withheld prevalence data logit_C_0 = dm.vars[dismod3.utils.gbd_key_for('bins', region, year, sex)]['initial']['logit_C_0'] assert len(prev_data) == 1, 'should be a single prevalance datum' d = prev_data[0] mu_logit_C_0 = mc.logit(dm.value_per_1(d) + dismod3.settings.NEARLY_ZERO) lb, ub = dm.bounds_per_1(d) sigma_logit_C_0 = (mc.logit(ub + dismod3.settings.NEARLY_ZERO) - mc.logit(lb + dismod3.settings.NEARLY_ZERO)) / (2 * 1.96) print 'mu_C_0_pri:', mc.invlogit(mu_logit_C_0) print 'ui_C_0_pri:', lb, ub # override the excess-mortality, based on the relative-risk data mu_rr = 1.01 * np.ones(dismod3.settings.MAX_AGE) sigma_rr = .01 * np.ones(dismod3.settings.MAX_AGE) for d in rr_data: mu_rr[d['age_start']:(d['age_end'] + 1)] = dm.value_per_1(d) sigma_rr[d['age_start']:(d['age_end'] + 1)] = dm.se_per_1(d) print 'mu_rr:', mu_rr.round(2) #print 'sigma_rr:', sigma_rr.round(2) log_f = dm.vars[dismod3.utils.gbd_key_for('excess-mortality', region, year, sex)]['age_coeffs'] log_f_mesh = log_f.parents['gamma_mesh'] param_mesh = log_f.parents['param_mesh'] m_all = dm.vars[dismod3.utils.gbd_key_for('all-cause_mortality', region, year, sex)] mu_log_f = np.log((mu_rr - 1) * m_all) sigma_log_f = 1 / ((mu_rr - 1) * m_all) * sigma_rr * m_all print 'mu_log_f:', mu_log_f.round(2)[param_mesh] print 'sigma_log_f:', sigma_log_f.round(2)[param_mesh] ### fit the model using Monte Carlo simulation (shoehorned into the MCMC framework of PyMC) dm.mcmc = mc.MCMC(dm.vars) dm.mcmc.use_step_method(SampleFromNormal, logit_C_0, mu=mu_logit_C_0, tau=sigma_logit_C_0**-2) dm.mcmc.use_step_method(SampleFromNormal, log_f_mesh, mu=mu_log_f[param_mesh], tau=sigma_log_f[param_mesh]**-2) for stoch in dm.mcmc.stochastics: dm.mcmc.use_step_method(mc.NoStepper, stoch) dm.mcmc.sample(1000, verbose=dismod3.settings.ON_SGE) #print 'mu_C_0_post:', mc.invlogit(logit_C_0.stats()['mean']).round(2) #print 'ui_C_0_post:', mc.invlogit(logit_C_0.stats()['95% HPD interval']).round(2) #print 'mu_rr_post:', dm.vars[dismod3.utils.gbd_key_for('relative-risk', region, year, sex)]['rate_stoch'].stats()['mean'].round(2) print 'mu_log_f_mesh_post:', log_f_mesh.stats()['mean'].round(2) print 'mu_f_post:', dm.vars[dismod3.utils.gbd_key_for( 'excess-mortality', region, year, sex)]['rate_stoch'].stats()['mean'].round(2) for k in keys: t, r, y, s = dismod3.utils.type_region_year_sex_from_key(k) if t in [ 'incidence', 'prevalence', 'remission', 'excess-mortality', 'mortality', 'prevalence_x_excess-mortality' ]: dismod3.neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k]) elif t in ['relative-risk', 'duration', 'incidence_x_duration']: dismod3.normal_model.store_mcmc_fit(dm, k, dm.vars[k]) from fit_posterior import save_country_level_posterior if str(year) == '2005': # also generate 2010 estimates save_country_level_posterior(dm, region, 2010, sex, ['prevalence', 'remission']) save_country_level_posterior( dm, region, year, sex, ['prevalence', 'remission'] ) #'prevalence incidence remission excess-mortality duration mortality relative-risk'.split()) # save results (do this last, because it removes things from the disease model that plotting function, etc, might need keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) dm.save('dm-%d-posterior-%s-%s-%s.json' % (dm.id, region, sex, year), keys_to_save=keys) return dm
def fit_emp_prior( id, param_type, fast_fit=False, generate_emp_priors=True, zero_re=True, alt_prior=False, global_heterogeneity="Slightly", ): """ Fit empirical prior of specified type for specified model Parameters ---------- id : int The model id number for the job to fit param_type : str, one of incidence, prevalence, remission, excess-mortality, prevalence_x_excess-mortality The disease parameter to generate empirical priors for Example ------- >>> import fit_emp_prior >>> fit_emp_prior.fit_emp_prior(2552, 'incidence') """ dir = dismod3.settings.JOB_WORKING_DIR % id ## load the model from disk or from web import simplejson as json import data reload(data) dm = dismod3.load_disease_model(id) try: model = data.ModelData.load(dir) print "loaded data from new format from %s" % dir except (IOError, AssertionError): model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json())) # model.save(dir) print "loaded data from json, saved in new format for next time in %s" % dir ## next block fills in missing covariates with zero for col in model.input_data.columns: if col.startswith("x_"): model.input_data[col] = model.input_data[col].fillna(0.0) # also fill all covariates missing in output template with zeros model.output_template = model.output_template.fillna(0) # set all heterogeneity priors to Slightly for the global fit for t in model.parameters: if "heterogeneity" in model.parameters[t]: model.parameters[t]["heterogeneity"] = global_heterogeneity t = { "incidence": "i", "prevalence": "p", "remission": "r", "excess-mortality": "f", "prevalence_x_excess-mortality": "pf", }[param_type] model.input_data = model.get_data(t) if len(model.input_data) == 0: print "No data for type %s, exiting" % param_type return dm ### For testing: ## speed up computation by reducing number of knots ## model.parameters[t]['parameter_age_mesh'] = [0, 10, 20, 40, 60, 100] ## smooth Slightly, Moderately, or Very ## model.parameters[t]['smoothness'] = dict(age_start=0, age_end=100, amount='Very') ## speed up computation be reducing data size ## predict_area = 'super-region_0' ## predict_year=2005 ## predict_sex='total' ## subtree = nx.traversal.bfs_tree(model.hierarchy, predict_area) ## relevant_rows = [i for i, r in model.input_data.T.iteritems() \ ## if (r['area'] in subtree or r['area'] == 'all')\ ## and (r['year_end'] >= 1997) \ ## and r['sex'] in [predict_sex, 'total']] ## model.input_data = model.input_data.ix[relevant_rows] # testing changes # model.input_data['effective_sample_size'] = pl.minimum(1.e3, model.input_data['effective_sample_size']) # missing_ess = pl.isnan(model.input_data['effective_sample_size']) # model.input_data['effective_sample_size'][missing_ess] = 1. # model.input_data['z_overdisperse'] = 1. # print model.describe(t) # model.input_data = model.input_data[model.input_data['area'].map(lambda x: x in nx.bfs_tree(model.hierarchy, 'super-region_5'))] # model.input_data = model.input_data = model.input_data.drop(['x_LDI_id_Updated_7July2011'], axis=1) # model.input_data = model.input_data.filter([model.input_data['x_nottroponinuse'] == 0.] # model.input_data = model.input_data[:100] ## speed up output by not making predictions for empirical priors # generate_emp_priors = False print "fitting", t model.vars += ism.age_specific_rate( model, t, reference_area="all", reference_sex="total", reference_year="all", mu_age=None, mu_age_parent=None, sigma_age_parent=None, rate_type=(t == "rr") and "log_normal" or "neg_binom", zero_re=zero_re, ) # for backwards compatibility, should be removed eventually dm.model = model dm.vars = model.vars[t] vars = dm.vars if fast_fit: dm.map, dm.mcmc = dismod3.fit.fit_asr(model, t, iter=101, burn=0, thin=1, tune_interval=100) else: dm.map, dm.mcmc = dismod3.fit.fit_asr( model, t, iter=50000, burn=10000, thin=40, tune_interval=1000, verbose=True ) stats = dm.vars["p_pred"].stats(batches=5) dm.vars["data"]["mu_pred"] = stats["mean"] dm.vars["data"]["sigma_pred"] = stats["standard deviation"] stats = dm.vars["pi"].stats(batches=5) dm.vars["data"]["mc_error"] = stats["mc error"] dm.vars["data"]["residual"] = dm.vars["data"]["value"] - dm.vars["data"]["mu_pred"] dm.vars["data"]["abs_residual"] = pl.absolute(dm.vars["data"]["residual"]) graphics.plot_fit(model, data_types=[t], ylab=["PY"], plot_config=(1, 1), fig_size=(8, 8)) if generate_emp_priors: for a in [dismod3.utils.clean(a) for a in dismod3.settings.gbd_regions]: print "generating empirical prior for %s" % a for s in dismod3.settings.gbd_sexes: for y in dismod3.settings.gbd_years: key = dismod3.utils.gbd_key_for(param_type, a, y, s) if t in model.parameters and "level_bounds" in model.parameters[t]: lower = model.parameters[t]["level_bounds"]["lower"] upper = model.parameters[t]["level_bounds"]["upper"] else: lower = 0 upper = pl.inf emp_priors = covariate_model.predict_for( model, model.parameters[t], "all", "total", "all", a, dismod3.utils.clean(s), int(y), alt_prior, vars, lower, upper, ) dm.set_mcmc("emp_prior_mean", key, emp_priors.mean(0)) if "eta" in vars: N, A = emp_priors.shape # N samples, for A age groups delta_trace = pl.transpose( [pl.exp(vars["eta"].trace()) for _ in range(A)] ) # shape delta matrix to match prediction matrix emp_prior_std = pl.sqrt(emp_priors.var(0) + (emp_priors ** 2 / delta_trace).mean(0)) else: emp_prior_std = emp_priors.std(0) dm.set_mcmc("emp_prior_std", key, emp_prior_std) pl.plot( model.parameters["ages"], dm.get_mcmc("emp_prior_mean", key), color="grey", label=a, zorder=-10, alpha=0.5, ) pl.savefig(dir + "/prior-%s.png" % param_type) store_effect_coefficients(dm, vars, param_type) # graphics.plot_one_ppc(vars, t) # pl.savefig(dir + '/prior-%s-ppc.png'%param_type) graphics.plot_acorr(model) pl.savefig(dir + "/prior-%s-convergence.png" % param_type) graphics.plot_trace(model) pl.savefig(dir + "/prior-%s-trace.png" % param_type) graphics.plot_one_effects(model, t) pl.savefig(dir + "/prior-%s-effects.png" % param_type) # save results (do this last, because it removes things from the disease model that plotting function, etc, might need try: dm.save("dm-%d-prior-%s.json" % (id, param_type)) except IOError, e: print e
def validate_prior_similarity(): #dm = dismod3.load_disease_model(20945) #dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json())) #t = 'i' #area, sex, year = 'europe_eastern', 'male', 2005 dm = dismod3.load_disease_model(20928) dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json())) t = 'p' area, sex, year = 'sub-saharan_africa_central', 'male', 2005 # select data that is about areas in this region, recent years, and sex of male or total only model = dm.model subtree = nx.traversal.bfs_tree(model.hierarchy, area) relevant_rows = [i for i, r in model.input_data.T.iteritems() \ if (r['area'] in subtree or r['area'] == 'all')\ and ((year == 2005 and r['year_end'] >= 1997) or r['year_start'] <= 1997) \ and r['sex'] in [sex, 'total']] model.input_data = model.input_data.ix[relevant_rows] # replace area 'all' with area model.input_data['area'][model.input_data['area'] == 'all'] = area for het in 'Slightly Moderately Very'.split(): dm.model.parameters[t]['parameter_age_mesh'] = [ 0, 15, 20, 25, 35, 45, 55, 65, 75, 100 ] dm.model.parameters[t]['heterogeneity'] = het setup_regional_model(dm, area, sex, year) dm.vars = {} dm.vars[t] = data_model.data_model( t, dm.model, t, root_area=area, root_sex=sex, root_year=year, mu_age=None, mu_age_parent=dm.emp_priors[t, 'mu'], sigma_age_parent=dm.emp_priors[t, 'sigma'], rate_type=(t == 'rr') and 'log_normal' or 'neg_binom') fit_model.fit_data_model(dm.vars[t], iter=10050, burn=5000, thin=50, tune_interval=100) #2graphics.plot_one_effects(dm.vars[t], t, dm.model.hierarchy) #pl.title(het) graphics.plot_convergence_diag(dm.vars[t]) pl.title(het) #graphics.plot_one_ppc(dm.vars[t], t) #pl.title(het) graphics.plot_one_type(dm.model, dm.vars[t], dm.emp_priors, t) pl.title(het) pl.show() return dm
pl.figure() for iso in list(pl.unique(Y["Iso3"])): pl.plot(Y[(Y["Iso3"] == iso) & (Y["Rate type"] == "prevalence")].filter(like="Draw").mean(1).__array__(), label=iso) pl.semilogy([1], [1]) Z = Y[Y["Rate type"] == "prevalence"].groupby("Age").apply(weighted_age) pl.plot(Z.mean(1).__array__(), color="red", linewidth=3, alpha=0.5, label="Inconsistent NA/ME") pl.legend() pl.axis([-5, 130, 1e-6, 2]) import dismod3 dm = dismod3.load_disease_model(19807) import fit_posterior fit_posterior.fit_posterior(dm, "north_africa_middle_east", "male", "2005", map_only=True) X = pandas.read_csv( "/var/tmp/dismod_working/test/dm-19807/posterior/dm-19807-north_africa_middle_east-male-2005.csv", index_col=None ) pl.figure() for iso in list(pl.unique(X["Iso3"])): pl.plot(X[(X["Iso3"] == iso)].filter(like="Draw").mean(1).__array__(), label=iso) pl.semilogy([1], [1]) Z = X.groupby("Age").apply(weighted_age) plot(Z.mean(1).__array__(), color="red", linewidth=3, alpha=0.5, label="Inconsistent NA/ME")
for iso in list(pl.unique(Y['Iso3'])): pl.plot(Y[(Y['Iso3']==iso)&(Y['Rate type']=='prevalence')].filter(like='Draw').mean(1).__array__(), label=iso) pl.semilogy([1],[1]) Z = Y[Y['Rate type'] == 'prevalence'].groupby('Age').apply(weighted_age) pl.plot(Z.mean(1).__array__(), color='red', linewidth=3, alpha=.5, label='Inconsistent NA/ME') pl.legend() pl.axis([-5,130,1e-6,2]) import dismod3 dm = dismod3.load_disease_model(19807) import fit_posterior fit_posterior.fit_posterior(dm, 'north_africa_middle_east', 'male', '2005', map_only=True) X = pandas.read_csv('/var/tmp/dismod_working/test/dm-19807/posterior/dm-19807-north_africa_middle_east-male-2005.csv', index_col=None) pl.figure() for iso in list(pl.unique(X['Iso3'])): pl.plot(X[(X['Iso3']==iso)].filter(like='Draw').mean(1).__array__(), label=iso) pl.semilogy([1],[1]) Z = X.groupby('Age').apply(weighted_age) plot(Z.mean(1).__array__(), color='red', linewidth=3, alpha=.5, label='Inconsistent NA/ME') plot(dm.vars['prevalence+north_africa_middle_east+2005+male']['rate_stoch'].stats()['mean'], color='red', linewidth=3, alpha=.5, label='Mean of Consistent NA/ME')
def fit_all(id, consistent_empirical_prior=True, consistent_posterior=True, posteriors_only=False, posterior_types='p i r', fast=False, zero_re=True, alt_prior=True, global_heterogeneity='Slightly'): """ Enqueues all jobs necessary to fit specified model to the cluster Parameters ---------- id : int The model id number for the job to fit Example ------- >>> import fit_all >>> fit_all.fit_all(2552) """ dir = dismod3.settings.JOB_WORKING_DIR % id # TODO: refactor into a function try: model = data.ModelData.load(dir) print 'loaded data from new format from %s' % dir # if we make it here, this model has already been run, so clean out the stdout/stderr dirs to make room for fresh messages call_str = 'rm -rf %s/empirical_priors/stdout/* %s/empirical_priors/stderr/* %s/posterior/stdout/* %s/posterior/stderr/* %s/json/dm-*-*.json' % (dir, dir, dir, dir, dir) print call_str subprocess.call(call_str, shell=True) # now load just the model, all previous fits are deleted dm = dismod3.load_disease_model(id) except (IOError, AssertionError): print 'downloading disease model' dm = dismod3.load_disease_model(id) import simplejson as json try: model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json())) except Exception as e: print e print 'attempting to use old covariate format' import old_cov_data model = old_cov_data.from_gbd_jsons(json.loads(dm.to_json())) model.save(dir) print 'loaded data from json, saved in new format for next time in %s' % dir def options(fast, zero_re, alt_prior, global_heterogeneity): call_str = '' call_str += ' --fast=%s'%fast call_str += ' --zerore=%s'%zero_re call_str += ' --altprior=%s'%alt_prior call_str += ' --globalheterogeneity=%s'%global_heterogeneity return call_str o = '%s/empirical_priors/stdout/%d_running.txt' % (dir, id) f = open(o, 'w') import time f.write('./run_on_cluster.sh fit_all.py --priorconsistent=%s --posteriorconsistent=%s %s\n' % (consistent_empirical_prior, consistent_posterior, options(fast, zero_re, alt_prior, global_heterogeneity))) f.write('Enqueued model %d on cluster at %s' % (id, time.strftime('%c'))) f.close() # fit empirical priors (by pooling data from all regions) emp_names = [] if not posteriors_only: if consistent_empirical_prior: t = 'all' o = '%s/empirical_priors/stdout/dismod_log_%s' % (dir, t) e = '%s/empirical_priors/stderr/dismod_log_%s' % (dir, t) name_str = '%s-%d' %(t[0], id) emp_names.append(name_str) if dismod3.settings.ON_SGE: call_str = 'qsub -cwd -o %s -e %s ' % (o, e) \ + '-N %s ' % name_str \ + 'run_on_cluster.sh ' else: call_str = 'python ' call_str += 'fit_world.py %d' % id call_str += options(fast, zero_re, alt_prior, global_heterogeneity) subprocess.call(call_str, shell=True) else: for t in ['excess-mortality', 'remission', 'incidence', 'prevalence']: o = '%s/empirical_priors/stdout/dismod_log_%s' % (dir, t) e = '%s/empirical_priors/stderr/dismod_log_%s' % (dir, t) name_str = '%s-%d' %(t[0], id) emp_names.append(name_str) if dismod3.settings.ON_SGE: call_str = 'qsub -cwd -o %s -e %s ' % (o, e) \ + '-N %s ' % name_str \ + 'run_on_cluster.sh ' else: call_str = 'python ' call_str += 'fit_emp_prior.py %d -t %s' % (id, t) call_str += options(fast, zero_re, alt_prior, global_heterogeneity) subprocess.call(call_str, shell=True) # directory to save the country level posterior csv files temp_dir = dir + '/posterior/country_level_posterior_dm-' + str(id) + '/' #fit each region/year/sex individually for this model hold_str = '-hold_jid %s ' % ','.join(emp_names) if posteriors_only: hold_str = '' post_names = [] for ii, r in enumerate(dismod3.gbd_regions): for s in dismod3.gbd_sexes: for y in dismod3.gbd_years: k = '%s+%s+%s' % (dismod3.utils.clean(r), dismod3.utils.clean(s), y) o = '%s/posterior/stdout/dismod_log_%s' % (dir, k) e = '%s/posterior/stderr/dismod_log_%s' % (dir, k) name_str = '%s%d%s%s%d' % (r[0], ii+1, s[0], str(y)[-1], id) post_names.append(name_str) if dismod3.settings.ON_SGE: call_str = 'qsub -cwd -o %s -e %s ' % (o,e) \ + hold_str \ + '-N %s ' % name_str \ + 'run_on_cluster.sh ' else: call_str = 'python ' call_str += 'fit_posterior.py %d -r %s -s %s -y %s' % (id, dismod3.utils.clean(r), dismod3.utils.clean(s), y) if not consistent_posterior: call_str += ' --inconsistent=True --types="%s"' % posterior_types if posteriors_only: call_str += ' --onlyposterior=True' if fast: call_str += ' --fast=true' call_str += ' --zerore=%s'%zero_re subprocess.call(call_str, shell=True) # after all posteriors have finished running, upload disease model json hold_str = '-hold_jid %s ' % ','.join(post_names) o = '%s/empirical_priors/stdout/%d_upload.txt' % (dir, id) e = '%s/empirical_priors/stderr/%d_upload.txt' % (dir, id) if dismod3.settings.ON_SGE: call_str = 'qsub -cwd -o %s -e %s ' % (o,e) \ + hold_str \ + '-N upld-%s ' % id \ + 'run_on_cluster.sh ' else: call_str = 'python ' call_str += 'upload_fits.py %d' % id subprocess.call(call_str, shell=True) return dm
import dismod3 import book_graphics reload(book_graphics) results = {} n_pred = 10000 iter = 20000 burn = 10000 thin = 10 # set font book_graphics.set_font() ### @export 'data' # TODO: migrate data into a csv, load with pandas dm = dismod3.load_disease_model(15630) dm.calc_effective_sample_size(dm.data) some_data = ([d for d in dm.data if d['data_type'] == 'prevalence data' and d['sex'] == 'male' and 15 <= d['age_start'] < 20 and d['age_end'] == 99 and d['effective_sample_size'] > 1]) countries = pl.unique([s['region'] for s in some_data]) min_year = min([s['year_start'] for s in some_data]) max_year = max([s['year_end'] for s in some_data]) cy = ['%s-%d'%(s['region'], s['year_start']) for s in some_data] n = pl.array([s['effective_sample_size'] for s in some_data]) r = pl.array([dm.value_per_1(s) for s in some_data])
import pylab as pl import pymc as mc import dismod3 import book_graphics reload(book_graphics) # set font book_graphics.set_font() results = {} ### @export 'data' #dm = dismod3.load_disease_model(15596) # epilipsy dm = dismod3.load_disease_model(16240) # af data = dm.filter_data('prevalence+all+all+all') hist = pl.zeros((101,101)) for d in data: hist[d['age_start'], d['age_end']] += 1 most_freq_cnt = hist.max() rows_total = len(data) ### @export 'scatter-prevalence-age-groups' pl.figure(**book_graphics.half_page_params) #pl.subplot(1,2,2) for a_0 in range(101):
def measure_fit(id, condition): """ Determine the RMSE, MAE, and Coverage of the fit stored in model specified by id """ print 'downloading model %d' % id sys.stdout.flush() dm = dismod3.load_disease_model(id) #print 'loading gold-standard data' gold_data = [ d for d in csv.DictReader(open(OUTPUT_PATH + '%s_gold.tsv' % condition), dialect='excel-tab') ] #print 'comparing values' abs_err = dict(incidence=[], prevalence=[], remission=[], duration=[], incidence_x_duration=[]) rel_err = dict(incidence=[], prevalence=[], remission=[], duration=[], incidence_x_duration=[]) coverage = dict(incidence=[], prevalence=[], remission=[], duration=[], incidence_x_duration=[]) for metric in [abs_err, rel_err, coverage]: metric['excess mortality'] = [] for d in gold_data: est = predict('mean', dm, d) lb = predict('lower_ui', dm, d) ub = predict('upper_ui', dm, d) if est < 0: continue val = float(d['Truth']) err = val - est #if d['Age Start'] <= 50: # continue t = d['Parameter'].replace(' data', '') abs_err[t].append(err) if val > 0.: rel_err[t].append(100 * err / val) coverage[t].append(val >= lb and val <= ub) for k in abs_err: print '%s abs RMSE = %f' % (k, np.sqrt(np.mean( np.array(abs_err[k])**2))) print '%s abs MAE = %f' % (k, np.median(np.abs(abs_err[k]))) print for k in rel_err: print '%s rel pct RMSE = %f' % ( k, np.sqrt(np.mean(np.array(rel_err[k])**2))) print '%s rel pct MAE = %f' % (k, np.median(np.abs(rel_err[k]))) print for k in coverage: print '%s coverage = %f' % (k, np.sum(coverage[k]) * 100. / len(coverage[k])) print k = 'incidence_x_duration' print '%s rel pct MAE =\t%f' % (k, np.median(np.abs(rel_err[k]))) return np.median(np.abs(rel_err[k])) # add estimate value as a column in the gold data tsv, for looking # in more detail with a spreadsheet or different code col_names = sorted(set(gold_data[0].keys()) | set(['Estimate Value'])) f_file = open(OUTPUT_PATH + '%s_gold.tsv' % condition, 'w') csv_f = csv.writer(f_file, dialect='excel-tab') csv_f.writerow(col_names) csv_f = csv.DictWriter(f_file, col_names, dialect='excel-tab') for d in gold_data: csv_f.writerow(d) f_file.close()
def fit_posterior(id, region, sex, year): """ Fit posterior of specified region/sex/year for specified model Parameters ---------- id : int The model id number for the job to fit region : str From dismod3.settings.gbd_regions, but clean()-ed sex : str, from dismod3.settings.gbd_sexes year : str, from dismod3.settings.gbd_years Example ------- >>> import fit_posterior >>> fit_posterior.fit_posterior(2552, 'asia_east', 'male', '2005') """ #print 'updating job status on server' #dismod3.log_job_status(id, 'posterior', '%s--%s--%s' % (region, sex, year), 'Running') dm = dismod3.load_disease_model(id) #dm.data = [] # for testing, remove all data keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) # fit the model dir = dismod3.settings.JOB_WORKING_DIR % id import dismod3.gbd_disease_model as model model.fit(dm, method='map', keys=keys, verbose=1) ## first generate decent initial conditions ## then sample the posterior via MCMC model.fit(dm, method='mcmc', keys=keys, iter=50000, thin=25, burn=25000, verbose=1, dbname='%s/posterior/pickle/dm-%d-posterior-%s-%s-%s.pickle' % (dir, id, region, sex, year)) # generate plots of results dismod3.tile_plot_disease_model(dm, keys, defaults={}) dm.savefig('dm-%d-posterior-%s.png' % (id, '+'.join(['all', region, sex, year]))) # TODO: refactor naming into its own function (disease_json.save_image perhaps) for param_type in dismod3.settings.output_data_types: keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex], type_list=[param_type]) dismod3.tile_plot_disease_model(dm, keys, defaults={}) dm.savefig('dm-%d-posterior-%s-%s-%s-%s.png' % (id, dismod3.utils.clean(param_type), region, sex, year)) # TODO: refactor naming into its own function # summarize fit quality graphically, as well as parameter posteriors for k in dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]): if dm.vars[k].get('data'): dismod3.plotting.plot_posterior_predicted_checks(dm, k) dm.savefig('dm-%d-check-%s.png' % (dm.id, k)) # save results (do this last, because it removes things from the disease model that plotting function, etc, might need keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) dm.save('dm-%d-posterior-%s-%s-%s.json' % (id, region, sex, year), keys_to_save=keys) # make a rate_type_list rate_type_list = ['incidence', 'prevalence', 'remission', 'excess-mortality', 'mortality', 'relative-risk', 'duration', 'incidence_x_duration'] # save country level posterior save_country_level_posterior(dm, region, year, sex) # update job status file #print 'updating job status on server' #dismod3.log_job_status(id, 'posterior', # '%s--%s--%s' % (region, sex, year), 'Completed') return dm
def main(): import optparse usage = 'usage: %prog [options] disease_model_id' parser = optparse.OptionParser(usage) parser.add_option( '-s', '--sex', default='male', help= 'only estimate given sex (valid settings ``male``, ``female``, ``all``)' ) parser.add_option( '-y', '--year', default='2005', help= 'only estimate given year (valid settings ``1990``, ``2005``, ``2010``)' ) parser.add_option('-r', '--region', default='australasia', help='only estimate given GBD Region') parser.add_option('-f', '--fast', default='False', help='use MAP only') parser.add_option('-i', '--inconsistent', default='False', help='use inconsistent model for posteriors') parser.add_option( '-t', '--types', default='p i r', help='with rate types to fit (only used if inconsistent=true)') parser.add_option('-z', '--zerore', default='true', help='enforce zero constraint on random effects') parser.add_option('-o', '--onlyposterior', default='False', help='skip empirical prior phase') (options, args) = parser.parse_args() if len(args) != 1: parser.error('incorrect number of arguments') try: id = int(args[0]) except ValueError: parser.error('disease_model_id must be an integer') dm = dismod3.load_disease_model(id) # set model id to passed-in id (should not be necessary) dm.id = id assert id == dm.id, 'model id should equal parameter id' dm = fit_posterior( dm, options.region, options.sex, options.year, fast_fit=options.fast.lower() == 'true', inconsistent_fit=options.inconsistent.lower() == 'true', params_to_fit=options.types.split(), posteriors_only=(options.onlyposterior.lower() == 'true'), zero_re=options.zerore.lower() == 'true') return dm
def fit_emp_prior(id, param_type, fast_fit=False, generate_emp_priors=True, zero_re=True, alt_prior=False, global_heterogeneity='Slightly'): """ Fit empirical prior of specified type for specified model Parameters ---------- id : int The model id number for the job to fit param_type : str, one of incidence, prevalence, remission, excess-mortality, prevalence_x_excess-mortality The disease parameter to generate empirical priors for Example ------- >>> import fit_emp_prior >>> fit_emp_prior.fit_emp_prior(2552, 'incidence') """ dir = dismod3.settings.JOB_WORKING_DIR % id ## load the model from disk or from web import simplejson as json import data reload(data) dm = dismod3.load_disease_model(id) try: model = data.ModelData.load(dir) print 'loaded data from new format from %s' % dir except (IOError, AssertionError): model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json())) #model.save(dir) print 'loaded data from json, saved in new format for next time in %s' % dir ## next block fills in missing covariates with zero for col in model.input_data.columns: if col.startswith('x_'): model.input_data[col] = model.input_data[col].fillna(0.) # also fill all covariates missing in output template with zeros model.output_template = model.output_template.fillna(0) # set all heterogeneity priors to Slightly for the global fit for t in model.parameters: if 'heterogeneity' in model.parameters[t]: model.parameters[t]['heterogeneity'] = global_heterogeneity t = { 'incidence': 'i', 'prevalence': 'p', 'remission': 'r', 'excess-mortality': 'f', 'prevalence_x_excess-mortality': 'pf' }[param_type] model.input_data = model.get_data(t) if len(model.input_data) == 0: print 'No data for type %s, exiting' % param_type return dm ### For testing: ## speed up computation by reducing number of knots ## model.parameters[t]['parameter_age_mesh'] = [0, 10, 20, 40, 60, 100] ## smooth Slightly, Moderately, or Very ## model.parameters[t]['smoothness'] = dict(age_start=0, age_end=100, amount='Very') ## speed up computation be reducing data size ## predict_area = 'super-region_0' ## predict_year=2005 ## predict_sex='total' ## subtree = nx.traversal.bfs_tree(model.hierarchy, predict_area) ## relevant_rows = [i for i, r in model.input_data.T.iteritems() \ ## if (r['area'] in subtree or r['area'] == 'all')\ ## and (r['year_end'] >= 1997) \ ## and r['sex'] in [predict_sex, 'total']] ## model.input_data = model.input_data.ix[relevant_rows] # testing changes #model.input_data['effective_sample_size'] = pl.minimum(1.e3, model.input_data['effective_sample_size']) #missing_ess = pl.isnan(model.input_data['effective_sample_size']) #model.input_data['effective_sample_size'][missing_ess] = 1. #model.input_data['z_overdisperse'] = 1. #print model.describe(t) #model.input_data = model.input_data[model.input_data['area'].map(lambda x: x in nx.bfs_tree(model.hierarchy, 'super-region_5'))] #model.input_data = model.input_data = model.input_data.drop(['x_LDI_id_Updated_7July2011'], axis=1) #model.input_data = model.input_data.filter([model.input_data['x_nottroponinuse'] == 0.] #model.input_data = model.input_data[:100] ## speed up output by not making predictions for empirical priors #generate_emp_priors = False print 'fitting', t model.vars += ism.age_specific_rate(model, t, reference_area='all', reference_sex='total', reference_year='all', mu_age=None, mu_age_parent=None, sigma_age_parent=None, rate_type=(t == 'rr') and 'log_normal' or 'neg_binom', zero_re=zero_re) # for backwards compatibility, should be removed eventually dm.model = model dm.vars = model.vars[t] vars = dm.vars if fast_fit: dm.map, dm.mcmc = dismod3.fit.fit_asr(model, t, iter=101, burn=0, thin=1, tune_interval=100) else: dm.map, dm.mcmc = dismod3.fit.fit_asr(model, t, iter=50000, burn=10000, thin=40, tune_interval=1000, verbose=True) stats = dm.vars['p_pred'].stats(batches=5) dm.vars['data']['mu_pred'] = stats['mean'] dm.vars['data']['sigma_pred'] = stats['standard deviation'] stats = dm.vars['pi'].stats(batches=5) dm.vars['data']['mc_error'] = stats['mc error'] dm.vars['data'][ 'residual'] = dm.vars['data']['value'] - dm.vars['data']['mu_pred'] dm.vars['data']['abs_residual'] = pl.absolute(dm.vars['data']['residual']) graphics.plot_fit(model, data_types=[t], ylab=['PY'], plot_config=(1, 1), fig_size=(8, 8)) if generate_emp_priors: for a in [ dismod3.utils.clean(a) for a in dismod3.settings.gbd_regions ]: print 'generating empirical prior for %s' % a for s in dismod3.settings.gbd_sexes: for y in dismod3.settings.gbd_years: key = dismod3.utils.gbd_key_for(param_type, a, y, s) if t in model.parameters and 'level_bounds' in model.parameters[ t]: lower = model.parameters[t]['level_bounds']['lower'] upper = model.parameters[t]['level_bounds']['upper'] else: lower = 0 upper = pl.inf emp_priors = covariate_model.predict_for( model, model.parameters[t], 'all', 'total', 'all', a, dismod3.utils.clean(s), int(y), alt_prior, vars, lower, upper) dm.set_mcmc('emp_prior_mean', key, emp_priors.mean(0)) if 'eta' in vars: N, A = emp_priors.shape # N samples, for A age groups delta_trace = pl.transpose([ pl.exp(vars['eta'].trace()) for _ in range(A) ]) # shape delta matrix to match prediction matrix emp_prior_std = pl.sqrt( emp_priors.var(0) + (emp_priors**2 / delta_trace).mean(0)) else: emp_prior_std = emp_priors.std(0) dm.set_mcmc('emp_prior_std', key, emp_prior_std) pl.plot(model.parameters['ages'], dm.get_mcmc('emp_prior_mean', key), color='grey', label=a, zorder=-10, alpha=.5) pl.savefig(dir + '/prior-%s.png' % param_type) store_effect_coefficients(dm, vars, param_type) #graphics.plot_one_ppc(vars, t) #pl.savefig(dir + '/prior-%s-ppc.png'%param_type) graphics.plot_acorr(model) pl.savefig(dir + '/prior-%s-convergence.png' % param_type) graphics.plot_trace(model) pl.savefig(dir + '/prior-%s-trace.png' % param_type) graphics.plot_one_effects(model, t) pl.savefig(dir + '/prior-%s-effects.png' % param_type) # save results (do this last, because it removes things from the disease model that plotting function, etc, might need try: dm.save('dm-%d-prior-%s.json' % (id, param_type)) except IOError, e: print e
def measure_fit(id, condition): """ Determine the RMSE, MAE, and Coverage of the fit stored in model specified by id """ print 'downloading model %d' % id sys.stdout.flush() dm = dismod3.load_disease_model(id) #print 'loading gold-standard data' gold_data = [d for d in csv.DictReader(open(OUTPUT_PATH + '%s_gold.tsv' % condition), dialect='excel-tab')] #print 'comparing values' abs_err = dict(incidence=[], prevalence=[], remission=[], duration=[], incidence_x_duration=[]) rel_err = dict(incidence=[], prevalence=[], remission=[], duration=[], incidence_x_duration=[]) coverage = dict(incidence=[], prevalence=[], remission=[], duration=[], incidence_x_duration=[]) for metric in [abs_err, rel_err, coverage]: metric['excess mortality'] = [] for d in gold_data: est = predict('mean', dm, d) lb = predict('lower_ui', dm, d) ub = predict('upper_ui', dm, d) if est < 0: continue val = float(d['Truth']) err = val - est #if d['Age Start'] <= 50: # continue t = d['Parameter'].replace(' data', '') abs_err[t].append(err) if val > 0.: rel_err[t].append(100 * err / val) coverage[t].append(val >= lb and val <= ub) for k in abs_err: print '%s abs RMSE = %f' % (k, np.sqrt(np.mean(np.array(abs_err[k])**2))) print '%s abs MAE = %f' % (k, np.median(np.abs(abs_err[k]))) print for k in rel_err: print '%s rel pct RMSE = %f' % (k, np.sqrt(np.mean(np.array(rel_err[k])**2))) print '%s rel pct MAE = %f' % (k, np.median(np.abs(rel_err[k]))) print for k in coverage: print '%s coverage = %f' % (k, np.sum(coverage[k]) * 100. / len(coverage[k])) print k = 'incidence_x_duration' print '%s rel pct MAE =\t%f' % (k, np.median(np.abs(rel_err[k]))) return np.median(np.abs(rel_err[k])) # add estimate value as a column in the gold data tsv, for looking # in more detail with a spreadsheet or different code col_names = sorted(set(gold_data[0].keys()) | set(['Estimate Value'])) f_file = open(OUTPUT_PATH + '%s_gold.tsv' % condition, 'w') csv_f = csv.writer(f_file, dialect='excel-tab') csv_f.writerow(col_names) csv_f = csv.DictWriter(f_file, col_names, dialect='excel-tab') for d in gold_data: csv_f.writerow(d) f_file.close()
print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.mu['abs_err'].mean(), pl.median(pl.absolute(model.mu['rel_err'].dropna())), model.mu['covered?'].mean()) print data_simulation.initialize_results(model) data_simulation.add_to_results(model, 'mu') data_simulation.finalize_results(model) print model.results return model if __name__ == '__main__': region, sex, year = 'north_america_high_income', 'male', 1990 import fit_posterior, upload_fits import data import simplejson as json ## load the model from disk or from web dm = dismod3.load_disease_model(24842) dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json())) data = upload_fits.merge_data_csvs(24842) dm.model.input_data['mu_pred'] = data['mu_pred'] simulate_data(dm, region, sex, year) fit_simulated(dm, region, sex, year) store_results(dm, region, sex, year)
import pylab as pl import pymc as mc import dismod3 import book_graphics reload(book_graphics) # set font book_graphics.set_font() results = {} ### @export 'data' #dm = dismod3.load_disease_model(15596) # epilipsy dm = dismod3.load_disease_model(16240) # af data = dm.filter_data('prevalence+all+all+all') hist = pl.zeros((101, 101)) for d in data: hist[d['age_start'], d['age_end']] += 1 most_freq_cnt = hist.max() rows_total = len(data) ### @export 'scatter-prevalence-age-groups' pl.figure(**book_graphics.half_page_params) #pl.subplot(1,2,2) for a_0 in range(101):
def fit_world(id, fast_fit=False, zero_re=True, alt_prior=False, global_heterogeneity='Slightly'): """ Fit consistent for all data in world Parameters ---------- id : int The model id number for the job to fit Example ------- >>> import fit_world >>> dm = fit_world.dismod3.load_disease_model(1234) >>> fit_world.fit_world(dm) """ dir = dismod3.settings.JOB_WORKING_DIR % id ## load the model from disk or from web import simplejson as json import data reload(data) try: model = data.ModelData.load(dir) print 'loaded data from new format from %s' % dir dm = dismod3.load_disease_model(id) except (IOError, AssertionError): dm = dismod3.load_disease_model(id) model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json())) try: model.save(dir) print 'loaded data from json, saved in new format for next time in %s' % dir except IOError: print 'loaded data from json, failed to save in new format' ## next block fills in missing covariates with zero for col in model.input_data.columns: if col.startswith('x_'): model.input_data[col] = model.input_data[col].fillna(0.) # also fill all covariates missing in output template with zeros model.output_template = model.output_template.fillna(0) # set all heterogeneity priors to Slightly for the global fit for t in model.parameters: if 'heterogeneity' in model.parameters[t]: model.parameters[t]['heterogeneity'] = global_heterogeneity ### For testing: ## speed up computation by reducing number of knots ## for t in 'irf': ## model.parameters[t]['parameter_age_mesh'] = [0, 100] model.vars += dismod3.ism.consistent(model, reference_area='all', reference_sex='total', reference_year='all', priors={}, zero_re=zero_re) ## fit model to data if fast_fit: dm.map, dm.mcmc = dismod3.fit.fit_consistent(model, 105, 0, 1, 100) else: dm.map, dm.mcmc = dismod3.fit.fit_consistent(model, iter=50000, burn=10000, thin=40, tune_interval=1000, verbose=True) dm.model = model # borrow strength to inform sigma_alpha between rate types post-hoc types_with_re = ['rr', 'f', 'i', 'm', 'smr', 'p', 'r', 'pf', 'm_with', 'X'] ## first calculate sigma_alpha_bar from posterior draws from each alpha alpha_vals = [] for type in types_with_re: if 'alpha' in model.vars[type]: for alpha_i in model.vars[type]['alpha']: alpha_vals += [a for a in alpha_i.trace() if a != 0] # remove zeros because areas with no siblings are included for convenience but are pinned to zero ## then blend sigma_alpha_i and sigma_alpha_bar for each sigma_alpha_i if len(alpha_vals) > 0: sigma_alpha_bar = pl.std(alpha_vals) for type in types_with_re: if 'sigma_alpha' in model.vars[type]: for sigma_alpha_i in model.vars[type]['sigma_alpha']: cur_val = sigma_alpha_i.trace() sigma_alpha_i.trace._trace[0] = (cur_val + sigma_alpha_bar) * pl.ones_like(sigma_alpha_i.trace._trace[0]) for t in 'p i r f rr pf m_with'.split(): param_type = dict(i='incidence', r='remission', f='excess-mortality', p='prevalence', rr='relative-risk', pf='prevalence_x_excess-mortality', m_with='mortality')[t] #graphics.plot_one_type(model, model.vars[t], {}, t) for a in [dismod3.utils.clean(a) for a in dismod3.settings.gbd_regions]: print 'generating empirical prior for %s' % a for s in dismod3.settings.gbd_sexes: for y in dismod3.settings.gbd_years: key = dismod3.utils.gbd_key_for(param_type, a, y, s) if t in model.parameters and 'level_bounds' in model.parameters[t]: lower=model.parameters[t]['level_bounds']['lower'] upper=model.parameters[t]['level_bounds']['upper'] else: lower=0 upper=pl.inf emp_priors = covariate_model.predict_for(model, model.parameters.get(t, {}), 'all', 'total', 'all', a, dismod3.utils.clean(s), int(y), alt_prior, model.vars[t], lower, upper) dm.set_mcmc('emp_prior_mean', key, emp_priors.mean(0)) if 'eta' in model.vars[t]: N,A = emp_priors.shape # N samples, for A age groups delta_trace = pl.transpose([pl.exp(model.vars[t]['eta'].trace()) for _ in range(A)]) # shape delta matrix to match prediction matrix emp_prior_std = pl.sqrt(emp_priors.var(0) + (emp_priors**2 / delta_trace).mean(0)) else: emp_prior_std = emp_priors.std(0) dm.set_mcmc('emp_prior_std', key, emp_prior_std) from fit_emp_prior import store_effect_coefficients store_effect_coefficients(dm, model.vars[t], param_type) if 'p_pred' in model.vars[t]: graphics.plot_one_ppc(model, t) pl.savefig(dir + '/prior-%s-ppc.png'%param_type) if 'p_pred' in model.vars[t] or 'lb' in model.vars[t]: graphics.plot_one_effects(model, t) pl.savefig(dir + '/prior-%s-effects.png'%param_type) for t in 'i r f p rr pf X m_with smr'.split(): fname = dir + '/empirical_priors/data-%s.csv'%t print 'saving tables for', t, 'to', fname if 'data' in model.vars[t] and 'p_pred' in model.vars[t]: stats = model.vars[t]['p_pred'].stats(batches=5) model.vars[t]['data']['mu_pred'] = stats['mean'] model.vars[t]['data']['sigma_pred'] = stats['standard deviation'] stats = model.vars[t]['pi'].stats(batches=5) model.vars[t]['data']['mc_error'] = stats['mc error'] model.vars[t]['data']['residual'] = model.vars[t]['data']['value'] - model.vars[t]['data']['mu_pred'] model.vars[t]['data']['abs_residual'] = pl.absolute(model.vars[t]['data']['residual']) #if 'delta' in model.vars[t]: # model.vars[t]['data']['logp'] = [mc.negative_binomial_like(n*p_obs, n*p_pred, n*p_pred*d) for n, p_obs, p_pred, d \ # in zip(model.vars[t]['data']['effective_sample_size'], # model.vars[t]['data']['value'], # model.vars[t]['data']['mu_pred'], # pl.atleast_1d(model.vars[t]['delta'].stats()['mean']))] model.vars[t]['data'].to_csv(fname) graphics.plot_fit(model) pl.savefig(dir + '/prior.png') graphics.plot_acorr(model) pl.savefig(dir + '/prior-convergence.png') graphics.plot_trace(model) pl.savefig(dir + '/prior-trace.png') # save results (do this last, because it removes things from the disease model that plotting function, etc, might need try: dm.save('dm-%d-prior-%s.json' % (dm.id, 'all')) except IOError, e: print e
import book_graphics reload(book_graphics) results = {} ### @export 'data' region = 'north_america_high_income' sex = 'female' year = '2005' heterogeneity = ['Slightly', 'Very'] for ii in range(2): # load model dm = dismod3.load_disease_model(16370) # set expert priors and other model parameters dm.set_param_age_mesh([0, 15, 20, 25, 30, 35, 40, 45, 50, 100]) dm.params['global_priors']['level_value']['incidence']['age_before'] = 15 dm.params['global_priors']['level_value']['incidence']['age_after'] = 50 dm.params['global_priors']['smoothness']['incidence']['age_start'] = 15 dm.params['global_priors']['level_value']['remission']['age_before'] = 40 dm.params['global_priors']['level_bounds']['remission']['upper'] = 10. dm.params['global_priors']['level_value']['excess_mortality'][ 'age_before'] = 101 dm.params['global_priors']['level_value']['prevalence']['age_before'] = 15
def fit_without_confrontation(id, region, sex, year): """ Fit posterior of specified region/sex/year for specified model without trying to integrate conflicting sources of data Parameters ---------- id : int The model id number for the job to fit region : str From dismod3.settings.gbd_regions, but clean()-ed sex : str, from dismod3.settings.gbd_sexes year : str, from dismod3.settings.gbd_years """ ## load model dm = dismod3.load_disease_model(id) ## separate out prevalence and relative-risk data prev_data = [d for d in dm.data if dm.relevant_to(d, 'prevalence', region, year, sex)] rr_data = [d for d in dm.data if dm.relevant_to(d, 'relative-risk', region, year, sex)] dm.data = [d for d in dm.data if not d in prev_data and not d in rr_data] ### setup the generic disease model (without prevalence data) import dismod3.gbd_disease_model as model keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) dm.calc_effective_sample_size(dm.data) dm.vars = model.setup(dm, keys) ## override the birth prevalence prior, based on the withheld prevalence data logit_C_0 = dm.vars[dismod3.utils.gbd_key_for('bins', region, year, sex)]['initial']['logit_C_0'] assert len(prev_data) == 1, 'should be a single prevalance datum' d = prev_data[0] mu_logit_C_0 = mc.logit(dm.value_per_1(d)+dismod3.settings.NEARLY_ZERO) lb, ub = dm.bounds_per_1(d) sigma_logit_C_0 = (mc.logit(ub+dismod3.settings.NEARLY_ZERO) - mc.logit(lb+dismod3.settings.NEARLY_ZERO)) / (2 * 1.96) print 'mu_C_0_pri:', mc.invlogit(mu_logit_C_0) print 'ui_C_0_pri:', lb, ub # override the excess-mortality, based on the relative-risk data mu_rr = 1.01*np.ones(dismod3.settings.MAX_AGE) sigma_rr = .01*np.ones(dismod3.settings.MAX_AGE) for d in rr_data: mu_rr[d['age_start']:(d['age_end']+1)] = dm.value_per_1(d) sigma_rr[d['age_start']:(d['age_end']+1)] = dm.se_per_1(d) print 'mu_rr:', mu_rr.round(2) #print 'sigma_rr:', sigma_rr.round(2) log_f = dm.vars[dismod3.utils.gbd_key_for('excess-mortality', region, year, sex)]['age_coeffs'] log_f_mesh = log_f.parents['gamma_mesh'] param_mesh = log_f.parents['param_mesh'] m_all = dm.vars[dismod3.utils.gbd_key_for('all-cause_mortality', region, year, sex)] mu_log_f = np.log((mu_rr-1) * m_all) sigma_log_f = 1 / ((mu_rr-1) * m_all) * sigma_rr * m_all print 'mu_log_f:', mu_log_f.round(2)[param_mesh] print 'sigma_log_f:', sigma_log_f.round(2)[param_mesh] ### fit the model using Monte Carlo simulation (shoehorned into the MCMC framework of PyMC) dm.mcmc = mc.MCMC(dm.vars) dm.mcmc.use_step_method(SampleFromNormal, logit_C_0, mu=mu_logit_C_0, tau=sigma_logit_C_0**-2) dm.mcmc.use_step_method(SampleFromNormal, log_f_mesh, mu=mu_log_f[param_mesh], tau=sigma_log_f[param_mesh]**-2) for stoch in dm.mcmc.stochastics: dm.mcmc.use_step_method(mc.NoStepper, stoch) dm.mcmc.sample(1000, verbose=dismod3.settings.ON_SGE) #print 'mu_C_0_post:', mc.invlogit(logit_C_0.stats()['mean']).round(2) #print 'ui_C_0_post:', mc.invlogit(logit_C_0.stats()['95% HPD interval']).round(2) #print 'mu_rr_post:', dm.vars[dismod3.utils.gbd_key_for('relative-risk', region, year, sex)]['rate_stoch'].stats()['mean'].round(2) print 'mu_log_f_mesh_post:', log_f_mesh.stats()['mean'].round(2) print 'mu_f_post:', dm.vars[dismod3.utils.gbd_key_for('excess-mortality', region, year, sex)]['rate_stoch'].stats()['mean'].round(2) for k in keys: t,r,y,s = dismod3.utils.type_region_year_sex_from_key(k) if t in ['incidence', 'prevalence', 'remission', 'excess-mortality', 'mortality', 'prevalence_x_excess-mortality']: dismod3.neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k]) elif t in ['relative-risk', 'duration', 'incidence_x_duration']: dismod3.normal_model.store_mcmc_fit(dm, k, dm.vars[k]) from fit_posterior import save_country_level_posterior if str(year) == '2005': # also generate 2010 estimates save_country_level_posterior(dm, region, 2010, sex, ['prevalence', 'remission']) save_country_level_posterior(dm, region, year, sex, ['prevalence', 'remission']) #'prevalence incidence remission excess-mortality duration mortality relative-risk'.split()) # save results (do this last, because it removes things from the disease model that plotting function, etc, might need keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) dm.save('dm-%d-posterior-%s-%s-%s.json' % (dm.id, region, sex, year), keys_to_save=keys) return dm
sys.path += ['..'] import pylab as pl import pymc as mc import dismod3 import book_graphics reload(book_graphics) results = {} models = {} for ii in range(2): ### @export 'load model' dm = dismod3.load_disease_model(16391) ### @export 'set expert priors' dm.params['global_priors']['level_value']['incidence'] = dict(value=0., age_before=1., age_after=99) dm.params['global_priors']['smoothness']['prevalence']['amount'] = 'Slightly' dm.params['global_priors']['heterogeneity']['prevalence'] = 'Slightly' dm.params['global_priors']['level_value']['prevalence'] = dict(value=0., age_before=0, age_after=100) dm.params['global_priors']['level_bounds']['prevalence'] = dict(lower=0., upper =.05) dm.params['global_priors']['increasing']['prevalence'] = dict(age_start=0, age_end=0) dm.params['global_priors']['decreasing']['prevalence'] = dict(age_start=100, age_end=100) dm.params['sex_effect_prevalence'] = dict(mean=1, upper_ci=1.0001, lower_ci=.9999) dm.params['time_effect_prevalence'] = dict(mean=1, upper_ci=1.0001, lower_ci=.9999) dm.params['region_effect_prevalence'] = dict(std=.0001) dm.params['covariates']['Study_level']['bias']['rate']['value'] = 0 for cv in dm.params['covariates']['Country_level']:
model.mu['abs_err'].mean(), pl.median(pl.absolute( model.mu['rel_err'].dropna())), model.mu['covered?'].mean()) print data_simulation.initialize_results(model) data_simulation.add_to_results(model, 'mu') data_simulation.finalize_results(model) print model.results return model if __name__ == '__main__': region, sex, year = 'north_america_high_income', 'male', 1990 import fit_posterior, upload_fits import data import simplejson as json ## load the model from disk or from web dm = dismod3.load_disease_model(24842) dm.model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json())) data = upload_fits.merge_data_csvs(24842) dm.model.input_data['mu_pred'] = data['mu_pred'] simulate_data(dm, region, sex, year) fit_simulated(dm, region, sex, year) store_results(dm, region, sex, year)