def fit_emp_prior(id, param_type): """ Fit empirical prior of specified type for specified model Parameters ---------- id : int The model id number for the job to fit param_type : str, one of incidence, prevalence, remission, excess-mortality The disease parameter to generate empirical priors for Example ------- >>> import fit_emp_prior >>> fit_emp_prior.fit_emp_prior(2552, 'incidence') """ #dismod3.log_job_status(id, 'empirical_priors', param_type, 'Running') # load disease model dm = dismod3.load_disease_model(id) #dm.data = [] # remove all data to speed up computation, for test import dismod3.neg_binom_model as model dir = dismod3.settings.JOB_WORKING_DIR % id model.fit_emp_prior(dm, param_type, dbname='%s/empirical_priors/pickle/dm-%d-emp_prior-%s.pickle' % (dir, id, param_type)) # generate empirical prior plots from pylab import subplot for sex in dismod3.settings.gbd_sexes: for year in dismod3.settings.gbd_years: keys = dismod3.utils.gbd_keys(region_list=['all'], year_list=[year], sex_list=[sex], type_list=[param_type]) dismod3.tile_plot_disease_model(dm, keys, defaults={}) dm.savefig('dm-%d-emp_prior-%s-%s-%s.png' % (id, param_type, sex, year)) # TODO: put this in a separate script, which runs after all empirical priors are computed for effect in ['alpha', 'beta', 'gamma', 'delta']: dismod3.plotting.plot_empirical_prior_effects([dm], effect) dm.savefig('dm-%d-emp-prior-%s-%s.png' % (id, param_type, effect)) # summarize fit quality graphically, as well as parameter posteriors k0 = keys[0] dm.vars = {k0: dm.vars} # hack to make posterior predictions plot dismod3.plotting.plot_posterior_predicted_checks(dm, k0) dm.savefig('dm-%d-emp-prior-check-%s.png' % (dm.id, param_type)) dm.vars = dm.vars[k0] # undo hack to make posterior predictions plot # save results (do this last, because it removes things from the disease model that plotting function, etc, might need dm.save('dm-%d-prior-%s.json' % (id, param_type)) dismod3.try_posting_disease_model(dm, ntries=5) #dismod3.log_job_status(id, 'empirical_priors', param_type, 'Completed') return dm
def hep_c_fit(regions, prediction_years, data_year_start=-inf, data_year_end=inf, egypt_flag=False): """ Fit prevalence for regions and years specified """ print "\n***************************\nfitting %s for %s (using data from years %f to %f)" % ( regions, prediction_years, data_year_start, data_year_end, ) ## load model to fit # dm = DiseaseJson(file('tests/hep_c.json').read()) id = 8788 dismod3.disease_json.create_disease_model_dir(id) dm = dismod3.fetch_disease_model(id) ## adjust the expert priors dm.params["global_priors"]["heterogeneity"]["prevalence"] = "Very" dm.params["global_priors"]["smoothness"]["prevalence"]["amount"] = "Slightly" # TODO: construct examples of adjusting other covariates # ipdb> dm.params['global_priors'].keys() # [u'increasing', u'unimodal', u'level_bounds', u'y_maximum', u'note', u'level_value', u'decreasing', u'parameter_age_mesh', u'heterogeneity', u'smoothness'] # ipdb> dm.params['global_priors']['smoothness']['prevalence'] # {u'age_start': 0, u'amount': u'Moderately', u'age_end': 100} # include a study-level covariate for 'bias' covariates_dict = dm.get_covariates() covariates_dict["Study_level"]["bias"]["rate"]["value"] = 1 # TODO: construct additional examples of adjusting covariates ## select relevant prevalence data # TODO: streamline data selection functions if egypt_flag: dm.data = [d for d in dm.data if d["country_iso3_code"] == "EGY"] else: dm.data = [ d for d in dm.data if dismod3.utils.clean(d["gbd_region"]) in regions and float(d["year_end"]) >= data_year_start and float(d["year_start"]) <= data_year_end and d["country_iso3_code"] != "EGY" ] ## create, fit, and save rate model dm.vars = {} keys = dismod3.utils.gbd_keys(type_list=["prevalence"], region_list=regions, year_list=prediction_years) # TODO: consider how to do this for models that use the complete disease model # TODO: consider adding hierarchical similarity priors for the male and female models k0 = keys[0] # looks like k0='prevalence+asia_south+1990+male' dm.vars[k0] = neg_binom_model.setup(dm, k0, dm.data) dm.mcmc = mc.MCMC(dm.vars) dm.mcmc.sample(iter=50000, burn=25000, thin=50, verbose=1) # make map object so we can compute AIC and BIC dm.map = mc.MAP(dm.vars) dm.map.fit() for k in keys: # save the results in the disease model dm.vars[k] = dm.vars[k0] neg_binom_model.store_mcmc_fit(dm, k, dm.vars[k]) # check autocorrelation to confirm chain has mixed test_model.summarize_acorr(dm.vars[k]["rate_stoch"].trace()) # generate plots of results dismod3.tile_plot_disease_model(dm, [k], defaults={"ymax": 0.15, "alpha": 0.5}) dm.savefig("dm-%d-posterior-%s.%f.png" % (dm.id, k, random())) # summarize fit quality graphically, as well as parameter posteriors dismod3.plotting.plot_posterior_predicted_checks(dm, k0) dm.savefig("dm-%d-check-%s.%f.png" % (dm.id, k0, random())) dismod3.post_disease_model(dm) return dm
for y in [1990, 2005]: for s in ["male", "female"]: key = "prevalence+egypt+%d+%s" % (y, s) prev_1 = neg_binom_model.calc_rate_trace(dm_egypt, key, dm_egypt.vars[key]) pop_1 = neg_binom_model.population_by_age[("EGY", str(y), s)] key = "prevalence+north_africa_middle_east+%d+%s" % (y, s) prev_0 = neg_binom_model.calc_rate_trace(dm_na_me, key, dm_na_me.vars[key]) pop_0 = neg_binom_model.regional_population(key) # generate population weighted average prev = (prev_0 * (pop_0 - pop_1) + prev_1 * pop_1) / pop_0 neg_binom_model.store_mcmc_fit(dm_na_me, key, None, prev) # generate plots of results dismod3.tile_plot_disease_model(dm_na_me, [key], defaults={"ymax": 0.15, "alpha": 0.5}) dm_na_me.savefig("dm-%d-posterior-na_me_w_egypt.%f.png" % (dm_na_me.id, random())) # save results dismod3.post_disease_model(dm_na_me) dm = hep_c_fit( "caribbean latin_america_tropical latin_america_andean latin_america_central latin_america_southern".split(), [1990, 2005], ) dm = hep_c_fit( "sub-saharan_africa_central sub-saharan_africa_southern sub-saharan_africa_west".split(), [1990, 2005] ) for ( r
['excess-mortality', 'excess'], ['incidence', 'incidence'], ['mrr', 'risk'], ['prevalence', 'prevalence'], ]: x = [0] y = [0] for age in age_mesh: x.append(age) y.append(measure_out.model[index_dict[(dm4_type, year, age)]]) key = dismod3.gbd_key_for(dm3_type, r, year, sex) est = dismod3.utils.interpolate(x, y, dm.get_estimate_age_mesh()) dm.set_truth(key, est) dismod3.tile_plot_disease_model(dm, [key], defaults={}) try: pl.savefig(dismod3.settings.JOB_WORKING_DIR % id + '/dm-%d-posterior-%s-%s-%s.png' % (id, dm3_type, sex, year)) # TODO: refactor naming into its own function except IOError, e: print 'Warning: could not create png. Maybe it exists already?\n%s' % e # save results (do this last, because it removes things from the disease model that plotting function, etc, might need dismod3.try_posting_disease_model(dm, ntries=5) print print '********************' print 'computation complete' print '********************' def main(): import optparse
def fit_posterior(id, region, sex, year): """ Fit posterior of specified region/sex/year for specified model Parameters ---------- id : int The model id number for the job to fit region : str From dismod3.settings.gbd_regions, but clean()-ed sex : str, from dismod3.settings.gbd_sexes year : str, from dismod3.settings.gbd_years Example ------- >>> import fit_posterior >>> fit_posterior.fit_posterior(2552, 'asia_east', 'male', '2005') """ #print 'updating job status on server' #dismod3.log_job_status(id, 'posterior', '%s--%s--%s' % (region, sex, year), 'Running') dm = dismod3.load_disease_model(id) #dm.data = [] # for testing, remove all data keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) # fit the model dir = dismod3.settings.JOB_WORKING_DIR % id import dismod3.gbd_disease_model as model model.fit(dm, method='map', keys=keys, verbose=1) ## first generate decent initial conditions ## then sample the posterior via MCMC model.fit(dm, method='mcmc', keys=keys, iter=50000, thin=25, burn=25000, verbose=1, dbname='%s/posterior/pickle/dm-%d-posterior-%s-%s-%s.pickle' % (dir, id, region, sex, year)) # generate plots of results dismod3.tile_plot_disease_model(dm, keys, defaults={}) dm.savefig('dm-%d-posterior-%s.png' % (id, '+'.join(['all', region, sex, year]))) # TODO: refactor naming into its own function (disease_json.save_image perhaps) for param_type in dismod3.settings.output_data_types: keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex], type_list=[param_type]) dismod3.tile_plot_disease_model(dm, keys, defaults={}) dm.savefig('dm-%d-posterior-%s-%s-%s-%s.png' % (id, dismod3.utils.clean(param_type), region, sex, year)) # TODO: refactor naming into its own function # summarize fit quality graphically, as well as parameter posteriors for k in dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]): if dm.vars[k].get('data'): dismod3.plotting.plot_posterior_predicted_checks(dm, k) dm.savefig('dm-%d-check-%s.png' % (dm.id, k)) # save results (do this last, because it removes things from the disease model that plotting function, etc, might need keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) dm.save('dm-%d-posterior-%s-%s-%s.json' % (id, region, sex, year), keys_to_save=keys) # make a rate_type_list rate_type_list = ['incidence', 'prevalence', 'remission', 'excess-mortality', 'mortality', 'relative-risk', 'duration', 'incidence_x_duration'] # save country level posterior save_country_level_posterior(dm, region, year, sex) # update job status file #print 'updating job status on server' #dismod3.log_job_status(id, 'posterior', # '%s--%s--%s' % (region, sex, year), 'Completed') return dm