def test_simulated_disease(): """ Test fit for simulated disease data""" # load model to test fitting dm = DiseaseJson(file('tests/test_disease_1.json').read()) # filter and noise up data cov = .5 data = [] for d in dm.data: d['truth'] = d['value'] if dismod3.utils.clean(d['gbd_region']) == 'north_america_high_income': if d['data_type'] == 'all-cause mortality data': data.append(d) else: se = (cov * d['value']) d['value'] = mc.rtruncnorm(d['truth'], se**-2, 0, np.inf) d['age_start'] -= 5 d['age_end'] = d['age_start']+9 d['age_weights'] = np.ones(d['age_end']-d['age_start']+1) d['age_weights'] /= float(len(d['age_weights'])) d['standard_error'] = se data.append(d) dm.data = data # fit empirical priors and compare fit to data from dismod3 import neg_binom_model for rate_type in 'prevalence incidence remission excess-mortality'.split(): neg_binom_model.fit_emp_prior(dm, rate_type, '/dev/null') check_emp_prior_fits(dm) # fit posterior delattr(dm, 'vars') # remove vars so that gbd_disease_model creates its own version from dismod3 import gbd_disease_model keys = dismod3.utils.gbd_keys(region_list=['north_america_high_income'], year_list=[1990], sex_list=['male']) gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1) ## first generate decent initial conditions gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=1000, thin=5, burn=5000, verbose=1, dbname='/dev/null') ## then sample the posterior via MCMC print 'error compared to the noisy data (coefficient of variation = %.2f)' % cov check_posterior_fits(dm) for d in dm.data: d['value'] = d['truth'] d['age_start'] += 5 d['age_end'] = d['age_start'] d['age_weights'] = np.ones(d['age_end']-d['age_start']+1) d['age_weights'] /= float(len(d['age_weights'])) print 'error compared to the truth' check_posterior_fits(dm) return dm
def test_triangle_pattern(): """ Test fit for empirical prior to data showing a linearly increasing age pattern""" # load model to test fitting dm = DiseaseJson(file('tests/single_low_noise.json').read()) # create linear age pattern data import copy d = dm.data.pop() for a in range(10, 100, 20): d = copy.copy(d) d['age_start'] = a d['age_end'] = a d['parameter_value'] = .01*min(a, 100-a) d['value'] = .01*min(a, 100-a) dm.data.append(d) # fit empirical priors from dismod3 import neg_binom_model neg_binom_model.fit_emp_prior(dm, 'prevalence', '/dev/null') # compare fit to data check_emp_prior_fits(dm) # fit posterior delattr(dm, 'vars') # remove vars so that gbd_disease_model creates its own version from dismod3 import gbd_disease_model keys = dismod3.utils.gbd_keys(region_list=['asia_southeast'], year_list=[1990], sex_list=['male']) gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1) ## first generate decent initial conditions gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=1000, thin=5, burn=5000, verbose=1, dbname='/dev/null') ## then sample the posterior via MCMC # compare fit to data check_posterior_fits(dm)
def test_dismoditis_wo_prevalence(): """ Test fit for simple example""" # load model to test fitting dm = DiseaseJson(file('tests/dismoditis.json').read()) # remove all prevalence data dm.data = [d for d in dm.data if d['parameter'] != 'prevalence data'] # fit empirical priors neg_binom_model.fit_emp_prior(dm, 'incidence', '/dev/null') check_emp_prior_fits(dm) neg_binom_model.fit_emp_prior(dm, 'excess-mortality', '/dev/null') check_emp_prior_fits(dm) # fit posterior delattr(dm, 'vars') # remove vars so that gbd_disease_model creates its own version from dismod3 import gbd_disease_model keys = dismod3.utils.gbd_keys(region_list=['asia_southeast'], year_list=[1990], sex_list=['male']) #gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1) ## first generate decent initial conditions gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=1000, thin=5, burn=5000, verbose=1, dbname='/dev/null') ## then sample the posterior via MCMC # compare fit to data check_posterior_fits(dm)
def test_hep_c(): """ Test fit for subset of hep_c data data is filtered to include only prevalence with region == 'europe_western' and sex == 'all' """ # load model to test fitting dm = DiseaseJson(file('tests/hep_c_europe_western.json').read()) # fit empirical priors neg_binom_model.fit_emp_prior(dm, 'prevalence', '/dev/null') # fit posterior delattr(dm, 'vars') # remove vars so that gbd_disease_model creates its own version from dismod3 import gbd_disease_model keys = dismod3.utils.gbd_keys(region_list=['europe_western'], year_list=[1990], sex_list=['male']) gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1) ## first generate decent initial conditions gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=1000, thin=5, burn=5000, verbose=1, dbname='/dev/null') ## then sample the posterior via MCMC # check that prevalence is smooth near age zero prediction = dm.get_mcmc('mean', 'prevalence+europe_western+1990+male') print prediction return dm assert prediction[100] < .1, 'prediction should not shoot up in oldest ages'
def test_increasing_prior(): """ Test fit for empirical prior to data showing a linearly increasing age pattern with a fine age mesh""" # load model to test fitting dm = DiseaseJson(file('tests/single_low_noise.json').read()) dm.params['global_priors']['increasing']['incidence']['age_end'] = 100 # create linear age pattern data import copy d = dm.data.pop() for a in range(10, 100, 10): d = copy.copy(d) d['age_start'] = a d['age_end'] = a d['parameter_value'] = .01*a d['value'] = .01*a dm.data.append(d) # fit empirical priors from dismod3 import neg_binom_model neg_binom_model.fit_emp_prior(dm, 'prevalence', '/dev/null') # compare fit to data, and check that it is increasing check_emp_prior_fits(dm) assert np.all(np.diff(dm.get_mcmc('emp_prior_mean', dismod3.utils.gbd_key_for('prevalence', 'asia_southeast', 1990, 'male'))) >= 0), 'expert prior says increasing'
def test_single_rate(): """ Test fit for a single low-noise data point""" # load model to test fitting dm = DiseaseJson(file('tests/single_low_noise.json').read()) # fit empirical priors neg_binom_model.fit_emp_prior(dm, 'prevalence', '/dev/null') # compare fit to data check_emp_prior_fits(dm)
def fit_emp_prior(id, param_type): """ Fit empirical prior of specified type for specified model Parameters ---------- id : int The model id number for the job to fit param_type : str, one of incidence, prevalence, remission, excess-mortality The disease parameter to generate empirical priors for Example ------- >>> import fit_emp_prior >>> fit_emp_prior.fit_emp_prior(2552, 'incidence') """ #dismod3.log_job_status(id, 'empirical_priors', param_type, 'Running') # load disease model dm = dismod3.load_disease_model(id) #dm.data = [] # remove all data to speed up computation, for test import dismod3.neg_binom_model as model dir = dismod3.settings.JOB_WORKING_DIR % id model.fit_emp_prior(dm, param_type, dbname='%s/empirical_priors/pickle/dm-%d-emp_prior-%s.pickle' % (dir, id, param_type)) # generate empirical prior plots from pylab import subplot for sex in dismod3.settings.gbd_sexes: for year in dismod3.settings.gbd_years: keys = dismod3.utils.gbd_keys(region_list=['all'], year_list=[year], sex_list=[sex], type_list=[param_type]) dismod3.tile_plot_disease_model(dm, keys, defaults={}) dm.savefig('dm-%d-emp_prior-%s-%s-%s.png' % (id, param_type, sex, year)) # TODO: put this in a separate script, which runs after all empirical priors are computed for effect in ['alpha', 'beta', 'gamma', 'delta']: dismod3.plotting.plot_empirical_prior_effects([dm], effect) dm.savefig('dm-%d-emp-prior-%s-%s.png' % (id, param_type, effect)) # summarize fit quality graphically, as well as parameter posteriors k0 = keys[0] dm.vars = {k0: dm.vars} # hack to make posterior predictions plot dismod3.plotting.plot_posterior_predicted_checks(dm, k0) dm.savefig('dm-%d-emp-prior-check-%s.png' % (dm.id, param_type)) dm.vars = dm.vars[k0] # undo hack to make posterior predictions plot # save results (do this last, because it removes things from the disease model that plotting function, etc, might need dm.save('dm-%d-prior-%s.json' % (id, param_type)) dismod3.try_posting_disease_model(dm, ntries=5) #dismod3.log_job_status(id, 'empirical_priors', param_type, 'Completed') return dm
def fit_model(dm, region, year, sex): """ Fit the empirical priors, and the posterior for a specific region/year/sex """ # fit empirical priors for rate_type in 'prevalence incidence remission excess-mortality'.split(): neg_binom_model.fit_emp_prior(dm, rate_type, '/dev/null') # fit posterior delattr(dm, 'vars') # remove vars so that gbd_disease_model creates its own version from dismod3 import gbd_disease_model keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1) ## first generate decent initial conditions gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=1000, thin=5, burn=5000, verbose=1, dbname='/dev/null') ## then sample the posterior via MCMC
def test_mesh_refinement(): """ Compare fit for coarse and fine age mesh""" # load model and fit it dm1 = DiseaseJson(file('tests/single_low_noise.json').read()) dm1.set_param_age_mesh(arange(0,101,20)) from dismod3 import neg_binom_model neg_binom_model.fit_emp_prior(dm1, 'prevalence', '/dev/null') # load another copy and fit it with a finer age mesh dm2 = DiseaseJson(file('tests/single_low_noise.json').read()) dm2.set_param_age_mesh(arange(0,101,5)) from dismod3 import neg_binom_model neg_binom_model.fit_emp_prior(dm2, 'prevalence', '/dev/null') # compare fits p1 = dm1.get_mcmc('emp_prior_mean', dismod3.utils.gbd_key_for('prevalence', 'asia_southeast', 1990, 'male')) p2 = dm2.get_mcmc('emp_prior_mean', dismod3.utils.gbd_key_for('prevalence', 'asia_southeast', 1990, 'male')) print p1[::20] print p2[::20] assert np.all(abs(p1[::20] / p2[::20] - 1.) < .05), 'Prediction should be closer to data'
def test_dismoditis(): """ Test fit for simple example""" # load model to test fitting dm = DiseaseJson(file('tests/dismoditis.json').read()) for d in dm.data: d['standard_error'] = .01 # fit empirical priors neg_binom_model.fit_emp_prior(dm, 'prevalence', '/dev/null') check_emp_prior_fits(dm) neg_binom_model.fit_emp_prior(dm, 'incidence', '/dev/null') check_emp_prior_fits(dm) neg_binom_model.fit_emp_prior(dm, 'excess-mortality', '/dev/null') check_emp_prior_fits(dm) # fit posterior where there is no data delattr(dm, 'vars') # remove vars so that gbd_disease_model creates its own version from dismod3 import gbd_disease_model keys = dismod3.utils.gbd_keys(region_list=['north_america_high_income'], year_list=[1990], sex_list=['male']) gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1) ## first generate decent initial conditions gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=1000, thin=5, burn=5000, verbose=1, dbname='/dev/null') ## then sample the posterior via MCMC check_posterior_fits(dm) # check that prevalence is smooth near age zero prediction = dm.get_mcmc('mean', 'prevalence+north_america_high_income+1990+male') assert prediction[1]-prediction[0] < .01, 'prediction should be smooth near zero'
def test_linear_pattern(): """ Test fit for empirical prior to data showing a linearly increasing age pattern""" # load model to test fitting dm = DiseaseJson(file('tests/single_low_noise.json').read()) # create linear age pattern data import copy d = dm.data.pop() for a in range(10, 100, 20): d = copy.copy(d) d['age_start'] = a d['age_end'] = a d['parameter_value'] = .01*a d['value'] = .01*a dm.data.append(d) # fit empirical priors from dismod3 import neg_binom_model neg_binom_model.fit_emp_prior(dm, 'prevalence', '/dev/null') # compare fit to data check_emp_prior_fits(dm)
def fit_simulated_disease(n=300, cv=2.): """ Test fit for simulated disease data with noise and missingness""" # load model to test fitting dm = DiseaseJson(file('tests/simulation_gold_standard.json').read()) # adjust any priors and covariates as desired dm.set_param_age_mesh(arange(0,101,2)) for type in 'incidence prevalence remission excess_mortality'.split(): dm.params['global_priors']['heterogeneity'][type] = 'Very' dm.params['covariates']['Country_level']['LDI_id']['rate']['value'] = 0 # filter and noise up data mort_data = [] all_data = [] for d in dm.data: d['truth'] = d['value'] d['age_weights'] = array([1.]) if d['data_type'] == 'all-cause mortality data': mort_data.append(d) else: if d['value'] > 0: se = (cv / 100.) * d['value'] Y_i = mc.rtruncnorm(d['truth'], se**-2, 0, np.inf) d['value'] = Y_i d['standard_error'] = se d['effective_sample_size'] = Y_i * (1-Y_i) / se**2 all_data.append(d) sampled_data = random.sample(all_data, n) + mort_data dm.data = sampled_data # fit empirical priors and compare fit to data from dismod3 import neg_binom_model for rate_type in 'prevalence incidence remission excess-mortality'.split(): #neg_binom_model.fit_emp_prior(dm, rate_type, iter=1000, thin=1, burn=0, dbname='/dev/null') neg_binom_model.fit_emp_prior(dm, rate_type, iter=30000, thin=15, burn=15000, dbname='/dev/null') check_emp_prior_fits(dm) # fit posterior delattr(dm, 'vars') # remove vars so that gbd_disease_model creates its own version from dismod3 import gbd_disease_model keys = dismod3.utils.gbd_keys(region_list=['north_america_high_income'], year_list=[1990], sex_list=['male']) gbd_disease_model.fit(dm, method='map', keys=keys, verbose=1) ## first generate decent initial conditions gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=30000, thin=15, burn=15000, verbose=1, dbname='/dev/null') ## then sample the posterior via MCMC #gbd_disease_model.fit(dm, method='mcmc', keys=keys, iter=1000, thin=1, burn=0, verbose=1, dbname='/dev/null') ## fast for dev print 'error compared to the noisy data (coefficient of variation = %.2f)' % cv check_posterior_fits(dm) dm.data = all_data for d in dm.data: if d['data_type'] != 'all-cause mortality data': d['noisy_data'] = d['value'] d['value'] = d['truth'] print 'error compared to the truth' are, coverage = check_posterior_fits(dm) print print 'Median Absolute Relative Error of Posterior Predictions:', median(are) print 'Pct coverage:', 100*mean(coverage) f = open('score_%d_%f.txt' % (n, cv), 'a') f.write('%10.10f,%10.10f\n' % (median(are), mean(coverage))) f.close() dm.all_data = all_data dm.data = sampled_data for d in dm.data: if d['data_type'] != 'all-cause mortality data': d['value'] = d['noisy_data'] generate_figure(dm, n, cv) return dm
def fit(id, opts): fit_str = '(%d) %s %s %s' % (id, opts.region or '', opts.sex or '', opts.year or '') #tweet('fitting disease model %s' % fit_str) sys.stdout.flush() # update job status file if opts.log: if opts.type and not (opts.region and opts.sex and opts.year): dismod3.log_job_status(id, 'empirical_priors', opts.type, 'Running') elif opts.region and opts.sex and opts.year and not opts.type: dismod3.log_job_status(id, 'posterior', '%s--%s--%s' % (opts.region, opts.sex, opts.year), 'Running') dm = dismod3.get_disease_model(id) fit_str = '%s %s' % (dm.params['condition'], fit_str) sex_list = opts.sex and [ opts.sex ] or dismod3.gbd_sexes year_list = opts.year and [ opts.year ] or dismod3.gbd_years region_list = opts.region and [ opts.region ] or dismod3.gbd_regions keys = gbd_keys(region_list=region_list, year_list=year_list, sex_list=sex_list) # fit empirical priors, if type is specified if opts.type: fit_str += ' emp prior for %s' % opts.type #print 'beginning ', fit_str import dismod3.neg_binom_model as model dir = dismod3.settings.JOB_WORKING_DIR % id model.fit_emp_prior(dm, opts.type, dbname='%s/empirical_priors/pickle/dm-%d-emp_prior-%s.pickle' % (dir, id, opts.type)) # if type is not specified, find consistient fit of all parameters else: import dismod3.gbd_disease_model as model # get the all-cause mortality data, and merge it into the model mort = dismod3.get_disease_model('all-cause_mortality') dm.data += mort.data # fit individually, if sex, year, and region are specified if opts.sex and opts.year and opts.region: dm.params['estimate_type'] = 'fit individually' # fit the model #print 'beginning ', fit_str dir = dismod3.settings.JOB_WORKING_DIR % id model.fit(dm, method='map', keys=keys, verbose=1) model.fit(dm, method='mcmc', keys=keys, iter=10000, thin=5, burn=5000, verbose=1, dbname='%s/posterior/pickle/dm-%d-posterior-%s-%s-%s.pickle' % (dir, id, opts.region, opts.sex, opts.year)) #model.fit(dm, method='mcmc', keys=keys, iter=1, thin=1, burn=0, verbose=1) # remove all keys that have not been changed by running this model for k in dm.params.keys(): if type(dm.params[k]) == dict: for j in dm.params[k].keys(): if not j in keys: dm.params[k].pop(j) # post results to dismod_data_server # "dumb" error handling, in case post fails (try: except: sleep random time, try again, stop after 4 tries) from twill.errors import TwillAssertionError from urllib2 import URLError import random PossibleExceptions = [TwillAssertionError, URLError] try: url = dismod3.post_disease_model(dm) except PossibleExceptions: time.sleep(random.random()*30) try: url = dismod3.post_disease_model(dm) except PossibleExceptions: time.sleep(random.random()*30) try: url = dismod3.post_disease_model(dm) except PossibleExceptions: time.sleep(random.random()*30) url = dismod3.post_disease_model(dm) # form url to view results #if opts.sex and opts.year and opts.region: # url += '/%s/%s/%s' % (opts.region, opts.year, opts.sex) #elif opts.region: # url += '/%s' % opts.region # announce completion, and url to view results #tweet('%s fit complete %s' % (fit_str, url)) sys.stdout.flush() # update job status file if opts.log: if opts.type and not (opts.region and opts.sex and opts.year): dismod3.log_job_status(id, 'empirical_priors', opts.type, 'Completed') elif opts.region and opts.sex and opts.year and not opts.type: dismod3.log_job_status(id, 'posterior', '%s--%s--%s' % (opts.region, opts.sex, opts.year), 'Completed')