def check_emp_prior_fits(dm): are = [] # compare fit to data print '*********************', inspect.stack()[1][3] for d in dm.vars['data']: type = d['data_type'].replace(' data', '') prior = dm.get_empirical_prior(type) prediction = neg_binom_model.predict_country_rate(dismod3.utils.gbd_key_for(type, d['gbd_region'], (d['year_start'] < 1997) and 1990 or 2005, d['sex']), d['country_iso3_code'], prior['alpha'], prior['beta'], prior['gamma'], dm.get_covariates(), lambda f, age: f, arange(101)) data_prediction = dismod3.utils.rate_for_range(prediction, arange(d['age_start'], d['age_end']+1), d['age_weights']) # test distance of predicted data value from observed data value are.append(abs(100 * (data_prediction / dm.value_per_1(d) - 1.))) print type, d['age_start'], dm.value_per_1(d), data_prediction, are[-1] #assert abs((.001 + data_prediction) / (.001 + dm.value_per_1(d)) - 1.) < .05, 'Prediction should be closer to data' print 'median absolue relative error:', median(are) print '*********************\n\n\n\n\n' return are
def save_country_level_posterior(dm, region, year, sex, rate_type_list): """ Save country level posterior in a csv file, and put the file in the directory job_working_directory/posterior/country_level_posterior_dm-'id' Parameters: ----------- dm : DiseaseJson object disease model region : str year : str 1990 or 2005 sex : str male or female rate_type_list : list list of rate types """ import csv, os # gat covariate dict from dm covariates_dict = dm.get_covariates() # job working directory job_wd = dismod3.settings.JOB_WORKING_DIR % dm.id # directory to save the file dir = job_wd + '/posterior/country_level_posterior_dm-' + str(dm.id) + '/' # make an output file filename = 'dm-%s-%s-%s-%s.csv' % (str(dm.id), region, sex, year) try: # open a file to write f_file = open(dir + filename, 'w') # get csv file writer csv_f = csv.writer(f_file) #csv_f = csv.writer(f_file, dialect=csv.excel_tab) print('writing csv file %s' % filename) # write header csv_f.writerow(['Iso3', 'Rate type', 'Age', 'Value']) # loop over countries and rate_types for iso3 in countries_for[region]: for rate_type in rate_type_list: # make a key key = '%s+%s+%s+%s' % (rate_type, region, year, sex) # modify rate type names if rate_type == 'mortality': rate_type = 'with-condition mortality' if rate_type == 'relative-risk': rate_type = 'rr mortality' # get dm.vars by the key model_vars = dm.vars[key] # get coeffs from dm.vars alpha=model_vars['region_coeffs'] beta=model_vars['study_coeffs'] gamma_trace = model_vars['age_coeffs'].trace() # get sample size sample_size = len(gamma_trace) # make a value_list of 0s for ages value_list = [0] * dismod3.MAX_AGE # calculate value list for ages for gamma in gamma_trace: value_trace = nbm.predict_country_rate(iso3, key, alpha, beta, gamma, covariates_dict, model_vars['bounds_func'], dm.get_estimate_age_mesh()) for i in range(dismod3.MAX_AGE): value_list[i] += value_trace[i] # write a row for i, value in enumerate(value_list): csv_f.writerow([iso3, rate_type, str(i), value / sample_size]) # close the file f_file.close() except: print "couldn't write file"
def save_country_level_posterior(dm, region, year, sex, rate_type_list): """ Save country level posterior in a csv file, and put the file in the directory job_working_directory/posterior/country_level_posterior_dm-'id' Parameters: ----------- dm : DiseaseJson object disease model region : str year : str 1990 or 2005 sex : str male or female rate_type_list : list list of rate types """ import csv, os import dismod3.gbd_disease_model as model keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex]) #dm.vars = model.setup(dm, keys) # get covariate dict from dm covariates_dict = dm.get_covariates() derived_covariate = dm.get_derived_covariate_values() # job working directory job_wd = dismod3.settings.JOB_WORKING_DIR % dm.id # directory to save the file dir = job_wd + '/posterior/' #import pymc as mc #picklename = 'pickle/dm-%s-posterior-%s-%s-%s.pickle' % (str(dm.id), region, sex, year) #model_trace = mc.database.pickle.load(dir + picklename) # make an output file filename = 'dm-%s-%s-%s-%s.csv' % (str(dm.id), region, sex, year) # open a file to write f_file = open(dir + filename, 'w') # get csv file writer csv_f = csv.writer(f_file) #csv_f = csv.writer(f_file, dialect=csv.excel_tab) print('writing csv file %s' % filename) # write header csv_f.writerow(['Iso3', 'Rate type', 'Age', 'Value', 'Lower UI', 'Upper UI']) # loop over countries and rate_types for iso3 in countries_for[region]: for rate_type in rate_type_list: # make a key key = '%s+%s+%s+%s' % (rate_type, region, year, dismod3.utils.clean(sex)) # modify rate type names if rate_type == 'mortality': rate_type = 'm_with' # get dm.vars by the key model_vars = dm.vars[key] if rate_type == 'duration': # make a value_list of 0s for ages value_list = np.zeros((dismod3.MAX_AGE, sample_size)) # calculate value list for ages for i, value_trace in enumerate(model_vars['rate_stoch'].trace()): value_list[:, i] = value_trace else: # get coeffs from dm.vars alpha=model_vars['region_coeffs'] beta=model_vars['study_coeffs'] #gamma_trace = model_trace.__getattribute__('age_coeffs_%s+%s+%s+%s' % (rate_type, region, year, dismod3.utils.clean(sex))).gettrace() gamma_trace = model_vars['age_coeffs'].trace() # get sample size sample_size = len(gamma_trace) # make a value_list of 0s for ages value_list = np.zeros((dismod3.MAX_AGE, sample_size)) # calculate value list for ages for i, gamma in enumerate(gamma_trace): value_trace = nbm.predict_country_rate(key, iso3, alpha, beta, gamma, covariates_dict, derived_covariate, model_vars['bounds_func'], range(101)) value_list[:, i] = value_trace if rate_type == 'prevalence': print key, iso3, nbm.country_covariates(key, iso3, covariates_dict, derived_covariate)[1], np.sort(value_list, axis=1)[5, .5*sample_size] # write a row for age in range(dismod3.MAX_AGE): csv_f.writerow([iso3, rate_type, str(age)] + list(np.sort(value_list, axis=1)[age, [.5*sample_size, .025*sample_size, .975*sample_size]])) # close the file f_file.close()