forked from aflaxman/gbd
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fit_posterior.py
208 lines (165 loc) · 7.35 KB
/
fit_posterior.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#!/usr/bin/python2.5
""" Generate a posterior estimate for a specific region, sex, and year -- PREVALENCE ONLY
Examples
--------
$ python fit_posterior_prevonly.py 3828 -r australasia -s male -y 2005
>>> # ipython example
>>> from fit_posterior import *
>>> dm = dismod3.get_disease_model(3828)
>>> mort = dismod3.get_disease_model('all-cause_mortality')
>>> dm.data += mort.data
>>> import dismod3.gbd_disease_model as model
>>> model.fit(dm, method='map', keys=keys)
>>> model.fit(dm, method='mcmc', keys=keys, iter=10000, thin=5, burn=5000, verbose=1)
>>> dismod3.post_disease_model(dm)
"""
# matplotlib backend setup
import matplotlib
matplotlib.use("AGG")
from dismod3.neg_binom_model import countries_for
import dismod3.neg_binom_model as nbm
import numpy as np
import dismod3
import pymc as mc
def fit_posterior(id, region, sex, year):
""" Fit posterior of specified region/sex/year for specified model
Parameters
----------
id : int
The model id number for the job to fit
region : str
From dismod3.settings.gbd_regions, but clean()-ed
sex : str, from dismod3.settings.gbd_sexes
year : str, from dismod3.settings.gbd_years
Example
-------
>>> import fit_posterior
>>> fit_posterior.fit_posterior(2552, 'asia_east', 'male', '2005')
"""
#print 'updating job status on server'
#dismod3.log_job_status(id, 'posterior', '%s--%s--%s' % (region, sex, year), 'Running')
dm = dismod3.load_disease_model(id)
#dm.data = [] # for testing, remove all data
keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex], type_list=['prevalence'])
# fit the model
dir = dismod3.settings.JOB_WORKING_DIR % id
import dismod3.neg_binom_model as model
k0 = keys[0]
dm.vars = {}
dm.vars[k0] = model.setup(dm, k0, dm.data)
dm.mcmc = mc.MCMC(dm.vars)
dm.mcmc.sample(iter=50000,burn=25000,thin=50,verbose=1)
dm.map = mc.MAP(dm.vars)
dm.map.fit()
model.store_mcmc_fit(dm, k0, dm.vars[k0])
# update job status file
#print 'updating job status on server'
#dismod3.log_job_status(id, 'posterior',
# '%s--%s--%s' % (region, sex, year), 'Completed')
# save results (do this last, because it removes things from the disease model that plotting function, etc, might need
keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex], type_list=['prevalence'])
dm.save('dm-%d-posterior-%s-%s-%s.json' % (id, region, sex, year), keys_to_save=keys)
return dm
def save_country_level_posterior(dm, region, year, sex, rate_type_list):
""" Save country level posterior in a csv file, and put the file in the
directory job_working_directory/posterior/country_level_posterior_dm-'id'
Parameters:
-----------
dm : DiseaseJson object
disease model
region : str
year : str
1990 or 2005
sex : str
male or female
rate_type_list : list
list of rate types
"""
import csv, os
import dismod3.gbd_disease_model as model
keys = dismod3.utils.gbd_keys(region_list=[region], year_list=[year], sex_list=[sex])
#dm.vars = model.setup(dm, keys)
# get covariate dict from dm
covariates_dict = dm.get_covariates()
derived_covariate = dm.get_derived_covariate_values()
# job working directory
job_wd = dismod3.settings.JOB_WORKING_DIR % dm.id
# directory to save the file
dir = job_wd + '/posterior/'
#import pymc as mc
#picklename = 'pickle/dm-%s-posterior-%s-%s-%s.pickle' % (str(dm.id), region, sex, year)
#model_trace = mc.database.pickle.load(dir + picklename)
# make an output file
filename = 'dm-%s-%s-%s-%s.csv' % (str(dm.id), region, sex, year)
# open a file to write
f_file = open(dir + filename, 'w')
# get csv file writer
csv_f = csv.writer(f_file)
#csv_f = csv.writer(f_file, dialect=csv.excel_tab)
print('writing csv file %s' % filename)
# write header
csv_f.writerow(['Iso3', 'Rate type', 'Age', 'Value', 'Lower UI', 'Upper UI'])
# loop over countries and rate_types
for iso3 in countries_for[region]:
for rate_type in rate_type_list:
# make a key
key = '%s+%s+%s+%s' % (rate_type, region, year, dismod3.utils.clean(sex))
# modify rate type names
if rate_type == 'mortality':
rate_type = 'm_with'
# get dm.vars by the key
model_vars = dm.vars[key]
if rate_type == 'duration':
# make a value_list of 0s for ages
value_list = np.zeros((dismod3.MAX_AGE, sample_size))
# calculate value list for ages
for i, value_trace in enumerate(model_vars['rate_stoch'].trace()):
value_list[:, i] = value_trace
else:
# get coeffs from dm.vars
alpha=model_vars['region_coeffs']
beta=model_vars['study_coeffs']
#gamma_trace = model_trace.__getattribute__('age_coeffs_%s+%s+%s+%s' % (rate_type, region, year, dismod3.utils.clean(sex))).gettrace()
gamma_trace = model_vars['age_coeffs'].trace()
# get sample size
sample_size = len(gamma_trace)
# make a value_list of 0s for ages
value_list = np.zeros((dismod3.MAX_AGE, sample_size))
# calculate value list for ages
for i, gamma in enumerate(gamma_trace):
value_trace = nbm.predict_country_rate(key, iso3, alpha, beta, gamma,
covariates_dict, derived_covariate,
model_vars['bounds_func'],
range(101))
value_list[:, i] = value_trace
if rate_type == 'prevalence':
print key, iso3, nbm.country_covariates(key, iso3, covariates_dict, derived_covariate)[1], np.sort(value_list, axis=1)[5, .5*sample_size]
# write a row
for age in range(dismod3.MAX_AGE):
csv_f.writerow([iso3, rate_type, str(age)] + list(np.sort(value_list, axis=1)[age, [.5*sample_size, .025*sample_size, .975*sample_size]]))
# close the file
f_file.close()
def main():
import optparse
usage = 'usage: %prog [options] disease_model_id'
parser = optparse.OptionParser(usage)
parser.add_option('-s', '--sex', default='male',
help='only estimate given sex (valid settings ``male``, ``female``, ``all``)')
parser.add_option('-y', '--year', default='2005',
help='only estimate given year (valid settings ``1990``, ``2005``)')
parser.add_option('-r', '--region', default='australasia',
help='only estimate given GBD Region')
(options, args) = parser.parse_args()
if len(args) != 1:
parser.error('incorrect number of arguments')
try:
id = int(args[0])
except ValueError:
parser.error('disease_model_id must be an integer')
import time
import random
time.sleep(random.random()*30) # sleep random interval before start to distribute load
dm = fit_posterior(id, options.region, options.sex, options.year)
return dm
if __name__ == '__main__':
dm = main()