forked from aflaxman/gbd
/
fit_continuous_spm.py
180 lines (130 loc) · 5.22 KB
/
fit_continuous_spm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#!/usr/bin/python2.5
""" Fit continuous single parameter model using gp_re_a model
Example
-------
$ python fit_continuous_spm.py 4773
"""
import matplotlib
matplotlib.use("AGG")
import optparse
import os
import subprocess
import pylab as pl
import dismod3
from dismod3.utils import clean, gbd_keys, type_region_year_sex_from_key
def fit_continuous_spm(id):
""" Fit continuous single parameter model
Parameters
----------
id : int
The model id number for the job to fit
Example
-------
>>> import fit_continuous_spm
>>> fit_continuous_spm.fit_continuous_spm(4773)
"""
dm = dismod3.get_disease_model(id)
## convert model to csv file
column_names = ['region', 'country', 'year', 'age', 'y', 'se', 'x0', 'x1', 'w0']
data_list = []
# add all the model data to the data list
param_type = 'continuous single parameter'
for d in dm.filter_data(data_type=param_type):
row = {}
row['region'] = dismod3.utils.clean(d['gbd_region'])
row['country'] = d['country_iso3_code']
row['year'] = round(.5 * (d['year_start'] + d['year_end']), -1)
row['age'] = round(.5 * (d['age_start'] + d['age_end']), -1)
row['y'] = d['parameter_value'] * float(d['units'])
row['se'] = d['standard_error'] * float(d['units'])
row['x0'] = 1.
row['x1'] = .1 * (row['year']-1997.)
row['w0'] = .1 * (row['year']-1997.)
data_list.append(row)
# add the time/age/regions that we want to predict to the data list as well
prediction_regions = dismod3.gbd_regions # FIXME: now i just take a few regions, for fast testing
age_mesh = [0, 20, 40, 60, 80, 100]
index_dict = {}
for r in prediction_regions:
for y in [1990, 2005]:
for a in age_mesh:
row = {}
row['region'] = dismod3.utils.clean(r)
row['country'] = row['region'] + '_all'
row['year'] = y
row['age'] = a
row['y'] = pl.nan
row['se'] = pl.inf
row['x0'] = 1.
row['x1'] = .1 * (row['year']-1997.)
row['w0'] = .1 * (row['year']-1997.)
index_dict[(dismod3.utils.clean(r),y,a)] = len(data_list)
data_list.append(row)
# save the csv file
import csv
fname = dismod3.settings.JOB_WORKING_DIR % id + '/data.csv'
try:
f = open(fname, 'w')
csv.writer(f).writerow(column_names)
csv.DictWriter(f, column_names).writerows(data_list)
f.close()
except IOError, e:
print 'Warning: could not create data csv. Maybe it exists already?\n%s' % e
## fit the model
data = pl.csv2rec(fname)
print 'generating model'
from space_time_model import model
reload(model) # for development, automatically reload in case model.py has changed
mod_mc = model.gp_re_a(data)
print 'fitting model with mcmc'
iter = 10000
#iter = 100 # for testing
mod_mc.sample(iter, iter/2, 1+iter/2000, verbose=1)
# generate plots of results
print 'summarizing results'
param_predicted_stats = mod_mc.param_predicted.stats()
for r in prediction_regions:
r = dismod3.utils.clean(r)
for t in [1990, 2005]:
x = []
y = []
yl = []
yu = []
for a in age_mesh:
x.append(a)
y.append(param_predicted_stats['mean'][index_dict[(r, t, a)]])
yl.append(param_predicted_stats['95% HPD interval'][index_dict[(r, t, a)],0])
yu.append(param_predicted_stats['95% HPD interval'][index_dict[(r, t, a)],1])
print r, t, zip(x,y)
key = dismod3.gbd_key_for(param_type, r, t, 'all')
est = dismod3.utils.interpolate(x, y, dm.get_estimate_age_mesh())
dm.set_mcmc('mean', key, est)
est = dismod3.utils.interpolate(x, yl, dm.get_estimate_age_mesh())
dm.set_mcmc('lower_ui', key, est)
est = dismod3.utils.interpolate(x, yu, dm.get_estimate_age_mesh())
dm.set_mcmc('upper_ui', key, est)
dismod3.tile_plot_disease_model(dm, [key], defaults={})
try:
pl.savefig(dismod3.settings.JOB_WORKING_DIR % id + '/dm-%d-posterior-%s-%s-%s.png' % (id, dismod3.utils.clean(r), 'all', t)) # TODO: refactor naming into its own function
except IOError, e:
print 'Warning: could not create png. Maybe it exists already?\n%s' % e
# save results (do this last, because it removes things from the disease model that plotting function, etc, might need
dismod3.try_posting_disease_model(dm, ntries=5)
print
print '********************'
print 'computation complete'
print '********************'
def main():
import optparse
usage = 'usage: %prog [options] disease_model_id'
parser = optparse.OptionParser(usage)
(options, args) = parser.parse_args()
if len(args) != 1:
parser.error('incorrect number of arguments')
try:
id = int(args[0])
except ValueError:
parser.error('disease_model_id must be an integer')
fit_continuous_spm(id)
if __name__ == '__main__':
main()