forked from policyLab/grmEstimatorToolbox
/
grmEvaluation.py
235 lines (155 loc) · 5.86 KB
/
grmEvaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
'''Calculate ATE, ATT and ATU by using the estimated parameters and simulated data
'''
import json
import grmReader
import numpy as np
from mpi4py import MPI
comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()
def evaluate():
# read grmRslt.json and get the dictionary
para=_getpara()
# get the estimated parameters from the dictionary
Y1_beta=para['Y1_beta']
Y0_beta=para['Y0_beta']
D_gamma=para['D_gamma']
U1_var=para['U1_var']
U0_var=para['U0_var']
U1V_rho=para['U1V_rho']
U0V_rho=para['U0V_rho']
# normalization
V_var=1
# calculate the covariance between U1 and V
U1V_cov=U1V_rho*np.sqrt(U1_var)*np.sqrt(V_var)
# calculate covariance between U0 and V
U0V_cov=U0V_rho*np.sqrt(U0_var)*np.sqrt(V_var)
# get the data from the .dat file
data_=_getdata()
# get numbAgents, numCovarsOut, numCovarsCost and randomSeed from the ini.file.
initDict = grmReader.read()
numAgents = initDict['numAgents']
Y1_beta_=initDict['Y1_beta']
numCovarsOut = np.array(Y1_beta_).shape[0]
D_gamma_=initDict['D_gamma']
numCovarsCost = np.array(D_gamma_).shape[0]
randomSeed=initDict['randomSeed']
#set random seed
np.random.seed(randomSeed)
# get the simulated X-covariates
X=data_[:,2:(numCovarsOut + 2)]
# get the simulated Z-covariates
Z=data_[:,-numCovarsCost:]
# calculate the level of Y_1 by using the estimated Y1_beta and simulated X
Y1_level=np.dot(Y1_beta,X.T)
# calculate the level of Y_0 by using the estimated Y0_beta and simulated X
Y0_level=np.dot(Y0_beta,X.T)
# calculate the level of D by using the estimated D_gamma and simulated Z
D_level=np.dot(D_gamma,Z.T)
# simulate the unobservables based on the estimated distributions
var_=[U1_var, U0_var, V_var]
cov=np.diag(var_)
cov[0,2]=U1V_cov
cov[2,0]=cov[0,2]
cov[1,2]=U0V_cov
cov[2,1]=cov[1,2]
U = np.random.multivariate_normal(np.tile(0.0,3), cov, numAgents)
U1=U[:,0]
U0=U[:,1]
V=U[:,2]
# simulate people's decisions
D = np.array((Y1_level-Y0_level+U1-U0-D_level-V) > 0)
# get the number of people who are treated (D=1)
numTreated=sum(D)
# get the number of people who are untreated (D=0)
numUntreated=numAgents-numTreated
# checks
assert (_checkdata(X,Y1_level,Y0_level,numAgents)==True)
'''calculate ATE
'''
ATE = (sum(Y1_level)-sum(Y0_level)+sum(U1)-sum(U0))/numAgents
'''calculate ATT
'''
# create an index indicating people who are treated (D=1)
index_tr=np.where(D==1)[0]
# get the X-covariates for people who are treated (D=1)
X_tr=X[index_tr,:]
# get U1 for people who are treated (D=1)
U1_tr=U1[index_tr,:]
# get U0 for people who are treated (D=1)
U0_tr=U0[index_tr,:]
# calculate the level of Y_1 for the treated agents (D=1)
Y1_level_tr=np.dot(Y1_beta,X_tr.T)
# calculate the level of Y_0 for the treated agents (D=1)
Y0_level_tr=np.dot(Y0_beta,X_tr.T)
# checks
assert (_checkdata(X_tr,Y1_level_tr,Y0_level_tr,numTreated)==True)
# calculate ATT
ATT = (sum(Y1_level_tr)-sum(Y0_level_tr)+sum(U1_tr)-sum(U0_tr))/numTreated
''' calculate ATU
'''
# create an index indicating people who are untreated (D=0)
index_utr=np.where(D==0)[0]
# get the X-covariates for people who are untreated (D=0)
X_utr=X[index_utr,:]
# get U1 for people who are untreated (D=0)
U1_utr=U1[index_utr,:]
# get U0 for people who are untreated (D=0)
U0_utr=U0[index_utr,:]
# calculate the level of Y_1 for the untreated agents (D=0)
Y1_level_utr=np.dot(Y1_beta,X_utr.T)
# calculate the level of Y_1 for the untreated agents (D=0)
Y0_level_utr=np.dot(Y0_beta,X_utr.T)
# checks
assert (_checkdata(X_utr,Y1_level_utr,Y0_level_utr,numUntreated)==True)
# calculate ATU
ATU = (sum(Y1_level_utr)-sum(Y0_level_utr)+sum(U1_utr)-sum(U0_utr))/numUntreated
#MPI
TreatmentEffects = np.array(comm.gather([ATE,ATT,ATU],root=0))
if rank==0:
TreatmentEffects = np.mean(TreatmentEffects, axis=0)
Treatments={}
Treatments['ATE'] = TreatmentEffects[0]
Treatments['ATT'] = TreatmentEffects[1]
Treatments['ATU']= TreatmentEffects[2]
print "ATE = %s" % Treatments['ATE']
print "ATT = %s" % Treatments['ATT']
print "ATU = %s" % Treatments['ATU']
def _getpara():
'''read the .json file and export the saved dictionary
'''
json_data=open('grmRslt.json').read()
para=json.loads(json_data)
#check
assert(isinstance(para,dict))
return para
def _getdata():
'''read the .dat file and export the simulated data
'''
# Process initialization file.
initDict = grmReader.read()
#read the data from the .dat file
data_ = np.genfromtxt(initDict['fileName'], dtype = 'float')
return data_
def _checkdata(X,Y1_level,Y0_level,numAgents):
# get numCovarsOut from the ini.file.
initDict = grmReader.read()
Y1_beta_=initDict['Y1_beta']
numCovarsOut = np.array(Y1_beta_).shape[0]
# check X
assert(isinstance(X,np.ndarray))
assert(X.shape==(numAgents, numCovarsOut))
assert(np.all(np.isfinite(X)))
# check Y1_level
assert(isinstance(Y1_level,np.ndarray))
assert(Y1_level.shape==(numAgents, ))
assert(np.all(np.isfinite(Y1_level)))
# check Y0_level
assert(isinstance(Y0_level,np.ndarray))
assert(Y0_level.shape==(numAgents, ))
assert(np.all(np.isfinite(Y0_level)))
return True
''' Executable.
'''
if __name__ == '__main__':
evaluate()