-
Notifications
You must be signed in to change notification settings - Fork 1
/
demc_class.py
358 lines (324 loc) · 19.3 KB
/
demc_class.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
# -*- coding: utf-8 -*-
import numpy
import numpy.linalg
import scipy.stats
import sys, pdb
from time import time
MAX_ITERATIONS = 3000000
class jeffreys (object):
def __init__ ( self ):
return
def pdf ( self, x ):
return 1./x
def rvs ( self, size=1 ):
return numpy.abs(numpy.random.randn(size))
def full_gauss_den(x, mu, va, log):
""" This function is the actual implementation
of gaussian pdf in full matrix case.
It assumes all args are conformant, so it should
not be used directly Call gauss_den instead
Does not check if va is definite positive (on inversible
for that matter), so the inverse computation and/or determinant
would throw an exception."""
d = mu.size
inva = numpy.linalg.inv(va)
fac = 1 / numpy.sqrt( (2*numpy.pi) ** d * numpy.fabs(numpy.linalg.det(va)))
# we are using a trick with sum to "emulate"
# the matrix multiplication inva * x without any explicit loop
#y = -0.5 * N.sum(N.dot((x-mu), inva) * (x-mu), 1)
y = -0.5 * numpy.dot(numpy.dot((x-mu), inva) * (x-mu),
numpy.ones((mu.size, 1), x.dtype))[:, 0]
if not log:
y = fac * numpy.exp(y)
else:
y = y + numpy.log(fac)
return y
class DEMC_sampler(object):
"""
=======================
Python DEMC sampler
=======================
A python implementation of the Differential Evoluation MCMC sampler of ter Braak (2008) (ter Braak C. J. F., and Vrugt J. A. (2008). Differential Evolution Markov Chain
with snooker updater and fewer chains. Statistics and Computing http://dx.doi.org/10.1007/s11222-008-9104-9). It appears that this implementation (and its daughter, DREAM) provide a really nice and efficient MCMC sampling scheme. An additional plus is that the algorithm can be easily parallelised.
The DEMC_sampler object *should not be used directly* (only for testing purposes), but it should be subclassed. Subclassing implies adding your own likelihood function, rather than the provided likelihoood function. The following python packages are required
- numpy ( & linalg, usually comes as standard)
- scipy (we need the stats objects from there)
An example run
~~~~~~~~~~~~~~
Class usage is very simple. The following example defines a DEMC sampler
code-block:: python
DEMC = DEMC_sampler ( 10, n_generations=1000 )
parameter_list=[['x1', 'scipy.stats.uniform(-20, 40)'], ['x2', 'scipy.stats.uniform(-20, 40)']]
parameters = ['x1','x2']
DEMC.prior_distributions ( parameter_list, parameters )
Z = DEMC.ProposeStartingMatrix ( 150 )
(Z_out, accept_rate) = DEMC.demc_zs ( Z )
The first call instantiates the DEMC_sampler class. It is initialised with population size (10 in this example), and with the number of generations (or iterations). The second line defines a list of pairs of parameter names, distributions. The distributions are distribution objects from scipy.stats, or code that implements the same methods that the user provides. Note that we do not provide a reference to these objects, but write the reference as a string (the reference is obtained internally). We also provide a list with the parameter names in the same order they are required in the M{\theta} parameter vector. These two lists are then included in the object by calling the prior_distributions() method. The sampler is initialised with a M{Z} matrix (num_params x [...]), sampled from the prior distributions. Or, you can provide your own. Finally, the sampler is started and it returns the Z_out matrix (the samples for the different parameters) and the accepctance rate (try to get it ~0.2-0.4)
"""
def __init__ ( self, num_population, logger=True, CR=1.0, F=2.38, pSnooker=0.1, pGamma1=0.1, n_generations=1000, n_thin=5, n_burnin=200, eps_mult=0.1, eps_add=0) :
"""
The class creator. You should pass it the populations (say between 10 or 100), and the sampler parameters, such as n_generations, n_thing and n_burnin. The other parameters are DEMC-related parameters and shouldn't need changing accroding to Vrugt and ter Braak. CR is a crossover rate, and at present, is not used in this implementation of the code (but could be used in a future implementation that does DREAM)
"""
self.num_population = num_population
self.CR = CR
self.F = F
self.pSnooker = pSnooker
self.pGamma1 = pGamma1
self.n_generations = n_generations
self.n_thin = n_thin
self.n_burnin =n_burnin
self.eps_mult = eps_mult
self.eps_add = eps_add
if logger==True:
print "Logging by default to ./DEMC_ZS.log"
logFile = "./DEMC_ZS.log"
self.logger = open(logFile,"w")
if type(logger)==str:
logFile = logger
self.logger = open(logFile,"w")
if logger==None:
self.logger = None
self._tweet ("Started simulation!")
def _tweet ( self, message ):
if self.logger<>None:
import time
self.logger.write ("%s - [ %s ]\n"%(time.asctime(), message ))
self.logger.flush()
def choose_without_replacement(self, m,n,repeats=None):
"""Choose n nonnegative integers less than m without replacement
Returns an array of shape n, or (n,repeats).
This code is from Anne Archiebald, as numpy/scipy don't seem to have this facility.... It looks like it might not be the most efficient code in the world
"""
if repeats is None:
r = 1
else:
r = repeats
if n>m:
raise ValueError, "Cannot find %d nonnegative integers less than %d" % (n,m)
if n>m/2:
res = numpy.sort(numpy.random.rand(m,r).argsort(axis=0)[:n,:],axis=0)
else:
res = numpy.random.random_integers(m,size=(n,r))
while True:
res = numpy.sort(res,axis=0)
w = numpy.nonzero(numpy.diff(res,axis=0)==0)
nr = len(w[0])
if nr==0:
break
res[w] = numpy.random.random_integers(m,size=nr)
if repeats is None:
return res[:,0]
else:
return res
def prior_distributions ( self, parameter_list, parameters ):
"""This function defines the prior distributions from a (python) list. Useful to quickly add/update these parameters. The parameters list (2nd argument) is there to have the parameter names in the same order that vector M{\Theta} will have. So if theta required by the model is [ par1, par2, par3], then parameters=['par1','par2','par3']
Example parameter list:
parameter_list = [['fuel_1hr',"scipy.stats.uniform ( 0,100.)"],\
['fuel_10hr', "scipy.stats.uniform ( 0,100.)"],\
['fuel_100hr', "scipy.stats.uniform ( 0,100.)"] ]
The way this method works is by using setattr to add the prior methods to the self object. Note that in this particular implementation, the parameter distributions are independent (i.e., you can't have correlation between parameters). If this is an issue, you'll need to change the code.
In Bayesian statistics, the prior distributions encapsulate whatever knowledge we have about the distribution of a parameter prior to doing any experiment. On the one hand, they add a degree of subjectiveness, as different people may translate their knowledge in different distribution shapes. Also, it is sometimes useful to use non-informative priors to indicate ignorance or "objectivity" (yeah, right...). In practice, broad uniform distributions, truncated normals, lognormal distributions are often used. In sequential applications of a calibration exercise, the posterior of the previous run can be used as the prior for the current state.
"""
self.parameters = parameters # List is in the order of the parameter vector
for [k,v] in parameter_list:
setattr(self, k, eval( v ))
self._tweet("Parameter: %s. Dist: %s"%(k,v))
def prior_probabilities ( self, theta ):
""" The method that calculates the prior (log) probabilities. This is based on the prior distributions given in prior_distributions, and assumes independence, so we just add them up.
"""
p = numpy.array([ numpy.log ( getattr ( self, self.parameters[i]).pdf ( theta[i])) for i in xrange(len(self.parameters)) ]).sum()
#if numpy.isneginf(p):
#p = numpy.log(1.0E-300)
return p
def likelihood_function ( self, theta ):
"""For example! This function ought to be overridden by the user, and maybe extended with whatever extra parameters you need to get hold of your observations, or model driver parameters.
This function method calculates the likelihood function for a vector M{\theta}. Usually, you have a model you run with these parameters as inputs (+ some driver data), and some observations that go with the output of the forward model output. These two sets of values are combined in some sort of cost function/likelihood function. A common criterion is to assume that the model is able to perfectly replicate the observations (given a proper parametrisation). The only mismatch between model output and observations is then given by the uncertainty with which the measurement is performed, and we can encode this as a zero-mean Normal distribution. The variance of this distribution is then related to the observational error. If different measurements are used, a multivariate normal is useful, and correlation between observations can also be included, if needs be.
"""
means = numpy.matrix([-3.0, 2.8])
means = numpy.matrix([-5.0, 5])
sigma1 = 1.0
sigma2 = 2#0.5
rho = -0.5#-0.1
covar = numpy.matrix([[sigma1*sigma1,rho*sigma1*sigma2],[rho*sigma1*sigma2,sigma2*sigma2]])
inv_covar = numpy.linalg.inv ( covar ) # numpy.matrix([[ 5.26315789, 9.47368421],\
#[ 9.47368421, 21.05263158]])
det_covar = numpy.linalg.det( covar ) #0.047499999999999987
N = means.shape[0]
X = numpy.matrix(means- theta)
#X = theta
#p = full_gauss_den(X, means, covar, True)
#This is just lazy... Using libraries to invert a 2x2 matrix & calc. its determinant....
#Also, the log calculations could be done more efficiently and stored, but...
p = pow(1.0/(2*numpy.pi), N/2.)
p = p / numpy.sqrt ( numpy.linalg.det (covar))
#p = 0.73025296137109341 # Precalc'ed
#p = p * numpy.exp (-0.5*X*inv_covar*X.transpose())
a = X*inv_covar*X.T
p = p*numpy.exp(-0.5*a)
#pdb.set_trace()
p = numpy.log(p)
if numpy.isneginf(p):
p = numpy.log(1.0E-300)
return p
def fitness ( self, theta ):
"""
The new posterior probability in log. Convenience, really
"""
prior = self.prior_probabilities ( theta )
if numpy.isneginf( prior ):
return numpy.log(1.0E-300)
else:
return self.likelihood_function ( theta ) + prior
def MonitorChains ( self, X ):
(Npar, I, T2) = X.shape # (number of parameters, number of chains, number of iterations)
T = T2/2 # Only use the latter half of the itarations. Deals with burn-in issues
#We do it for each parameter, I guess?
rhat = 0.
quantiles = numpy.zeros ((Npar, 5))
param_r = numpy.zeros(Npar)
for par in xrange(Npar):
x = X[par, :, (T+1):] # For convenience, subset per parameter. x is now (chains, iterations)
psi_i = numpy.mean ( x,axis=1) # Check axis!
psi = numpy.mean ( psi_i )
B = numpy.sum ( (psi_i-psi)*(psi_i-psi))*(1/(I-1.))
S = numpy.mean( (x-psi_i[:,numpy.newaxis])**2,axis=1)
W = numpy.mean(S)
V_hat = ((T-.1)/T)*W + (1+1./I)*B
#pdb.set_trace()
if rhat<numpy.sqrt(V_hat/W):
rhat = numpy.sqrt(V_hat/W)
quantiles_p =[ scipy.stats.scoreatpercentile ( x.flatten(1), percentile) for percentile in [2.5, 25., 50., 75., 97.5] ]
quantiles[par,:] = numpy.array ( quantiles_p )
param_r[par] = numpy.sqrt(V_hat/W)
return ( rhat, param_r, quantiles )
def _dump_diags ( self, rhat, param_r, quantiles, iteration ):
self._tweet ("Convergence diagnostics @ iteration %s"%iteration)
self._tweet ("Total rhat: %f"%rhat)
for par in xrange(len(self.parameters)):
self._tweet ("Param: %s; rhat: %f"%(self.parameters[par],param_r[par]))
self._tweet("+----------------------------------------------------------------+")
self._tweet("|Parameter| 2.5% | 25% | 50% | 75 % | 97.5% |")
for par in xrange(len(self.parameters)):
self._tweet ("|%s|%10.2g|%10.2g|%10.2g|%10.2g|%10.2g|"%(self.parameters[par].rjust(9),quantiles[par,0], quantiles[par,1], quantiles[par,2],quantiles[par,3], quantiles[par,4]) )
self._tweet("+----------------------------------------------------------------+")
def ProposeStartingMatrix ( self, m0):
"""
The proposed starting matrix. We initialise the chain with this matrix. Usually, a draw from the prior distribution is suitable
"""
if m0<=(self.num_population+len(self.parameters)):
print "The size of the Z matrix needs to be larger than the population size + the number of parameters"
sys.exit()
Z = []
for i in xrange(len(self.parameters)):
Z.append(getattr ( self, self.parameters[i]).rvs ( size = m0))
return numpy.array(Z)
def demc_zs ( self, Z):
#CR=self.CR, F=self.F, pSnooker=0.1, pGamma1=0.1, n_generations=10000, n_thin=5, n_burnin=2000, eps_mult=0.1, eps_add=0
#Z = numpy.zeros ( d, m0)
#X = numpy.zeros (d, num_population)
rhat_prev = 2.
d = 2
npass = 0
X = Z[:,:self.num_population]
m0 = Z.shape[1]
self.discard = int(m0+self.num_population*numpy.floor(self.n_burnin/self.n_thin))
mZ = Z.shape[1]
Npar = X.shape[0]
Npar12 = (Npar-1)/2. # Factor for Metropolis ratio DE snooker update
F2 = self.F/numpy.sqrt ( 2.*Npar)
F1 = 1.0
accept = numpy.zeros ( self.n_generations )
#iseq = numpy.arange(1, num_population)
rr = 0.0 ; r_extra = 0
#print int(m0+num_population*numpy.floor(n_burnin/n_thin))
#Calculate the starting posteriors...
Z_diagnostic = numpy.zeros ( (Npar, self.num_population, self.n_generations))
logfitness_x = [ self.fitness(X[:,i]) for i in xrange(self.num_population) ]
#Start of main loop
iteration = -1
T0 = time()
while True:
# We clear the acceptance counter
accepti = 0
for i in xrange(self.num_population):
#Start of different chains loop
#First decide whether this is a snooker update or not
if (numpy.random.random()<self.pSnooker):
#Snooker update
#Select three chains
rr = self.choose_without_replacement(mZ-1, 3, repeats=None)
z = Z[:,rr[2]]
x_z = X[:,i] - z
#Difference between the current point and one of the 3 chains
D2 = max(numpy.sum(x_z*x_z), 1.0e-300) # This is the distance. Could do it with dot?
gamma_snooker = numpy.random.random()+1.2 # Snooker stochastic
proj_diff = numpy.sum((Z[:,rr[0]] - Z[:,rr[1]])*x_z)/D2 # Project the difference onto x_z. Normalize by x_z's norm
x_prop = X[:,i] + (gamma_snooker * proj_diff) * x_z # Proposed point
x_z = x_prop - z # update x_z
#pdb.set_trace()
D2prop = max( numpy.dot(x_z, x_z), 1.0e-30) # Calculate D2prop
r_extra = Npar12*(numpy.log(D2prop) - numpy.log(D2))
else:
if (numpy.random.random()<self.pGamma1):
gamma_par = F1
else:
gamma_par = F2 * numpy.random.uniform( low=1-self.eps_mult, high=1+self.eps_mult, size=Npar)
rr = self.choose_without_replacement(mZ-1, 2, repeats=None)
if (self.eps_add==0):
x_prop = X[:,i] + gamma_par * ( Z[:,rr[0]] - Z[:,rr[1]])
else:
x_prop = X[:,i] + gamma_par * ( Z[:,rr[0]] - Z[:,rr[1]]) + self.eps_add*numpy.random.randn(Npar)
r_extra = 0
logfitness_x_prop = self.fitness ( x_prop )
logr = logfitness_x_prop - logfitness_x[i]
if (logr + r_extra)>numpy.log ( numpy.random.random()):
accepti += 1
X[:,i] = x_prop
logfitness_x[i] = logfitness_x_prop
Z_diagnostic [:, i, iteration] = X[:,i]
accept[iteration] = accepti
iteration += 1
if iteration%100==0:
T1 = time()
self._tweet("Run 100 iterations more. Now at %d"%iteration)
self._tweet("Speed: %f sims/s"%( 100./(T1-T0)))
T0 = time()
#print logfitness_x[i], logfitness_x_prop ,logr,r_extra,accepti,x_prop
if iteration%self.n_thin==0:
Z = numpy.c_[X,Z]
mZ = Z.shape[1]
#print "iteration ",iteration
#print "\t",X[0,:]
#print "\t",X[1,:]
if (iteration% (self.n_generations)==0) and iteration>0:
( rhat, param_r, quantiles ) = self.MonitorChains ( Z_diagnostic)
self._dump_diags ( rhat, param_r, quantiles, iteration )
new_pop = iteration
if ( rhat <= 1.05 ):
npass += 1
rhat_prev = rhat
else:
if npass > 0:
npass = 0
if npass > 3: break
if iteration>MAX_ITERATIONS:
self._tweet ( "Maximum number of iterations exceeded. Bailing out...")
break
accept = numpy.append ( accept, numpy.zeros (self.n_generations+1), 0)
Z_diagnostic = numpy.append(Z_diagnostic, numpy.zeros ((Npar, self.num_population, self.n_generations+1)), 2)
self._tweet ("Finished simulation!")
self._tweet ( "Returned %d samples"%int(m0+iteration*numpy.floor(self.n_burnin/self.n_thin)))
return ( Z, accept/self.num_population )
# return (Z[:,:int(m0+iteration*numpy.floor(self.n_burnin/self.n_thin))], accept/self.num_population)
if __name__=="__main__":
DEMC = DEMC_sampler ( 4, n_generations=400, n_burnin=1, n_thin=1, logger="test_me.log")
parameter_list=[['x1', 'scipy.stats.norm(-100, 20)'], ['x2', 'scipy.stats.norm(-100, 20)']]
parameters = ['x1','x2']
DEMC.prior_distributions ( parameter_list, parameters )
Z = DEMC.ProposeStartingMatrix ( 150 )
(Z_out, accept_rate) = DEMC.demc_zs ( Z )
import pylab
pylab.figure();pylab.plot ( Z_out[0,:], Z_out[1,:], 'k,')
pylab.figure();pylab.hist(Z_out[0,:],bins=10);pylab.hist(Z_out[1,:],bins=10);
pylab.figure();pylab.hexbin(Z_out[0,:], Z_out[1,:], bins='log')
pylab.show()