-
Notifications
You must be signed in to change notification settings - Fork 0
/
HMC_mutiple_samples.py
233 lines (200 loc) · 9.2 KB
/
HMC_mutiple_samples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 18 10:12:19 2015
@author: user
"""
import numpy
import theano
import theano.tensor as T
import hmc_sampling
import scipy.linalg as linalg
import scipy.optimize
import matplotlib.pyplot as plt
import timeit
def test_hmc(n_sample, n_dim, mu,cov,cov_inv,rng,seed):
"""
parameters:
(maybe just input a gaussian energy function instead of mu and cov_inv)
------------
n_samples: number of samples.
n_dim : number of dim
mu : the ground truth mean
cov_inv : the inverse of the ground truth SD.
rng : random generator for initial value x0 for the optimization process.
seed : seed for the random momentum generator (for initial_vel)
"""
#make the params to be shared theano variable, each row is a sample.
params = theano.shared(value=numpy.zeros(n_sample*n_dim, dtype=theano.config.floatX), name='params', borrow=True)
initial_pos = params[0:n_sample*n_dim].reshape((n_sample, n_dim))
#return the gaussian energy.
def gaussian_energy(x):
return 0.5 * (T.dot((x - mu), cov_inv) *
(x - mu)).sum(axis=1)
"""
initial_vel: random initial momentum
n_step: # of leapfrog steps
stepsizes: stepsizes for different particles.
"""
initial_vel = T.matrix('initial_vel')
n_step = T.iscalar('n_step')
stepsizes = T.vector('stepsizes')
"""
return value of hmc_move:
1st: return accept prob. associated with all trajectories.
2nd: return accept prob. associated with only end trajectory.
3rd: return final positions associated with all trajectories.
4th: return final positions associated with only end trajectory.
5th: return difference of Hamiltonian energy
"""
#_,accept,_, final_pos= hmc_sampling.hmc_move(initial_vel,initial_pos, gaussian_energy, 0.1,50)
accept,_,final_pos,_,ndeltaH= hmc_sampling.hmc_move(initial_vel, initial_pos, gaussian_energy, stepsizes, n_step)
accept_matrix = accept.dimshuffle(0,1, 'x')
"""
define the objective function:
uncomment the proper one and then update the total_cost1: matching the specific statistics
Here, we use summation instead of mean to avoid large sample fluctuation
"""
#sampler_cost_first = accept_matrix*(initial_pos-final_pos)
#sampler_cost_second = accept_matrix*(initial_pos**2-final_pos**2)
#sampler_cost_third = accept_matrix*(initial_pos**3-final_pos**3)
#sampler_cost_sin = accept_matrix*(T.sin(initial_pos)-T.sin(final_pos))
sampler_cost_exp = accept_matrix*(T.exp(initial_pos)-T.exp(final_pos))
total_cost1 = sampler_cost_exp
"""
1): if we average samples along all the trajectories, use the first two lines.
2): if we penalize each sampler along the trajectories, uncomment and use the last two lines
"""
total_cost1 = T.mean(total_cost1, axis=0)
total_cost1 = T.sum(total_cost1, axis=0)
#total_cost = (total_cost**2).sum(axis=1)
#total_cost = T.mean(total_cost)
"""
total_cost2: matching the average Hamiltonian energy, if dont consider the
average Hamiltonian energy matching, just ignore the total_cost2
by deleting the correspongding part of the total_cost
"""
total_cost2 = T.mean(accept*ndeltaH, axis=0)
total_cost2 = T.sum(total_cost2)
total_cost = T.mean(total_cost1**2)+total_cost2**2
func_eval = theano.function([initial_vel,stepsizes,n_step], total_cost, name='func_eval', allow_input_downcast=True)
func_grad = theano.function([initial_vel,stepsizes,n_step], T.grad(total_cost, params), name='func_grad', allow_input_downcast=True)
"""
set up stepsizes for different particles.
"""
rng_stepsize = numpy.random.RandomState(353)
random_stepsizes = numpy.array(rng_stepsize.rand(n_sample), dtype=theano.config.floatX)
random_interval = 1.5*random_stepsizes-1
stepsize_baseline = 0.2
noise_level = 2
stepsizes0 = stepsize_baseline*noise_level**random_interval
"""
build the evaluation function and gradient function for scipy optimize
"""
def train_fn(param_new):
params.set_value(param_new.astype(theano.config.floatX), borrow=True)
rng_temp = numpy.random.RandomState(seed)
initial_v=numpy.array(rng_temp.randn(n_sample,n_dim), dtype=theano.config.floatX)
res = func_eval(initial_v,stepsizes0,30)
return res
def train_fn_grad(param_new):
params.set_value(param_new.astype(theano.config.floatX), borrow=True)
rng_temp = numpy.random.RandomState(seed)
initial_v=numpy.array(rng_temp.randn(n_sample,n_dim), dtype=theano.config.floatX)
res = func_grad(initial_v,stepsizes0,30)
return res
n_epoch = 5000
best_samples_params = scipy.optimize.fmin_l_bfgs_b(func = train_fn,
x0= numpy.array(rng.randn(n_sample*n_dim), dtype=theano.config.floatX),
#x0 = numpy.array((1.0/cov_inv)*rng.randn(n_sample*n_dim)+mu, dtype=theano.config.floatX),
#x0 = samples_true.flatten(),
fprime = train_fn_grad,
maxiter = n_epoch)
#res = best_samples_params.reshape((n_sample,n_dim))
res = (best_samples_params[0]).reshape((n_sample, n_dim))
"""
uncomment the proper one to print estimated value for different number of samples.
"""
# print "estimated mean from representative sample= ", res.mean(axis=0)
# print "true mu= ", mu
# print "estimated second moment from representative samples= ", (res**2).mean(axis=0)
# print "true second moment= ", mu**2 + 1./cov_inv
# print "estimated third moment from representative samples= ", (res**3).mean(axis=0)
# print "estimated sin(x) from representative samples= ", (numpy.sin(res)).mean(axis=0)
print "estimated exp(x) from representative samples= ", (numpy.exp(res)).mean(axis=0)
"""
uncomment the proper one to return estimated value for different number of samples
"""
#return res.mean(axis=0)
#return (res**2).mean(axis=0)
#return (res**3).mean(axis=0)
return (numpy.exp(res)).mean(axis=0)
#return (numpy.sin(res)).mean(axis=0)
def multiple_hmc():
nIter = 5
bases = [numpy.power(10,i) for i in numpy.arange(nIter)]
"""
set up x_axis, i.e., number of samples. [1,2,3,...9,10,20,30,...90,100,200,...]
"""
x_axis=[]
for i in bases:
for j in numpy.arange(9):
x_axis.append(i+j*i)
n_samples = numpy.array(x_axis)
"""
set up the ground truth mean and std for gaussian.
"""
n_dim=1
rng = numpy.random.RandomState(123)
mu = numpy.array(rng.rand(n_dim)*1, dtype=theano.config.floatX)
rng1=numpy.random.RandomState(444)
cov = numpy.array(rng1.rand(n_dim,n_dim), dtype=theano.config.floatX)
cov = (cov+cov.T)/2.
cov[numpy.arange(n_dim), numpy.arange(n_dim)]=1.0*rng1.rand(1)
cov_inv = linalg.inv(cov)
#cov = numpy.eye(n_dim, dtype=theano.config.floatX)*rng1.rand(1)
#cov = (rng1.rand(1)).astype(theano.config.floatX)
#cov_inv = 1./(cov)
"""
get the estimated value for each sample size
"""
start_time = timeit.default_timer()
y_estimated_temp = [test_hmc(n_sample, n_dim, mu,cov, cov_inv,rng,n_sample) for n_sample in n_samples]
end_time = timeit.default_timer()
print "computing time= ",end_time-start_time
y_estimated = numpy.array(y_estimated_temp)
"""
get the value from independent samples
"""
y_independent = []
for n_sample in n_samples:
independent_samples = numpy.sqrt(cov)*numpy.array(rng.randn(n_sample,1), dtype=theano.config.floatX)+mu
"""
uncomment the proper one to get the result for different matching function
"""
#y_independent.append(independent_samples.mean(axis=0))
#y_independent.append((independent_samples**2).mean(axis=0))
#y_independent.append((independent_samples**3).mean(axis=0))
y_independent.append((numpy.exp(independent_samples).mean(axis=0)))
#y_independent.append(numpy.sin(independent_samples).mean(axis=0))
y_independent = numpy.array(y_independent)
"""
plot using log scale
"""
plt.subplot(2,1,1)
plt.xscale('log')
#plt.yscale('log')
# plt.plot(n_samples, numpy.sqrt(((y_estimated-mu)**2).mean(axis=1)), color='blue', lw=2, label='rep. samples error curve.')
# plt.plot(n_samples, numpy.sqrt(((y_independent-mu)**2).mean(axis=1)), color='green', lw=2, label='independent error curve')
plt.plot(n_samples, y_estimated, color='blue', lw=2, label='rep. samples with all traj.')
plt.plot(n_samples, y_independent, color='green', lw=2, label='independent')
plt.xlabel('number of samples')
plt.ylabel('value of the matching statistics')
plt.legend(bbox_to_anchor=(0.,1.02, 1.,.102),loc=3,ncol=2,mode='expand', borderaxespad=0.)
plt.subplot(2,1,2)
plt.xscale('log')
plt.plot(n_samples, numpy.abs(y_estimated-y_independent), color='red')
plt.xlabel('number of samples')
plt.ylabel('error')
plt.show()
if __name__=="__main__":
multiple_hmc()