def sample(self, T, g, g0=None): if g0==None: g0 = g v, h = self.v, self.h VH, HH, b_init = self V = zeros((T, v)) H = zeros((T, h)) B = zeros((T, h)) VH_t = 1*VH VH_t[2] = VH[2] + b_init V[[0]], H[[0]] = rbm.sample(VH_t, g0, 1, self.vis_gauss) ## mean-fieldize the output: if self.vis_gauss: V[[0]] = VH_t.T() * H[[0]] else: V[[0]] = sigmoid(VH_t.T() * H[[0]]) for t in range(1, T): B[[t]] = HH*H[[t-1]] VH_t[2] = VH[2] + B[t] V[[t]], H[[t]] = rbm.sample(VH_t, g, 1, self.vis_gauss) ## mean-field-ize the output. if self.vis_gauss: V[[t]] = VH_t.T() * H[[t]] else: V[[t]] = sigmoid(VH_t.T() * H[[t]]) return V
def sample(self, T, g, g0=None): if g0==None: g0 = g v, h = self.v, self.h VH, HH, b_init = self V = zeros((T, v)) H = zeros((T, h)) B = zeros((T, h)) VH_t = 1*VH VH_t[2] = VH[2] + b_init V[[0]], H_t_stoch = rbm.sample(VH_t, g0, 1, self.vis_gauss) H[[0]] = sigmoid(VH_t * V[[0]]) if self.vis_gauss: V[[0]] = VH_t.T() * H_t_stoch else: V[[0]] = sigmoid(VH_t.T() * H_t_stoch) for t in range(1, T): B[[t]] = HH*H[[t-1]] VH_t[2] = VH[2] + B[t] V[[t]], H_t_stoch = rbm.sample(VH_t, g, 1, self.vis_gauss) H[[t]] = sigmoid(VH_t * V[[t]]) if self.vis_gauss: V[[t]] = VH_t.T() * H_t_stoch else: V[[t]] = sigmoid(VH_t.T() * H_t_stoch) return V
def sample(self, T, g, g0=None): if g0 == None: g0 = g v, h = self.v, self.h VH, HH, b_init = self V = zeros((T, v)) H = zeros((T, h)) B = zeros((T, h)) VH_t = 1 * VH VH_t[2] = VH[2] + b_init V[[0]], H_t_stoch = rbm.sample(VH_t, g0, 1, self.vis_gauss) H[[0]] = sigmoid(VH_t * V[[0]]) if self.vis_gauss: V[[0]] = VH_t.T() * H_t_stoch else: V[[0]] = sigmoid(VH_t.T() * H_t_stoch) for t in range(1, T): B[[t]] = HH * H[[t - 1]] VH_t[2] = VH[2] + B[t] V[[t]], H_t_stoch = rbm.sample(VH_t, g, 1, self.vis_gauss) H[[t]] = sigmoid(VH_t * V[[t]]) if self.vis_gauss: V[[t]] = VH_t.T() * H_t_stoch else: V[[t]] = sigmoid(VH_t.T() * H_t_stoch) return V
def grad(self, V): T, v = V.shape assert (v == self.v) h = self.h VH, HH, b_init = self G = 0 * self d_VH, d_HH, d_b_init = G H = zeros((T, h)) B = zeros((T, h)) H[0] = sigmoid(VH * V[[0]] + b_init[newaxis, :]) for t in range(1, T): B[[t]] = HH * H[[t - 1]] H[[t]] = sigmoid(VH * V[[t]] + B[[t]]) dB = zeros((T, h)) dBL = zeros((T, h)) F_t = zeros(h) loss = 0 VH_t = 1 * VH for t in reversed(range(T)): dB[t] = H[t] * (1 - H[t]) * F_t VH_t[2] = B[t] + VH[2] if self.CD_n > 0: dVH_t, dict_loss = rbm.rbm_grad_cd(VH_t, V[[t]], self.CD_n, self.vis_gauss) else: dVH_t, dict_loss = rbm.rbm_grad_exact(VH_t, V[[t]], self.vis_gauss) loss += dict_loss['loss'] dBL[t] = dVH_t[2] d_VH += dVH_t HH.direction = up F_t[:] = HH.T() * (dB[[t]] + dBL[[t]]) HH.direction = None d_b_init += dB[0] HH.direction = up VH.direction = up for t in range(1, T): d_HH += HH.outp_up(H[[t - 1]], dB[[t]] + dBL[[t]]) d_VH += VH.outp_up(V[[t]], dB[[t]]) d_VH += VH.outp_up(V[[0]], dB[[0]]) HH.direction = None VH.direction = None return G, dict(loss=loss)
def rbm_grad_exact(W, x, vis_gauss=False): batch_size = float(len(x)) v, h = W.v, W.h G = 0 * W H = sigmoid(W*x) G += 1./batch_size * W.outp(x, H) if not vis_gauss: Z = brute_force_Z(W) def prob(V): return exp( free_energy(W, V)[0] - Z ) for i in xrange(2**v): V = int_to_bin(i, v) H = sigmoid(W* V) G -= prob(V) * W.outp(V, H) loss = - ( free_energy(W, x).mean(0) - Z ) if vis_gauss: Z = brute_force_Z_vis_gauss(W) def prob(H): b = W.T() * H z = .5*dot(b,b.T) + dot(H,W[2]) return float(exp(z-Z) ) for i in xrange(2**h): H = int_to_bin(i, h) b = W.T()*H G -= prob(H) * W.outp(b,H) loss = - (-.5*amap(dot,x,x).mean() + free_energy(W, x).mean(0) - Z) return batch_size * G, dict(loss=batch_size * loss)
def grad(self, V): T, v = V.shape assert(v == self.v) h = self.h VH, HH, b_init = self G = 0 * self d_VH, d_HH, d_b_init = G H = zeros((T, h)) B = zeros((T, h)) H[0] = sigmoid(VH * V[[0]]+ b_init[newaxis, :]) for t in range(1, T): B[[t]] = HH*H[[t-1]] H[[t]] = sigmoid(VH*V[[t]] + B[[t]]) dB = zeros((T, h)) dBL = zeros((T, h)) F_t = zeros(h) loss =0 VH_t = 1 * VH for t in reversed(range(T)): VH_t[2] = B[t] + VH[2] if self.CD_n > 0: dVH_t, dict_loss = rbm.rbm_grad_cd (VH_t, V[[t]], self.CD_n, self.vis_gauss) else: dVH_t, dict_loss = rbm.rbm_grad_exact(VH_t, V[[t]], self.vis_gauss) loss += dict_loss['loss'] d_VH += dVH_t if t>0: HH.direction = up d_HH += HH.outp(H[[t-1]], dVH_t[2][newaxis,:]) HH.direction = None else: d_b_init += dVH_t[2] return G, dict(loss=loss)
def rbm_grad_cd(W, x, cd, vis_gauss=False): batch_size = float(len(x)) v, h, = W.v, W.h V = x H = sigmoid(W * x) G = W.outp(V, H) for g in range(cd): H = stochastic(H) if vis_gauss: V = W.T()*H + randn(batch_size,v) else: V = Rsigmoid(W.T() * H) H = sigmoid(W * V) G -= W.outp(V, H) loss = abs(V - x).sum() return G, dict(loss=loss)
def grad(self, V1): batch_size, v = V1.shape assert (v == self.v) self.V1 = V1 W = self.w[0] G = 0 * self dW = G[0] H1 = sigmoid(W * V1) dW += W.outp(V1, H1) H1 = stochastic(H1) V2 = Rsigmoid(W.T() * H1) H2 = sigmoid(W * V2) dW -= W.outp(V2, H2) recon = abs(V1 - V2).sum(1).sum() return G, dict(loss=recon)
def grad(self, V1): batch_size, v = V1.shape assert(v==self.v) self.V1 = V1 W = self.w[0] G = 0 * self dW = G[0] H1 = sigmoid(W * V1) dW+= W.outp(V1, H1) H1 = stochastic(H1) V2 = Rsigmoid(W.T() * H1) H2 = sigmoid(W * V2) dW -= W.outp(V2, H2) recon = abs(V1 - V2).sum(1).sum() return G, dict(loss=recon)
def sample_last_mf(W,g,batch_size, vis_gauss=False): v,h = W.v, W.h V = rand(batch_size,v) for gg in range(g): H = Rsigmoid(W*V) if vis_gauss: V = W.T()*H + randn(batch_size,v) else: V = Rsigmoid(W.T()*H) if vis_gauss: V = W.T()*H else: V = sigmoid(W.T()*H) return V,H
def empirically_evaluate(t): """ basically, what do we want to do here? I want to run inference (be it mean field or not) and then predict the next timestep. This is kind of nice because the way in which we do the approximate prediction is identical in both the TRBM and the RTRBM. In particular, the RTRBM uses mean-field for inference. """ V = t.valid_data_fn() W = t.W ## step 1: do the approximate mean-field inference (note: that's ## also the way the TRBM does inference--the TRBM does not ## sample the hiddens, but only computes their "mean-field" ## values, which is identical to the inference procedure of ## the RTRBM). T, v = V.shape h = W.h assert (W.v == v) VH, HH, b_init = W from pylab import zeros, sigmoid, newaxis, sqrt H = zeros((T, h)) B = zeros((T, h)) inp_from_past = zeros((T, h)) H[0] = sigmoid(VH * V[[0]] + b_init[newaxis, :]) ## STEP 1: INFER THE HIDDEN UNITS for t in range(1, T): B[[t]] = HH * H[[t - 1]] H[[t]] = sigmoid(VH * V[[t]] + B[[t]]) from pylab import Rsigmoid loss = 0 for p in range(2, T): target = V[t] hid_bias = HH * H[[t - 1]] # VH_t is the set of weights of the "RBM" at time t. # its the same as normal vis-hid, except that it has # the additional bias from the previous time step. VH_t = 1 * VH VH_t[2] = VH[2] + hid_bias ## original bias + extra, dynamic bias. ## the point of sampling the last one from MF is so that in the gaussian case, ## pred, hid = rbm.sample_last_mf(VH_t, g_gen, 1, W.vis_gauss) loss += ((target - pred)**2).sum() return (float(loss) / T) / v
def empirically_evaluate(t): """ basically, what do we want to do here? I want to run inference (be it mean field or not) and then predict the next timestep. This is kind of nice because the way in which we do the approximate prediction is identical in both the TRBM and the RTRBM. In particular, the RTRBM uses mean-field for inference. """ V = t.valid_data_fn() W = t.W ## step 1: do the approximate mean-field inference (note: that's ## also the way the TRBM does inference--the TRBM does not ## sample the hiddens, but only computes their "mean-field" ## values, which is identical to the inference procedure of ## the RTRBM). T, v = V.shape h = W.h assert(W.v == v) VH, HH, b_init = W from pylab import zeros, sigmoid, newaxis, sqrt H = zeros((T, h)) B = zeros((T, h)) inp_from_past = zeros((T, h)) H[0] = sigmoid(VH * V[[0]]+ b_init[newaxis, :]) ## STEP 1: INFER THE HIDDEN UNITS for t in range(1, T): B[[t]] = HH*H[[t - 1]] H[[t]] = sigmoid(VH*V[[t]] + B[[t]]) from pylab import Rsigmoid loss = 0 for p in range(2, T): target = V[t] hid_bias = HH * H[[t - 1]] # VH_t is the set of weights of the "RBM" at time t. # its the same as normal vis-hid, except that it has # the additional bias from the previous time step. VH_t = 1*VH VH_t[2] = VH[2] + hid_bias ## original bias + extra, dynamic bias. ## the point of sampling the last one from MF is so that in the gaussian case, ## pred, hid = rbm.sample_last_mf(VH_t, g_gen, 1, W.vis_gauss) loss += ((target - pred)**2).sum() return (float(loss) / T) / v