def get_loss_func(self,x, C=1.0): batchsize = len(self.encode(x)[0]) z=list() mu, ln_var = self.encode(x) for l in six.moves.range(self.sampling_number): z.append(F.gaussian(mu, ln_var)) for iii in range(self.sampling_number): if iii==0: rec_loss=0 z = F.gaussian(mu, ln_var) rec_loss += F.sum(F.bernoulli_nll(x, self.decode(z, sigmoid=False), reduce='no'),axis=1)/(batchsize) loss=rec_loss+F.sum(C * gaussian_kl_divergence(mu, ln_var,reduce='no'),axis=1)/ batchsize loss=F.reshape(loss,[batchsize,1]) else: rec_loss=0 z = F.gaussian(mu, ln_var) rec_loss += F.sum(F.bernoulli_nll(x, self.decode(z, sigmoid=False), reduce='no'),axis=1)/(batchsize) tmp_loss=rec_loss+F.sum(C * gaussian_kl_divergence(mu, ln_var,reduce='no'),axis=1)/ batchsize tmp_loss=F.reshape(tmp_loss,[batchsize,1]) loss=F.concat((loss,tmp_loss),axis=1) importance_weight = F.softmax(loss) self.total_loss=F.sum(importance_weight*loss) return self.total_loss
def encode_x_z(self, x, test=False, argmax_y=True): x = self.to_variable(x) mean, ln_var = self.q_a_x(x, test=test) a = F.gaussian(mean, ln_var) y = self.sample_x_y(x, argmax=argmax_y, test=test) mean, ln_var = self.q_z_axy(a, x, y, test=test) return F.gaussian(mean, ln_var)
def plot_all_imgs(self, index=0): f, ax = plt.subplots(2, 2) img = self.data[index, 0, :, :] img2 = self.data[index, 1, :, :] ax[0, 0].imshow(np.reshape(img, (32, 32))) ax[0, 0].set_title('Original') ax[1, 0].imshow(np.reshape(img2, (32, 32))) ax[1, 0].set_title('Original2') m, s, m2, s2 = self.autoencoder.encode( np.reshape(img, (1, 1, 32, 32)), np.reshape(img2, (1, 1, 32, 32))) sample1 = F.gaussian(m, s) sample2 = F.gaussian(m2, s2) sample = F.concat((sample1, sample2)) mean = F.concat((m, m2)) # Reconstruct using sample given m,s decoding = np.reshape( self.autoencoder.decode(sample, for_plot=False).data, (32, 32)) give_stats(decoding, 'Decoding') im = ax[0, 2].imshow(decoding) ax[0, 2].set_title('Reconstruct with sampling') data_utils.colorbar(im) decoding = np.reshape( self.autoencoder.decode(sample, for_plot=True).data, (32, 32)) give_stats(decoding, 'Decoding Sig') im = ax[1, 2].imshow(decoding) ax[1, 2].set_title('Sig(Reconstruct with sampling)') data_utils.colorbar(im)
def reverse_step(self, out, gaussian_eps, squeeze_factor, sampling=True): sum_logdet = 0 if self.split_output: if sampling: z_distritubion = self.prior(out) mean, ln_var = split_channel(z_distritubion) zi = cf.gaussian(mean, ln_var, eps=gaussian_eps) else: zi = gaussian_eps out = cf.concat((zi, out), axis=1) else: if sampling: zeros = zeros_like(gaussian_eps) z_distritubion = self.prior(zeros) mean, ln_var = split_channel(z_distritubion) out = cf.gaussian(mean, ln_var, eps=gaussian_eps) else: out = gaussian_eps for flow in self.flows[::-1]: out, logdet = flow.reverse_step(out) sum_logdet += logdet out = unsqueeze(out, factor=squeeze_factor) return out, sum_logdet
def plot_all_imgs(self,index=0): ''' Plotting procedure for the target, reconstruction pre-sigmoid and reconstruction index: index of image in data matrix (because of seeding use same idx for same img) ''' f,ax = plt.subplots(1,3) img = self.data[index,0,:,:] img2 = self.data[index,1,:,:] target = self.data[index,2,:,:] ax[0].imshow(np.reshape(target,(32,32)),interpolation="nearest") ax[0].set_title('Target') m,s,m2,s2 = self.autoencoder.encode(np.reshape(img,(1,1,32,32)),np.reshape(img2,(1,1,32,32))) sample1 = F.gaussian(m, s) sample2 = F.gaussian(m2,s2) sample = F.concat((sample1,sample2)) # Reconstruct using sample given m,s decoding = np.reshape(self.autoencoder.decode(sample,for_plot=False).data,(32,32)) give_stats(decoding,'Decoding') im = ax[1].imshow(decoding, cmap=data_utils.shiftedColorMap(matplotlib.cm.jet, midpoint=data_utils.calcMidpointForCM(decoding), name='shifted'),interpolation="nearest") ax[1].set_title('Reconstruction with sampling') data_utils.colorbar(im) result = np.reshape(self.autoencoder.decode(sample,for_plot=True).data,(32,32)) give_stats(result,'Decoding Sig') im = ax[2].imshow(result,interpolation="nearest") ax[2].set_title('Sig(Reconstruction with sampling)') data_utils.colorbar(im) # Plot MSE in title MSE = chainer.functions.mean_squared_error(target, result).data f.suptitle("MSE of {:02.10f}".format(float(MSE)))
def __call__(self,x,seed): if (seed not in self.calledValues): w = F.gaussian(self.muW,self.lnSigmaW) b = F.gaussian(self.muB,self.lnSigmaB) self.calledValues[seed] = (w,b) else: w,b = self.calledValues[seed] return F.linear(x,w,b)
def _check(self): eps = self.eps if self.specify_eps else None out, out_eps = functions.gaussian( self.m, self.v, eps=eps, return_eps=True) assert isinstance(out_eps, type(out.array)) if eps is None: assert out_eps.shape == out.array.shape else: assert out_eps is eps out2 = functions.gaussian(self.m, self.v, eps=out_eps) testing.assert_allclose(out.array, out2.array)
def generate_latent(self, batch, center=None, sd=1.0): zeros = broadcast_to(self.zero, (batch, self.z_size)) ones = broadcast_to(self.one, (batch, self.z_size)) zeros.unchain_backward() ones.unchain_backward() ln_var = log(sd**2) * ones if center is None: return gaussian(zeros, ln_var) else: mean_z = broadcast_to(center, (batch, self.z_size)) return gaussian(mean_z, ln_var)
def pretrain_step_vrae_tag(self, x_input, tag, word_drop_ratio=0.0, train=True): """ Maximum likelihood Estimation :param x_input: :return: loss """ batch_size = len(x_input) _, mu_z, ln_var_z = self.encoder.encode_with_tag(x_input, tag, train) self.reset_state() if self.latent_dim: z = F.gaussian(mu_z, ln_var_z) else: latent = F.gaussian(mu_z, ln_var_z) tag_ = self.tag_embed( chainer.Variable(self.xp.array(tag, 'int32'), volatile=not train)) self.lstm1.h = self.dec_input(F.concat((latent, tag_))) z = None accum_loss = 0 for i in range(self.sequence_length): if i == 0: x = chainer.Variable(self.xp.asanyarray([self.start_token] * batch_size, 'int32'), volatile=not train) else: if np.random.random() < word_drop_ratio and train: x = chainer.Variable(self.xp.asanyarray( [self.start_token] * batch_size, 'int32'), volatile=not train) else: x = chainer.Variable(self.xp.asanyarray( x_input[:, i - 1], 'int32'), volatile=not train) scores = self.decode_one_step(x, z=z) loss = F.softmax_cross_entropy( scores, chainer.Variable(self.xp.asanyarray(x_input[:, i], 'int32'), volatile=not train)) accum_loss += loss dec_loss = accum_loss kl_loss = F.gaussian_kl_divergence(mu_z, ln_var_z) / batch_size return dec_loss, kl_loss
def generate(self,N,sampling_x=False): z_dim = self['dec_l1'].W.shape[1] if(isinstance(self['dec_l1'].W,numpy.ndarray)): zero_mat = Variable(numpy.zeros((N,z_dim),'float32')) z = F.gaussian(zero_mat,zero_mat) else: raise NotImplementedError() dec_mu, dec_log_sigma_2 = self.decode(z) if(sampling_x): x = F.gaussian(dec_mu,dec_log_sigma_2) else: x = dec_mu return x
def __call__(self, x, top_down=None): if top_down is not None: x = self.xp.concatenate(x, top_down) (z_mu, z_var, state) = self.encoder(x) z = F.gaussian(z_mu, z_var) (x_mu, x_var) = self.decoder(z) (a_mu, a_var) = self.action(z, state) a = F.gaussian(a_mu, a_var) self.a = a self.x_hat = x_mu return a, (x_mu, x_var), (z_mu, z_var), (a_mu, a_var)
def generate(self, N, sampling_x=False): z_dim = self['dec_l1'].W.shape[1] if (isinstance(self['dec_l1'].W, np.ndarray)): zero_mat = Variable(np.zeros((N, z_dim), 'float32')) z = F.gaussian(zero_mat, zero_mat) else: raise NotImplementedError() dec_mu, dec_log_sigma_2 = self.decode(z) if (sampling_x): x = F.gaussian(dec_mu, dec_log_sigma_2) else: x = dec_mu return x
def forward_down(self, x, sample=False): """ """ h = F.elu(x) h = self.down1(h) sections = [self.z_dim, self.z_dim*2, self.z_dim*3, self.z_dim*4, self.z_dim*4+self.h_dim] pz_mean, pz_logv, rz_mean, rz_logv, down_context, h_det = \ F.split_axis(h, sections, axis=1) prior = F.gaussian(pz_mean, 2 * pz_logv) logps = self.gaussian_diag_logps(pz_mean, 2*pz_logv, prior) if sample: z = prior context = 0 logqs = chainer.Variable( self.xp.zeros(logps.shape, dtype="float32"), name="logqs") else: post_mean = rz_mean + self.qz_mean post_logv = 2 * (rz_logv + self.qz_logv) posterior = F.gaussian(post_mean, post_logv) context = self.up_context + down_context logqs = self.gaussian_diag_logps(post_mean, post_logv, posterior) z = posterior # autoregressive nn h = self.ar1(z) h = h + context h = self.ar2(h) sections = [self.z_dim] arw_mean, arw_logv = F.split_axis(h, sections, axis=1) # arw_mean, arw_logv = h[0] * 0.1, h[1] * 0.1 # ?? z = (z - 0.1*arw_mean) / F.exp(F.clip(0.1*arw_logv, -100., 100.)) logqs += arw_logv kl_cost = logqs - logps kl_cost, kl_obj = self.kl_sum(kl_cost) z = F.concat([z, h_det]) z = F.elu(z) z = self.down2(z) if self.downsample: output_shape = z.shape[2:] x = F.resize_images(x, output_shape) z = x + 0.1 * z return z, kl_obj, kl_cost
def generate_canvas_states(self, v, r, xp): batch_size = v.shape[0] h_t_gen, c_t_gen, u_t, _, _ = self.generate_initial_state( batch_size, xp) v = cf.reshape(v, v.shape[:2] + (1, 1)) u_t_array = [] for t in range(self.num_layers): generation_core = self.get_generation_core(t) mean_z_p, ln_var_z_p = self.z_prior_distribution.compute_parameter( h_t_gen) z_t = cf.gaussian(mean_z_p, ln_var_z_p) h_next_gen, c_next_gen, u_next = generation_core( h_t_gen, c_t_gen, z_t, v, r, u_t) u_t = u_next h_t_gen = h_next_gen c_t_gen = c_next_gen u_t_array.append(u_t) return u_t_array
def __call__(self, x, test=False): if test == True: return x xp = cuda.get_array_module(x.data) ln_var = math.log(self.std ** 2) noise = F.gaussian(Variable(xp.zeros_like(x.data)), Variable(xp.full_like(x.data, ln_var))) return x + noise
def __call__(self, x): if chainer.config.train == False: return x xp = cuda.get_array_module(x.data) std = math.log(self.std ** 2) noise = functions.gaussian(chainer.Variable(xp.zeros_like(x.data)), chainer.Variable(xp.full_like(x.data, std))) return x + noise
def train(self, x, L=1, test=False): batchsize = x.data.shape[0] z_mean, z_ln_var = self.encoder(x, test=test, apply_f=False) loss = 0 for l in xrange(L): # Sample z z = F.gaussian(z_mean, z_ln_var) # Compute lower bound log_px_z = self.log_px_z(x, z, test=test) log_pz = self.log_pz(z, z_mean, z_ln_var) log_qz_x = self.log_qz_x(z, z_mean, z_ln_var) lower_bound = log_px_z + log_pz - log_qz_x loss += -lower_bound loss = F.sum(loss) / L / batchsize self.zero_grads() loss.backward() self.update() if self.gpu: loss.to_cpu() return loss.data
def _infer_z(mu, ln_var): batch_size = mu.data.shape[0] var = F.exp(ln_var) z = F.gaussian(mu, ln_var) kl = -F.sum(1 + ln_var - mu**2 - var) / 2 kl /= batch_size return z, kl
def update_core(self): batch = self._iterators['main'].next() x = Variable(self.converter(batch, self.device)) xp = cuda.get_array_module(x.data) enc = self.enc opt_enc = self._optimizers['enc'] dec = self.dec opt_dec = self._optimizers['dec'] mu, ln_var = enc(x) batchsize = len(mu.data) rec_loss = 0 k = 10 for l in range(k): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(x, dec( z, sigmoid=False)) / (k * batchsize) loss = rec_loss + 1.0 * F.gaussian_kl_divergence(mu, ln_var) / batchsize enc.cleargrads() dec.cleargrads() loss.backward() opt_enc.update() opt_dec.update() chainer.report({'rec_loss': rec_loss}) chainer.report({'loss': loss})
def lf(frames): mu, ln_var = self.encode(frames) z = F.gaussian(mu, ln_var) frames_flat = F.reshape( frames, (-1, frames.shape[1] * frames.shape[2] * frames.shape[3])) variational_flat = F.reshape( self.decode(z), (-1, frames.shape[1] * frames.shape[2] * frames.shape[3])) rec_loss = F.sum(F.square(frames_flat - variational_flat), axis=1) # l2 reconstruction loss rec_loss = F.mean(rec_loss) kl_loss = F.sum(F.gaussian_kl_divergence(mu, ln_var, reduce="no"), axis=1) if self._cpu: kl_tolerance = np.asarray(self.kl_tolerance * self.n_latent).astype(np.float32) else: kl_tolerance = cp.asarray(self.kl_tolerance * self.n_latent).astype(cp.float32) kl_loss = F.maximum(kl_loss, F.broadcast_to(kl_tolerance, kl_loss.shape)) kl_loss = F.mean(kl_loss) loss = rec_loss + kl_loss chainer.report({'loss': loss}, observer=self) chainer.report({'kl_loss': kl_loss}, observer=self) chainer.report({'rec_loss': rec_loss}, observer=self) return loss
def sample_with_log_prob(self): x = F.gaussian(self.mean, self.ln_var) normal_log_prob = _eltwise_gaussian_log_likelihood( x, self.mean, self.var, self.ln_var) log_probs = normal_log_prob - _tanh_forward_log_det_jacobian(x) y = F.tanh(x) return y, F.sum(log_probs, axis=1)
def reconst(self, data, unregular=False): if data.ndim == 1: data = data.reshape(1,-1) with chainer.using_config('train', False), chainer.using_config('enable_backprop', False): e_mu,e_var = self.encode(Variable(data)) feat = F.gaussian(e_mu, e_var).data d_out = self.decode(Variable(feat)) if self.is_gauss_dist: rec = d_out[0].data if unregular: # 非正則化項のやつ d_mu, d_var = d_out D_VAE = F.gaussian_kl_divergence(e_mu, e_var) A_VAE = 0.5* ( np.log(2*np.pi) + d_var ) M_VAE = 0.5* ( data-d_mu )**2 * F.exp(-d_var) return feat, rec, M_VAE.data else: rec = F.sigmoid(d_out).data mse = np.mean( (rec-data)**2, axis=1 ) # lat_loss = F.gaussian_kl_divergence(e_mu, e_var) # rec_loss = F.bernoulli_nll( Variable(data), d_out ) # vae_err = (lat_loss+rec_loss).data return feat, rec, mse
def _encode(self, s, a): mu, ln_var = self._latent_distribution(s, a) # 2 * ln_std = ln_var # original code is written in ln_std form # Clip for numerical stability ln_var = F.clip(ln_var, x_min=-8, x_max=30) return F.gaussian(mu, ln_var), mu, ln_var
def __call__(self, x, y): """ Parameters ----------------- x: Variable Feature of unlabeled samples. y: Variable Feature of unlabeled samples. """ g = F.broadcast_to( F.gaussian( np.array([0], dtype=np.float32), np.array([np.exp(1)], dtype=np.float32)), x.shape) x_g = x * g y_g = y * g x_g_norm = F.sum(x_g**2, axis=1) y_g_norm = F.sum(y_g**2, axis=1) x_g_y_g = F.linear(x_g, y_g) x_g_norm, x_g_y_g, y_g_norm = \ F.broadcast( *[x_g_norm, x_g_y_g, F.expand_dims(y_g_norm, 1)]) #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm)) return F.exp(- x_g_norm + 2 * x_g_y_g - y_g_norm)
def generate_image(self, v, r, xp): batch_size = v.shape[0] h_t_gen, c_t_gen, u_t, _, _ = self.generate_initial_state( batch_size, xp) v_broadcast_shape = ( h_t_gen.shape[0], v.shape[1], ) + h_t_gen.shape[2:] v = xp.reshape(v, v.shape + (1, 1)) v = xp.broadcast_to(v, shape=v_broadcast_shape) for t in range(self.generation_steps): generation_core = self.get_generation_core(t) generation_piror = self.get_generation_prior(t) generation_upsampler = self.get_generation_upsampler(t) mean_z_p, ln_var_z_p = generation_piror.compute_parameter(h_t_gen) z_t = cf.gaussian(mean_z_p, ln_var_z_p) h_next_gen, c_next_gen = generation_core(h_t_gen, c_t_gen, z_t, v, r) u_t = u_t + generation_upsampler(h_next_gen) h_t_gen = h_next_gen c_t_gen = c_next_gen mean_x = self.map_u_x(u_t) return mean_x.data
def generate_canvas_states(self, v, r, xp): batch_size = v.shape[0] h_t_gen, c_t_gen, u_t, _, _ = self.generate_initial_state( batch_size, xp) v = cf.reshape(v, v.shape[:2] + (1, 1)) u_t_array = [] for t in range(self.generation_steps): generation_core = self.get_generation_core(t) generation_piror = self.get_generation_prior(t) generation_upsampler = self.get_generation_upsampler(t) mean_z_p, ln_var_z_p = generation_piror.compute_parameter(h_t_gen) z_t = cf.gaussian(mean_z_p, ln_var_z_p) h_next_gen, c_next_gen = generation_core(h_t_gen, c_t_gen, z_t, v, r, u_t) u_t = u_t + generation_upsampler(h_next_gen) h_t_gen = h_next_gen c_t_gen = c_next_gen u_t_array.append(u_t) return u_t_array
def lf(x): mu, ln_var = self.encode(x) mean_mu, mean_sigma = calculate_means(mu, ln_var) batchsize = len(mu.data) std_mu, std_ln_var = generate_std_params(mu) # reconstruction loss rec_loss = 0 kl_loss = 0 for l in six.moves.range(k): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(x, self.decode(z, sigmoid=False)) / (k * batchsize) kl_loss += -F.gaussian_nll(z, mu, ln_var) / (k * batchsize) kl_loss += F.gaussian_nll(z, std_mu, std_ln_var) / (k * batchsize) self.rec_loss = rec_loss self.kl_loss = kl_loss self.loss = self.rec_loss + C * self.kl_loss chainer.report( { 'rec_loss': rec_loss, 'kl': self.kl_loss, 'loss': self.loss, 'mu': mean_mu, 'sigma': mean_sigma, }, observer=self ) return self.loss
def _infer_z(mu, ln_var): batch_size = mu.data.shape[0] var = F.exp(ln_var) z = F.gaussian(mu, ln_var) kl = -F.sum(1 + ln_var - mu ** 2 - var) / 2 kl /= batch_size return z, kl
def __call__(self, in_data): """in_data: (B, N, C, H, W)""" assert in_data.ndim == 5 # BNCHW xp = self.xp batch_size, nframes, nchannels = in_data.shape[:3] in_size = in_data.shape[3:] assert nframes == self.num_episodes, "%s != %s" % (self.num_episodes, nframes) self.reset_state() x = resize_seq_images(in_data, (128, 128)) hidden = self.encoder(x) # add gaussian noise if chainer.config.train: noise_sigma = xp.log(self.noise_sigma**2, dtype=hidden.dtype) ln_var = xp.ones_like(hidden, dtype=hidden.dtype) * noise_sigma hidden = F.gaussian(hidden, ln_var) reconst = self.decoder_reconst(hidden) pred = self.decoder_pred(hidden) reconst = resize_seq_images(reconst, in_size) pred = resize_seq_images(pred, in_size) assert reconst.shape == in_data.shape assert pred.shape == in_data.shape return reconst, pred, hidden
def generate_onestep(self): #,h2,aa,bb): """ generate from middle layer #call reset() first, but no relation between img_array[input] and generated image[output] [input] x : BxHxW[mono] 3BxHxW[color] matrix (Variable) errx : BxHxW[mono] 3BxHxW[color] matrix (Variable) [output] y : BxHxW[mono] 3BxHxW[color] matrix (Variable) [normal]:sigmoid,relu [whitened]:tanh """ zero_mat = XP.fzeros((self.B, self.z_dim)) z = F.gaussian(zero_mat, zero_mat) #F.gaussian(mean,ln_var) self.c2, self.h2, inc_canvas, Wmean_x, Wmean_y, Wln_var, Wln_stride, Wln_gamma, Rmean_x, Rmean_y, Rln_var, Rln_stride, Rln_gamma = self.decode( self.c2, self.h2, z) #,aa,bb) self.W_filter.mkFilter(Wmean_x, Wmean_y, Wln_var, Wln_stride, Wln_gamma) inc_canvas = F.reshape( inc_canvas, (self.B * self.C, self.Write_patch, self.Write_patch)) inc_canvas = self.W_filter.InvFilter(inc_canvas) self.canvas += inc_canvas y = F.relu(self.canvas + 0.5) - F.relu( self.canvas - 0.5) #F.sigmoid(self.canvas) #[normal]:sigmoid, [whitened]:tanh self.errx = self.x - y self.t += 1 return y #,h2
def sample_g0(self, zs): mu = self.l_g0_mu(zs) ln_var = self.l_g0_ln_var(zs) g_0 = F.gaussian(mu, ln_var) batchsize = len(mu.data) kl_g0 = gaussian_kl_divergence(mu, ln_var) / batchsize return g_0, kl_g0
def __call__(self, z, c, test=False): ### text augmentation hc_mu = F.leaky_relu(self.lc_mu(c)) hc_var = F.leaky_relu(self.lc_var(c)) h_c = F.gaussian(hc_mu, hc_var) ### concate z and c h = F.concat((z, h_c)) ### generate image h1_0 = F.reshape(self.bn0(self.l0(h), test=test), (h.data.shape[0], self.gf_dim*8, self.s16, self.s16)) h1_1 = self.dc1_1(h1_0, test=test) h1_1 = self.dc1_2(h1_1, test=test) h1_1 = self.dc1_3(h1_1, test=test) h = F.relu(h1_0+h1_1) h2_0 = self.dc2(h, test=test) h2_1 = self.dc2_1(h2_0, test=test) h2_1 = self.dc2_2(h2_1, test=test) h2_1 = self.dc2_3(h2_1, test=test) h = F.relu(h2_0+h2_1) h = self.dc3(h, test=test) h = self.dc4(h, test=test) x = F.tanh(self.dc5(h, test=test)) if test: return x else: return x, hc_mu, hc_var
def term_slop(self, loc, val, bs, nf, train=True): """ Compute the slope for each active feature. """ shape = (bs, nf) # Reshape all of our constants pr_mu = F.broadcast_to(self.slop_mu.b, shape) pr_lv = F.broadcast_to(self.slop_lv.b, shape) # This is either zero or a very negative number # indicating to sample N(mean, logvar) or just draw # the mean preicsely if not train: pr_lv += self.lv_floor # The feature slopes are grouped together so that they # all share a common mean. Then individual features slop_delta_lv # are shrunk towards zero, which effectively sets features to fall # back on the group mean. sl_mu = F.reshape(self.slop_delta_mu(loc), shape) + pr_mu sl_lv = F.reshape(self.slop_delta_lv(loc), shape) + pr_lv coef = F.gaussian(sl_mu, sl_lv) slop = F.sum(coef * val, axis=1) # Calculate divergence between group mean and N(0, 1) kld1 = F.gaussian_kl_divergence(self.slop_mu.b, self.slop_lv.b) # Calculate divergence of individual delta means and delta vars args = (self.slop_delta_mu.W, self.slop_delta_lv.W) kld2 = F.gaussian_kl_divergence(*args) return slop, kld1 + kld2
def generate_image(self, v, r): xp = cuda.get_array_module(v) batch_size = v.shape[0] h_t_gen, c_t_gen, u_t, _, _ = self.generate_initial_state( batch_size, xp) v = cf.reshape(v, v.shape[:2] + (1, 1)) for t in range(self.generation_steps): generation_core = self.get_generation_core(t) generation_piror = self.get_generation_prior(t) generation_upsampler = self.get_generation_upsampler(t) mean_z_p, ln_var_z_p = generation_piror.compute_parameter(h_t_gen) z_t = cf.gaussian(mean_z_p, ln_var_z_p) h_next_gen, c_next_gen = generation_core(h_t_gen, c_t_gen, z_t, v, r, u_t) u_t = u_t + generation_upsampler(h_next_gen) h_t_gen = h_next_gen c_t_gen = c_next_gen mean_x = self.map_u_x(u_t) return mean_x.data
def encode_x_y_distribution(self, x, test=False, softmax=True): x = self.to_variable(x) mean, ln_var = self.q_a_x(x, test=test) a = F.gaussian(mean, ln_var) y = self.q_y_ax(a, x, test=test) if softmax: return F.softmax(y) return y
def free_energy(self,x): #return -(free energy) enc_mu, enc_log_sigma_2 = self.encode(x) kl = F.gaussian_kl_divergence(enc_mu,enc_log_sigma_2) z = F.gaussian(enc_mu,enc_log_sigma_2) dec_mu = self.decode(z) nll = F.bernoulli_nll(x,dec_mu) return nll+kl
def check_forward(self, m_data, v_data): m = chainer.Variable(m_data) v = chainer.Variable(v_data) n = functions.gaussian(m, v) # Only checks dtype and shape because its result contains noise self.assertEqual(n.dtype, numpy.float32) self.assertEqual(n.shape, m.shape)
def __call__(self, x): if chainer.config.train == False: return x data = x.data if isinstance(x, chainer.Variable) else x xp = cuda.get_array_module(data) ln_var = math.log(self.std ** 2) noise = functions.gaussian(xp.full_like(data, self.mean), xp.full_like(data, ln_var)) return x + noise
def encode(self, bow): """ Convert the bag of words vector of shape (n_docs, n_vocab) into latent mean log variance vectors. """ lam = F.relu(self.l1(bow)) pi = F.relu(self.l2(lam)) mu, log_sigma = F.split_axis(self.mu_logsigma(pi), 2, 1) sample = F.gaussian(mu, log_sigma) loss = F.gaussian_kl_divergence(mu, log_sigma) return sample, loss
def predict(self, x): a_mean, a_ln_var = split(self.q_a_given_x(x)) y_pred = chainer.Variable( self.xp.zeros((len(x.data), self.y_dim), dtype=np.float32), volatile='auto') for _ in six.moves.range(self.sampling_predict): a = F.gaussian(a_mean, a_ln_var) y_pred += F.softmax(self.q_y_given_a_x(a, x)) y_pred /= self.sampling_predict return y_pred
def __call__(self, x): xp = self.encoder.xp x = Variable(xp.asarray(x)) zm, zv = self.encoder((x,)) z = F.gaussian(zm, zv) mean, ln_var = self.decoder((z,)) kl_loss = F.gaussian_kl_divergence(zm, zv) nll_loss = F.gaussian_nll(x, mean, ln_var) loss = kl_loss + nll_loss return loss
def __call__(self, x): x = Variable(x) start = time.time() zm, zv = self.encoder((x,)) z = F.gaussian(zm, zv) y = self.decoder((z,))[0] kl_loss = F.gaussian_kl_divergence(zm, zv) nll_loss = F.bernoulli_nll(x, y) loss = kl_loss + nll_loss return loss
def lf(x): mu, ln_var = self.encode(x) batchsize = len(mu.data) # reconstruction loss rec_loss = 0 for l in six.moves.range(k): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(x, self.decode(z, sigmoid=False)) \ / (k * batchsize) self.rec_loss = rec_loss self.loss = self.rec_loss + \ C * gaussian_kl_divergence(mu, ln_var) / batchsize return self.loss
def forward(self, x, l, train, action): if self.xp == np: loc = l.data else: loc = self.xp.asnumpy(l.data) margin = self.g_size/2 loc = (loc+1)*0.5*(self.in_size-self.g_size+1) + margin loc = np.clip(loc, margin, self.in_size-margin) loc = np.floor(loc).astype(np.int32) # Retina Encoding hx = crop(x, loc=loc, size=self.g_size) hx = F.relu(self.emb_x(hx)) # Location Encoding hl = F.relu(self.emb_l(l)) # Glimpse Net g = F.relu(self.fc_lg(hl) + self.fc_xg(hx)) # Core Net h = self.core_lstm(g) # LSTM(g + h_t-1) # Location Net l = F.tanh(self.fc_hl(h)) if train: # sampling location l s = F.gaussian(mean=l, ln_var=self.ln_var) s = F.clip(s, -1., 1.) # location policy l1, l2 = F.split_axis(l, indices_or_sections=2, axis=1) s1, s2 = F.split_axis(s, indices_or_sections=2, axis=1) norm = (s1-l1)*(s1-l1) + (s2-l2)*(s2-l2) ln_p = 0.5 * norm / self.var ln_p = F.reshape(ln_p, (-1,)) if action: # Action Net y = self.fc_ha(h) if train: return s, ln_p, y else: return l, None, y else: if train: return s, ln_p, None else: return l, None, None
def check_backward(self, m_data, v_data, y_grad): m = chainer.Variable(m_data) v = chainer.Variable(v_data) y = functions.gaussian(m, v) self.assertEqual(y.data.dtype, numpy.float32) y.grad = y_grad y.backward() func = y.creator f = lambda: func.forward((m.data, v.data)) gm, gv = gradient_check.numerical_grad(f, (m.data, v.data), (y.grad,)) gradient_check.assert_allclose(gm, m.grad, atol=1e-4, rtol=1e-3) gradient_check.assert_allclose(gv, v.grad, atol=1e-4, rtol=1e-3)
def test_forward(self, backend_config): # TODO(niboshi): Support it if backend_config.use_chainerx and self.dtype == numpy.float16: raise unittest.SkipTest('ChainerX does not support float16') m_data, v_data = backend_config.get_array((self.m, self.v)) m = chainer.Variable(m_data) v = chainer.Variable(v_data) # Call forward without eps and retrieve it n1, eps = functions.gaussian(m, v, return_eps=True) self.assertIsInstance(eps, backend_config.xp.ndarray) self.assertEqual(n1.dtype, self.dtype) self.assertEqual(n1.shape, m.shape) self.assertEqual(eps.dtype, self.dtype) self.assertEqual(eps.shape, m.shape) # Call again with retrieved eps n2 = functions.gaussian(m, v, eps=eps) self.assertEqual(n2.dtype, self.dtype) self.assertEqual(n2.shape, m.shape) testing.assert_allclose(n1.array, n2.array)
def lf(x): mu, ln_var = self.encode(x) batchsize = len(mu) # reconstruction loss rec_loss = 0 for l in six.moves.range(k): z = F.gaussian(mu, ln_var) rec_loss += F.bernoulli_nll(x, self.decode(z, sigmoid=False)) \ / (k * batchsize) self.rec_loss = rec_loss self.loss = self.rec_loss + \ beta * gaussian_kl_divergence(mu, ln_var) / batchsize chainer.report( {'rec_loss': rec_loss, 'loss': self.loss}, observer=self) return self.loss
def forward_one_step_gaussian(self, x, test): activate = activations[self.activation_type] chain_mean = [x] chain_variance = [x] # Hidden for i in range(self.n_layers - 1): u = getattr(self, "layer_mean_%i" % i)(chain_mean[-1]) if self.apply_batchnorm: u = getattr(self, "batchnorm_mean_%i" % i)(u, test=test) output = activate(u) if self.enable_dropout: output = F.dropout(output, train=not test) chain_mean.append(output) u = getattr(self, "layer_variance_%i" % i)(chain_variance[-1]) if self.apply_batchnorm: u = getattr(self, "batchnorm_variance_%i" % i)(u, test=test) output = activate(u) if self.enable_dropout: output = F.dropout(output, train=not test) chain_variance.append(output) # Output u = getattr(self, "layer_mean_%i" % (self.n_layers - 1))(chain_mean[-1]) if self.apply_batchnorm and self.apply_batchnorm_to_output: u = getattr(self, "batchnorm_mean_%i" % (self.n_layers - 1))(u, test=test) if self.output_activation_type is None: chain_mean.append(u) else: chain_mean.append(activations[self.output_activation_type](u)) u = getattr(self, "layer_variance_%i" % (self.n_layers - 1))(chain_variance[-1]) if self.apply_batchnorm and self.apply_batchnorm_to_output: u = getattr(self, "batchnorm_variance_%i" % (self.n_layers - 1))(u, test=test) if self.output_activation_type is None: chain_variance.append(u) else: chain_variance.append(activations[self.output_activation_type](u)) mean = chain_mean[-1] ## log(sigma^2) ln_var = chain_variance[-1] return F.gaussian(mean, ln_var)
def loss_z_dep(self, x, y, a): def to_onehot(y, T): ret = np.zeros((len(y), T), dtype=np.float32) ret[:, y.get()] = 1.0 return chainer.Variable(self.xp.asarray(ret), volatile='auto') y = to_onehot(y.data, self.y_dim) z_mean, z_ln_var = split(self.q_z_given_a_y_x(a, y, x)) z = F.gaussian(z_mean, z_ln_var) a_mean, a_ln_var = split(self.p_a_given_z_y_x(z, y, x)) x_mean, _ = split(self.p_x_given_z_y(z, y)) zero = chainer.Variable(self.xp.zeros_like(z.data), volatile='auto') nll_p_z = F.sum(l.gaussian_nll(z, zero, zero), axis=1) nll_p_x_given_z_y = F.sum(l.bernoulli_nll(x, x_mean), axis=1) nll_p_a_given_z_y_x = F.sum( l.gaussian_nll(a, a_mean, a_ln_var), axis=1) nll_q_z_given_a_y_x = F.sum( l.gaussian_nll(z, z_mean, z_ln_var), axis=1) return (nll_p_z + nll_p_x_given_z_y + nll_p_a_given_z_y_x - nll_q_z_given_a_y_x)
def loss_one(self, x, y=None): a_mean, a_ln_var = split(self.q_a_given_x(x)) a = F.gaussian(a_mean, a_ln_var) loss = -F.sum(l.gaussian_nll(a, a_mean, a_ln_var)) # nll(q(a|x)) loss += np.log(self.y_dim) # nll(p(y)) if y is None: losses_z_dep = [] for i in six.moves.range(self.y_dim): y_ = chainer.Variable( self.xp.full((len(x.data), self.y_dim), i, dtype=np.int32), volatile='auto') loss_z_dep = self.loss_z_dep(x, y_, a) loss_z_dep = F.reshape(loss_z_dep, (-1, 1)) losses_z_dep.append(loss_z_dep) q_y_given_a_x = F.softmax(self.q_y_given_a_x(a, x)) loss -= entropy(q_y_given_a_x) # nll(q(y|a,x)) # nll(p(z)) + nll(p(x|z, y)) + nll(p(a|x, y, z)) - nll(q(z|a, y, x)) loss += F.sum(F.concat(losses_z_dep) * q_y_given_a_x) else: # nll(p(z)) + nll(p(x|z, y)) + nll(p(a|x, y, z)) - nll(q(z|a, y, x)) loss += F.sum(self.loss_z_dep(x, y, a)) loss += self.gamma * self.classification_loss(x, y) # cls loss return loss
def train(self, x, L=1, test=False): batchsize = x.data.shape[0] z_mean, z_ln_var = self.encoder(x, test=test, apply_f=False) loss = 0 for l in xrange(L): # Sample z z = F.gaussian(z_mean, z_ln_var) # Decode x_expectation = self.decoder(z, test=test, apply_f=False) # E_q(z|x)[log(p(x|z))] loss += self.bernoulli_nll_keepbatch(x, x_expectation) if L > 1: loss /= L # KL divergence loss += self.gaussian_kl_divergence_keepbatch(z_mean, z_ln_var) loss = F.sum(loss) / batchsize self.zero_grads() loss.backward() self.update() if self.gpu: loss.to_cpu() return loss.data
def encode_x_a(self, x, test=False): x = self.to_variable(x) mean, ln_var = self.q_a_x(x, test=test) return F.gaussian(mean, ln_var)
def f(m, v): # In case numerical gradient computation is held in more precise # dtype than that of backward computation, cast the eps to reuse # before the numerical computation. eps_ = eps.astype(m.dtype) return functions.gaussian(m, v, eps=eps_)
def compute_lower_bound_loss(self, labeled_x, labeled_y, label_ids, unlabeled_x, test=False): def lower_bound(log_px_zy, log_py, log_pz, log_qz_xy): lb = log_px_zy + log_py + log_pz - log_qz_xy return lb # _l: labeled # _u: unlabeled batchsize_l = labeled_x.data.shape[0] batchsize_u = unlabeled_x.data.shape[0] num_types_of_label = labeled_y.data.shape[1] xp = self.xp ### Lower bound for labeled data ### # Compute eq.6 -L(x,y) z_mean_l, z_ln_var_l = self.encoder_xy_z(labeled_x, labeled_y, test=test, apply_f=False) z_l = F.gaussian(z_mean_l, z_ln_var_l) log_px_zy_l = self.log_px_zy(labeled_x, z_l, labeled_y, test=test) log_py_l = self.log_py(labeled_y, test=test) if False: log_pz_l = self.log_pz(z_l, z_mean_l, z_ln_var_l, test=test) log_qz_xy_l = self.log_qz_xy(z_l, z_mean_l, z_ln_var_l, test=test) lower_bound_l = lower_bound(log_px_zy_l, log_py_l, log_pz_l, log_qz_xy_l) else: lower_bound_l = log_px_zy_l + log_py_l - self.gaussian_kl_divergence_keepbatch(z_mean_l, z_ln_var_l) if batchsize_u > 0: ### Lower bound for unlabeled data ### # To marginalize y, we repeat unlabeled x, and construct a target (batchsize_u * num_types_of_label) x num_types_of_label # Example of n-dimensional x and target matrix for a 3 class problem and batch_size=2. # unlabeled_x_ext y_ext # [[x0[0], x0[1], ..., x0[n]] [[1, 0, 0] # [x1[0], x1[1], ..., x1[n]] [1, 0, 0] # [x0[0], x0[1], ..., x0[n]] [0, 1, 0] # [x1[0], x1[1], ..., x1[n]] [0, 1, 0] # [x0[0], x0[1], ..., x0[n]] [0, 0, 1] # [x1[0], x1[1], ..., x1[n]]] [0, 0, 1]] unlabeled_x_ext = xp.zeros((batchsize_u * num_types_of_label, unlabeled_x.data.shape[1]), dtype=xp.float32) y_ext = xp.zeros((batchsize_u * num_types_of_label, num_types_of_label), dtype=xp.float32) for n in xrange(num_types_of_label): y_ext[n * batchsize_u:(n + 1) * batchsize_u,n] = 1 unlabeled_x_ext[n * batchsize_u:(n + 1) * batchsize_u] = unlabeled_x.data y_ext = Variable(y_ext) unlabeled_x_ext = Variable(unlabeled_x_ext) # Compute eq.6 -L(x,y) for unlabeled data z_mean_u_ext, z_mean_ln_var_u_ext = self.encoder_xy_z(unlabeled_x_ext, y_ext, test=test, apply_f=False) z_u_ext = F.gaussian(z_mean_u_ext, z_mean_ln_var_u_ext) log_px_zy_u = self.log_px_zy(unlabeled_x_ext, z_u_ext, y_ext, test=test) log_py_u = self.log_py(y_ext, test=test) if False: log_pz_u = self.log_pz(z_u_ext, z_mean_u_ext, z_mean_ln_var_u_ext, test=test) log_qz_xy_u = self.log_qz_xy(z_u_ext, z_mean_u_ext, z_mean_ln_var_u_ext, test=test) lower_bound_u = lower_bound(log_px_zy_u, log_py_u, log_pz_u, log_qz_xy_u) else: lower_bound_u = log_px_zy_u + log_py_u - self.gaussian_kl_divergence_keepbatch(z_mean_u_ext, z_mean_ln_var_u_ext) # Compute eq.7 sum_y{q(y|x){-L(x,y) + H(q(y|x))}} # Let LB(xn, y) be the lower bound for an input image xn and a label y (y = 0, 1, ..., 9). # Let bs be the batchsize. # # lower_bound_u is a vector and it looks like... # [LB(x0,0), LB(x1,0), ..., LB(x_bs,0), LB(x0,1), LB(x1,1), ..., LB(x_bs,1), ..., LB(x0,9), LB(x1,9), ..., LB(x_bs,9)] # # After reshaping. (axis 1 corresponds to label, axis 2 corresponds to batch) # [[LB(x0,0), LB(x1,0), ..., LB(x_bs,0)], # [LB(x0,1), LB(x1,1), ..., LB(x_bs,1)], # . # . # . # [LB(x0,9), LB(x1,9), ..., LB(x_bs,9)]] # # After transposing. (axis 1 corresponds to batch) # [[LB(x0,0), LB(x0,1), ..., LB(x0,9)], # [LB(x1,0), LB(x1,1), ..., LB(x1,9)], # . # . # . # [LB(x_bs,0), LB(x_bs,1), ..., LB(x_bs,9)]] lower_bound_u = F.transpose(F.reshape(lower_bound_u, (num_types_of_label, batchsize_u))) y_distribution = self.encoder_x_y(unlabeled_x, test=test, softmax=True) lower_bound_u = y_distribution * (lower_bound_u - F.log(y_distribution + 1e-6)) loss_labeled = -F.sum(lower_bound_l) / batchsize_l loss_unlabeled = -F.sum(lower_bound_u) / batchsize_u loss = loss_labeled + loss_unlabeled else: loss_unlabeled = None loss_labeled = -F.sum(lower_bound_l) / batchsize_l loss = loss_labeled return loss, loss_labeled, loss_unlabeled
def __call__(self, z, y, test=False, apply_f=False): mean, ln_var = self.forward_one_step(z, y, test=test, apply_f=False) if apply_f: return F.gaussian(mean, ln_var) return mean, ln_var
def decode_yz_a(self, y, z, test=False): y = self.to_variable(y) z = self.to_variable(z) mean, ln_var = self.p_a_yz(y, z, test=test) return F.gaussian(mean, ln_var)