def e_step(self, y, qs, *params): prior = concatenate(params[:1], axis=0) consider_constant = [y, prior] cost = T.constant(0.).astype(floatX) for l in xrange(self.n_layers): q = qs[l] mu_q = _slice(q, 0, self.dim_h) log_sigma_q = _slice(q, 1, self.dim_h) kl_term = self.kl_divergence(q, prior).mean(axis=0) epsilon = self.trng.normal(avg=0, std=1.0, size=(self.n_inference_samples, mu_q.shape[0], mu_q.shape[1])) h = mu_q + epsilon * T.exp(log_sigma_q) p = self.p_y_given_h(h, *params) if l == 0: cond_term = self.conditional.neg_log_prob(y[None, :, :], p).mean(axis=0) else: cond_term = self.kl_divergence(q[l - 1][None, :, :], p) cost += (kl_term + cond_term).sum(axis=0) grads = theano.grad(cost, wrt=qs, consider_constant=consider_constant) cost = kl_term.mean() return cost, grads
def _normal(trng, p, size=None): dim = p.shape[p.ndim - 1] // 2 mu = _slice(p, 0, dim) log_sigma = _slice(p, 1, dim) if size is None: size = mu.shape return trng.normal(avg=mu, std=T.exp(log_sigma), size=size, dtype=floatX)
def neg_log_prob(self, x, p=None): if p is None: p = self.get_prob(*self.get_params()) mu = _slice(p, 0, p.shape[p.ndim - 1] // 2) log_sigma = _slice(p, 1, p.shape[p.ndim - 1] // 2) mu = T.clip(mu, self.min, self.max) p = concatenate([mu, log_sigma], axis=mu.ndim - 1) return self.f_neg_log_prob(x, p)
def _neg_normal_log_prob(x, p, clip=None): dim = p.shape[p.ndim - 1] // 2 mu = _slice(p, 0, dim) log_sigma = _slice(p, 1, dim) if clip is not None: log_sigma = T.maximum(log_sigma, clip) energy = 0.5 * ((x - mu)**2 / (T.exp(2 * log_sigma)) + 2 * log_sigma + T.log(2 * pi)) return energy.sum(axis=energy.ndim - 1)
def step_kl_divergence(self, q, mu, log_sigma): mu_q = _slice(q, 0, self.dim) mu = T.clip(mu, self.min, self.max) mu_q = T.clip(mu_q, self.min, self.max) log_sigma_q = _slice(q, 1, self.dim) kl = log_sigma - log_sigma_q + 0.5 * ( (T.exp(2 * log_sigma_q) + (mu - mu_q)**2) / T.exp(2 * log_sigma) - 1) return kl.sum(axis=kl.ndim - 1)
def kl_divergence(self, p, q, entropy_scale=1.0): dim = self.dim_h mu_p = _slice(p, 0, dim) log_sigma_p = _slice(p, 1, dim) mu_q = _slice(q, 0, dim) log_sigma_q = _slice(q, 1, dim) kl = log_sigma_q - log_sigma_p + 0.5 * ( (T.exp(2 * log_sigma_p) + (mu_p - mu_q)**2) / T.exp(2 * log_sigma_q) - 1) return kl.sum(axis=kl.ndim - 1)
def _normal_entropy(p, clip=None): dim = p.shape[p.ndim - 1] // 2 log_sigma = _slice(p, 1, dim) if clip is not None: log_sigma = T.maximum(log_sigma, clip) entropy = 0.5 * T.log(2 * pi * e) + log_sigma return entropy.sum(axis=entropy.ndim - 1)
def _normal_prob(p): dim = p.shape[p.ndim - 1] // 2 mu = _slice(p, 0, dim) return mu
def step_neg_log_prob(self, x, p): mu = _slice(p, 0, p.shape[p.ndim - 1] // 2) log_sigma = _slice(p, 1, p.shape[p.ndim - 1] // 2) mu = T.clip(mu, self.min, self.max) p = concatenate([mu, log_sigma], axis=mu.ndim - 1) return self.f_neg_log_prob(x, p)
def __call__(self, p): mu = _slice(p, 0, p.shape[p.ndim - 1] // 2) log_sigma = _slice(p, 1, p.shape[p.ndim - 1] // 2) mu = T.clip(mu, self.min, self.max) return concatenate([mu, log_sigma], axis=mu.ndim - 1)
def step_sample(self, epsilon, p): dim = p.shape[p.ndim - 1] // self.scale mu = _slice(p, 0, dim) log_sigma = _slice(p, 1, dim) return mu + epsilon * T.exp(log_sigma)
def split_prob(self, p): mu = _slice(p, 0, p.shape[p.ndim - 1] // self.scale) log_sigma = _slice(p, 1, p.shape[p.ndim - 1] // self.scale) return mu, log_sigma
def get_center(self, p): mu = _slice(p, 0, p.shape[p.ndim - 1] // self.scale) return mu