def update(self): self.training_step += 1 s = torch.tensor(self.buffer['s'], dtype=torch.double).to(device) a = torch.tensor(self.buffer['a'], dtype=torch.double).to(device) r = torch.tensor(self.buffer['r'], dtype=torch.double).to(device).view(-1, 1) s_ = torch.tensor(self.buffer['s_'], dtype=torch.double).to(device) old_a_logp = torch.tensor(self.buffer['a_logp'], dtype=torch.double).to(device).view(-1, 1) with torch.no_grad(): target_v = r + args.gamma * self.net(s_, actual_obs=False)[1] adv = target_v - self.net(s, actual_obs=False)[1] # adv = (adv - adv.mean()) / (adv.std() + 1e-8) for _ in range(self.ppo_epoch): for index in BatchSampler(SubsetRandomSampler(range(self.buffer_capacity)), self.batch_size, False): alpha, beta = self.net(s[index], actual_obs=False)[0] dist = Beta(alpha, beta) a_logp = dist.log_prob(a[index]).sum(dim=1, keepdim=True) ratio = torch.exp(a_logp - old_a_logp[index]) surr1 = ratio * adv[index] surr2 = torch.clamp(ratio, 1.0 - self.clip_param, 1.0 + self.clip_param) * adv[index] action_loss = -torch.min(surr1, surr2).mean() value_loss = F.smooth_l1_loss(self.net(s[index], actual_obs=False)[1], target_v[index]) loss = action_loss + 2. * value_loss self.optimizer.zero_grad() loss.backward() # nn.utils.clip_grad_norm_(self.net.parameters(), self.max_grad_norm) self.optimizer.step()
def forward(self, observation, reparameterize=True, deterministic=False, return_log_prob=False): """ Forward pass. Assumes input is a torch tensor. :type observation: torch.Tensor """ layer_input = observation for fc in self.fcs: layer_input = self.hidden_activation(fc(layer_input)) network_output = self.output_activation(self.last_fc(layer_input)) alpha = network_output[:, 0].unsqueeze(1) + EPSILON beta = network_output[:, 1].unsqueeze(1) + EPSILON distribution = Beta(alpha, beta) distribution_mean = distribution.mean if deterministic: sample = distribution.rsample() else: sample = distribution_mean # transform to range (min, max) action = self.min + self.max_min_difference * sample mean = self.min + self.max_min_difference * distribution_mean variance = self.max_min_difference_squared * distribution.variance std = torch.sqrt(variance) log_std = torch.log(std) log_prob = distribution.log_prob(sample) entropy = distribution.entropy() mean_action_log_prob = None pre_tanh_value = None return action, mean, log_std, log_prob, entropy, std, mean_action_log_prob, pre_tanh_value
def log_prob_density(x, dist_args, args): if args.stat_policy == "Gaussian": log_prob_density = -(x - dist_args[0]).pow(2) / (2 * dist_args[1].pow(2)) \ - 0.5 * math.log(2 * math.pi) elif args.stat_policy == "Beta": log_prob_density = Beta(dist_args[0], dist_args[1]).log_prob(x) return log_prob_density.sum(1, keepdim=True)
def kldivergence(self, datas1, datas2): alpha1, beta1 = datas1 alpha2, beta2 = datas2 distribution1 = Beta(alpha1, beta1) distribution2 = Beta(alpha2, beta2) return kl_divergence(distribution1, distribution2).float().to(set_device(self.use_gpu))
def _sampler(self, samples=1000): d_ = torch.ones(samples) if d == 1: # If SZ is adopted, then some Districts and Schools buy in dist = Poisson(self.n_districts)\ .sample([samples])\ .reshape([samples]) schools = NegativeBinomial(tensor([3.]), tensor([0.8]))\ .sample([samples, self.n_districts.int()])\ .sum(dim=1)\ .reshape([samples]) sz = 15000. * dist + 2430 * schools else: dist, schools, sz = torch.zeros(samples),\ torch.zeros(samples),\ torch.zeros(samples) if d < 2: sf = LogNormal( *self._lognormal_params(300000., 10000.))\ .sample([samples]) else: sf = torch.zeros(samples) # System & Infrastructure az = LogNormal(self.az_means[d], self.az_sds[d]).sample([samples]) salary_estimate = Normal(70000., 5000.).sample([samples]) fa = Beta(self.fa_ms[d], self.fa_ks[d]).sample([samples]) dt = Beta(self.dt_ms[d], self.dt_ks[d]).sample([samples]) return d_, dist, schools, sz, az, sf, fa, dt
def __init__(self, N_side_in, **kwargs): self.N_side = N_side_in self.canv_shape = (self.N_side, self.N_side) self.op_dict = { 'union' : union, 'rect' : self.primitive_rect } #'subtract' : subtract, self.op_str_list = list(self.op_dict.keys()) #print(self.op_str_list) self.N_ops = len(self.op_str_list) self.N_non_primitive_ops = 1 self.N_params = 4 self.zero_pad = ZeroPad2d(1) #self.peaky_noise = Beta(0.03*torch.ones(self.canv_shape), 0.47*torch.ones(self.canv_shape)) #self.peaky_noise = Beta(1*torch.ones(self.canv_shape), 8*torch.ones(self.canv_shape)) self.peaky_noise = Beta(0.05*torch.ones(self.canv_shape), 0.45*torch.ones(self.canv_shape)) self.canv_dist = kwargs.get('canv_dist', 'bernoulli') assert self.canv_dist in ['bernoulli', 'beta'], 'Canv dist must be either bernoulli or beta!' noise_methods = { 'bernoulli' : 'bern', 'beta' : 'peaky_blur', } self.noise_method = noise_methods[self.canv_dist]
def test_beta_sample_grad(self): self._set_rng_seed() num_samples = 20 for alpha, beta in product([1e-2, 1e0, 1e2], [1e-2, 1e0, 1e2]): alphas = Variable(torch.Tensor([alpha] * num_samples), requires_grad=True) betas = Variable(torch.Tensor([beta] * num_samples)) x = Beta(alphas, betas).rsample() x.sum().backward() x, ind = x.data.sort() x = x.numpy() actual_grad = alphas.grad.data[ind].numpy() # Compare with expected gradient dx/dalpha along constant cdf(x,alpha,beta). cdf = scipy.stats.beta.cdf pdf = scipy.stats.beta.pdf eps = 0.02 * alpha / (1.0 + np.sqrt(alpha)) cdf_alpha = (cdf(x, alpha + eps, beta) - cdf(x, alpha - eps, beta)) / (2 * eps) cdf_x = pdf(x, alpha, beta) expected_grad = -cdf_alpha / cdf_x rel_error = np.abs(actual_grad - expected_grad) / (expected_grad + 1e-100) self.assertLess( np.max(rel_error), 0.01, '\n'.join([ 'Bad gradients for Beta({}, {})'.format(alpha, beta), 'x {}'.format(x), 'expected {}'.format(expected_grad), 'actual {}'.format(actual_grad), 'rel error {}'.format(rel_error), 'max error {}'.format(rel_error.max()) ]))
def chooseActionTrain(self, state): """ Choose an action during training mode Parameters ------- state: The current state of the car. Returns ------- action : np.ndarray The actions to run on the track coefficient : float The logarithmic probability for an action Notes ------- This function is only called when the --train flag IS provided. """ state = torch.from_numpy(state).double().to( self.hardwareDevice).unsqueeze(0) with torch.no_grad(): alpha, beta = self.nn(state)[0] dist = Beta(alpha, beta) action = dist.sample() coefficient = dist.log_prob(action).sum(dim=1) action = action.squeeze().cpu().numpy() coefficient = coefficient.item() return action, coefficient
def forward(self, x=None, warmup=1., inf_net=None): #, k=1): #, marginf_type=0): # x: [B,3,112,112] # q: [B,L] # inf type: 0 is both, 1 is only x, 2 is only y # dec type: 0 is both, 1 is only x, 2 is only y outputs = {} if inf_net is None: mu, logvar = self.inference_net(x) else: mu, logvar = inf_net.inference_net(x) z, logpz, logqz = self.sample(mu, logvar) z_dec = self.z_to_dec(z) B = z_dec.shape[0] # Decode Image x_hat = self.image_decoder(z_dec) alpha = torch.sigmoid(x_hat) beta = Beta(alpha * self.beta_scale, (1. - alpha) * self.beta_scale) x_noise = torch.clamp( x + torch.FloatTensor(x.shape).uniform_(0., 1. / 256.).cuda(), min=1e-5, max=1 - 1e-5) # logpx = beta.log_prob(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda()) #[120,3,112,112] # add uniform noise here logpx = beta.log_prob( x_noise) #[120,3,112,112] # add uniform noise here logpx = torch.sum(logpx.view(B, -1), 1) # [PB] * self.w_logpx # logpx = logpx * self.w_logpx log_ws = logpx + logpz - logqz outputs['logpx'] = torch.mean(logpx) outputs['x_recon'] = alpha outputs['welbo'] = torch.mean(logpx + warmup * (logpz - logqz)) outputs['elbo'] = torch.mean(log_ws) outputs['logws'] = log_ws outputs['z'] = z outputs['logpz'] = torch.mean(logpz) outputs['logqz'] = torch.mean(logqz) outputs['logvar'] = logvar # print (outputs['elbo'], outputs['welbo'], outputs['logpz'], outputs['logqz']) # fafs # if generate: # # word_preds, sampled_words = self.text_generator.teacher_force(z_dec, generate=generate, embeder=self.encoder_embed) # # if dec_type == 2: # alpha = torch.sigmoid(self.image_decoder(z_dec)) # return outputs, alpha #, word_preds, sampled_words return outputs
def test_beta_likelihood(concentration1: float, concentration0: float) -> None: """ Test to check that maximizing the likelihood recovers the parameters """ # generate samples concentration1s = torch.zeros((NUM_SAMPLES, )) + concentration1 concentration0s = torch.zeros((NUM_SAMPLES, )) + concentration0 distr = Beta(concentration1s, concentration0s) samples = distr.sample() init_biases = [ inv_softplus(concentration1 - START_TOL_MULTIPLE * TOL * concentration1), inv_softplus(concentration0 - START_TOL_MULTIPLE * TOL * concentration0), ] concentration1_hat, concentration0_hat = maximum_likelihood_estimate_sgd( BetaOutput(), samples, init_biases=init_biases, learning_rate=PositiveFloat(0.05), num_epochs=PositiveInt(10), ) assert ( np.abs(concentration1_hat - concentration1) < TOL * concentration1 ), f"concentration1 did not match: concentration1 = {concentration1}, concentration1_hat = {concentration1_hat}" assert ( np.abs(concentration0_hat - concentration0) < TOL * concentration0 ), f"concentration0 did not match: concentration0 = {concentration0}, concentration0_hat = {concentration0_hat}"
def test_e_log_stick(): """ This test DOES NOT PASS, and maybe should not """ model = InfiniteIBP(4., 10, 0.1, 0.5, 36) model.init_z(10) K = model.K # take a lot of samples to get something working dist = Beta(model.tau.detach()[:, 0], model.tau.detach()[:, 1]) samples = dist.sample((100000, )) f = (1. - samples.cumprod(1)).log().mean(0) log_stick, q = model._E_log_stick(model.tau, model.K) jeffrey_q = np.zeros((K, K)) jeffrey_log_stick = np.zeros((K, )) for k in range(K): a, b = compute_q_Elogstick(model.tau.detach().numpy().T, k) jeffrey_q[k, :k + 1] = a jeffrey_log_stick[k] = b print("old: {}".format(jeffrey_log_stick)) print("new: {}".format(log_stick.detach().numpy())) print("samples: {}".format(f.detach().numpy())) import ipdb ipdb.set_trace()
def sample(self, datas): alpha, beta = datas distribution = Beta(alpha, beta) action = distribution.sample().float().to(set_device(self.use_gpu)) return action
def get_kl(self): gamma_q = Gamma(concentration=self.logalpha.exp(), rate=self.logbeta.exp()) gamma_p = Gamma(0.1*torch.ones_like(self.logalpha), 0.3*torch.ones_like(self.logalpha)) beta_q = Beta(self.logtheta.exp(), self.logeta.exp()) beta_p = Beta(torch.ones_like(self.logtheta), torch.ones_like(self.logtheta)) # kl = _kl_beta_beta(beta_q, beta_p) + _kl_gamma_gamma(gamma_q, gamma_p) kl = kl_divergence(beta_q, beta_p).sum() + kl_divergence(gamma_q, gamma_p).sum() return kl
def kl_bernoulli(pi, step, args): cap = min(args.h_cap, step * args.h_cap / args.total_steps) beta_dist = Beta(torch.ones_like(pi) * args.alpha_0, torch.ones_like(pi)) pi_prior = Bernoulli(torch.cumprod(beta_dist.sample(), dim=-1)) pi_posterior = Bernoulli(pi) klh_loss = kl_divergence(pi_posterior, pi_prior).sum(dim=1).mean() cap_klh_loss = args.gamma_h * (klh_loss - cap).abs() return cap_klh_loss
def compute_global_kl_divergence(self) -> torch.Tensor: outputs = self.get_alphas_betas(as_numpy=False) alpha_posterior = outputs["alpha_posterior"] beta_posterior = outputs["beta_posterior"] alpha_prior = outputs["alpha_prior"] beta_prior = outputs["beta_prior"] return kl(Beta(alpha_posterior, beta_posterior), Beta(alpha_prior, beta_prior)).sum()
def update(self): self.training_step += 1 s = torch.tensor(self.buffer['s'], dtype=torch.double) a = torch.tensor(self.buffer['a'], dtype=torch.double) r = torch.tensor(self.buffer['r'], dtype=torch.double).view(-1, 1) s_ = torch.tensor(self.buffer['s_'], dtype=torch.double) old_a_logp = torch.tensor(self.buffer['a_logp'], dtype=torch.double).view(-1, 1) with torch.no_grad(): target_v = r + self.gamma * self.net(s_)[1] adv = target_v - self.net(s)[1] for _ in range(self.ppo_epoch): for index in BatchSampler( SubsetRandomSampler(range(self.buffer_capacity)), self.batch_size, False): alpha, beta = self.net(s[index])[0] dist = Beta(alpha, beta) a_logp = dist.log_prob(a[index]).sum(dim=1, keepdim=True) ratio = torch.exp(a_logp - old_a_logp[index]) surr1 = ratio * adv[index] surr2 = torch.clamp(ratio, 1.0 - self.clip_param, 1.0 + self.clip_param) * adv[index] action_loss = -torch.min(surr1, surr2).mean() value_loss = F.smooth_l1_loss( self.net(s[index])[1], target_v[index]) loss = action_loss + 2. * value_loss self.optimizer.zero_grad() loss.backward() # intuition says to do this step differently # i.e. compute loss using minibatches and take multiple SGD steps # new insight: the shape of the objective function is fundamental in limiting # how the parameters theta don't move to a region where L > 1 + epsilon # because the norm of the gradient near the 'ceiling' approaches 0, we don't move far into the territory # this works with multiple SGD steps, but unclear how a step of grad * lr works # in an update, theta_k is constant so we are always moving in the same space # what happens if we move with too big of a gradient? # then the grad = 0, and we have finished early # epsilon is relevant for each individual action, so if its not yet there, # each action takes a gradient step closer to the ceiling # ppo just limits the adjustments of each action under the policy (given state) # objective must be maxed for each action # when adjusting theta for another transition, a different ratio can be > epsilon # this is fine, as long as the optimizer does not act greedily w.r.t this self.optimizer.step()
def test_elbo_components(inputs=None): """ Test that various KL divergences are positive, and in the case of the approximate posterior q(v), compute it exactly in two ways and check that both give the same result. """ if inputs is None: model = InfiniteIBP(4., 6, 0.1, 0.5, 36) model.init_z(10) model.train() X = torch.randn(10, 36) else: model, X = inputs a = model._1_feature_prob(model.tau).sum() b = model._2_feature_assign(model.nu, model.tau).sum() c = model._3_feature_prob(model.phi_var, model.phi).sum() d = model._4_likelihood(X, model.nu, model.phi_var, model.phi).sum() e = model._5_entropy(model.tau, model.phi_var, model.nu).sum() entropy_q_v = InfiniteIBP._entropy_q_v(model.tau) entropy_q_A = InfiniteIBP._entropy_q_A(model.phi_var) entropy_q_z = InfiniteIBP._entropy_q_z(model.nu) try: assert (a + b + c + d + e).item() not in (np.inf, -np.inf), "ELBO is inf" except AssertionError: print("a: ", a) print("b: ", b) print("c: ", c) print("d: ", d) print("e: ", e) print("entropy_q_v: ", entropy_q_v) print("entropy_q_A: ", entropy_q_A) print("entropy_q_z: ", entropy_q_z) raise # check the sign of the various KL divergences (summed, so less powerful than it could be) assert (a + entropy_q_v).item() <= 0, "KL(q(pi) || p(pi)) is negative" # assert (b + entropy_q_z).item() <= 10, "KL(q(z) || p(z)) is negative" # we give this one some tolerance assert (c + entropy_q_A).item() <= 0, "KL(q(A) || p(A)) is negative" assert (a + b + c + e ).item() <= 0, "KL divergence between q(...) || p(...) is negative" # check the empirical value of the component KL divergences (this is a very strong test) from torch.distributions import Beta, kl_divergence p_pi = Beta(model.alpha, 1.) q_pi = Beta(model.tau[:, 0], model.tau[:, 1]) try: assert ( kl_divergence(q_pi, p_pi).sum() + (a + entropy_q_v)).abs() < 1e-3, "KL(q(pi) || p(pi)) is incorrect" except: import ipdb ipdb.set_trace()
def test_beta_shape_tensor_params(self): dist = Beta(torch.Tensor([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]), torch.Tensor([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])) self.assertEqual(dist._batch_shape, torch.Size((3, 2))) self.assertEqual(dist._event_shape, torch.Size(())) self.assertEqual(dist.sample().size(), torch.Size((3, 2))) self.assertEqual(dist.sample((3, 2)).size(), torch.Size((3, 2, 3, 2))) self.assertEqual(dist.log_prob(self.tensor_sample_1).size(), torch.Size((3, 2))) self.assertRaises(ValueError, dist.log_prob, self.tensor_sample_2)
def test_beta_shape_scalar_params(self): dist = Beta(0.1, 0.1) self.assertEqual(dist._batch_shape, torch.Size()) self.assertEqual(dist._event_shape, torch.Size()) self.assertEqual(dist.sample().size(), torch.Size((1,))) self.assertEqual(dist.sample((3, 2)).size(), torch.Size((3, 2))) self.assertRaises(ValueError, dist.log_prob, self.scalar_sample) self.assertEqual(dist.log_prob(self.tensor_sample_1).size(), torch.Size((3, 2))) self.assertEqual(dist.log_prob(self.tensor_sample_2).size(), torch.Size((3, 2, 3)))
def test_beta_log_prob(self): for _ in range(100): alpha = np.exp(np.random.normal()) beta = np.exp(np.random.normal()) dist = Beta(alpha, beta) x = dist.sample() actual_log_prob = dist.log_prob(x).sum() expected_log_prob = scipy.stats.beta.logpdf(x, alpha, beta)[0] self.assertAlmostEqual(actual_log_prob, expected_log_prob, places=3, allow_inf=True)
def forward(self, x=None, warmup=1., inf_net=None): #, k=1): #, marginf_type=0): # x: [B,3,112,112] # q: [B,L] # inf type: 0 is both, 1 is only x, 2 is only y # dec type: 0 is both, 1 is only x, 2 is only y outputs = {} if inf_net is None: mu, logvar = self.inference_net(x) else: mu, logvar = inf_net.inference_net(x) z, logpz, logqz = self.sample(mu, logvar) z_dec = self.z_to_dec(z) B = z_dec.shape[0] # Decode Image x_hat = self.image_decoder(z_dec) alpha = torch.sigmoid(x_hat) beta = Beta(alpha*self.beta_scale, (1.-alpha)*self.beta_scale) x_noise = torch.clamp(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda(), min=1e-5, max=1-1e-5) # logpx = beta.log_prob(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda()) #[120,3,112,112] # add uniform noise here logpx = beta.log_prob(x_noise) #[120,3,112,112] # add uniform noise here logpx = torch.sum(logpx.view(B, -1),1) # [PB] * self.w_logpx # logpx = logpx * self.w_logpx log_ws = logpx + logpz - logqz outputs['logpx'] = torch.mean(logpx) outputs['x_recon'] = alpha outputs['welbo'] = torch.mean(logpx + warmup*( logpz - logqz)) outputs['elbo'] = torch.mean(log_ws) outputs['logws'] = log_ws outputs['z'] = z outputs['logpz'] = torch.mean(logpz) outputs['logqz'] = torch.mean(logqz) outputs['logvar'] = logvar # print (outputs['elbo'], outputs['welbo'], outputs['logpz'], outputs['logqz']) # fafs # if generate: # # word_preds, sampled_words = self.text_generator.teacher_force(z_dec, generate=generate, embeder=self.encoder_embed) # # if dec_type == 2: # alpha = torch.sigmoid(self.image_decoder(z_dec)) # return outputs, alpha #, word_preds, sampled_words return outputs
def sample(self, device, epoch, num=64): sample = torch.randn(num, self.latent_dim).to(device) x_alpha, x_beta = self.decode(sample) beta = Beta(x_alpha, x_beta) p = beta.sample() binomial = Binomial(255, p) x_sample = binomial.sample() x_sample = x_sample.float() / 255. save_image(x_sample.view(num, 1, 28, 28), 'results/epoch_{}_samples.png'.format(epoch))
def test_beta_sample(self): set_rng_seed(1) # see Note [Randomized statistical tests] for alpha, beta in product([0.1, 1.0, 10.0], [0.1, 1.0, 10.0]): self._check_sampler_sampler(Beta(alpha, beta), scipy.stats.beta(alpha, beta), 'Beta(alpha={}, beta={})'.format(alpha, beta)) # Check that small alphas do not cause NANs. for Tensor in [torch.FloatTensor, torch.DoubleTensor]: x = Beta(Tensor([1e-6]), Tensor([1e-6])).sample()[0] self.assertTrue(np.isfinite(x) and x > 0, 'Invalid Beta.sample(): {}'.format(x))
def forward(self, x=None, warmup=1., inf_net=None): #, k=1): #, marginf_type=0): outputs = {} B = x.shape[0] if inf_net is None: # mu, logvar = self.inference_net(x) z, logits = self.q.sample(x) else: # mu, logvar = inf_net.inference_net(x) z, logqz = inf_net.sample(x) # print (z[0]) # b = harden(z) # print (b[0]) # logpz = torch.sum( self.prior.log_prob(b), dim=1) # print (logpz[0]) # print (logpz.shape) # fdasf probs_q = torch.sigmoid(logits) probs_q = torch.clamp(probs_q, min=.00000001, max=.9999999) probs_p = torch.ones(B, self.z_size).cuda() *.5 KL = probs_q*torch.log(probs_q/probs_p) + (1-probs_q)*torch.log((1-probs_q)/(1-probs_p)) KL = torch.sum(KL, dim=1) # print (z.shape) # Decode Image x_hat = self.generator.forward(z) alpha = torch.sigmoid(x_hat) beta = Beta(alpha*self.beta_scale, (1.-alpha)*self.beta_scale) x_noise = torch.clamp(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda(), min=1e-5, max=1-1e-5) logpx = beta.log_prob(x_noise) #[120,3,112,112] # add uniform noise here logpx = torch.sum(logpx.view(B, -1),1) # [PB] * self.w_logpx # print (logpx.shape,logpz.shape,logqz.shape) # fsdfda log_ws = logpx - KL #+ logpz - logqz outputs['logpx'] = torch.mean(logpx) outputs['x_recon'] = alpha # outputs['welbo'] = torch.mean(logpx + warmup*( logpz - logqz)) outputs['welbo'] = torch.mean(logpx + warmup*(KL)) outputs['elbo'] = torch.mean(log_ws) outputs['logws'] = log_ws outputs['z'] = z outputs['logpz'] = torch.zeros(1) #torch.mean(logpz) outputs['logqz'] = torch.mean(KL) # outputs['logvar'] = logvar return outputs
def select_action(self, state): state = torch.from_numpy(state).double().to(device).unsqueeze(0) with torch.no_grad(): (alpha, beta), _, rcrc_s = self.net(state) dist = Beta(alpha, beta) action = dist.sample() a_logp = dist.log_prob(action).sum(dim=1) action = action.squeeze().cpu().numpy() a_logp = a_logp.item() return action, a_logp, rcrc_s
def trainmodel(self): s = torch.tensor(self.memory.buffer['s'], dtype=torch.double).to(device) a = torch.tensor(self.memory.buffer['a'], dtype=torch.double).to(device) #r = torch.tensor(self.memory.buffer['r'], dtype=torch.double).to(device).view(-1, 1) s_ = torch.tensor(self.memory.buffer['s_'], dtype=torch.double).to(device) #v = torch.tensor(self.memory.buffer['v'], dtype=torch.double).to(device).view(-1, 1) input = s_[-1].view(1, 4, 28, 28) future_value = self.net(input)[1].item() adv, target_v = self.getgae(future_value) adv = torch.tensor(np.array(adv), dtype=torch.double).to(device).view(-1, 1) target_v = torch.tensor(target_v, dtype=torch.double).to(device).view(-1, 1) adv = (adv - adv.mean()) / (adv.std() + 1e-5) old_a_logp = torch.tensor(self.memory.buffer['a_logp'], dtype=torch.double).to(device).view(-1, 1) for _ in range(self.PPOepoch): for index in BatchSampler( SubsetRandomSampler(range(self.memory.buffer_capacity)), self.memory.batch_size, False): alpha, beta = self.net(s[index])[0] dist = Beta(alpha, beta) a_logp = dist.log_prob(a[index]).sum(dim=1) a_logp = a_logp.reshape(-1, 1) ratio = torch.exp(a_logp - old_a_logp[index]) with torch.no_grad(): entrop = dist.entropy() surr1 = ratio * adv[index] surr2 = torch.clamp(ratio, 1.0 - self.clip_param, 1.0 + self.clip_param) * adv[index] action_loss = -torch.min(surr1, surr2).mean() value_loss = F.smooth_l1_loss( self.net(s[index])[1], target_v[index]) self.storeloss(action_loss, value_loss) action_loss = torch.clamp(action_loss, 0, 10) value_loss = torch.clamp(value_loss, 0, 10) loss = action_loss + 2. * value_loss - args.bound * entrop.mean( ) self.optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(self.net.parameters(), self.max_grad_norm) self.optimizer.step() torch.save(self.net.state_dict(), self.path_t7)
def select_action(self, state): state = torch.from_numpy(state).double().to(device).unsqueeze(0) with torch.no_grad(): alpha, beta = self.net(state)[0] dist = Beta(alpha, beta) action = dist.sample() # 3 values in [0,1] a_logp = dist.log_prob(action).sum(dim=1) # For PPO action = action.squeeze().cpu().numpy() a_logp = a_logp.item() return action, a_logp
def mixup(self, input: torch.Tensor, target: torch.Tensor): if not torch.is_tensor(self.beta): self.beta = torch.tensor(self.beta).to(self.device) gamma = Beta(self.beta, self.beta).sample((input.size(0), 1, 1, 1)) perm = torch.randperm(input.size(0)) perm_input = input[perm] perm_target = target[perm] input.mul_(gamma).add_(perm_input.mul_(1 - gamma)) gamma = gamma.view(-1, 1) target.mul_(gamma).add_(perm_target.mul_(1 - gamma)) return input, target
def select_action(self, state): # deal with datatype of state and transform it state = torch.from_numpy(state).double().unsqueeze(0) with torch.no_grad(): alpha, beta = self.net(state)[0] dist = Beta(alpha, beta) action = dist.sample() # sampled action in interval (0, 1) a_logp = dist.log_prob(action).sum( dim=1) # add the log probability densities of the 3-stack action = action.squeeze().numpy() a_logp = a_logp.item() return action, a_logp
def gnll_loss_beta(y, param_1, param_2): batch_size = y.shape[0] loss = 0 for i in range(batch_size): beta = Beta(param_1[i], param_2[i]) sample = y[i].reshape(-1,1) for j in sample: # this is because log_prob is inf for score = 1.0 or 0.0, which makes loss=nan if j == 0: j += 1.0e-3 elif j == 1: j-= 1.0e-3 log_likelihood = beta.log_prob(sample) # (9,32) loss -= torch.mean(log_likelihood) return loss + 200
def mixup(x, y, num_classes, gamma=0.2, smooth_eps=0.1): if gamma == 0 and smooth_eps == 0: return x, y m = Beta(torch.tensor([gamma]), torch.tensor([gamma])) lambdas = m.sample([x.size(0), 1, 1]).to(x) my = onehot(y, num_classes).to(x) true_class, false_class = 1. - smooth_eps * num_classes / (num_classes - 1), smooth_eps / (num_classes - 1) my = my * true_class + torch.ones_like(my) * false_class perm = torch.randperm(x.size(0)) x2 = x[perm] y2 = my[perm] return x * (1 - lambdas) + x2 * lambdas, my * (1 - lambdas) + y2 * lambdas
def reconstruct(self, x, device, epoch): x = x.view(-1, 784).float().to(device) z_mu, z_logvar = self.encode(x) z = self.reparameterize(z_mu, z_logvar) # sample zs x_alpha, x_beta = self.decode(z) beta = Beta(x_alpha, x_beta) p = beta.sample() binomial = Binomial(255, p) x_recon = binomial.sample() x_recon = x_recon.float() / 255. x_with_recon = torch.cat((x, x_recon)) save_image(x_with_recon.view(64, 1, 28, 28), 'results/epoch_{}_recon.png'.format(epoch))
def forward(self, x=None, warmup=1., inf_net=None): #, k=1): #, marginf_type=0): outputs = {} if inf_net is None: # mu, logvar = self.inference_net(x) z, logqz = self.q.sample(x) else: # mu, logvar = inf_net.inference_net(x) z, logqz = inf_net.sample(x) logpz = self.prior.logprob(z) # Decode Image x_hat = self.image_decoder(z) alpha = torch.sigmoid(x_hat) beta = Beta(alpha*self.beta_scale, (1.-alpha)*self.beta_scale) x_noise = torch.clamp(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda(), min=1e-5, max=1-1e-5) # logpx = beta.log_prob(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda()) #[120,3,112,112] # add uniform noise here logpx = beta.log_prob(x_noise) #[120,3,112,112] # add uniform noise here B = z.shape[0] logpx = torch.sum(logpx.view(B, -1),1) # [PB] * self.w_logpx log_ws = logpx + logpz - logqz outputs['logpx'] = torch.mean(logpx) outputs['x_recon'] = alpha outputs['welbo'] = torch.mean(logpx + warmup*( logpz - logqz)) outputs['elbo'] = torch.mean(log_ws) outputs['logws'] = log_ws outputs['z'] = z outputs['logpz'] = torch.mean(logpz) outputs['logqz'] = torch.mean(logqz) # outputs['logvar'] = logvar return outputs
def f(self, x, z, logits, hard=False): B = x.shape[0] # image likelihood given b # b = harden(z).detach() x_hat = self.generator.forward(z) alpha = torch.sigmoid(x_hat) beta = Beta(alpha*self.beta_scale, (1.-alpha)*self.beta_scale) x_noise = torch.clamp(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda(), min=1e-5, max=1-1e-5) logpx = beta.log_prob(x_noise) #[120,3,112,112] # add uniform noise here logpx = torch.sum(logpx.view(B, -1),1) # [PB] * self.w_logpx # prior is constant I think # for q(b|x), we just want to increase its entropy if hard: dist = Bernoulli(logits=logits) else: dist = RelaxedBernoulli(torch.Tensor([1.]).cuda(), logits=logits) logqb = dist.log_prob(z.detach()) logqb = torch.sum(logqb,1) return logpx, logqb, alpha