def update(self):
        self.training_step += 1

        s = torch.tensor(self.buffer['s'], dtype=torch.double).to(device)
        a = torch.tensor(self.buffer['a'], dtype=torch.double).to(device)
        r = torch.tensor(self.buffer['r'], dtype=torch.double).to(device).view(-1, 1)
        s_ = torch.tensor(self.buffer['s_'], dtype=torch.double).to(device)
        old_a_logp = torch.tensor(self.buffer['a_logp'], dtype=torch.double).to(device).view(-1, 1)

        with torch.no_grad():
            target_v = r + args.gamma * self.net(s_, actual_obs=False)[1]
            adv = target_v - self.net(s, actual_obs=False)[1]
            # adv = (adv - adv.mean()) / (adv.std() + 1e-8)

        for _ in range(self.ppo_epoch):
            for index in BatchSampler(SubsetRandomSampler(range(self.buffer_capacity)), self.batch_size, False):

                alpha, beta = self.net(s[index], actual_obs=False)[0]
                dist = Beta(alpha, beta)
                a_logp = dist.log_prob(a[index]).sum(dim=1, keepdim=True)
                ratio = torch.exp(a_logp - old_a_logp[index])

                surr1 = ratio * adv[index]
                surr2 = torch.clamp(ratio, 1.0 - self.clip_param, 1.0 + self.clip_param) * adv[index]
                action_loss = -torch.min(surr1, surr2).mean()
                value_loss = F.smooth_l1_loss(self.net(s[index], actual_obs=False)[1], target_v[index])
                loss = action_loss + 2. * value_loss

                self.optimizer.zero_grad()
                loss.backward()
                # nn.utils.clip_grad_norm_(self.net.parameters(), self.max_grad_norm)
                self.optimizer.step()
Пример #2
0
    def forward(self,
                observation,
                reparameterize=True,
                deterministic=False,
                return_log_prob=False):
        """
        Forward pass.
        Assumes input is a torch tensor.

        :type observation: torch.Tensor
        """
        layer_input = observation
        for fc in self.fcs:
            layer_input = self.hidden_activation(fc(layer_input))
        network_output = self.output_activation(self.last_fc(layer_input))

        alpha = network_output[:, 0].unsqueeze(1) + EPSILON
        beta = network_output[:, 1].unsqueeze(1) + EPSILON
        distribution = Beta(alpha, beta)
        distribution_mean = distribution.mean
        if deterministic:
            sample = distribution.rsample()
        else:
            sample = distribution_mean
        # transform to range (min, max)
        action = self.min + self.max_min_difference * sample
        mean = self.min + self.max_min_difference * distribution_mean
        variance = self.max_min_difference_squared * distribution.variance
        std = torch.sqrt(variance)
        log_std = torch.log(std)
        log_prob = distribution.log_prob(sample)
        entropy = distribution.entropy()
        mean_action_log_prob = None
        pre_tanh_value = None
        return action, mean, log_std, log_prob, entropy, std, mean_action_log_prob, pre_tanh_value
Пример #3
0
def log_prob_density(x, dist_args, args):
    if args.stat_policy == "Gaussian":
        log_prob_density = -(x - dist_args[0]).pow(2) / (2 * dist_args[1].pow(2)) \
                         - 0.5 * math.log(2 * math.pi)
    elif args.stat_policy == "Beta":
        log_prob_density = Beta(dist_args[0], dist_args[1]).log_prob(x)
    return log_prob_density.sum(1, keepdim=True)
    def kldivergence(self, datas1, datas2):
        alpha1, beta1 = datas1
        alpha2, beta2 = datas2

        distribution1 = Beta(alpha1, beta1)
        distribution2 = Beta(alpha2, beta2)
        return kl_divergence(distribution1, distribution2).float().to(set_device(self.use_gpu))
Пример #5
0
 def _sampler(self, samples=1000):
     d_ = torch.ones(samples)
     if d == 1:
         # If SZ is adopted, then some Districts and Schools buy in
         dist = Poisson(self.n_districts)\
                 .sample([samples])\
                 .reshape([samples])
         schools = NegativeBinomial(tensor([3.]),
                                    tensor([0.8]))\
                     .sample([samples, self.n_districts.int()])\
                     .sum(dim=1)\
                     .reshape([samples])
         sz = 15000. * dist + 2430 * schools
     else:
         dist, schools, sz = torch.zeros(samples),\
                             torch.zeros(samples),\
                             torch.zeros(samples)
     if d < 2:
         sf = LogNormal(
                 *self._lognormal_params(300000., 10000.))\
                     .sample([samples])
     else:
         sf = torch.zeros(samples)
     # System & Infrastructure
     az = LogNormal(self.az_means[d], self.az_sds[d]).sample([samples])
     salary_estimate = Normal(70000., 5000.).sample([samples])
     fa = Beta(self.fa_ms[d], self.fa_ks[d]).sample([samples])
     dt = Beta(self.dt_ms[d], self.dt_ks[d]).sample([samples])
     return d_, dist, schools, sz, az, sf, fa, dt
Пример #6
0
	def __init__(self, N_side_in, **kwargs):

		self.N_side = N_side_in
		self.canv_shape = (self.N_side, self.N_side)

		self.op_dict = {
			'union' : union,
			'rect' : self.primitive_rect
		}
		#'subtract' : subtract,

		self.op_str_list = list(self.op_dict.keys())
		#print(self.op_str_list)
		self.N_ops = len(self.op_str_list)
		self.N_non_primitive_ops = 1
		self.N_params = 4

		self.zero_pad = ZeroPad2d(1)
		#self.peaky_noise = Beta(0.03*torch.ones(self.canv_shape), 0.47*torch.ones(self.canv_shape))
		#self.peaky_noise = Beta(1*torch.ones(self.canv_shape), 8*torch.ones(self.canv_shape))
		self.peaky_noise = Beta(0.05*torch.ones(self.canv_shape), 0.45*torch.ones(self.canv_shape))

		self.canv_dist = kwargs.get('canv_dist', 'bernoulli')

		assert self.canv_dist in ['bernoulli', 'beta'], 'Canv dist must be either bernoulli or beta!'

		noise_methods = {
			'bernoulli' : 'bern',
			'beta' : 'peaky_blur',
		}

		self.noise_method = noise_methods[self.canv_dist]
Пример #7
0
 def test_beta_sample_grad(self):
     self._set_rng_seed()
     num_samples = 20
     for alpha, beta in product([1e-2, 1e0, 1e2], [1e-2, 1e0, 1e2]):
         alphas = Variable(torch.Tensor([alpha] * num_samples),
                           requires_grad=True)
         betas = Variable(torch.Tensor([beta] * num_samples))
         x = Beta(alphas, betas).rsample()
         x.sum().backward()
         x, ind = x.data.sort()
         x = x.numpy()
         actual_grad = alphas.grad.data[ind].numpy()
         # Compare with expected gradient dx/dalpha along constant cdf(x,alpha,beta).
         cdf = scipy.stats.beta.cdf
         pdf = scipy.stats.beta.pdf
         eps = 0.02 * alpha / (1.0 + np.sqrt(alpha))
         cdf_alpha = (cdf(x, alpha + eps, beta) -
                      cdf(x, alpha - eps, beta)) / (2 * eps)
         cdf_x = pdf(x, alpha, beta)
         expected_grad = -cdf_alpha / cdf_x
         rel_error = np.abs(actual_grad - expected_grad) / (expected_grad +
                                                            1e-100)
         self.assertLess(
             np.max(rel_error), 0.01, '\n'.join([
                 'Bad gradients for Beta({}, {})'.format(alpha, beta),
                 'x {}'.format(x), 'expected {}'.format(expected_grad),
                 'actual {}'.format(actual_grad),
                 'rel error {}'.format(rel_error),
                 'max error {}'.format(rel_error.max())
             ]))
Пример #8
0
    def chooseActionTrain(self, state):
        """ Choose an action during training mode
        
            Parameters
            -------
            state:
                The current state of the car.

            Returns
            -------
            action : np.ndarray
                The actions to run on the track
            coefficient : float
                The logarithmic probability for an action

            Notes
            -------
                This function is only called when the --train flag IS provided.
        """
        state = torch.from_numpy(state).double().to(
            self.hardwareDevice).unsqueeze(0)
        with torch.no_grad():
            alpha, beta = self.nn(state)[0]
        dist = Beta(alpha, beta)
        action = dist.sample()
        coefficient = dist.log_prob(action).sum(dim=1)

        action = action.squeeze().cpu().numpy()
        coefficient = coefficient.item()

        return action, coefficient
Пример #9
0
    def forward(self,
                x=None,
                warmup=1.,
                inf_net=None):  #, k=1): #, marginf_type=0):
        # x: [B,3,112,112]
        # q: [B,L]
        # inf type: 0 is both, 1 is only x, 2 is only y
        # dec type: 0 is both, 1 is only x, 2 is only y

        outputs = {}

        if inf_net is None:
            mu, logvar = self.inference_net(x)
        else:
            mu, logvar = inf_net.inference_net(x)

        z, logpz, logqz = self.sample(mu, logvar)

        z_dec = self.z_to_dec(z)

        B = z_dec.shape[0]

        # Decode Image
        x_hat = self.image_decoder(z_dec)
        alpha = torch.sigmoid(x_hat)

        beta = Beta(alpha * self.beta_scale, (1. - alpha) * self.beta_scale)
        x_noise = torch.clamp(
            x + torch.FloatTensor(x.shape).uniform_(0., 1. / 256.).cuda(),
            min=1e-5,
            max=1 - 1e-5)
        # logpx = beta.log_prob(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda()) #[120,3,112,112]  # add uniform noise here
        logpx = beta.log_prob(
            x_noise)  #[120,3,112,112]  # add uniform noise here

        logpx = torch.sum(logpx.view(B, -1), 1)  # [PB]  * self.w_logpx
        # logpx = logpx * self.w_logpx

        log_ws = logpx + logpz - logqz

        outputs['logpx'] = torch.mean(logpx)
        outputs['x_recon'] = alpha
        outputs['welbo'] = torch.mean(logpx + warmup * (logpz - logqz))
        outputs['elbo'] = torch.mean(log_ws)
        outputs['logws'] = log_ws
        outputs['z'] = z
        outputs['logpz'] = torch.mean(logpz)
        outputs['logqz'] = torch.mean(logqz)
        outputs['logvar'] = logvar

        # print (outputs['elbo'], outputs['welbo'], outputs['logpz'], outputs['logqz'])
        # fafs

        # if generate:
        #     # word_preds, sampled_words = self.text_generator.teacher_force(z_dec, generate=generate, embeder=self.encoder_embed)
        #     # if dec_type == 2:
        #     alpha = torch.sigmoid(self.image_decoder(z_dec))
        #     return outputs, alpha #, word_preds, sampled_words

        return outputs
Пример #10
0
def test_beta_likelihood(concentration1: float, concentration0: float) -> None:
    """
    Test to check that maximizing the likelihood recovers the parameters
    """

    # generate samples
    concentration1s = torch.zeros((NUM_SAMPLES, )) + concentration1
    concentration0s = torch.zeros((NUM_SAMPLES, )) + concentration0

    distr = Beta(concentration1s, concentration0s)
    samples = distr.sample()

    init_biases = [
        inv_softplus(concentration1 -
                     START_TOL_MULTIPLE * TOL * concentration1),
        inv_softplus(concentration0 -
                     START_TOL_MULTIPLE * TOL * concentration0),
    ]

    concentration1_hat, concentration0_hat = maximum_likelihood_estimate_sgd(
        BetaOutput(),
        samples,
        init_biases=init_biases,
        learning_rate=PositiveFloat(0.05),
        num_epochs=PositiveInt(10),
    )

    assert (
        np.abs(concentration1_hat - concentration1) < TOL * concentration1
    ), f"concentration1 did not match: concentration1 = {concentration1}, concentration1_hat = {concentration1_hat}"
    assert (
        np.abs(concentration0_hat - concentration0) < TOL * concentration0
    ), f"concentration0 did not match: concentration0 = {concentration0}, concentration0_hat = {concentration0_hat}"
Пример #11
0
def test_e_log_stick():
    """
    This test DOES NOT PASS, and maybe should not
    """
    model = InfiniteIBP(4., 10, 0.1, 0.5, 36)
    model.init_z(10)

    K = model.K

    # take a lot of samples to get something working
    dist = Beta(model.tau.detach()[:, 0], model.tau.detach()[:, 1])
    samples = dist.sample((100000, ))
    f = (1. - samples.cumprod(1)).log().mean(0)
    log_stick, q = model._E_log_stick(model.tau, model.K)

    jeffrey_q = np.zeros((K, K))
    jeffrey_log_stick = np.zeros((K, ))
    for k in range(K):
        a, b = compute_q_Elogstick(model.tau.detach().numpy().T, k)
        jeffrey_q[k, :k + 1] = a
        jeffrey_log_stick[k] = b

    print("old:     {}".format(jeffrey_log_stick))
    print("new:     {}".format(log_stick.detach().numpy()))
    print("samples: {}".format(f.detach().numpy()))

    import ipdb
    ipdb.set_trace()
    def sample(self, datas):
        alpha, beta = datas

        distribution    = Beta(alpha, beta)
        action          = distribution.sample().float().to(set_device(self.use_gpu))

        return action
Пример #13
0
 def get_kl(self):
     gamma_q = Gamma(concentration=self.logalpha.exp(), rate=self.logbeta.exp())
     gamma_p = Gamma(0.1*torch.ones_like(self.logalpha), 0.3*torch.ones_like(self.logalpha))
     beta_q = Beta(self.logtheta.exp(), self.logeta.exp())
     beta_p = Beta(torch.ones_like(self.logtheta), torch.ones_like(self.logtheta))
     # kl = _kl_beta_beta(beta_q, beta_p) + _kl_gamma_gamma(gamma_q, gamma_p)
     kl = kl_divergence(beta_q, beta_p).sum() + kl_divergence(gamma_q, gamma_p).sum()
     return kl
Пример #14
0
def kl_bernoulli(pi, step, args):
    cap = min(args.h_cap, step * args.h_cap / args.total_steps)
    beta_dist = Beta(torch.ones_like(pi) * args.alpha_0, torch.ones_like(pi))
    pi_prior = Bernoulli(torch.cumprod(beta_dist.sample(), dim=-1))
    pi_posterior = Bernoulli(pi)
    klh_loss = kl_divergence(pi_posterior, pi_prior).sum(dim=1).mean()
    cap_klh_loss = args.gamma_h * (klh_loss - cap).abs()
    return cap_klh_loss
Пример #15
0
 def compute_global_kl_divergence(self) -> torch.Tensor:
     outputs = self.get_alphas_betas(as_numpy=False)
     alpha_posterior = outputs["alpha_posterior"]
     beta_posterior = outputs["beta_posterior"]
     alpha_prior = outputs["alpha_prior"]
     beta_prior = outputs["beta_prior"]
     return kl(Beta(alpha_posterior, beta_posterior),
               Beta(alpha_prior, beta_prior)).sum()
Пример #16
0
    def update(self):
        self.training_step += 1

        s = torch.tensor(self.buffer['s'], dtype=torch.double)
        a = torch.tensor(self.buffer['a'], dtype=torch.double)
        r = torch.tensor(self.buffer['r'], dtype=torch.double).view(-1, 1)
        s_ = torch.tensor(self.buffer['s_'], dtype=torch.double)

        old_a_logp = torch.tensor(self.buffer['a_logp'],
                                  dtype=torch.double).view(-1, 1)

        with torch.no_grad():
            target_v = r + self.gamma * self.net(s_)[1]
            adv = target_v - self.net(s)[1]

        for _ in range(self.ppo_epoch):
            for index in BatchSampler(
                    SubsetRandomSampler(range(self.buffer_capacity)),
                    self.batch_size, False):

                alpha, beta = self.net(s[index])[0]
                dist = Beta(alpha, beta)
                a_logp = dist.log_prob(a[index]).sum(dim=1, keepdim=True)
                ratio = torch.exp(a_logp - old_a_logp[index])

                surr1 = ratio * adv[index]
                surr2 = torch.clamp(ratio, 1.0 - self.clip_param,
                                    1.0 + self.clip_param) * adv[index]
                action_loss = -torch.min(surr1, surr2).mean()
                value_loss = F.smooth_l1_loss(
                    self.net(s[index])[1], target_v[index])
                loss = action_loss + 2. * value_loss

                self.optimizer.zero_grad()
                loss.backward()

                # intuition says to do this step differently
                # i.e. compute loss using minibatches and take multiple SGD steps

                # new insight: the shape of the objective function is fundamental in limiting
                # how the parameters theta don't move to a region where L > 1 + epsilon
                # because the norm of the gradient near the 'ceiling' approaches 0, we don't move far into the territory
                # this works with multiple SGD steps, but unclear how a step of grad * lr works

                # in an update, theta_k is constant so we are always moving in the same space
                # what happens if we move with too big of a gradient?
                # then the grad = 0, and we have finished early

                # epsilon is relevant for each individual action, so if its not yet there,
                # each action takes a gradient step closer to the ceiling

                # ppo just limits the adjustments of each action under the policy (given state)
                # objective must be maxed for each action

                # when adjusting theta for another transition, a different ratio can be > epsilon
                # this is fine, as long as the optimizer does not act greedily w.r.t this

                self.optimizer.step()
Пример #17
0
def test_elbo_components(inputs=None):
    """
    Test that various KL divergences are positive, and in the case of the
    approximate posterior q(v), compute it exactly in two ways and check
    that both give the same result.
    """
    if inputs is None:
        model = InfiniteIBP(4., 6, 0.1, 0.5, 36)
        model.init_z(10)
        model.train()

        X = torch.randn(10, 36)
    else:
        model, X = inputs

    a = model._1_feature_prob(model.tau).sum()
    b = model._2_feature_assign(model.nu, model.tau).sum()
    c = model._3_feature_prob(model.phi_var, model.phi).sum()
    d = model._4_likelihood(X, model.nu, model.phi_var, model.phi).sum()
    e = model._5_entropy(model.tau, model.phi_var, model.nu).sum()

    entropy_q_v = InfiniteIBP._entropy_q_v(model.tau)
    entropy_q_A = InfiniteIBP._entropy_q_A(model.phi_var)
    entropy_q_z = InfiniteIBP._entropy_q_z(model.nu)

    try:
        assert (a + b + c + d + e).item() not in (np.inf,
                                                  -np.inf), "ELBO is inf"
    except AssertionError:
        print("a: ", a)
        print("b: ", b)
        print("c: ", c)
        print("d: ", d)
        print("e: ", e)
        print("entropy_q_v: ", entropy_q_v)
        print("entropy_q_A: ", entropy_q_A)
        print("entropy_q_z: ", entropy_q_z)
        raise

    # check the sign of the various KL divergences (summed, so less powerful than it could be)
    assert (a + entropy_q_v).item() <= 0, "KL(q(pi) || p(pi)) is negative"
    # assert (b + entropy_q_z).item() <= 10, "KL(q(z) || p(z)) is negative" # we give this one some tolerance
    assert (c + entropy_q_A).item() <= 0, "KL(q(A) || p(A)) is negative"
    assert (a + b + c + e
            ).item() <= 0, "KL divergence between q(...) || p(...) is negative"

    # check the empirical value of the component KL divergences (this is a very strong test)
    from torch.distributions import Beta, kl_divergence
    p_pi = Beta(model.alpha, 1.)
    q_pi = Beta(model.tau[:, 0], model.tau[:, 1])

    try:
        assert (
            kl_divergence(q_pi, p_pi).sum() +
            (a + entropy_q_v)).abs() < 1e-3, "KL(q(pi) || p(pi)) is incorrect"
    except:
        import ipdb
        ipdb.set_trace()
Пример #18
0
 def test_beta_shape_tensor_params(self):
     dist = Beta(torch.Tensor([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]),
                 torch.Tensor([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]))
     self.assertEqual(dist._batch_shape, torch.Size((3, 2)))
     self.assertEqual(dist._event_shape, torch.Size(()))
     self.assertEqual(dist.sample().size(), torch.Size((3, 2)))
     self.assertEqual(dist.sample((3, 2)).size(), torch.Size((3, 2, 3, 2)))
     self.assertEqual(dist.log_prob(self.tensor_sample_1).size(), torch.Size((3, 2)))
     self.assertRaises(ValueError, dist.log_prob, self.tensor_sample_2)
Пример #19
0
 def test_beta_shape_scalar_params(self):
     dist = Beta(0.1, 0.1)
     self.assertEqual(dist._batch_shape, torch.Size())
     self.assertEqual(dist._event_shape, torch.Size())
     self.assertEqual(dist.sample().size(), torch.Size((1,)))
     self.assertEqual(dist.sample((3, 2)).size(), torch.Size((3, 2)))
     self.assertRaises(ValueError, dist.log_prob, self.scalar_sample)
     self.assertEqual(dist.log_prob(self.tensor_sample_1).size(), torch.Size((3, 2)))
     self.assertEqual(dist.log_prob(self.tensor_sample_2).size(), torch.Size((3, 2, 3)))
Пример #20
0
 def test_beta_log_prob(self):
     for _ in range(100):
         alpha = np.exp(np.random.normal())
         beta = np.exp(np.random.normal())
         dist = Beta(alpha, beta)
         x = dist.sample()
         actual_log_prob = dist.log_prob(x).sum()
         expected_log_prob = scipy.stats.beta.logpdf(x, alpha, beta)[0]
         self.assertAlmostEqual(actual_log_prob, expected_log_prob, places=3, allow_inf=True)
Пример #21
0
    def forward(self, x=None, warmup=1., inf_net=None): #, k=1): #, marginf_type=0):
        # x: [B,3,112,112]
        # q: [B,L] 
        # inf type: 0 is both, 1 is only x, 2 is only y
        # dec type: 0 is both, 1 is only x, 2 is only y

        outputs = {}

        if inf_net is None:
        	mu, logvar = self.inference_net(x)
        else:
        	mu, logvar = inf_net.inference_net(x)   



        z, logpz, logqz = self.sample(mu, logvar) 

        z_dec = self.z_to_dec(z)

        B = z_dec.shape[0]

        # Decode Image
        x_hat = self.image_decoder(z_dec)
        alpha = torch.sigmoid(x_hat)

        beta = Beta(alpha*self.beta_scale, (1.-alpha)*self.beta_scale)
        x_noise = torch.clamp(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda(), min=1e-5, max=1-1e-5)
        # logpx = beta.log_prob(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda()) #[120,3,112,112]  # add uniform noise here
        logpx = beta.log_prob(x_noise) #[120,3,112,112]  # add uniform noise here

        logpx = torch.sum(logpx.view(B, -1),1) # [PB]  * self.w_logpx
        # logpx = logpx * self.w_logpx

        log_ws = logpx + logpz - logqz

        outputs['logpx'] = torch.mean(logpx)
        outputs['x_recon'] = alpha
        outputs['welbo'] = torch.mean(logpx + warmup*( logpz - logqz))
        outputs['elbo'] = torch.mean(log_ws)
        outputs['logws'] = log_ws
        outputs['z'] = z
        outputs['logpz'] = torch.mean(logpz)
        outputs['logqz'] = torch.mean(logqz)
        outputs['logvar'] = logvar

        # print (outputs['elbo'], outputs['welbo'], outputs['logpz'], outputs['logqz'])
        # fafs


        # if generate:
        #     # word_preds, sampled_words = self.text_generator.teacher_force(z_dec, generate=generate, embeder=self.encoder_embed)
        #     # if dec_type == 2:
        #     alpha = torch.sigmoid(self.image_decoder(z_dec))
        #     return outputs, alpha #, word_preds, sampled_words

        return outputs
Пример #22
0
 def sample(self, device, epoch, num=64):
     sample = torch.randn(num, self.latent_dim).to(device)
     x_alpha, x_beta = self.decode(sample)
     beta = Beta(x_alpha, x_beta)
     p = beta.sample()
     binomial = Binomial(255, p)
     x_sample = binomial.sample()
     x_sample = x_sample.float() / 255.
     save_image(x_sample.view(num, 1, 28, 28),
                'results/epoch_{}_samples.png'.format(epoch))
Пример #23
0
 def test_beta_sample(self):
     set_rng_seed(1)  # see Note [Randomized statistical tests]
     for alpha, beta in product([0.1, 1.0, 10.0], [0.1, 1.0, 10.0]):
         self._check_sampler_sampler(Beta(alpha, beta),
                                     scipy.stats.beta(alpha, beta),
                                     'Beta(alpha={}, beta={})'.format(alpha, beta))
     # Check that small alphas do not cause NANs.
     for Tensor in [torch.FloatTensor, torch.DoubleTensor]:
         x = Beta(Tensor([1e-6]), Tensor([1e-6])).sample()[0]
         self.assertTrue(np.isfinite(x) and x > 0, 'Invalid Beta.sample(): {}'.format(x))
Пример #24
0
    def forward(self, x=None, warmup=1., inf_net=None): #, k=1): #, marginf_type=0):

        outputs = {}
        B = x.shape[0]

        if inf_net is None:
            # mu, logvar = self.inference_net(x)
            z, logits = self.q.sample(x) 
        else:
            # mu, logvar = inf_net.inference_net(x)   
            z, logqz = inf_net.sample(x) 

        # print (z[0])
        # b = harden(z)
        # print (b[0])
        
        # logpz = torch.sum( self.prior.log_prob(b), dim=1)

        # print (logpz[0])
        # print (logpz.shape)
        # fdasf

        probs_q = torch.sigmoid(logits)
        probs_q = torch.clamp(probs_q, min=.00000001, max=.9999999)
        probs_p = torch.ones(B, self.z_size).cuda() *.5
        KL = probs_q*torch.log(probs_q/probs_p) + (1-probs_q)*torch.log((1-probs_q)/(1-probs_p))
        KL = torch.sum(KL, dim=1)

        # print (z.shape)
        # Decode Image
        x_hat = self.generator.forward(z)
        alpha = torch.sigmoid(x_hat)
        beta = Beta(alpha*self.beta_scale, (1.-alpha)*self.beta_scale)
        x_noise = torch.clamp(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda(), min=1e-5, max=1-1e-5)
        logpx = beta.log_prob(x_noise) #[120,3,112,112]  # add uniform noise here

        logpx = torch.sum(logpx.view(B, -1),1) # [PB]  * self.w_logpx

        # print (logpx.shape,logpz.shape,logqz.shape)
        # fsdfda

        log_ws = logpx - KL #+ logpz - logqz

        outputs['logpx'] = torch.mean(logpx)
        outputs['x_recon'] = alpha
        # outputs['welbo'] = torch.mean(logpx + warmup*( logpz - logqz))
        outputs['welbo'] = torch.mean(logpx + warmup*(KL))
        outputs['elbo'] = torch.mean(log_ws)
        outputs['logws'] = log_ws
        outputs['z'] = z
        outputs['logpz'] = torch.zeros(1) #torch.mean(logpz)
        outputs['logqz'] = torch.mean(KL)
        # outputs['logvar'] = logvar

        return outputs
    def select_action(self, state):
        state = torch.from_numpy(state).double().to(device).unsqueeze(0)
        with torch.no_grad():
            (alpha, beta), _, rcrc_s = self.net(state)
        dist = Beta(alpha, beta)
        action = dist.sample()
        a_logp = dist.log_prob(action).sum(dim=1)

        action = action.squeeze().cpu().numpy()
        a_logp = a_logp.item()
        return action, a_logp, rcrc_s
Пример #26
0
    def trainmodel(self):

        s = torch.tensor(self.memory.buffer['s'],
                         dtype=torch.double).to(device)
        a = torch.tensor(self.memory.buffer['a'],
                         dtype=torch.double).to(device)
        #r = torch.tensor(self.memory.buffer['r'], dtype=torch.double).to(device).view(-1, 1)
        s_ = torch.tensor(self.memory.buffer['s_'],
                          dtype=torch.double).to(device)
        #v = torch.tensor(self.memory.buffer['v'], dtype=torch.double).to(device).view(-1, 1)
        input = s_[-1].view(1, 4, 28, 28)
        future_value = self.net(input)[1].item()
        adv, target_v = self.getgae(future_value)

        adv = torch.tensor(np.array(adv),
                           dtype=torch.double).to(device).view(-1, 1)
        target_v = torch.tensor(target_v,
                                dtype=torch.double).to(device).view(-1, 1)
        adv = (adv - adv.mean()) / (adv.std() + 1e-5)
        old_a_logp = torch.tensor(self.memory.buffer['a_logp'],
                                  dtype=torch.double).to(device).view(-1, 1)

        for _ in range(self.PPOepoch):
            for index in BatchSampler(
                    SubsetRandomSampler(range(self.memory.buffer_capacity)),
                    self.memory.batch_size, False):

                alpha, beta = self.net(s[index])[0]
                dist = Beta(alpha, beta)
                a_logp = dist.log_prob(a[index]).sum(dim=1)
                a_logp = a_logp.reshape(-1, 1)
                ratio = torch.exp(a_logp - old_a_logp[index])
                with torch.no_grad():
                    entrop = dist.entropy()

                surr1 = ratio * adv[index]
                surr2 = torch.clamp(ratio, 1.0 - self.clip_param,
                                    1.0 + self.clip_param) * adv[index]
                action_loss = -torch.min(surr1, surr2).mean()
                value_loss = F.smooth_l1_loss(
                    self.net(s[index])[1], target_v[index])
                self.storeloss(action_loss, value_loss)
                action_loss = torch.clamp(action_loss, 0, 10)
                value_loss = torch.clamp(value_loss, 0, 10)
                loss = action_loss + 2. * value_loss - args.bound * entrop.mean(
                )

                self.optimizer.zero_grad()
                loss.backward()
                nn.utils.clip_grad_norm_(self.net.parameters(),
                                         self.max_grad_norm)
                self.optimizer.step()

        torch.save(self.net.state_dict(), self.path_t7)
Пример #27
0
    def select_action(self, state):
        state = torch.from_numpy(state).double().to(device).unsqueeze(0)
        with torch.no_grad():
            alpha, beta = self.net(state)[0]
        dist = Beta(alpha, beta)
        action = dist.sample()  # 3 values in [0,1]
        a_logp = dist.log_prob(action).sum(dim=1)  # For PPO
        action = action.squeeze().cpu().numpy()
        a_logp = a_logp.item()

        return action, a_logp
Пример #28
0
 def mixup(self, input: torch.Tensor, target: torch.Tensor):
     if not torch.is_tensor(self.beta):
         self.beta = torch.tensor(self.beta).to(self.device)
     gamma = Beta(self.beta, self.beta).sample((input.size(0), 1, 1, 1))
     perm = torch.randperm(input.size(0))
     perm_input = input[perm]
     perm_target = target[perm]
     input.mul_(gamma).add_(perm_input.mul_(1 - gamma))
     gamma = gamma.view(-1, 1)
     target.mul_(gamma).add_(perm_target.mul_(1 - gamma))
     return input, target
Пример #29
0
 def select_action(self, state):
     # deal with datatype of state and transform it
     state = torch.from_numpy(state).double().unsqueeze(0)
     with torch.no_grad():
         alpha, beta = self.net(state)[0]
     dist = Beta(alpha, beta)
     action = dist.sample()  # sampled action in interval (0, 1)
     a_logp = dist.log_prob(action).sum(
         dim=1)  # add the log probability densities of the 3-stack
     action = action.squeeze().numpy()
     a_logp = a_logp.item()
     return action, a_logp
Пример #30
0
 def gnll_loss_beta(y, param_1, param_2):   
     batch_size = y.shape[0]
     loss = 0
     for i in range(batch_size):
         beta = Beta(param_1[i], param_2[i])
         sample = y[i].reshape(-1,1)
         for j in sample:                            # this is because log_prob is inf for score = 1.0 or 0.0, which makes loss=nan
             if j == 0: j += 1.0e-3
             elif j == 1: j-= 1.0e-3
         log_likelihood = beta.log_prob(sample)    # (9,32)
         loss -= torch.mean(log_likelihood)
     return loss + 200
Пример #31
0
def mixup(x, y, num_classes, gamma=0.2, smooth_eps=0.1):
    if gamma == 0 and smooth_eps == 0:
        return x, y
    m = Beta(torch.tensor([gamma]), torch.tensor([gamma]))
    lambdas = m.sample([x.size(0), 1, 1]).to(x)
    my = onehot(y, num_classes).to(x)
    true_class, false_class = 1. - smooth_eps * num_classes / (num_classes - 1), smooth_eps / (num_classes - 1)
    my = my * true_class + torch.ones_like(my) * false_class
    perm = torch.randperm(x.size(0))
    x2 = x[perm]
    y2 = my[perm]
    return x * (1 - lambdas) + x2 * lambdas, my * (1 - lambdas) + y2 * lambdas
Пример #32
0
 def reconstruct(self, x, device, epoch):
     x = x.view(-1, 784).float().to(device)
     z_mu, z_logvar = self.encode(x)
     z = self.reparameterize(z_mu, z_logvar)  # sample zs
     x_alpha, x_beta = self.decode(z)
     beta = Beta(x_alpha, x_beta)
     p = beta.sample()
     binomial = Binomial(255, p)
     x_recon = binomial.sample()
     x_recon = x_recon.float() / 255.
     x_with_recon = torch.cat((x, x_recon))
     save_image(x_with_recon.view(64, 1, 28, 28),
                'results/epoch_{}_recon.png'.format(epoch))
Пример #33
0
    def forward(self, x=None, warmup=1., inf_net=None): #, k=1): #, marginf_type=0):

        outputs = {}

        if inf_net is None:
            # mu, logvar = self.inference_net(x)
            z, logqz = self.q.sample(x) 
        else:
            # mu, logvar = inf_net.inference_net(x)   
            z, logqz = inf_net.sample(x) 


        logpz = self.prior.logprob(z)

        # Decode Image
        x_hat = self.image_decoder(z)
        alpha = torch.sigmoid(x_hat)
        beta = Beta(alpha*self.beta_scale, (1.-alpha)*self.beta_scale)
        x_noise = torch.clamp(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda(), min=1e-5, max=1-1e-5)
        # logpx = beta.log_prob(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda()) #[120,3,112,112]  # add uniform noise here
        logpx = beta.log_prob(x_noise) #[120,3,112,112]  # add uniform noise here
        B = z.shape[0]
        logpx = torch.sum(logpx.view(B, -1),1) # [PB]  * self.w_logpx

        log_ws = logpx + logpz - logqz

        outputs['logpx'] = torch.mean(logpx)
        outputs['x_recon'] = alpha
        outputs['welbo'] = torch.mean(logpx + warmup*( logpz - logqz))
        outputs['elbo'] = torch.mean(log_ws)
        outputs['logws'] = log_ws
        outputs['z'] = z
        outputs['logpz'] = torch.mean(logpz)
        outputs['logqz'] = torch.mean(logqz)
        # outputs['logvar'] = logvar

        return outputs
Пример #34
0
    def f(self, x, z, logits, hard=False):

        B = x.shape[0]

        # image likelihood given b
        # b = harden(z).detach()
        x_hat = self.generator.forward(z)
        alpha = torch.sigmoid(x_hat)
        beta = Beta(alpha*self.beta_scale, (1.-alpha)*self.beta_scale)
        x_noise = torch.clamp(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda(), min=1e-5, max=1-1e-5)
        logpx = beta.log_prob(x_noise) #[120,3,112,112]  # add uniform noise here
        logpx = torch.sum(logpx.view(B, -1),1) # [PB]  * self.w_logpx

        # prior is constant I think 
        # for q(b|x), we just want to increase its entropy 
        if hard:
            dist = Bernoulli(logits=logits)
        else:
            dist = RelaxedBernoulli(torch.Tensor([1.]).cuda(), logits=logits)
            
        logqb = dist.log_prob(z.detach())
        logqb = torch.sum(logqb,1)

        return logpx, logqb, alpha