def test_exponential_shape_tensor_param(self): expon = Exponential(torch.Tensor([1, 1])) self.assertEqual(expon._batch_shape, torch.Size((2,))) self.assertEqual(expon._event_shape, torch.Size(())) self.assertEqual(expon.sample().size(), torch.Size((2,))) self.assertEqual(expon.sample((3, 2)).size(), torch.Size((3, 2, 2))) self.assertEqual(expon.log_prob(self.tensor_sample_1).size(), torch.Size((3, 2))) self.assertRaises(ValueError, expon.log_prob, self.tensor_sample_2)
def test_exponential_shape_scalar_param(self): expon = Exponential(1.) self.assertEqual(expon._batch_shape, torch.Size()) self.assertEqual(expon._event_shape, torch.Size()) self.assertEqual(expon.sample().size(), torch.Size((1,))) self.assertEqual(expon.sample((3, 2)).size(), torch.Size((3, 2))) self.assertRaises(ValueError, expon.log_prob, self.scalar_sample) self.assertEqual(expon.log_prob(self.tensor_sample_1).size(), torch.Size((3, 2))) self.assertEqual(expon.log_prob(self.tensor_sample_2).size(), torch.Size((3, 2, 3)))
def _make_ma_mdp(self): joint_action_shape = self.joint_action_shape n_states = self.n_states n_agents = len(joint_action_shape) rand = self.rand # Reward perturbation perturbation = mu.unsqueeze( self.reward_perturbation, -1, n_states + 3 - self.reward_perturbation.dim()) # Generate transition probability tensor trans_prob = th.rand(n_states, *joint_action_shape, n_states, generator=rand) # Acyclic (episodic) MDP if self.acyclic: states_idx, next_states_idx = th.tril_indices(n_states) trans_prob[states_idx, ..., next_states_idx] = 0 # Normalize transition probability matrix trans_prob /= trans_prob.sum(dim=-1, keepdim=True) trans_prob[th.isnan(trans_prob)] = 0 # Generate random reward (following method ensures enough variance in rewards) # 1) Generate rewards "core" for state, joint actions and agents rewards = th.randn(n_states, *joint_action_shape, 1, n_agents, generator=rand) # 2) Multiply "core" by scales to generate different rewards for next state scales_dist = Exponential(th.tensor(1.)) with mu.use_rand(rand): rewards *= scales_dist.sample( (n_states, *joint_action_shape, n_states, n_agents)) # 3) Correlate rewards rewards = rewards @ self.reward_correlation ## Transition probability self._trans_prob = trans_prob ## Rewards for state-joint actions self._rewards = rewards
def forward(self, theta, n_samp=1): n_exp = theta.shape[0] n_samp = torch.Size([n_samp, 1]) unit = torch.ones(n_exp).to(self.device) with torch.autograd.no_grad(): d0 = Normal(theta[:, 0], unit) z0 = d0.sample(n_samp).permute(2, 0, 1) d1 = Normal(3 * unit, torch.exp(theta[:, 1] / 3)) z1 = d1.sample(n_samp).permute(2, 0, 1) d2_1 = Normal(-2 * unit, unit) d2_2 = Normal(2 * unit, .5 * unit) d2_b = Bernoulli(.5 * unit) z2_b = d2_b.sample(n_samp).float() # Gaussian Mixture z2 = ((z2_b * d2_1.sample(n_samp) + (1 - z2_b) * d2_2.sample(n_samp)).permute(2, 0, 1)) d3 = Uniform(-5 * unit, theta[:, 2]) z3 = d3.sample(n_samp).permute(2, 0, 1) d4 = Exponential(.5 * unit) z4 = d4.sample(n_samp).permute(2, 0, 1) z = torch.cat((z0, z1, z2, z3, z4), 2) X = torch.matmul(self.R, z.view(-1, 5).unsqueeze(2)).view(n_exp, -1, 5) return X
def sample(self, size): m = Exponential(torch.tensor([1.0])) return m.sample(size)