Exemplo n.º 1
0
def test_categorical():
    probs = torch.as_tensor([0.2, 0.6, 0.2])
    dist = torch.distributions.Categorical(probs=probs)
    mode = mode_of_distribution(dist)
    assert mode.tolist() == 1

    probs = torch.as_tensor([0.6, 0.2, 0.2])
    dist = torch.distributions.Categorical(probs=probs)
    mode = mode_of_distribution(dist)
    assert mode.tolist() == 0

    probs = torch.as_tensor([[0.6, 0.2, 0.2], [0.2, 0.2, 0.6]])
    dist = torch.distributions.Categorical(probs)
    mode = mode_of_distribution(dist)
    assert mode.tolist() == [0, 2]
Exemplo n.º 2
0
def test_independent_normal():
    loc = torch.as_tensor([[0.3, 0.7], [0.2, 0.4]])
    scale = torch.as_tensor([[0.1, 0.2], [0.3, 0.8]])
    dist = torch.distributions.Independent(
        torch.distributions.Normal(loc, scale), 1)
    mode = mode_of_distribution(dist)
    torch_assert_allclose(mode, loc)
Exemplo n.º 3
0
    def _batch_act_train(self, batch_obs):
        assert self.training

        statevar = self.batch_states(batch_obs, self.device, self.phi)

        if self.t == 0:
            with torch.no_grad():
                pout, _ = self.model(statevar)
                action = pout.sample()
            self._flush_storage(statevar.shape, action)

        self.states[self.t - self.t_start] = statevar

        # NOTE:
        if self.t - self.t_start == self.update_steps * 2:
            self.update()

        # NOTE: inference during training
        with torch.no_grad():
            pout, value = self.model(statevar)
            # epsilon-greedy
            epsilon = self.compute_epsilon(self.t)
            if np.random.rand() < epsilon:
                # random
                action = pout.sample()
            else:
                # greedy
                action = mode_of_distribution(pout)

        self.actions[self.t - self.t_start] = action.reshape(
            -1, *self.action_shape)
        self.value_preds[self.t - self.t_start] = value[:, 0]

        return action.cpu().numpy()
Exemplo n.º 4
0
 def batch_select_greedy_action(self, batch_obs, deterministic=False):
     with torch.no_grad(), pfrl.utils.evaluating(self.policy):
         batch_xs = self.batch_states(batch_obs, self.device, self.phi)
         policy_out = self.policy(batch_xs)
         if deterministic:
             batch_action = mode_of_distribution(policy_out).cpu().numpy()
         else:
             batch_action = policy_out.sample().cpu().numpy()
     return batch_action
Exemplo n.º 5
0
 def _batch_act_eval(self, batch_obs):
     assert not self.training
     statevar = self.batch_states(batch_obs, self.device, self.phi)
     with torch.no_grad():
         pout, _ = self.model(statevar)
         if self.act_deterministically:
             action = mode_of_distribution(pout)
         else:
             action = pout.sample()
     return action.cpu().numpy()
Exemplo n.º 6
0
Arquivo: a3c.py Projeto: xylee95/pfrl
 def _act_eval(self, obs):
     # Use the process-local model for acting
     with torch.no_grad(), pfrl.utils.evaluating(self.model):
         statevar = self.batch_states([obs], self.device, self.phi)
         if self.recurrent:
             (pout, _), self.test_recurrent_states = one_step_forward(
                 self.model, statevar, self.test_recurrent_states)
         else:
             pout, _ = self.model(statevar)
         if self.act_deterministically:
             return mode_of_distribution(pout).cpu().numpy()[0]
         else:
             return pout.sample().cpu().numpy()[0]
Exemplo n.º 7
0
 def _act_eval(self, obs):
     with torch.no_grad():
         batch_obs = self.batch_states([obs], self.device, self.phi)
         if self.recurrent:
             action_distrib, self.test_recurrent_states = one_step_forward(
                 self.model, batch_obs, self.test_recurrent_states
             )
         else:
             action_distrib = self.model(batch_obs)
         if self.act_deterministically:
             return mode_of_distribution(action_distrib).cpu().numpy()[0]
         else:
             return action_distrib.sample().cpu().numpy()[0]
Exemplo n.º 8
0
 def _act_eval(self, obs):
     # Use the process-local model for acting
     with torch.no_grad():
         statevar = batch_states([obs], self.device, self.phi)
         if self.recurrent:
             (action_distrib, _,
              _), self.test_recurrent_states = one_step_forward(
                  self.model, statevar, self.test_recurrent_states)
         else:
             action_distrib, _, _ = self.model(statevar)
         if self.act_deterministically:
             return mode_of_distribution(action_distrib).numpy()[0]
         else:
             return action_distrib.sample().numpy()[0]
Exemplo n.º 9
0
    def batch_select_onpolicy_action(self, batch_obs, deterministic=False):
        with torch.no_grad(), pfrl.utils.evaluating(self.policy):
            batch_xs = self.batch_states(batch_obs, self.device, self.phi)
            batch_distribution = self.policy(batch_xs)

            if deterministic and self.add_entropy:
                batch_action = mode_of_distribution(batch_distribution)
            else:
                batch_action = batch_distribution.sample()

            batch_action = self.scale * batch_action
            batch_action = batch_action.cpu().numpy()

        return list(batch_action)
Exemplo n.º 10
0
    def _batch_act_eval(self, batch_obs):
        assert not self.training
        b_state = self.batch_states(batch_obs, self.device, self.phi)

        if self.obs_normalizer:
            b_state = self.obs_normalizer(b_state, update=False)

        with torch.no_grad(), pfrl.utils.evaluating(self.model):
            if self.recurrent:
                (action_distrib, _), self.test_recurrent_states = one_step_forward(
                    self.model, b_state, self.test_recurrent_states
                )
            else:
                action_distrib, _ = self.model(b_state)
            if self.act_deterministically:
                action = mode_of_distribution(action_distrib).cpu().numpy()
            else:
                action = action_distrib.sample().cpu().numpy()

        return action
Exemplo n.º 11
0
def test_transform():
    base_dist = torch.distributions.Normal(loc=2, scale=1)
    dist = torch.distributions.TransformedDistribution(
        base_dist, [torch.distributions.transforms.TanhTransform()])
    mode = mode_of_distribution(dist)
    torch_assert_allclose(mode.tolist(), math.tanh(2))
Exemplo n.º 12
0
def test_multivariate_normal():
    loc = torch.as_tensor([0.3, 0.7])
    cov = torch.as_tensor([[0.1, 0.0], [0.0, 0.9]])
    dist = torch.distributions.MultivariateNormal(loc, cov)
    mode = mode_of_distribution(dist)
    torch_assert_allclose(mode, loc)
Exemplo n.º 13
0
def test_normal():
    loc = torch.as_tensor([0.3, 0.5])
    scale = torch.as_tensor([0.1, 0.9])
    dist = torch.distributions.Normal(loc, scale)
    mode = mode_of_distribution(dist)
    torch_assert_allclose(mode, loc)