Пример #1
0
 def __init__(self,
              obs_size,
              action_space,
              n_hidden_layers=2,
              n_hidden_channels=64,
              bound_mean=None,
              normalize_obs=None):
     assert bound_mean in [False, True]
     assert normalize_obs in [False, True]
     super().__init__()
     hidden_sizes = (n_hidden_channels, ) * n_hidden_layers
     self.normalize_obs = normalize_obs
     with self.init_scope():
         self.pi = policies.FCGaussianPolicyWithStateIndependentCovariance(
             obs_size,
             action_space.low.size,
             n_hidden_layers,
             n_hidden_channels,
             var_type='diagonal',
             nonlinearity=F.tanh,
             bound_mean=bound_mean,
             min_action=action_space.low,
             max_action=action_space.high,
             mean_wscale=1e-2)
         self.v = links.MLP(obs_size, 1, hidden_sizes=hidden_sizes)
         if self.normalize_obs:
             self.obs_filter = links.EmpiricalNormalization(shape=obs_size)
Пример #2
0
    def make_model(self, env):
        n_hidden_channels = 20

        n_dim_obs = env.observation_space.low.size
        v = v_functions.FCVFunction(
            n_dim_obs,
            n_hidden_layers=1,
            n_hidden_channels=n_hidden_channels,
            nonlinearity=F.tanh,
            last_wscale=0.01,
        )

        if self.discrete:
            n_actions = env.action_space.n

            pi = policies.FCSoftmaxPolicy(
                n_dim_obs,
                n_actions,
                n_hidden_layers=1,
                n_hidden_channels=n_hidden_channels,
                nonlinearity=F.tanh,
                last_wscale=0.01,
            )
        else:
            n_dim_actions = env.action_space.low.size

            pi = policies.FCGaussianPolicyWithStateIndependentCovariance(
                n_dim_obs,
                n_dim_actions,
                n_hidden_layers=1,
                n_hidden_channels=n_hidden_channels,
                nonlinearity=F.tanh,
                mean_wscale=0.01,
                var_type='diagonal',
            )

        # Check if KL div supports double-backprop
        fake_obs = np.zeros_like(env.observation_space.low, dtype=np.float32)
        action_distrib = pi(fake_obs[None])
        kl = action_distrib.kl(action_distrib)
        old_style_funcs = trpo._find_old_style_function([kl])
        if old_style_funcs:
            self.skipTest("\
Chainer v{} does not support double backprop of these functions: {}.".format(
                chainer.__version__, old_style_funcs))

        return pi, v