Exemplo n.º 1
0
    def _make_agent_modules(self):
        self.policy = Policy(self._dummy_env, 128)
        self.p_target = Policy(self._dummy_env, 128)
        self.p_target.load_state_dict(self.policy.state_dict())

        self.q_module = Critic(self._dummy_env, 128)
        self.q_target = Critic(self._dummy_env, 128)
        self.q_target.load_state_dict(self.q_module.state_dict())
Exemplo n.º 2
0
    def _make_agent_modules(self):
        self.policy = Policy(self._dummy_env, 128, a_range=5, action_size=2)
        self.p_target = Policy(self._dummy_env, 128, a_range=5, action_size=2)
        self.p_target.load_state_dict(self.policy.state_dict())

        self.q_module = Critic(self._dummy_env, 128, a_range=5, action_size=2)
        self.q_target = Critic(self._dummy_env, 128, a_range=5, action_size=2)
        self.q_target.load_state_dict(self.q_module.state_dict())

        self.policy_lo = StochasticPolicy(self._dummy_env, 256, goal_size=2)
        self.v_module_lo = Value(self._dummy_env,
                                 256,
                                 goal_size=2,
                                 use_antigoal=False)