def _make_agent_modules(self): self.policy = StochasticPolicy(self._dummy_env, 256, a_range=5, action_size=self._dummy_env.goal_size) self.v_module = Value(self._dummy_env, 256, use_antigoal=False) self.policy_lo = StochasticPolicy(self._dummy_env, 256) self.v_module_lo = Value(self._dummy_env, 256, use_antigoal=False)
def _make_agent_modules(self): self.vae = VQVAEDiscriminator(state_size=self._dummy_env.state_size, **self.vae_args) self.vae.load_checkpoint(self.vae_checkpoint_path) kwargs = dict(env=self._dummy_env, hidden_size=self.hidden_size, num_layers=self.num_layers, goal_size=self.vae.code_size, normalize_inputs=self.normalize_inputs) self.policy = StochasticPolicy(**kwargs) self.v_module = Value(use_antigoal=self.use_antigoal, **kwargs)
def _make_agent_modules(self): self._make_skill_embedding() kwargs = dict(env=self._dummy_env, hidden_size=self.hidden_size, num_layers=self.num_layers, goal_size=self.skill_n, normalize_inputs=self.normalize_inputs) self.policy = StochasticPolicy(**kwargs) self.v_module = Value(use_antigoal=False, **kwargs)
def _make_agent_modules(self): self.policy = Policy(self._dummy_env, 128, a_range=5, action_size=2) self.p_target = Policy(self._dummy_env, 128, a_range=5, action_size=2) self.p_target.load_state_dict(self.policy.state_dict()) self.q_module = Critic(self._dummy_env, 128, a_range=5, action_size=2) self.q_target = Critic(self._dummy_env, 128, a_range=5, action_size=2) self.q_target.load_state_dict(self.q_module.state_dict()) self.policy_lo = StochasticPolicy(self._dummy_env, 256, goal_size=2) self.v_module_lo = Value(self._dummy_env, 256, goal_size=2, use_antigoal=False)
def _make_agent_modules(self): self.policy = StochasticPolicy(self._dummy_env, 128) self.v_module = Value(self._dummy_env, 128, use_antigoal=False)
def _make_agent_modules(self): self.policy = StochasticPolicy(self._dummy_env, 256) self.v_module = Value(self._dummy_env, 256, use_antigoal=self.use_antigoal)