def make_agent(agent_type, env, policy_hparams, policy_dir, sampling_temp, sim_env_kwargs=None, frame_stack_size=None, planning_horizon=None, rollout_agent_type=None): """Factory function for Agents.""" return { "random": lambda: rl_utils.RandomAgent( # pylint: disable=g-long-lambda env.batch_size, env.observation_space, env.action_space), "policy": lambda: rl_utils.PolicyAgent( # pylint: disable=g-long-lambda env.batch_size, env.observation_space, env.action_space, policy_hparams, policy_dir, sampling_temp), "planner": lambda: rl_utils.PlannerAgent( # pylint: disable=g-long-lambda env.batch_size, make_agent(rollout_agent_type, env, policy_hparams, policy_dir, sampling_temp), rl_utils.SimulatedBatchGymEnvWithFixedInitialFrames( **sim_env_kwargs), lambda env: rl_utils.BatchStackWrapper( env, frame_stack_size), planning_horizon), }[agent_type]()
def make_agent( agent_type, env, policy_hparams, policy_dir, sampling_temp, sim_env_kwargs_fn=None, frame_stack_size=None, rollout_agent_type=None, batch_size=None, inner_batch_size=None, env_type=None, **planner_kwargs ): """Factory function for Agents.""" if batch_size is None: batch_size = env.batch_size return { "random": lambda: rl_utils.RandomAgent( # pylint: disable=g-long-lambda batch_size, env.observation_space, env.action_space ), "policy": lambda: rl_utils.PolicyAgent( # pylint: disable=g-long-lambda batch_size, env.observation_space, env.action_space, policy_hparams, policy_dir, sampling_temp ), "planner": lambda: rl_utils.PlannerAgent( # pylint: disable=g-long-lambda batch_size, make_agent( rollout_agent_type, env, policy_hparams, policy_dir, sampling_temp, batch_size=inner_batch_size ), make_env(env_type, env.env, sim_env_kwargs_fn()), lambda env: rl_utils.BatchStackWrapper(env, frame_stack_size), discount_factor=policy_hparams.gae_gamma, **planner_kwargs ), }[agent_type]()
def make_agent( agent_type, env, policy_hparams, policy_dir, sampling_temp, sim_env_kwargs=None, frame_stack_size=None, planning_horizon=None, rollout_agent_type=None, batch_size=None, num_rollouts=None, inner_batch_size=None, video_writer=None, env_type=None, uct_const=None, uct_std_normalization=None, uniform_first_action=None ): """Factory function for Agents.""" if batch_size is None: batch_size = env.batch_size return { "random": lambda: rl_utils.RandomAgent( # pylint: disable=g-long-lambda batch_size, env.observation_space, env.action_space ), "policy": lambda: rl_utils.PolicyAgent( # pylint: disable=g-long-lambda batch_size, env.observation_space, env.action_space, policy_hparams, policy_dir, sampling_temp ), "planner": lambda: rl_utils.PlannerAgent( # pylint: disable=g-long-lambda batch_size, make_agent( rollout_agent_type, env, policy_hparams, policy_dir, sampling_temp, batch_size=inner_batch_size ), make_env(env_type, env.env, sim_env_kwargs), lambda env: rl_utils.BatchStackWrapper(env, frame_stack_size), num_rollouts, planning_horizon, discount_factor=policy_hparams.gae_gamma, uct_const=uct_const, uct_std_normalization=uct_std_normalization, uniform_first_action=uniform_first_action, video_writer=video_writer ), }[agent_type]()
def make_agent(agent_type, env, policy_hparams, policy_dir, sampling_temp, sim_env_kwargs=None, frame_stack_size=None, planning_horizon=None, rollout_agent_type=None, batch_size=None, num_rollouts=None, inner_batch_size=None, video_writer=None): """Factory function for Agents.""" if batch_size is None: batch_size = env.batch_size return { "random": lambda: rl_utils.RandomAgent( # pylint: disable=g-long-lambda batch_size, env.observation_space, env.action_space), "policy": lambda: rl_utils.PolicyAgent( # pylint: disable=g-long-lambda batch_size, env.observation_space, env.action_space, policy_hparams, policy_dir, sampling_temp), "planner": lambda: rl_utils.PlannerAgent( # pylint: disable=g-long-lambda batch_size, make_agent(rollout_agent_type, env, policy_hparams, policy_dir, sampling_temp, batch_size=inner_batch_size), rl_utils.SimulatedBatchGymEnvWithFixedInitialFrames( **sim_env_kwargs), lambda env: rl_utils.BatchStackWrapper(env, frame_stack_size), num_rollouts, planning_horizon, discount_factor=policy_hparams.gae_gamma, video_writer=video_writer), }[agent_type]()