Пример #1
0
def Train(_):
    env = screen_learning.ScreenGymEnvironment(gym.make('SpaceInvaders-v0'))
    async_env_runners = []  # type: t.List[async_runner_impl.AsyncEnvRunner]
    for _ in range(10):
        async_env_runners.append(
            async_runner_impl.AsyncEnvRunner(
                env=screen_learning.ScreenGymEnvironment(
                    gym.make('SpaceInvaders-v0')),
                runner=runner_impl.NStepExperienceRunner(n_step_return=10),
            ))
    brain = async_runner_impl.AsyncBrain(
        a3c_impl.A3C(model=CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
        )))
    brain.Load('saved_models/a3c_invader.weights')  # warm start

    policy = policy_impl.PolicyWithDecreasingRandomness(
        base_policy=policy_impl.PiWeightedPolicy(),
        initial_epsilon=0.2,
        final_epsilon=0.05,
        decay_by_half_after_num_of_episodes=500,
    )
    runner = async_runner_impl.ParallelRunner(async_env_runners)
    runner.AddCallback(
        async_runner_impl.AsyncRunnerExtension(
            runner_extension_impl.ProgressTracer(
                report_every_num_of_episodes=10)))
    runner.AddCallback(
        async_runner_impl.AsyncRunnerExtension(
            runner_extension_impl.ModelSaver(
                save_filepath='saved_models/a3c_invader.weights',
                use_averaged_value_over_num_of_episodes=30)))

    runner.Run(brain=brain, policy=policy, num_of_episodes=200)
Пример #2
0
  def _RunEnv(gym_env):
    env = screen_learning.ScreenGymEnvironment(gym_env)
    qfunc = qfunc_impl.DQN_TargetNetwork(
      model=screen_learning.CreateConvolutionModel(
        action_space_size=env.GetActionSpaceSize()))
    policy = policy_impl.GreedyPolicyWithRandomness(epsilon=1.0)

    runner_impl.SimpleRunner().Run(
      env=env, qfunc=qfunc, policy=policy, num_of_episodes=10)
Пример #3
0
def Demo(_):
    env = screen_learning.ScreenGymEnvironment(gym.make('SpaceInvaders-v0'))
    brain = a3c_impl.A3C(model=a3c_impl.CreateModel(
        state_shape=env.GetStateShape(),
        action_space_size=env.GetActionSpaceSize(),
        hidden_layer_sizes=(12, ),
    ))
    brain.Load('saved_models/a3c_invader.weights')
    policy = policy_impl.GreedyPolicy()

    env.StartRecording('a3c_invader.mp4')
    runner = runner_impl.SimpleRunner()
    runner.Run(env=env, brain=brain, policy=policy, num_of_episodes=10)
    env.StopRecording()
Пример #4
0
    def __init__(
        self,
        gym_env_name: t.Text,
        gym_env=None,
        report_every_num_of_episodes: int = 1,
        use_ddqn: bool = True,
        use_large_model: bool = True,
    ):
        """Ctor.

    Args:
      gym_env_name: name of the gym environment that will be created.
      gym_env: Gym environment. If set, use the provided Gym environment and
        gym_env_name is only used as a tag.
      report_every_num_of_episodes: do progress report every this number of
        episodes.
      use_ddqn: whether to use DDQN or DQN_TargetNetwork.
      use_large_model: whether to use the larger model. Without GPU it's very
        slow to use it.
    """
        self._gym_env_name = gym_env_name
        if gym_env:
            env = gym_env
        else:
            env = gym.make(gym_env_name)
        self.env = screen_learning.ScreenGymEnvironment(env)
        if use_large_model:
            model_pair = (screen_learning.CreateOriginalConvolutionModel(
                action_space_size=self.env.GetActionSpaceSize()),
                          screen_learning.CreateOriginalConvolutionModel(
                              action_space_size=self.env.GetActionSpaceSize()))
        else:
            model_pair = (screen_learning.CreateConvolutionModel(
                action_space_size=self.env.GetActionSpaceSize()),
                          screen_learning.CreateConvolutionModel(
                              action_space_size=self.env.GetActionSpaceSize()))
        if use_ddqn:
            self.qfunc = qfunc_impl.DDQN(
                model_pair=model_pair,
                training_batch_size=DEFAULT_BATCH_SIZE,
                discount_factor=0.99,
            )
        else:
            self.qfunc = qfunc_impl.DQN_TargetNetwork(
                model=model_pair[0],
                training_batch_size=DEFAULT_BATCH_SIZE,
                discount_factor=0.99)
        logging.printf('Using qfunc implementation: %s',
                       string.GetClassName(self.qfunc))
        self.policy = policy_impl.GreedyPolicyWithDecreasingRandomness(
            initial_epsilon=1.0,
            final_epsilon=0.1,
            decay_by_half_after_num_of_episodes=50)
        logging.printf('Using policy implementation: %s',
                       string.GetClassName(self.policy))

        self.runner = runner_impl.ExperienceReplayRunner(
            experience_capacity=100000,
            experience_sample_batch_size=DEFAULT_BATCH_SIZE)
        logging.printf('Using runner implementation: %s',
                       string.GetClassName(self.runner))

        self._progress_tracer = runner_extension_impl.ProgressTracer(
            report_every_num_of_episodes=report_every_num_of_episodes)
        self._model_saver = runner_extension_impl.ModelSaver(
            self._GetModelWeightsFilepath(),
            use_averaged_value_over_num_of_episodes=report_every_num_of_episodes
        )