Exemplo n.º 1
0
    def __init__(
        self,
        gym_env_name: t.Text,
        model_shape: t.Iterable[int] = (20, 20, 20),
        report_every_num_of_episodes: int = 100,
    ):
        """Ctor.

    Default implementations are provided for all objects. They can be changed
    by directly setting the public properties after the creation.

    Args:
      gym_env_name: name of the gym environment, like "LunarLander-v2".
      model_shape: a list of number of nodes per hidden layer.
      report_every_num_of_episodes: do progress report every this number of
        episodes.
    """
        self._gym_env_name = gym_env_name
        self._model_shape = tuple(model_shape)

        self.env = environment_impl.GymEnvironment(gym.make(gym_env_name))
        self.qfunc = qfunc_impl.DDQN(
            model_pair=(qfunc_impl.CreateModel(
                state_shape=self.env.GetStateShape(),
                action_space_size=self.env.GetActionSpaceSize(),
                hidden_layer_sizes=model_shape),
                        qfunc_impl.CreateModel(
                            state_shape=self.env.GetStateShape(),
                            action_space_size=self.env.GetActionSpaceSize(),
                            hidden_layer_sizes=model_shape)),
            training_batch_size=DEFAULT_BATCH_SIZE,
            discount_factor=0.99,
        )
        logging.printf('Using qfunc implementation: %s',
                       string.GetClassName(self.qfunc))
        self.policy = policy_impl.GreedyPolicyWithDecreasingRandomness(
            initial_epsilon=1.0,
            final_epsilon=0.1,
            decay_by_half_after_num_of_episodes=500)
        logging.printf('Using policy implementation: %s',
                       string.GetClassName(self.policy))

        self.runner = runner_impl.ExperienceReplayRunner(
            experience_capacity=100000,
            experience_sample_batch_size=DEFAULT_BATCH_SIZE)
        logging.printf('Using runner implementation: %s',
                       string.GetClassName(self.runner))

        self._progress_tracer = runner_extension_impl.ProgressTracer(
            report_every_num_of_episodes=report_every_num_of_episodes)
        self._model_saver = runner_extension_impl.ModelSaver(
            self._GetModelWeightsFilepath())
Exemplo n.º 2
0
def main(_):
    batch_size = 64  # used in qfunc and runner.
    env = environment_impl.GymEnvironment(gym.make('MountainCar-v0'))
    env.SetGymEnvMaxEpisodeSteps(400)
    qfunc = qfunc_impl.DQN(
        model=qfunc_impl.CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
            hidden_layer_sizes=(64, )),
        training_batch_size=batch_size,
        discount_factor=0.99,
    )
    qfunc.Load('saved_models/mountaincar_shape_64_rmsprop_gamma_099.weights')
    policy = policy_impl.GreedyPolicy()
    runner = runner_impl.NoOpRunner()

    env.TurnOnRendering(should_render=True, fps=24)
    logging.ENV.debug_verbosity = 9

    env.StartRecording(video_filename='mountaincar_demo.mp4')
    # First 5 runs with random actions:
    rand_qfunc = qfunc_impl.RandomQFunction(env.GetActionSpaceSize())
    runner.Run(env=env, brain=rand_qfunc, policy=policy, num_of_episodes=5)
    # Then 10 runs with trained qfunc:
    runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=10)
    env.StopRecording()
Exemplo n.º 3
0
def main(_):
    batch_size = 64  # used in qfunc and runner.
    env = environment_impl.GymEnvironment(gym.make('CartPole-v0'))
    qfunc = qfunc_impl.DQN(
        model=qfunc_impl.CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
            hidden_layer_sizes=(20, 20, 20)),
        training_batch_size=batch_size,
        discount_factor=0.99,
    )
    runner = runner_impl.ExperienceReplayRunner(
        experience_capacity=100000, experience_sample_batch_size=batch_size)

    # Train 500 episodes.
    logging.ENV.debug_verbosity = 3
    policy = policy_impl.GreedyPolicyWithRandomness(epsilon=0.1)
    runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=500)

    # Test for 100 episodes.
    logging.ENV.debug_verbosity = 4
    policy = policy_impl.GreedyPolicy()
    runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=100)

    # Demo with video.
    env.TurnOnRendering(should_render=True, fps=24)
    # env.StartRecording(video_filename='demo.mp4')  # uncomment to record video.
    # First 5 runs with random actions:
    runner.Run(env=env,
               brain=qfunc_impl.RandomQFunction(env.GetActionSpaceSize()),
               policy=policy,
               num_of_episodes=5)
    # Then 10 runs with trained qfunc:
    runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=10)
Exemplo n.º 4
0
    def test_saveLoad(self):
        tmp_file = '/tmp/DDQNTest_savedata.tmp'
        self.qfunc._SetValues(self.states, self.values)
        self.qfunc.Save(tmp_file)
        qfunc = qfunc_impl.DDQN(model_pair=(qfunc_impl.CreateModel(
            state_shape=(3, ),
            action_space_size=2,
            hidden_layer_sizes=(3, ),
        ),
                                            qfunc_impl.CreateModel(
                                                state_shape=(3, ),
                                                action_space_size=2,
                                                hidden_layer_sizes=(3, ),
                                            )), )
        qfunc.Load(tmp_file)

        numpy_util.TestUtil.AssertModelWeightsEqual(qfunc._q1,
                                                    self.qfunc._model)
        numpy_util.TestUtil.AssertModelWeightsEqual(qfunc._q2,
                                                    self.qfunc._model)
Exemplo n.º 5
0
    def setUp(self) -> None:
        # State space size is 3; Action space size is 2.
        self.qfunc = qfunc_impl.DDQN(
            model_pair=(qfunc_impl.CreateModel(
                state_shape=(3, ),
                action_space_size=2,
                hidden_layer_sizes=(3, ),
            ),
                        qfunc_impl.CreateModel(
                            state_shape=(3, ),
                            action_space_size=2,
                            hidden_layer_sizes=(3, ),
                        )),
            discount_factor=0.9,
        )
        self.states = numpy.array([
            [1, 2, 3],
            [4, 5, 6],
        ])

        self.values = numpy.array([
            [0.5, 0.5],
            [0.3, 0.7],
        ])
Exemplo n.º 6
0
    def _RunEnv(gym_env):
        env = environment_impl.GymEnvironment(gym_env)
        env.SetGymEnvMaxEpisodeSteps(10)
        qfunc = qfunc_impl.DQN(model=qfunc_impl.CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
            hidden_layer_sizes=(4, ),
        ))

        env.Reset()
        policy = policy_impl.GreedyPolicyWithRandomness(epsilon=1.0)

        runner_impl.SimpleRunner().Run(env=env,
                                       qfunc=qfunc,
                                       policy=policy,
                                       num_of_episodes=10)
Exemplo n.º 7
0
def main(_):
    batch_size = 64  # used in qfunc and runner.
    env = environment_impl.GymEnvironment(gym.make('Acrobot-v1'))
    qfunc = qfunc_impl.DQN(
        model=qfunc_impl.CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
            hidden_layer_sizes=(20, 20, 20)),
        training_batch_size=batch_size,
        discount_factor=0.99,
    )
    qfunc.LoadModel(
        'saved_models/acrobot_v1_shape_20-20-20_rmsprop_gamma_0.99.model')
    policy = policy_impl.GreedyPolicy()
    runner = runner_impl.NoOpRunner()

    env.TurnOnRendering(should_render=True, fps=10)
    logging.ENV.debug_verbosity = 9
    runner.Run(env=env, qfunc=qfunc, policy=policy, num_of_episodes=10)
Exemplo n.º 8
0
    def setUp(self) -> None:
        # State space size is 3; Action space size is 2.
        self.qfunc = qfunc_impl.DQN_TargetNetwork(
            model=qfunc_impl.CreateModel(
                state_shape=(3, ),
                action_space_size=2,
                hidden_layer_sizes=(6, 4),
            ),
            update_target_network_every_num_of_steps=2,
        )
        self.states = numpy.array([
            [1, 2, 3],
            [4, 5, 6],
        ])

        self.values = numpy.array([
            [0.5, 0.5],
            [0.3, 0.7],
        ])