Пример #1
0
def main(_):
    batch_size = 64  # used in qfunc and runner.
    env = environment_impl.GymEnvironment(gym.make('MountainCar-v0'))
    env.SetGymEnvMaxEpisodeSteps(400)
    qfunc = qfunc_impl.DQN(
        model=qfunc_impl.CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
            hidden_layer_sizes=(64, )),
        training_batch_size=batch_size,
        discount_factor=0.99,
    )
    qfunc.Load('saved_models/mountaincar_shape_64_rmsprop_gamma_099.weights')
    policy = policy_impl.GreedyPolicy()
    runner = runner_impl.NoOpRunner()

    env.TurnOnRendering(should_render=True, fps=24)
    logging.ENV.debug_verbosity = 9

    env.StartRecording(video_filename='mountaincar_demo.mp4')
    # First 5 runs with random actions:
    rand_qfunc = qfunc_impl.RandomQFunction(env.GetActionSpaceSize())
    runner.Run(env=env, brain=rand_qfunc, policy=policy, num_of_episodes=5)
    # Then 10 runs with trained qfunc:
    runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=10)
    env.StopRecording()
Пример #2
0
def main(_):
    env = environment_impl.GymEnvironment(gym.make('Seaquest-v0'))
    env.TurnOnRendering(should_render=True, fps=24)
    qfunc = qfunc_impl.RandomValueQFunction(
        action_space_size=env.GetActionSpaceSize())
    policy = policy_impl.GreedyPolicyWithRandomness(epsilon=1.0)
    runner = runner_impl.NoOpRunner()

    runner.Run(env, qfunc, policy, num_of_episodes=10)
Пример #3
0
    def Demo(self,
             num_of_episodes: int = 10,
             save_video_to: t.Text = 'demo.mp4'):
        """Starts a demo run.

    Args:
      num_of_episodes: number of runs to demo.
      save_video_to: saves the demo video for the run to a file of this
        name. It must ends with mp4.
    """
        self.env.TurnOnRendering(should_render=True, fps=24)
        self.env.StartRecording(video_filename=save_video_to)
        runner_impl.NoOpRunner().Run(env=self.env,
                                     brain=self.qfunc,
                                     policy=policy_impl.GreedyPolicy(),
                                     num_of_episodes=num_of_episodes)
        self.env.StopRecording()
        self.env.TurnOnRendering(should_render=False)
Пример #4
0
def main(_):
    batch_size = 64  # used in qfunc and runner.
    env = environment_impl.GymEnvironment(gym.make('Acrobot-v1'))
    qfunc = qfunc_impl.DQN(
        model=qfunc_impl.CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
            hidden_layer_sizes=(20, 20, 20)),
        training_batch_size=batch_size,
        discount_factor=0.99,
    )
    qfunc.LoadModel(
        'saved_models/acrobot_v1_shape_20-20-20_rmsprop_gamma_0.99.model')
    policy = policy_impl.GreedyPolicy()
    runner = runner_impl.NoOpRunner()

    env.TurnOnRendering(should_render=True, fps=10)
    logging.ENV.debug_verbosity = 9
    runner.Run(env=env, qfunc=qfunc, policy=policy, num_of_episodes=10)
Пример #5
0
  def setUp(self) -> None:
    self.env = mock.MagicMock()
    self.qfunc = mock.MagicMock()
    self.policy = mock.MagicMock()

    self.runner = runner_impl.NoOpRunner()