Пример #1
0
def Train(_):
    env = environment_impl.GymEnvironment(gym.make('CartPole-v0'))
    async_env_runners = []  # type: t.List[async_runner_impl.AsyncEnvRunner]
    for _ in range(10):
        async_env_runners.append(
            async_runner_impl.AsyncEnvRunner(
                env=environment_impl.GymEnvironment(gym.make('CartPole-v0')),
                runner=runner_impl.NStepExperienceRunner(n_step_return=10),
            ))
    brain = async_runner_impl.AsyncBrain(
        a3c_impl.A3C(model=a3c_impl.CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
            hidden_layer_sizes=(12, ),
        )))

    policy = policy_impl.PolicyWithDecreasingRandomness(
        base_policy=policy_impl.PiWeightedPolicy(),
        initial_epsilon=0.2,
        final_epsilon=0.05,
        decay_by_half_after_num_of_episodes=500,
    )
    runner = async_runner_impl.ParallelRunner(async_env_runners)
    runner.AddCallback(
        async_runner_impl.AsyncRunnerExtension(
            runner_extension_impl.ProgressTracer(
                report_every_num_of_episodes=100)))
    runner.AddCallback(
        async_runner_impl.AsyncRunnerExtension(
            runner_extension_impl.ModelSaver(
                save_filepath='saved_models/a3c_cartpole_12.weights',
                use_averaged_value_over_num_of_episodes=30)))

    runner.Run(brain=brain, policy=policy, num_of_episodes=2000)
Пример #2
0
def main(_):
    batch_size = 64  # used in qfunc and runner.
    env = environment_impl.GymEnvironment(gym.make('CartPole-v0'))
    qfunc = qfunc_impl.DQN(
        model=qfunc_impl.CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
            hidden_layer_sizes=(20, 20, 20)),
        training_batch_size=batch_size,
        discount_factor=0.99,
    )
    runner = runner_impl.ExperienceReplayRunner(
        experience_capacity=100000, experience_sample_batch_size=batch_size)

    # Train 500 episodes.
    logging.ENV.debug_verbosity = 3
    policy = policy_impl.GreedyPolicyWithRandomness(epsilon=0.1)
    runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=500)

    # Test for 100 episodes.
    logging.ENV.debug_verbosity = 4
    policy = policy_impl.GreedyPolicy()
    runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=100)

    # Demo with video.
    env.TurnOnRendering(should_render=True, fps=24)
    # env.StartRecording(video_filename='demo.mp4')  # uncomment to record video.
    # First 5 runs with random actions:
    runner.Run(env=env,
               brain=qfunc_impl.RandomQFunction(env.GetActionSpaceSize()),
               policy=policy,
               num_of_episodes=5)
    # Then 10 runs with trained qfunc:
    runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=10)
Пример #3
0
def main(_):
    batch_size = 64  # used in qfunc and runner.
    env = environment_impl.GymEnvironment(gym.make('MountainCar-v0'))
    env.SetGymEnvMaxEpisodeSteps(400)
    qfunc = qfunc_impl.DQN(
        model=qfunc_impl.CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
            hidden_layer_sizes=(64, )),
        training_batch_size=batch_size,
        discount_factor=0.99,
    )
    qfunc.Load('saved_models/mountaincar_shape_64_rmsprop_gamma_099.weights')
    policy = policy_impl.GreedyPolicy()
    runner = runner_impl.NoOpRunner()

    env.TurnOnRendering(should_render=True, fps=24)
    logging.ENV.debug_verbosity = 9

    env.StartRecording(video_filename='mountaincar_demo.mp4')
    # First 5 runs with random actions:
    rand_qfunc = qfunc_impl.RandomQFunction(env.GetActionSpaceSize())
    runner.Run(env=env, brain=rand_qfunc, policy=policy, num_of_episodes=5)
    # Then 10 runs with trained qfunc:
    runner.Run(env=env, brain=qfunc, policy=policy, num_of_episodes=10)
    env.StopRecording()
Пример #4
0
 def __init__(self):
     self._env = environment_impl.GymEnvironment(
         gym_env=gym.make('CartPole-v0'))
     self._brain = a3c_impl.A3C(model=a3c_impl.CreateModel(
         state_shape=self._env.GetStateShape(),
         action_space_size=self._env.GetActionSpaceSize(),
         hidden_layer_sizes=(12, ),
     ))
Пример #5
0
def main(_):
    env = environment_impl.GymEnvironment(gym.make('Seaquest-v0'))
    env.TurnOnRendering(should_render=True, fps=24)
    qfunc = qfunc_impl.RandomValueQFunction(
        action_space_size=env.GetActionSpaceSize())
    policy = policy_impl.GreedyPolicyWithRandomness(epsilon=1.0)
    runner = runner_impl.NoOpRunner()

    runner.Run(env, qfunc, policy, num_of_episodes=10)
Пример #6
0
  def _RunEnv(gym_env):
    env = environment_impl.GymEnvironment(gym_env)
    qfunc = qfunc_impl.RandomValueQFunction(
      action_space_size=env.GetActionSpaceSize())
    env.Reset()
    policy = policy_impl.GreedyPolicyWithRandomness(epsilon=1.0)

    runner_impl.SimpleRunner().Run(
      env=env, qfunc=qfunc, policy=policy, num_of_episodes=10)
Пример #7
0
    def _RunEnv(gym_env):
        env = environment_impl.GymEnvironment(gym_env)
        env.SetGymEnvMaxEpisodeSteps(10)
        qfunc = qfunc_impl.MemoizationQFunction(
            action_space_size=env.GetActionSpaceSize())
        env.Reset()
        policy = policy_impl.GreedyPolicyWithRandomness(epsilon=1.0)

        runner_impl.SimpleRunner().Run(env=env,
                                       brain=qfunc,
                                       policy=policy,
                                       num_of_episodes=1)
Пример #8
0
    def test_takeAction(self):
        env = environment_impl.GymEnvironment(gym.make('CartPole-v1'))
        s = env.Reset()
        self.assertEqual((1, 4), s.shape)

        transition = env.TakeRandomAction()
        self.assertEqual((1, 4), transition.s.shape)
        self.assertEqual((1, 4), transition.sp.shape)

        old_s = transition.sp
        transition = env.TakeRandomAction()
        self.assertArrayEq(old_s, transition.s)
Пример #9
0
    def __init__(
        self,
        gym_env_name: t.Text,
        model_shape: t.Iterable[int] = (20, 20, 20),
        report_every_num_of_episodes: int = 100,
    ):
        """Ctor.

    Default implementations are provided for all objects. They can be changed
    by directly setting the public properties after the creation.

    Args:
      gym_env_name: name of the gym environment, like "LunarLander-v2".
      model_shape: a list of number of nodes per hidden layer.
      report_every_num_of_episodes: do progress report every this number of
        episodes.
    """
        self._gym_env_name = gym_env_name
        self._model_shape = tuple(model_shape)

        self.env = environment_impl.GymEnvironment(gym.make(gym_env_name))
        self.qfunc = qfunc_impl.DDQN(
            model_pair=(qfunc_impl.CreateModel(
                state_shape=self.env.GetStateShape(),
                action_space_size=self.env.GetActionSpaceSize(),
                hidden_layer_sizes=model_shape),
                        qfunc_impl.CreateModel(
                            state_shape=self.env.GetStateShape(),
                            action_space_size=self.env.GetActionSpaceSize(),
                            hidden_layer_sizes=model_shape)),
            training_batch_size=DEFAULT_BATCH_SIZE,
            discount_factor=0.99,
        )
        logging.printf('Using qfunc implementation: %s',
                       string.GetClassName(self.qfunc))
        self.policy = policy_impl.GreedyPolicyWithDecreasingRandomness(
            initial_epsilon=1.0,
            final_epsilon=0.1,
            decay_by_half_after_num_of_episodes=500)
        logging.printf('Using policy implementation: %s',
                       string.GetClassName(self.policy))

        self.runner = runner_impl.ExperienceReplayRunner(
            experience_capacity=100000,
            experience_sample_batch_size=DEFAULT_BATCH_SIZE)
        logging.printf('Using runner implementation: %s',
                       string.GetClassName(self.runner))

        self._progress_tracer = runner_extension_impl.ProgressTracer(
            report_every_num_of_episodes=report_every_num_of_episodes)
        self._model_saver = runner_extension_impl.ModelSaver(
            self._GetModelWeightsFilepath())
Пример #10
0
    def __init__(
        self,
        position_reward_factor: float = 1.0,
        speed_reward_factor: float = 1.0,
    ):
        self._original_env = environment_impl.GymEnvironment(
            gym.make('MountainCar-v0'))
        super().__init__(
            state_shape=self._original_env.GetStateShape(),
            action_space_size=self._original_env.GetActionSpaceSize())

        self._position_reward_factor = position_reward_factor
        self._reward_factor = speed_reward_factor
Пример #11
0
def Demo(_):
    env = environment_impl.GymEnvironment(gym.make('CartPole-v0'))
    brain = a3c_impl.A3C(model=a3c_impl.CreateModel(
        state_shape=env.GetStateShape(),
        action_space_size=env.GetActionSpaceSize(),
        hidden_layer_sizes=(12, ),
    ))
    brain.Load('saved_models/a3c_cartpole_12.weights')
    policy = policy_impl.GreedyPolicy()

    env.StartRecording('a3c_cartpole.mp4')
    runner = runner_impl.SimpleRunner()
    runner.Run(env=env, brain=brain, policy=policy, num_of_episodes=10)
    env.StopRecording()
Пример #12
0
    def _RunEnv(gym_env):
        env = environment_impl.GymEnvironment(gym_env)
        env.SetGymEnvMaxEpisodeSteps(10)
        qfunc = qfunc_impl.DQN(model=qfunc_impl.CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
            hidden_layer_sizes=(4, ),
        ))

        env.Reset()
        policy = policy_impl.GreedyPolicyWithRandomness(epsilon=1.0)

        runner_impl.SimpleRunner().Run(env=env,
                                       qfunc=qfunc,
                                       policy=policy,
                                       num_of_episodes=10)
Пример #13
0
def main(_):
    batch_size = 64  # used in qfunc and runner.
    env = environment_impl.GymEnvironment(gym.make('Acrobot-v1'))
    qfunc = qfunc_impl.DQN(
        model=qfunc_impl.CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
            hidden_layer_sizes=(20, 20, 20)),
        training_batch_size=batch_size,
        discount_factor=0.99,
    )
    qfunc.LoadModel(
        'saved_models/acrobot_v1_shape_20-20-20_rmsprop_gamma_0.99.model')
    policy = policy_impl.GreedyPolicy()
    runner = runner_impl.NoOpRunner()

    env.TurnOnRendering(should_render=True, fps=10)
    logging.ENV.debug_verbosity = 9
    runner.Run(env=env, qfunc=qfunc, policy=policy, num_of_episodes=10)
Пример #14
0
def main(_):
    running_environment.ForceCpuForTheRun()

    env = environment_impl.GymEnvironment(gym.make('CartPole-v0'))
    brain = a3c_impl.A3C(model=a3c_impl.CreateModel(
        state_shape=env.GetStateShape(),
        action_space_size=env.GetActionSpaceSize(),
        hidden_layer_sizes=(12, ),
    ))

    policy = policy_impl.PolicyWithDecreasingRandomness(
        base_policy=policy_impl.PiWeightedPolicy(),
        initial_epsilon=0.4,
        final_epsilon=0.05,
        decay_by_half_after_num_of_episodes=500,
    )
    runner = a3c_impl.NStepExperienceRunner()
    # runner = runner_impl.SimpleRunner()
    runner.AddCallback(
        runner_extension_impl.ProgressTracer(report_every_num_of_episodes=100))

    runner.Run(env=env, brain=brain, policy=policy, num_of_episodes=1200)
Пример #15
0
GAMMA_N = GAMMA**N_STEP_RETURN

EPS_START = 0.4
EPS_STOP = .15
EPS_STEPS = 75000

MIN_BATCH = 32
LEARNING_RATE = 5e-3

LOSS_V = .5  # v loss coefficient
LOSS_ENTROPY = .01  # entropy coefficient

count = 0
rewards = []

env = environment_impl.GymEnvironment(gym_env=gym.make('CartPole-v0'))
NUM_STATE = env.GetStateShape()[0]
NUM_ACTIONS = env.GetActionSpaceSize()
NONE_STATE = np.zeros(NUM_STATE)


# ---------
class JBrain:
    train_queue = [[], [], [], [], []]  # s, a, r, s', s' terminal mask
    lock_queue = threading.Lock()

    def __init__(self):
        self.session = tf.Session()
        K.set_session(self.session)
        K.manual_variable_initialization(True)
Пример #16
0
 def _RunEnv(gym_env):
     env = environment_impl.GymEnvironment(gym_env)
     env.Reset()
     for _ in range(10):
         env.TakeRandomAction()
Пример #17
0
 def test_getStateShape(self):
     env = environment_impl.GymEnvironment(gym.make('SpaceInvaders-v4'))
     self.assertEqual((210, 160, 3), env.GetStateShape())