Exemplo n.º 1
0
    def test_saveLoad(self):
        a3c = a3c_impl.A3C(model=a3c_impl.CreateModel(
            state_shape=(3, ),
            action_space_size=2,
            hidden_layer_sizes=(3, ),
        ), )
        tmp_file = tempfile.NamedTemporaryFile().name
        s = numpy.array([[1, 2, 3]])
        for _ in range(10):
            a3c.UpdateFromTransitions([
                base.Transition(s=s,
                                a=numpy.array([[1, 0]]),
                                r=1.0,
                                sp=numpy.array([[4, 5, 6]])),
            ])
        a3c.Save(tmp_file)
        saved_values = a3c.GetValues(s)

        a3c = a3c_impl.A3C(model=a3c_impl.CreateModel(
            state_shape=(3, ),
            action_space_size=2,
            hidden_layer_sizes=(3, ),
        ), )
        a3c.Load(tmp_file)

        numpy_util.TestUtil.AssertArrayEqual(saved_values, a3c.GetValues(s))
Exemplo n.º 2
0
def Train(_):
    env = screen_learning.ScreenGymEnvironment(gym.make('SpaceInvaders-v0'))
    async_env_runners = []  # type: t.List[async_runner_impl.AsyncEnvRunner]
    for _ in range(10):
        async_env_runners.append(
            async_runner_impl.AsyncEnvRunner(
                env=screen_learning.ScreenGymEnvironment(
                    gym.make('SpaceInvaders-v0')),
                runner=runner_impl.NStepExperienceRunner(n_step_return=10),
            ))
    brain = async_runner_impl.AsyncBrain(
        a3c_impl.A3C(model=CreateModel(
            state_shape=env.GetStateShape(),
            action_space_size=env.GetActionSpaceSize(),
        )))
    brain.Load('saved_models/a3c_invader.weights')  # warm start

    policy = policy_impl.PolicyWithDecreasingRandomness(
        base_policy=policy_impl.PiWeightedPolicy(),
        initial_epsilon=0.2,
        final_epsilon=0.05,
        decay_by_half_after_num_of_episodes=500,
    )
    runner = async_runner_impl.ParallelRunner(async_env_runners)
    runner.AddCallback(
        async_runner_impl.AsyncRunnerExtension(
            runner_extension_impl.ProgressTracer(
                report_every_num_of_episodes=10)))
    runner.AddCallback(
        async_runner_impl.AsyncRunnerExtension(
            runner_extension_impl.ModelSaver(
                save_filepath='saved_models/a3c_invader.weights',
                use_averaged_value_over_num_of_episodes=30)))

    runner.Run(brain=brain, policy=policy, num_of_episodes=200)
Exemplo n.º 3
0
    def test_convergence(self):
        a3c = a3c_impl.A3C(
            model=a3c_impl.CreateModel(
                state_shape=(3, ),
                action_space_size=2,
                hidden_layer_sizes=(3, ),
            ),
            # optimizer=a3c_impl.CreateDefaultOptimizer(learning_rate=0.05),
        )
        s = numpy.array([[1, 2, 3]])
        a1 = numpy.array([[1, 0]])
        a2 = numpy.array([[0, 1]])

        for _ in range(10):
            # Needs to train for both actions as one step, otherwise it shows some
            # "staggering" effect.
            a3c.UpdateFromTransitions([
                base.Transition(s=s, a=a1, r=1.0, sp=None),
            ])
            a3c.UpdateFromTransitions([
                base.Transition(s=s, a=a2, r=-1.0, sp=s),
            ])
            logging.printf('%s', a3c.GetValues(s))
        old_value_a1 = a3c.GetActionValues(a3c.GetValues(s), a1)
        # Trains for one step, for both actions.
        a3c.UpdateFromTransitions([
            base.Transition(s=s, a=a1, r=1.0, sp=None),
        ])
        a3c.UpdateFromTransitions([
            base.Transition(s=s, a=a2, r=-1.0, sp=s),
        ])
        self.assertGreaterEqual(a3c.GetActionValues(a3c.GetValues(s), a1),
                                old_value_a1)
Exemplo n.º 4
0
 def __init__(self):
     self._env = environment_impl.GymEnvironment(
         gym_env=gym.make('CartPole-v0'))
     self._brain = a3c_impl.A3C(model=a3c_impl.CreateModel(
         state_shape=self._env.GetStateShape(),
         action_space_size=self._env.GetActionSpaceSize(),
         hidden_layer_sizes=(12, ),
     ))
Exemplo n.º 5
0
def Demo(_):
    env = screen_learning.ScreenGymEnvironment(gym.make('SpaceInvaders-v0'))
    brain = a3c_impl.A3C(model=a3c_impl.CreateModel(
        state_shape=env.GetStateShape(),
        action_space_size=env.GetActionSpaceSize(),
        hidden_layer_sizes=(12, ),
    ))
    brain.Load('saved_models/a3c_invader.weights')
    policy = policy_impl.GreedyPolicy()

    env.StartRecording('a3c_invader.mp4')
    runner = runner_impl.SimpleRunner()
    runner.Run(env=env, brain=brain, policy=policy, num_of_episodes=10)
    env.StopRecording()
def main(_):
    running_environment.ForceCpuForTheRun()

    env = environment_impl.GymEnvironment(gym.make('CartPole-v0'))
    brain = a3c_impl.A3C(model=a3c_impl.CreateModel(
        state_shape=env.GetStateShape(),
        action_space_size=env.GetActionSpaceSize(),
        hidden_layer_sizes=(12, ),
    ))

    policy = policy_impl.PolicyWithDecreasingRandomness(
        base_policy=policy_impl.PiWeightedPolicy(),
        initial_epsilon=0.4,
        final_epsilon=0.05,
        decay_by_half_after_num_of_episodes=500,
    )
    runner = a3c_impl.NStepExperienceRunner()
    # runner = runner_impl.SimpleRunner()
    runner.AddCallback(
        runner_extension_impl.ProgressTracer(report_every_num_of_episodes=100))

    runner.Run(env=env, brain=brain, policy=policy, num_of_episodes=1200)