def make_sarsa_rl_agent(processor: Processor_56x5, nbr_layers=2): model = processor.create_model(nbr_layers=nbr_layers) test_policy = GreedyQPolicy() sarsa_agent = SarsaAgent(model=model, nb_actions=NBR_TICHU_ACTIONS, nb_steps_warmup=10, gamma=0.99, test_policy=test_policy, processor=processor) sarsa_agent.compile(Adam(lr=1e-3), metrics=['mae']) return sarsa_agent
def test_sarsa(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1,))) model.add(Activation('relu')) model.add(Dense(nb_actions, activation='linear')) policy = EpsGreedyQPolicy(eps=.1) sarsa = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=50, policy=policy) sarsa.compile(Adam(lr=1e-3)) sarsa.fit(env, nb_steps=20000, visualize=False, verbose=0) policy.eps = 0. h = sarsa.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
print('Observations: ', (1, ) + env.observation_space.shape) # create model model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(64)) model.add(Activation('relu')) model.add(Dense(32)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) # configure agent policy = BoltzmannQPolicy() dqn = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=policy) dqn.compile(Adam(lr=1e-5), metrics=['mae']) # run agent history = dqn.fit(env, nb_steps=10000, visualize=False, verbose=1, log_interval=100) plt.plot(history.history['episode_reward']) plt.show()
# Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=False) else: # SARSA # SARSA does not require a memory. policy = BoltzmannQPolicy() model = Sequential() model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) model.add(Dense(state_size/2)) model.add(Activation('relu')) model.add(Dense(state_size/4)) model.add(Activation('relu')) model.add(Dense(state_size/8)) model.add(Activation('relu')) model.add(Dense(nb_actions, activation='linear')) print(model.summary()) sarsa = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=policy) sarsa.compile(Adam(lr=1e-3), metrics=['mae']) sarsa.fit(env, nb_steps=500000, visualize=False, verbose=1) # After training is done, we save the final weights. sarsa.save_weights('weights.h5f', overwrite=True) # Finally, evaluate our algorithm for 5 episodes. sarsa.test(env, nb_episodes=5, visualize=False)