# Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=False) else: # SARSA # SARSA does not require a memory. policy = BoltzmannQPolicy() model = Sequential() model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) model.add(Dense(state_size/2)) model.add(Activation('relu')) model.add(Dense(state_size/4)) model.add(Activation('relu')) model.add(Dense(state_size/8)) model.add(Activation('relu')) model.add(Dense(nb_actions, activation='linear')) print(model.summary()) sarsa = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=policy) sarsa.compile(Adam(lr=1e-3), metrics=['mae']) sarsa.fit(env, nb_steps=500000, visualize=False, verbose=1) # After training is done, we save the final weights. sarsa.save_weights('weights.h5f', overwrite=True) # Finally, evaluate our algorithm for 5 episodes. sarsa.test(env, nb_episodes=5, visualize=False)
model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary()) # Sarsa.py in rl.agent update every experience on policy, memory is not used. policy = BoltzmannQPolicy() sarsa = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=policy) sarsa.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. sarsa.fit(env, nb_steps=5000, visualize=False, verbose=2) # After training is done, we save the final weights. sarsa.save_weights('sarsa_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. sarsa.test(env, nb_episodes=5, visualize=False)