def test_cem(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1,))) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) memory = EpisodeParameterMemory(limit=1000, window_length=1) dqn = CEMAgent(model=model, nb_actions=nb_actions, memory=memory) dqn.compile() dqn.fit(env, nb_steps=2000, visualize=False, verbose=1) h = dqn.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
def test_cem(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1, ))) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) memory = EpisodeParameterMemory(limit=1000, window_length=1) dqn = CEMAgent(model=model, nb_actions=nb_actions, memory=memory) dqn.compile() dqn.fit(env, nb_steps=2000, visualize=False, verbose=1) h = dqn.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
#model.add(Dense(1)) #model.add(Activation('relu')) #model.add(Dense(nb_actions)) #model.add(Activation('softmax')) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = EpisodeParameterMemory(limit=1000, window_length=1) cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=1, nb_steps_warmup=10, train_interval=10, elite_frac=0.5) cem.compile() # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. cem.fit(env, nb_steps=100000, visualize=False, verbose=2) # After training is done, we save the best weights. cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. cem.test(env, nb_episodes=10)