Exemplo n.º 1
0
# agent.load_weights('sq_{}_weights.h5f'.format(ENV_NAME))
# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.load_weights('ddpg_fixed_weights_{}_weights.h5f'.format(ENV_NAME))

# env.is_train = True
# agent.fit(env, nb_steps=50000, visualize=False, verbose=1, nb_max_episode_steps=20)

# After training is done, we save the final weights.
# agent.save_weights('ddpg_fixed_weights_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.

env.is_train = False
env.plot_row = 1
env.plot_col = 5

q_values = pd.DataFrame()
st = status.reshape([-1, 1])
for action in actions:
    state1_batch_with_action = [
        st, np.ones(st.shape).reshape(-1, 1, 1) * action
    ]
    q_values = pd.concat([
        q_values,
        pd.DataFrame(
            agent.target_critic.predict_on_batch(state1_batch_with_action))
    ],
                         axis=1)
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=20,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
env.is_train = True

dqn.load_weights('dqn_{}_weights.h5f'.format(ENV_NAME))
dqn.fit(env, nb_steps=100000, visualize=False, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

with open('dqn_action.json', 'w') as fw:
    observation = status.tolist()
    action = [
        float(actions[dqn.forward(np.array([obs]))]) for obs in observation
    ]
    json.dump({'observation': observation, 'action': action}, fw)

state_batch = status.reshape([-1, 1, 1])