Пример #1
0
# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
agent.fit(env,
          nb_steps=50000,
          visualize=False,
          verbose=1,
          nb_max_start_steps=0)

# After training is done, we save the final weights.
agent.save_weights('ddpg_{}_weights.h5f'.format("test"), overwrite=True)

observation = env.reset()  # Obtain an initial observation of the environment
while True:
    print observation
    action = agent.select_action([observation])
    print action
    action = action.argmax()
    observation, reward, done, info = env.step(action)
    graph_stk, graph_holding, graph_liquidasset, graph_staticasset = env.graphing(
    )
    if done:
        fig, axarr = plt.subplots(3, 1)
        fig.suptitle("DDPG Agent", fontsize=10)
        axarr[0].plot(graph_holding)
        axarr[0].set_title('Stocks held')
        axarr[1].plot(graph_stk)
        axarr[1].set_title('Stocks value')
        axarr[2].plot(graph_liquidasset, color='red')
        axarr[2].plot(graph_staticasset, color='blue')
        axarr[2].set_title('Comparision of stagnant and RL-bot asset value')
Пример #2
0
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=.99, target_model_update=1e-3)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

agent.load_weights('ddpg_test_weights.h5f')


observation = env.reset() # Obtain an initial observation of the environment
while True:
    action = agent.select_action([observation]).argmax()
    observation, reward, done, info = env.step(action)
    graph_stk,graph_holding,graph_liquidasset,graph_staticasset = env.graphing()
    if done:
        fig, axarr = plt.subplots(3, 1)
        fig.suptitle("DDPG Agent", fontsize=10)
        axarr[0].plot(graph_holding)
        axarr[0].set_title('Stocks held')
        axarr[1].plot(graph_stk)
        axarr[1].set_title('Stocks value')
        axarr[2].plot(graph_liquidasset,color='red')
        axarr[2].plot(graph_staticasset,color='blue')
        axarr[2].set_title('Comparision of stagnant and RL-bot asset value')
        fig.tight_layout()
        fig.subplots_adjust(top=0.88)
        plt.savefig("training/BOT_4/train.png")