state = train_env.reset() log_probs = [] rewards = [] profits = [] hold_profits = [] for steps in range(train_env.steps_left): if steps % 500 ==0: train_env.render() action, log_prob = policy_net.get_action(state, device) new_state, reward, done, _ = train_env.step(action) log_probs.append(log_prob) rewards.append(reward) profits.append(train_env._get_profit()) hold_profits.append(train_env._get_hold_profit()) if done: # Evaluate model all_rewards.append(np.mean(rewards)) avg_rewards.append(np.mean(all_rewards[-10:])) if all_rewards[-1] > all_rewards[-2]: torch.save(policy_net.state_dict(), 'model/state_dict3.pt') print(f'Reward increased ({episode - 1} --> {episode}).\nSaving model ...') update_policy(policy_net, rewards, log_probs) if (episode % 1) == 0: print(f'\nDone episode {episode}')
log_probs = [] rewards = [] profits = [] hold_profits = [] for steps in range(test_env.steps_left): if steps % 500 == 0: test_env.render() with torch.no_grad(): action, log_prob = policy_net.get_action(state, device) new_state, reward, done, _ = test_env.step(action) log_probs.append(log_prob) rewards.append(reward) profits.append(test_env._get_profit()) hold_profits.append(test_env._get_hold_profit()) if done: print('\nTrading session terminated:') print(f"total reward: {np.round(np.sum(rewards), decimals=3)}") print(f"bot profit: {profits[-1]}") print(f"hold profit: {hold_profits[-1]}") print(f"steps: {steps}") break state = new_state # plot results fig, axs = plt.subplots(3) fig.suptitle('Trading results') axs[0].plot(profits, 'tab:blue')