def learn(self, data, episodes, num_features, batch_size, use_existing_model, random_action_min=0.1, random_action_decay=0.99995, num_neurons=64, future_reward_importance=0.95): agent = Agent(num_features, use_existing_model, '', random_action_min, random_action_decay, num_neurons, future_reward_importance) l = len(data) - 1 rewards_vs_episode = [] profit_vs_episode = [] trades_vs_episode = [] epsilon_vs_episode = [] for episode in range(1,episodes + 1): #print("Episode " + str(e) + "/" + str(episode_count)) state = self.get_state(data, num_features, num_features) total_profits = 0 total_holds = 0 total_buys = 1 total_sells = 0 #total_rewards = 0 self.open_orders = [data[0]] for t in range(num_features,l): action = agent.choose_best_action(state)#tradeoff bw predict and random #print(f'state={state}') reward, total_profits, total_holds, total_buys, total_sells = self.execute_action (action, data[t], t, total_profits, total_holds, total_buys, total_sells) done = True if t == l - 1 else False next_state = self.get_state(data, t + 1, num_features) print(f'row #{t} {agent.actions[action]} @{data[t]}, state1={state}, state2={next_state}, reward={reward}') agent.remember(state, action, reward, next_state, done)#store contents of memory in buffer for future learning state = next_state if done: eps = np.round(agent.epsilon,3) print(f'Episode {episode}/{episodes} Total Profit: {formatPrice(total_profits)} , Total trades: {total_buys}, probability of random action: {eps}') print("---------------------------------------") #rewards_vs_episode.append(total_rewards) profit_vs_episode.append(np.round(total_profits,4)) trades_vs_episode.append(total_buys) epsilon_vs_episode.append(eps) if len(agent.memory) > batch_size:#if memory of agent gets full: agent.experience_replay(batch_size)#fit #clean memory ? # if episode % 1000 == 0: # model_name = "files/output/model_ep" + str(episode) # agent.model.save(model_name) # print(f'{model_name} saved') model_name = "files/output/model_ep" + str(episodes) agent.model.save(model_name) print(f'{model_name} saved') return profit_vs_episode, trades_vs_episode, epsilon_vs_episode, model_name, agent.num_trains, agent.epsilon
def learn(self, data, n_episodes, n_features, batch_size, use_existing_model, random_action_min=0.1, random_action_decay=0.99995, n_neurons=64, future_reward_importance=0.95): agent = Agent(n_features, use_existing_model, '', random_action_min, random_action_decay, n_neurons, future_reward_importance) l = len(data) - 1 rewards_vs_episode = [] profit_vs_episode = [] trades_vs_episode = [] epsilon_vs_episode = [] for episode in range(1, n_episodes + 1): state = self.get_state(data, n_features, n_features) total_profits = 0 total_holds = 0 total_buys = 1 total_sells = 0 total_notvalid = 0 # add-on buys or sells without previous buy # total_rewards = 0 self.open_orders = [data[0]] for t in range(n_features, l): action = agent.choose_best_action( state) # tradeoff bw predict and random # print(f'state={state}') reward, total_profits, total_holds, total_buys, total_sells, total_notvalid = \ self.execute_action(action, data[t], t, total_profits, total_holds, total_buys, total_sells, total_notvalid) done = True if t == l - 1 else False next_state = self.get_state(data, t + 1, n_features) #if len(self.open_orders) > 0: # if long add next state return as reward #print(action, agent.actions[action]) if agent.actions[action] == 'buy': immediate_reward = next_state[0][-1] elif agent.actions[action] == 'sell': immediate_reward = -next_state[0][-1] else: immediate_reward = 0 #print("Immediate reward:{0:.5f} Reward:{1:.5f} Time:{2} Price:{3} Action:{4}". # format(immediate_reward, reward, t, data[t], agent.actions[action])) #reward = reward + immediate_reward reward = immediate_reward #print(f'row #{t} {agent.actions[action]} @{data[t]}, state1={state}, state2={next_state}, reward={reward}') # store contents of memory in buffer for future learning agent.remember(state, action, reward, next_state, done) state = next_state if done: # sell position at end of episode reward, total_profits, total_holds, total_buys, total_sells, total_notvalid = \ self.execute_action(2, data[t+1], t+1, total_profits, total_holds, total_buys, total_sells, total_notvalid) eps = np.round(agent.epsilon, 3) print( f'Episode {episode}/{n_episodes} Total Profit: {formatPrice(total_profits * 100)},' f' Total hold/buy/sell/notvalid trades: {total_holds} / {total_buys} / {total_sells} / {total_notvalid},' f' probability of random action: {eps}') print("---------------------------------------") # rewards_vs_episode.append(total_rewards) profit_vs_episode.append(np.round(total_profits, 4)) trades_vs_episode.append(total_buys) epsilon_vs_episode.append(eps) if len(agent.memory ) >= batch_size: # if enough recorded memory available agent.experience_replay(batch_size) # fit # clean memory ? model_name = "files/output/model_ep" + str(n_episodes) agent.model.save(model_name) print(f'{model_name} saved') return profit_vs_episode, trades_vs_episode, epsilon_vs_episode, model_name, agent.num_trains, agent.epsilon