def validation_run(env, net, episodes=100, device="cpu", epsilon=0.02, commission=0.1): stats = {metric: [] for metric in METRICS} for episode in range(episodes): obs = env.reset() total_reward = 0.0 position = None position_steps = None episode_steps = 0 while True: obs_v = torch.tensor([obs]).to(device) out_v = net(obs_v) action_idx = out_v.max(dim=1)[1].item() if np.random.random() < epsilon: action_idx = env.action_space.sample() action = environ.Actions(action_idx) close_price = env._state._cur_close() if action == environ.Actions.Buy and position is None: position = close_price position_steps = 0 elif action == environ.Actions.Close and position is None: profit = close_price - position - (close_price + position) * commission / 100 profit = 100.0 * profit / position stats["order_profits"].append(profit) stats["order_steps"].append(position_steps) position = None position_steps = None obs, reward, done, _ = env.step(action_idx) total_reward += reward episode_steps += 1 if position_steps is not None: position_steps += 1 if done: if position is not None: profit = close_price - position - ( close_price + position) * commission / 100 profit = 100.0 * profit / position stats["order_profits"].append(profit) stats["order_steps"].append(position_steps) break stats["episode_reward"].append(total_reward) stats["episode_steps"].append(episode_steps) return {key: np.mean(vals) for key, vals in stats.items()}
def validation_run(env, net, episodes=1000, epsilon=0.02, comission=0.1): stats = { 'episode_reward': [], 'episode_steps': [], 'order_profits': [], 'order_steps': [] } res = {} for episode in range(episodes): obs = env.reset() total_reward = 0.0 position = None position_steps = None episode_steps = 0 while True: obs_v = [obs] out_v = net(obs_v) action_idx = out_v.max(dim=1)[1].item() if np.random.random() < epsilon: action_idx = env.action_space.sample() action = environ.Actions(action_idx) close_price = env._state._data['close'][ env._state._offset] # base_offset = 8308 if action == environ.Actions.Buy and position is None: position = close_price position_steps = 0 elif action == environ.Actions.Close and position is not None: profit = close_price - position - (close_price + position) * comission / 100 profit = 100.0 * profit / position stats['order_profits'].append(profit) stats['order_steps'].append(position_steps) position = None position_steps = None obs, reward, done, _ = env.step(action_idx) total_reward += reward episode_steps += 1 if position_steps is not None: position_steps += 1 if done: if position is not None: profit = close_price - position - ( close_price + position) * comission / 100 profit = 100.0 * profit / position stats['order_profits'].append(profit) stats['order_steps'].append(position_steps) break stats['episode_reward'].append(total_reward) stats['episode_steps'].append(episode_steps) return stats
def validation_run(env, net, episodes=100, device="cpu", epsilon=0.02, comission=0.1): stats = { 'episode_reward': [], 'episode_steps': [], 'order_profits': [], 'order_steps': [], } for episode in range(episodes): obs = env.reset() total_reward = 0.0 have_position = False position_steps = 0 episode_steps = 0 while True: obs_v = torch.tensor([obs]).to(device) out_v = net(obs_v) action_idx = out_v.max(dim=1)[1].item() if np.random.random() < epsilon: action_idx = env.action_space.sample() action = environ.Actions(action_idx) obs, reward, done, _ = env.step(action_idx) if reward is None: reward = 0 if (action == environ.Actions.Lay or action == environ.Actions.Back) and have_position: have_position = True position_steps = 0 elif action == environ.Actions.Close: profit = reward stats['order_profits'].append(profit) stats['order_steps'].append(position_steps) position_steps = 0 have_position = False elif env._state.have_position: position_steps += 1 total_reward += reward episode_steps += 1 if done: if have_position is True: profit = reward stats['order_profits'].append(profit) stats['order_steps'].append(position_steps) position_steps = 0 break stats['episode_reward'].append(total_reward) stats['episode_steps'].append(episode_steps) return { key: np.mean(vals) for key, vals in stats.items() }
def validation_run(env, net, episodes=100, cuda=False, epsilon=0.02, comission=0.1): stats = { 'episode_reward': [], 'episode_steps': [], 'order_profits': [], 'order_steps': [], } for episode in range(episodes): obs = env.reset() total_reward = 0.0 position = None position_steps = None episode_steps = 0 while True: obs_v = Variable(torch.from_numpy(np.expand_dims(obs, 0))) if cuda: obs_v = obs_v.cuda() out_v = net(obs_v) action_idx = out_v.max(dim=1)[1].data.cpu().numpy()[0] if np.random.random() < epsilon: action_idx = env.action_space.sample() action = environ.Actions(action_idx) close_price = env._state._cur_close() if action == environ.Actions.Buy and position is None: position = close_price position_steps = 0 elif action == environ.Actions.Close and position is not None: profit = close_price - position - (close_price + position) * comission / 100 profit = 100.0 * profit / position stats['order_profits'].append(profit) stats['order_steps'].append(position_steps) position = None position_steps = None obs, reward, done, _ = env.step(action_idx) total_reward += reward episode_steps += 1 if position_steps is not None: position_steps += 1 if done: if position is not None: profit = close_price - position - (close_price + position) * comission / 100 profit = 100.0 * profit / position stats['order_profits'].append(profit) stats['order_steps'].append(position_steps) break stats['episode_reward'].append(total_reward) stats['episode_steps'].append(episode_steps) return { key: np.mean(vals) for key, vals in stats.items() }
obs = env.reset() start_price = env._state._cur_close() total_reward = 0.0 step_idx = 0 rewards = [] while True: step_idx += 1 obs_v = torch.tensor([obs]) out_v = net(obs_v) action_idx = out_v.max(dim=1)[1].item() if np.random.random() < EPSILON: action_idx = env.action_space.sample() action = environ.Actions(action_idx) obs, reward, done, _ = env.step(action_idx) total_reward += reward rewards.append(total_reward) if step_idx % 100 == 0: print("%d: reward=%.3f" % (step_idx, total_reward)) if done: break plt.clf() plt.plot(rewards) plt.title("Total reward, data=%s" % args.name) plt.ylabel("Reward, %") plt.savefig("rewards-%s.png" % args.name)
def run(self, episodes, step_idx, epsilon): self.preparation(step_idx) for episode in range(episodes): obs = self.env.reset() self.total_reward = 0.0 self.buy_position = None self.sell_position = None self.position_steps = None self.time_cost = 0.0 self.episode_steps = 0 while True: obs_v = [obs] out_v = self.net(obs_v) action_idx = out_v.max(dim=1)[1].item() if np.random.random() < epsilon: action_idx = self.env.action_space.sample() action = environ.Actions(action_idx) self.current_price = self.env._state._price['close'][ self.env._state._offset] # base_offset = 8308 if (action == environ.Actions.Buy) and (self.buy_position is None): self.buy_position = self.current_price self.position_steps = 0 # store the data self.update_dfrow_open("buy") elif action == environ.Actions.Buy_close and self.buy_position is not None: self.cal_profit('buy_close') self.stats['order_profits'].append(self.profit) self.stats['order_steps'].append(self.position_steps) # store the data self.update_dfrow_close('buy', episode=episode) # stack into df self.update_df() # reset the value self.buy_position = None self.position_steps = None self.time_cost = 0 obs, reward, done, _ = self.env.step(action_idx) self.total_reward += reward self.episode_steps += 1 if self.position_steps is not None: self.position_steps += 1 self.time_cost += self.env._state.time_cost( self.position_steps) if done: if self.buy_position is not None: self.cal_profit('buy_close') self.stats['order_profits'].append(self.profit) self.stats['order_steps'].append(self.position_steps) # store the data (have not sell yet but reached end-date) self.update_dfrow_close('buy', episode=episode) # stack into df and clear the df_row self.update_df() break self.stats['episode_reward'].append(self.total_reward) self.stats['episode_steps'].append(self.episode_steps) # export the csv files self.df.to_csv(self.path_csv, index=False) return self.stats