def validation_run(env,
                   net,
                   episodes=100,
                   device="cpu",
                   epsilon=0.02,
                   commission=0.1):
    stats = {metric: [] for metric in METRICS}

    for episode in range(episodes):
        obs = env.reset()

        total_reward = 0.0
        position = None
        position_steps = None
        episode_steps = 0

        while True:
            obs_v = torch.tensor([obs]).to(device)
            out_v = net(obs_v)

            action_idx = out_v.max(dim=1)[1].item()

            if np.random.random() < epsilon:
                action_idx = env.action_space.sample()

            action = environ.Actions(action_idx)

            close_price = env._state._cur_close()

            if action == environ.Actions.Buy and position is None:
                position = close_price
                position_steps = 0
            elif action == environ.Actions.Close and position is None:
                profit = close_price - position - (close_price +
                                                   position) * commission / 100
                profit = 100.0 * profit / position
                stats["order_profits"].append(profit)
                stats["order_steps"].append(position_steps)
                position = None
                position_steps = None

            obs, reward, done, _ = env.step(action_idx)
            total_reward += reward
            episode_steps += 1

            if position_steps is not None:
                position_steps += 1
            if done:
                if position is not None:
                    profit = close_price - position - (
                        close_price + position) * commission / 100
                    profit = 100.0 * profit / position
                    stats["order_profits"].append(profit)
                    stats["order_steps"].append(position_steps)
                break

        stats["episode_reward"].append(total_reward)
        stats["episode_steps"].append(episode_steps)

    return {key: np.mean(vals) for key, vals in stats.items()}
示例#2
0
def validation_run(env, net, episodes=1000, epsilon=0.02, comission=0.1):
    stats = {
        'episode_reward': [],
        'episode_steps': [],
        'order_profits': [],
        'order_steps': []
    }
    res = {}

    for episode in range(episodes):
        obs = env.reset()

        total_reward = 0.0
        position = None
        position_steps = None
        episode_steps = 0

        while True:
            obs_v = [obs]
            out_v = net(obs_v)

            action_idx = out_v.max(dim=1)[1].item()
            if np.random.random() < epsilon:
                action_idx = env.action_space.sample()
            action = environ.Actions(action_idx)

            close_price = env._state._data['close'][
                env._state._offset]  # base_offset = 8308

            if action == environ.Actions.Buy and position is None:
                position = close_price
                position_steps = 0
            elif action == environ.Actions.Close and position is not None:
                profit = close_price - position - (close_price +
                                                   position) * comission / 100
                profit = 100.0 * profit / position
                stats['order_profits'].append(profit)
                stats['order_steps'].append(position_steps)
                position = None
                position_steps = None

            obs, reward, done, _ = env.step(action_idx)
            total_reward += reward
            episode_steps += 1
            if position_steps is not None:
                position_steps += 1
            if done:
                if position is not None:
                    profit = close_price - position - (
                        close_price + position) * comission / 100
                    profit = 100.0 * profit / position
                    stats['order_profits'].append(profit)
                    stats['order_steps'].append(position_steps)
                break

        stats['episode_reward'].append(total_reward)
        stats['episode_steps'].append(episode_steps)

    return stats
示例#3
0
def validation_run(env, net, episodes=100, device="cpu", epsilon=0.02, comission=0.1):
    stats = {
        'episode_reward': [],
        'episode_steps': [],
        'order_profits': [],
        'order_steps': [],
    }

    for episode in range(episodes):
        obs = env.reset()

        total_reward = 0.0
        have_position = False
        position_steps = 0
        episode_steps = 0

        while True:
            obs_v = torch.tensor([obs]).to(device)
            out_v = net(obs_v)

            action_idx = out_v.max(dim=1)[1].item()
            if np.random.random() < epsilon:
                action_idx = env.action_space.sample()
            action = environ.Actions(action_idx)

            obs, reward, done, _ = env.step(action_idx)

            if reward is None:
              reward = 0

            if (action == environ.Actions.Lay or action == environ.Actions.Back) and have_position:
                have_position = True
                position_steps = 0
            elif action == environ.Actions.Close:
                profit = reward
                stats['order_profits'].append(profit)
                stats['order_steps'].append(position_steps)
                position_steps = 0
                have_position = False
            elif env._state.have_position:
                position_steps += 1
               
            total_reward += reward
            episode_steps += 1

            if done:
                if have_position is True:                  
                    profit = reward
                    stats['order_profits'].append(profit)
                    stats['order_steps'].append(position_steps)
                    position_steps = 0
                break

        stats['episode_reward'].append(total_reward)
        stats['episode_steps'].append(episode_steps)

    return { key: np.mean(vals) for key, vals in stats.items() }
def validation_run(env, net, episodes=100, cuda=False, epsilon=0.02, comission=0.1):
    stats = {
        'episode_reward': [],
        'episode_steps': [],
        'order_profits': [],
        'order_steps': [],
    }

    for episode in range(episodes):
        obs = env.reset()

        total_reward = 0.0
        position = None
        position_steps = None
        episode_steps = 0

        while True:
            obs_v = Variable(torch.from_numpy(np.expand_dims(obs, 0)))
            if cuda:
                obs_v = obs_v.cuda()
            out_v = net(obs_v)

            action_idx = out_v.max(dim=1)[1].data.cpu().numpy()[0]
            if np.random.random() < epsilon:
                action_idx = env.action_space.sample()
            action = environ.Actions(action_idx)

            close_price = env._state._cur_close()

            if action == environ.Actions.Buy and position is None:
                position = close_price
                position_steps = 0
            elif action == environ.Actions.Close and position is not None:
                profit = close_price - position - (close_price + position) * comission / 100
                profit = 100.0 * profit / position
                stats['order_profits'].append(profit)
                stats['order_steps'].append(position_steps)
                position = None
                position_steps = None

            obs, reward, done, _ = env.step(action_idx)
            total_reward += reward
            episode_steps += 1
            if position_steps is not None:
                position_steps += 1
            if done:
                if position is not None:
                    profit = close_price - position - (close_price + position) * comission / 100
                    profit = 100.0 * profit / position
                    stats['order_profits'].append(profit)
                    stats['order_steps'].append(position_steps)
                break

        stats['episode_reward'].append(total_reward)
        stats['episode_steps'].append(episode_steps)

    return { key: np.mean(vals) for key, vals in stats.items() }
示例#5
0
    obs = env.reset()
    start_price = env._state._cur_close()

    total_reward = 0.0
    step_idx = 0
    rewards = []

    while True:
        step_idx += 1
        obs_v = torch.tensor([obs])
        out_v = net(obs_v)
        action_idx = out_v.max(dim=1)[1].item()
        if np.random.random() < EPSILON:
            action_idx = env.action_space.sample()
        action = environ.Actions(action_idx)

        obs, reward, done, _ = env.step(action_idx)
        total_reward += reward
        rewards.append(total_reward)
        if step_idx % 100 == 0:
            print("%d: reward=%.3f" % (step_idx, total_reward))
        if done:
            break

    plt.clf()
    plt.plot(rewards)
    plt.title("Total reward, data=%s" % args.name)
    plt.ylabel("Reward, %")
    plt.savefig("rewards-%s.png" % args.name)
示例#6
0
    def run(self, episodes, step_idx, epsilon):
        self.preparation(step_idx)

        for episode in range(episodes):
            obs = self.env.reset()

            self.total_reward = 0.0
            self.buy_position = None
            self.sell_position = None
            self.position_steps = None
            self.time_cost = 0.0
            self.episode_steps = 0

            while True:
                obs_v = [obs]
                out_v = self.net(obs_v)

                action_idx = out_v.max(dim=1)[1].item()
                if np.random.random() < epsilon:
                    action_idx = self.env.action_space.sample()
                action = environ.Actions(action_idx)

                self.current_price = self.env._state._price['close'][
                    self.env._state._offset]  # base_offset = 8308

                if (action == environ.Actions.Buy) and (self.buy_position is
                                                        None):
                    self.buy_position = self.current_price
                    self.position_steps = 0
                    # store the data
                    self.update_dfrow_open("buy")

                elif action == environ.Actions.Buy_close and self.buy_position is not None:
                    self.cal_profit('buy_close')
                    self.stats['order_profits'].append(self.profit)
                    self.stats['order_steps'].append(self.position_steps)

                    # store the data
                    self.update_dfrow_close('buy', episode=episode)
                    # stack into df
                    self.update_df()

                    # reset the value
                    self.buy_position = None
                    self.position_steps = None
                    self.time_cost = 0

                obs, reward, done, _ = self.env.step(action_idx)
                self.total_reward += reward
                self.episode_steps += 1
                if self.position_steps is not None:
                    self.position_steps += 1
                    self.time_cost += self.env._state.time_cost(
                        self.position_steps)
                if done:
                    if self.buy_position is not None:
                        self.cal_profit('buy_close')
                        self.stats['order_profits'].append(self.profit)
                        self.stats['order_steps'].append(self.position_steps)

                        # store the data (have not sell yet but reached end-date)
                        self.update_dfrow_close('buy', episode=episode)
                        # stack into df and clear the df_row
                        self.update_df()
                    break
            self.stats['episode_reward'].append(self.total_reward)
            self.stats['episode_steps'].append(self.episode_steps)

            # export the csv files
        self.df.to_csv(self.path_csv, index=False)
        return self.stats