示例#1
0
    def infer(self, train, episode):
        if not train:
            episode_length = self.env.datacontainer.train_length - 1 - self.env.history_length
            tsm = TradingStateModel(
                datacontainer=self.env.datacontainer,
                episode_length=episode_length,
                history_length=self.env.history_length,
                is_training=True,
                commission_percentage=self.env.commission_percentage)
            state = tsm.reset()
            prices = [state.price]  # [episode_length]
            rewards = [0]  # [episode_length]
            allocations = [state.portfolio_allocation]  # [episode_length]

            for _ in range(episode_length):
                batch_asset_features, batch_portfolio = convert_features(
                    features=np.array([state.features]),
                    asset_features_shape=self.actor.asset_features_shape,
                    portfolio_features_shape=[self.actor.a_dim])
                action = self.actor.predict_target(
                    asset_inputs=batch_asset_features,
                    portfolio_inputs=batch_portfolio)[0]
                trans_state, reward, terminal, info = tsm.step(action)
                prices.append(trans_state.price)
                rewards.append(reward)
                allocations.append(trans_state.portfolio_allocation)
                state = trans_state

            prices = np.array(prices)
            rewards = np.array(rewards)
            allocations = np.array(allocations)

            f, axarr = plt.subplots(3, sharex=True)
            axarr[0].set_ylabel('Price')
            for ind in range(self.env.datacontainer.num_assets):
                axarr[0].plot(prices[:, ind])

            axarr[1].set_ylabel('Cumulative Reward')
            axarr[1].plot(np.cumsum(rewards))

            axarr[2].set_ylabel('Action')
            for ind in range(self.env.datacontainer.num_assets):
                axarr[2].plot(allocations[:, ind])

            dataset = 'Train' if train else 'Test'
            title = '{}, Total Reward: {}'.format(dataset, np.sum(rewards))
            plt.savefig(
                os.path.join(self.infer_directory,
                             str(episode) + ".png"))
示例#2
0
    def infer(self, train, episode):
        if not train:
            episode_length = self.datacontainer.test_length - 1
            tsm = TradingStateModel(
                datacontainer=self.datacontainer,
                episode_length=episode_length,
                is_training=train,
                commission_percentage=self.tsm.commission_percentage,
                coin_boundary=self.tsm.coin_boundary)
            state, reward = tsm.initialize()
            prices = [state.price]  # [episode_length]
            rewards = [reward]  # [episode_length]
            coins = [state.coins]  # [episode_length]

            for _ in tqdm(range(episode_length)):
                action = self.actor_target.select_action(
                    inputs=np.array([state.features]))[0][0]
                #action = self.random_action()
                trans_state, reward = tsm.step(action)
                prices.append(trans_state.price)
                rewards.append(reward)
                coins.append(trans_state.coins)
                state = trans_state

            prices = np.array(prices)
            rewards = np.array(rewards)
            coins = np.array(coins)

            f, axarr = plt.subplots(3, sharex=True)
            axarr[0].set_ylabel('Price')
            axarr[0].plot(prices)

            axarr[1].set_ylabel('Cumulative Reward')
            axarr[1].plot(np.cumsum(rewards))

            axarr[2].set_ylabel('Action')
            axarr[2].plot(coins)

            dataset = 'Train' if train else 'Test'
            title = '{}, Total Reward: {}'.format(dataset, np.sum(rewards))
            plt.savefig("./infer" + str(episode) + ".png")
示例#3
0
    def infer(self, train):
        if not train:
            episode_length = self.datacontainer.test_length - 1
            tsm = TradingStateModel(datacontainer=self.datacontainer,
                                    episode_length=episode_length,
                                    is_training=train,
                                    commission_percentage=0.0)
            state, reward = tsm.initialize()
            prices = [state.prices]  # [episode_length, num_assets]
            rewards = [reward]  # [episode_length]
            allocations = [state.portfolio_allocation
                           ]  # [episode_length, num_assets]

            for _ in tqdm(range(episode_length)):
                #action = self.actor_target.select_action(inputs=np.array([state.features]))[0]
                action = DDPG.random_action(
                    num_dimensions=self.datacontainer.num_assets)
                trans_state, reward = tsm.step(action)
                prices.append(trans_state.prices)
                rewards.append(reward)
                allocations.append(action)
                state = trans_state

            prices = np.array(prices)
            rewards = np.array(rewards)
            allocations = np.array(allocations)

            f, axarr = plt.subplots(3, sharex=True)
            axarr[0].set_ylabel('Price')
            for ind in range(prices.shape[1]):
                axarr[0].plot(prices[:, ind])

            axarr[1].set_ylabel('Cumulative Reward')
            axarr[1].plot(np.cumsum(rewards))

            axarr[2].set_ylabel('Action')
            for ind in range(allocations.shape[1]):
                axarr[2].plot(allocations[:, ind])
            dataset = 'Train' if train else 'Test'
            title = '{}, Total Reward: {}'.format(dataset, np.sum(rewards))
            plt.show()
示例#4
0
def infer(env, agent, train, episode, infer_directory):
    print("INFERRING episode:", episode)
    episode_length = env.datacontainer.train_length - 40
    tsm = TradingStateModel(datacontainer=env.datacontainer,
                            episode_length=episode_length,
                            is_training=True,
                            commission_percentage=env.commission_percentage)
    state = tsm.reset()
    prices = [state.price]  # [episode_length]
    rewards = [0]  # [episode_length]
    allocations = [state.portfolio_allocation]  # [episode_length]

    for _ in range(episode_length):
        action, _ = agent.pi(state.features,
                             apply_noise=False,
                             compute_Q=False)
        trans_state, reward, terminal, info = tsm.step(action)
        prices.append(trans_state.price)
        rewards.append(reward)
        allocations.append(trans_state.portfolio_allocation)
        state = trans_state

    prices = np.array(prices)
    rewards = np.array(rewards)
    allocations = np.array(allocations)

    f, axarr = plt.subplots(3, sharex=True)
    axarr[0].set_ylabel('Price')
    for ind in range(env.datacontainer.num_assets):
        axarr[0].plot(prices[:, ind])

    axarr[1].set_ylabel('Cumulative Reward')
    axarr[1].plot(np.cumsum(rewards))

    axarr[2].set_ylabel('Action')
    for ind in range(env.datacontainer.num_assets):
        axarr[2].plot(allocations[:, ind])

    dataset = 'Train' if train else 'Test'
    title = '{}, Total Reward: {}'.format(dataset, np.sum(rewards))
    plt.savefig(os.path.join(infer_directory, str(episode) + ".png"))
示例#5
0
TAU = 0.001
COMMISSION_PERCENTAGE = 0.0

# env = gym.make('Pendulum-v0')
# state_dim = env.observation_space.shape[0]
# action_dim = env.action_space.shape[0]
# boundary = env.action_space.high[0]

# dc = TestContainer(num_assets=3,
#                    num_samples=2000)
dc = EasyContainer(num_samples=2000)
#dc = DataContainer(hdf_file_name='../data/hdfs/poloniex_30m.hf')
#dc = BitcoinTestContainer(csv_file_name='../data/csvs/output.csv')
env = TradingStateModel(datacontainer=dc,
                        episode_length=EPISODE_LENGTH,
                        is_training=True,
                        history_length=HISTORY_LENGTH,
                        commission_percentage=COMMISSION_PERCENTAGE)
asset_features_shape = [dc.num_assets, HISTORY_LENGTH, dc.num_asset_features]
action_dim = dc.num_assets

actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))
# rpb = ReplayBuffer(buffer_size=BUFFER_SIZE)
# conf = {
#   'size': BUFFER_SIZE,
#   'batch_size': BATCH_SIZE,
#   'learn_start': 1000,
#   'steps': NUM_EPISODES * EPISODE_LENGTH
# }
# rpb = Experience(conf)
rpb = PrioritizedReplayBuffer(size=BUFFER_SIZE, alpha=0.6)
示例#6
0
from tradingstatemodel import TradingStateModel

NUM_EPISODES = 1000
EPISODE_LENGTH = 50
COMMISSION_PERCENTAGE = 0.0
BATCH_SIZE = 32
BATCH_NORM = True
BUFFER_SIZE = 1000000
COIN_BOUNDARY = 5

#tc = TestContainer(num_assets=1, num_samples=5000)
tc = BitcoinTestContainer(csv_file_name='../data/csvs/output.csv')
tc.plot_prices(train=True)
tsm = TradingStateModel(datacontainer=tc,
                        episode_length=EPISODE_LENGTH,
                        is_training=True,
                        commission_percentage=COMMISSION_PERCENTAGE,
                        coin_boundary=COIN_BOUNDARY)
print("FLATTENED:", tc.num_flattened_features)

sess = tf.Session()
actor_target = ActorNetwork(sess=sess,
                            batch_size=BATCH_SIZE,
                            batch_norm=BATCH_NORM,
                            dropout=0.5,
                            history_length=50,
                            datacontainer=tc,
                            epochs=50,
                            is_target=True,
                            coin_boundary=COIN_BOUNDARY)
actor_trainer = ActorNetwork(sess=sess,
示例#7
0
NUM_EPISODES = 10000
EPISODE_LENGTH = 250
GAMMA = 0.99
TAU = 0.001

# env = gym.make('Pendulum-v0')
# state_dim = env.observation_space.shape[0]
# action_dim = env.action_space.shape[0]
# boundary = env.action_space.high[0]

# dc = TestContainer(num_assets=1,
#                    num_samples=2000)
dc = BitcoinTestContainer(csv_file_name='../data/csvs/output.csv')
env = TradingStateModel(datacontainer=dc,
                        episode_length=EPISODE_LENGTH,
                        is_training=True,
                        commission_percentage=0,
                        coin_boundary=5)
state_dim = dc.num_flattened_features
action_dim = 1
boundary = env.coin_boundary

actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))
# rpb = ReplayBuffer(buffer_size=BUFFER_SIZE)
# conf = {
#   'size': BUFFER_SIZE,
#   'batch_size': BATCH_SIZE,
#   'learn_start': 1000,
#   'steps': NUM_EPISODES * EPISODE_LENGTH
# }
# rpb = Experience(conf)
示例#8
0
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs):
    # Configure things.
    rank = MPI.COMM_WORLD.Get_rank()
    if rank != 0:
        logger.set_level(logger.DISABLED)

    # Create envs.
    # env = gym.make(env_id)
    # env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))

    if evaluation and rank == 0:
        eval_env = gym.make(env_id)
        eval_env = bench.Monitor(eval_env,
                                 os.path.join(logger.get_dir(), 'gym_eval'))
        env = bench.Monitor(env, None)
    else:
        eval_env = None

    #dc = TestContainer(num_assets=3, num_samples=20000)
    dc = BitcoinTestContainer(csv_file_name='../../../data/csvs/output.csv')
    env = TradingStateModel(datacontainer=dc,
                            episode_length=kwargs['nb_rollout_steps'],
                            is_training=True,
                            commission_percentage=COMMISSION_PERCENTAGE)

    # Parse noise_type
    action_noise = None
    param_noise = None
    # nb_actions = env.action_space.shape[-1]
    nb_actions = env.datacontainer.num_assets
    for current_noise_type in noise_type.split(','):
        current_noise_type = current_noise_type.strip()
        if current_noise_type == 'none':
            pass
        elif 'adaptive-param' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            param_noise = AdaptiveParamNoiseSpec(
                initial_stddev=float(stddev),
                desired_action_stddev=float(stddev))
        elif 'normal' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = NormalActionNoise(mu=np.zeros(nb_actions),
                                             sigma=float(stddev) *
                                             np.ones(nb_actions))
        elif 'ou' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = OrnsteinUhlenbeckActionNoise(
                mu=np.zeros(nb_actions),
                sigma=float(stddev) * np.ones(nb_actions))
        else:
            raise RuntimeError(
                'unknown noise type "{}"'.format(current_noise_type))

    # Configure components.
    # memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape)
    memory = Memory(limit=int(1e6),
                    action_shape=env.action_space.shape,
                    observation_shape=env.observation_space.shape)
    critic = Critic(num_asset_features=env.datacontainer.total_asset_features,
                    num_actions=env.datacontainer.num_assets,
                    asset_features_shape=env.asset_features_shape,
                    portfolio_features_shape=env.portfolio_features_shape,
                    layer_norm=layer_norm)
    actor = Actor(nb_actions,
                  num_asset_features=env.datacontainer.total_asset_features,
                  num_actions=env.datacontainer.num_assets,
                  asset_features_shape=env.asset_features_shape,
                  portfolio_features_shape=env.portfolio_features_shape,
                  layer_norm=layer_norm)

    # Seed everything to make things reproducible.
    seed = seed + 1000000 * rank
    logger.info('rank {}: seed={}, logdir={}'.format(rank, seed,
                                                     logger.get_dir()))
    tf.reset_default_graph()
    set_global_seeds(seed)
    # env.seed(seed)
    # if eval_env is not None:
    #     eval_env.seed(seed)

    # Disable logging for rank != 0 to avoid noise.
    if rank == 0:
        start_time = time.time()
    training.train(env=env,
                   eval_env=eval_env,
                   param_noise=param_noise,
                   action_noise=action_noise,
                   actor=actor,
                   critic=critic,
                   memory=memory,
                   tensorboard_directory='./tensorboard_' +
                   str(COMMISSION_PERCENTAGE),
                   infer_directory='./infer_ims_' + str(COMMISSION_PERCENTAGE),
                   **kwargs)
    env.close()
    if eval_env is not None:
        eval_env.close()
    if rank == 0:
        logger.info('total runtime: {}s'.format(time.time() - start_time))
示例#9
0
from data.datacontainer import BitcoinTestContainer
from tradingstatemodel import TradingStateModel, QApproximator, ReplayBuffer

csv_file_name = './data/csvs/output.csv'

max_coins = 4

btc = BitcoinTestContainer(csv_file_name=csv_file_name)
rpb = ReplayBuffer()
q_approximator = QApproximator(num_features=btc.num_features + max_coins + 1,
                               num_actions=max_coins + 1)
tsm = TradingStateModel(bitcoin_container=btc,
                        model=q_approximator,
                        episode_length=2000,
                        gamma=0.95,
                        starting_coins=0,
                        max_coins=max_coins,
                        epochs=100,
                        replay_buffer=rpb,
                        batch_size=10)
tsm.train()