def setUp(self):
     self.order_book_id_number = 100
     self.feature_number = 10
     self.toy_data = create_toy_data(order_book_ids_number=order_book_id_number, feature_number=self.feature_number, start="2019-05-01", end="2019-12-12", frequency="D")
     self.env_2d = PortfolioTradingGym(data_df=self.toy_data, sequence_window=1, add_cash=False, mode="numpy")
     
     self.sequence_window =3
     self.env_3d = PortfolioTradingGym(data_df = self.toy_data, sequence_window=self.sequence_window, add_cash=False, mode="numpy")
Exemplo n.º 2
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pdb
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from trading_gym.utils.data.toy import create_toy_data
from trading_gym.envs.portfolio_gym.portfolio_gym import PortfolioTradingGym

order_book_id_number = 100
toy_data = create_toy_data(order_book_ids_number=order_book_id_number,
                           feature_number=10,
                           start="2019-05-01",
                           end="2019-12-12",
                           frequency="D")

env = PortfolioTradingGym(data_df=toy_data, sequence_window=1, add_cash=False)
state = env.reset()

while True:
    next_state, reward, done, info = env.step(action=None)
    label = info["one_step_fwd_returns"]
    print(state)
    print(label)

    #
    regressor = LinearRegression()
    regressor.fit(state.values, label.values)

    #display and store
    print(regressor.coef_)
Exemplo n.º 3
0
def main():
    order_book_id_number = 10
    toy_data = create_toy_data(order_book_ids_number=order_book_id_number,
                               feature_number=20,
                               start="2019-05-01",
                               end="2019-12-12",
                               frequency="D")
    env = PortfolioTradingGym(data_df=toy_data,
                              sequence_window=5,
                              add_cash=True)
    env = Numpy(env)
    env = ch.envs.Logger(env, interval=1000)
    env = ch.envs.Torch(env)
    env = ch.envs.Runner(env)

    # create net
    action_size = env.action_space.shape[0]
    number_asset, seq_window, features_number = env.observation_space.shape

    input_size = features_number

    actor = Actor(input_size=input_size,
                  hidden_size=50,
                  action_size=action_size)
    critic = Critic(input_size=input_size,
                    hidden_size=50,
                    action_size=action_size)

    target_actor = create_target_network(actor)
    target_critic = create_target_network(critic)
    actor_optimiser = optim.Adam(actor.parameters(), lr=LEARNING_RATE_ACTOR)
    critic_optimiser = optim.Adam(critic.parameters(), lr=LEARNING_RATE_CRITIC)
    replay = ch.ExperienceReplay()
    ou_noise = OrnsteinUhlenbeckNoise(mu=np.zeros(action_size))

    def get_action(state):
        action = actor(state)
        action = action + ou_noise()[0]
        return action

    def get_random_action(state):
        action = torch.softmax(torch.randn(action_size), dim=0)
        return action

    for step in range(1, MAX_STEPS + 1):
        with torch.no_grad():

            if step < UPDATE_START:
                replay += env.run(get_random_action, steps=1)
            else:
                replay += env.run(get_action, steps=1)

        replay = replay[-REPLAY_SIZE:]
        if step > UPDATE_START and step % UPDATE_INTERVAL == 0:
            sample = random.sample(replay, BATCH_SIZE)
            batch = ch.ExperienceReplay(sample)

            next_values = target_critic(batch.next_state(),
                                        target_actor(batch.next_state())).view(
                                            -1, 1)
            values = critic(batch.state(), batch.action()).view(-1, 1)
            rewards = ch.normalize(batch.reward())
            #rewards = batch.reward()/100.0   change the convergency a lot
            value_loss = ch.algorithms.ddpg.state_value_loss(
                values, next_values.detach(), rewards, batch.done(), DISCOUNT)
            critic_optimiser.zero_grad()
            value_loss.backward()
            critic_optimiser.step()

            # Update policy by one step of gradient ascent
            policy_loss = -critic(batch.state(), actor(batch.state())).mean()
            actor_optimiser.zero_grad()
            policy_loss.backward()
            actor_optimiser.step()

            # Update target networks
            ch.models.polyak_average(target_critic, critic, POLYAK_FACTOR)
            ch.models.polyak_average(target_actor, actor, POLYAK_FACTOR)
Exemplo n.º 4
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import numpy as np
from trading_gym.utils.data.toy import create_toy_data
from trading_gym.envs import PortfolioTradingGym
from trading_gym.envs.portfolio_gym.costs import TCostModel
import pdb

np.random.seed(64)

commitment_fee = TCostModel(half_spread=0.01)
mock_data = create_toy_data(order_book_ids_number=2, feature_number=3, start="2019-01-01", end="2019-01-6")
'''
0001.XSHE      2019-01-01    0.0219
               2019-01-02   -0.0103
               2019-01-03    0.0175
               2019-01-04   -0.0017
               2019-01-05   -0.0039
               2019-01-06    0.0059
               2019-01-07   -0.0049
               2019-01-08   -0.0003
               2019-01-09   -0.0136
               2019-01-10    0.0068
               2019-01-11    0.0077
0002.XSHE      2019-01-01    0.0136
               2019-01-02   -0.0022
               2019-01-03   -0.0012
               2019-01-04   -0.0186
               2019-01-05    0.0098
               2019-01-06   -0.0030
               2019-01-07    0.0065
Exemplo n.º 5
0
def main():
    order_book_id_number = 10
    toy_data = create_toy_data(order_book_ids_number=order_book_id_number,
                               feature_number=20,
                               start="2019-05-01",
                               end="2019-12-12",
                               frequency="D")
    env = PortfolioTradingGym(data_df=toy_data,
                              sequence_window=5,
                              add_cash=True)
    env = Numpy(env)
    env = ch.envs.Logger(env, interval=1000)
    env = ch.envs.Torch(env)
    env = ch.envs.Runner(env)

    # create net
    action_size = env.action_space.shape[0]
    number_asset, seq_window, features_number = env.observation_space.shape
    input_size = features_number

    agent = ActorCritic(input_size=input_size,
                        hidden_size=HIDDEN_SIZE,
                        action_size=action_size)
    actor_optimiser = optim.Adam(agent.actor.parameters(), lr=LEARNING_RATE)
    critic_optimiser = optim.Adam(agent.critic.parameters(), lr=LEARNING_RATE)

    replay = ch.ExperienceReplay()

    for step in range(1, MAX_STEPS + 1):
        replay += env.run(agent, episodes=1)

        if len(replay) >= BATCH_SIZE:
            with torch.no_grad():
                advantages = pg.generalized_advantage(DISCOUNT, TRACE_DECAY,
                                                      replay.reward(),
                                                      replay.done(),
                                                      replay.value(),
                                                      torch.zeros(1))
                advantages = ch.normalize(advantages, epsilon=1e-8)
                returns = td.discount(DISCOUNT, replay.reward(), replay.done())
                old_log_probs = replay.log_prob()

            # here is to add readability
            new_values = replay.value()
            new_log_probs = replay.log_prob()
            for epoch in range(PPO_EPOCHS):
                # Recalculate outputs for subsequent iterations
                if epoch > 0:
                    _, infos = agent(replay.state())
                    masses = infos['mass']
                    new_values = infos['value']
                    new_log_probs = masses.log_prob(
                        replay.action()).unsqueeze(-1)

                # Update the policy by maximising the PPO-Clip objective
                policy_loss = ch.algorithms.ppo.policy_loss(
                    new_log_probs,
                    old_log_probs,
                    advantages,
                    clip=PPO_CLIP_RATIO)
                actor_optimiser.zero_grad()
                policy_loss.backward()
                actor_optimiser.step()

                # Fit value function by regression on mean-squared error
                value_loss = ch.algorithms.a2c.state_value_loss(
                    new_values, returns)
                critic_optimiser.zero_grad()
                value_loss.backward()
                critic_optimiser.step()

            replay.empty()
Exemplo n.º 6
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import numpy as np
from trading_gym.utils.data.toy import create_toy_data
from trading_gym.envs.portfolio_gym.portfolio_gym import PortfolioTradingGym

# ============================================= #
# todo:                                         #
# ============================================= #
order_book_id_number = 1
toy_data = create_toy_data(order_book_ids_number=order_book_id_number, feature_number=3, start="2019-05-01", end="2019-7-12", frequency="D",random_seed=123)

env = PortfolioTradingGym(data_df=toy_data, sequence_window=1, add_cash=False)

observation = env.reset()
print(observation)
action = np.array([1.])
total_steps=list(range(2))
for step in total_steps:
    next_state, reward, done, info = env.step(action)
    print(next_state, reward)