Exemplo n.º 1
0
def DRL_prediction(df,
                   model,
                   name,
                   last_state,
                   iter_num,
                   unique_trade_date,
                   rebalance_window,
                   turbulence_threshold,
                   initial):
    ### make a prediction based on trained model### 

    ## trading env
    trade_data = data_split(df, start=unique_trade_date[iter_num - rebalance_window], end=unique_trade_date[iter_num])
    env_trade = DummyVecEnv([lambda: StockEnvTrade(trade_data,
                                                   turbulence_threshold=turbulence_threshold,
                                                   initial=initial,
                                                   previous_state=last_state,
                                                   model_name=name,
                                                   iteration=iter_num)])
    obs_trade = env_trade.reset()

    for i in range(len(trade_data.index.unique())):
        action, _states = model.predict(obs_trade)
        obs_trade, rewards, dones, info = env_trade.step(action)
        if i == (len(trade_data.index.unique()) - 2):
            # print(env_test.render())
            last_state = env_trade.render()

    df_last_state = pd.DataFrame({'last_state': last_state})
    df_last_state.to_csv('results/last_state_{}_{}.csv'.format(name, i), index=False)
    return last_state
Exemplo n.º 2
0
def DRL_prediction_no_rebalance(df, model, name, unique_trade_date,
                                log_interval):
    ### make a prediction based on trained model###

    ## trading env
    print("DRL PREDICTION NO REBALANCE")
    all_data = data_split(df,
                          start=unique_trade_date[0],
                          end=unique_trade_date[len(unique_trade_date) - 1])
    for ticker in all_data["tic"].unique():
        trade_data = all_data[all_data["tic"] == ticker]
        env_trade = DummyVecEnv([
            lambda: StockEnvTrade(trade_data,
                                  initial=True,
                                  model_name=name + "_" + ticker,
                                  log_interval=log_interval)
        ])
        obs_trade = env_trade.reset()
        for i in range(len(trade_data.index.unique())):
            action, _states = model.predict(obs_trade)
            print("action: ", action)
            obs_trade, rewards, dones, info = env_trade.step(action)
            if i == (len(trade_data.index.unique()) - 2):
                # print(env_test.render())
                last_state = env_trade.render()

    # df_last_state = pd.DataFrame({'last_state': last_state})
    # df_last_state.to_csv('results/last_state_{}_{}.csv'.format(name, i), index=False)
    return last_state
Exemplo n.º 3
0
def DRL_prediction(df, model, name, last_state, iter_num, unique_trade_date,
                   rebalance_window, turbulence_threshold, initial):
    ### make a prediction based on trained model###

    ## trading env
    trade_data = data_split(df,
                            start=unique_trade_date[iter_num -
                                                    rebalance_window],
                            end=unique_trade_date[iter_num])
    '''
    env_trade = DummyVecEnv([lambda: StockEnvTrade(trade_data,
                                                   turbulence_threshold=turbulence_threshold,
                                                   initial=initial,
                                                   previous_state=last_state,
                                                   model_name=name,
                                                   iteration=iter_num)])
    '''

    from stable_baselines.common import make_vec_env
    env_trade = make_vec_env(
        lambda: StockEnvTrade(trade_data,
                              turbulence_threshold=turbulence_threshold,
                              initial=initial,
                              previous_state=last_state,
                              model_name=name,
                              iteration=iter_num),
        n_envs=1)

    obs_trade = env_trade.env_method('reset')
    np.set_printoptions(formatter={'float_kind': '{:f}'.format})

    fh = open('state_action.txt', 'a+')
    for i in range(len(trade_data.index.unique())):
        action, _states = model.predict(obs_trade)

        fh.write(str(obs_trade[0]) + "\n")
        fh.write(str(action[0]) + "\n")

        obs_trade, rewards, dones, info = env_trade.step(action)
        if i == (len(trade_data.index.unique()) - 2):
            # print(env_test.render())
            last_state = env_trade.env_method('render')

    print('^' * 20)
    print(last_state)
    print('^' * 20)
    df_last_state = pd.DataFrame(data={'last_state': last_state[0]})
    df_last_state.to_csv('results/last_state_{}_{}.csv'.format(name, i),
                         index=False)
    return last_state[0]
Exemplo n.º 4
0
def train_PPO_models(stocks=['./data/TSLA.csv', './data/FB.csv'],
                     tickers=['TSLA', 'FB'],
                     start_date=20130102,
                     end_date=20180101,
                     model_save_path='/content/DeepRL4Stocks/trained_models/'):
    teachers = []
    envs = []
    for i in range(len(stocks)):
        print(i)
        df = process_yahoo_finance(stocks[i], tickers[i])
        train = data_split(df, start=start_date, end=end_date)

        env_train = DummyVecEnv([lambda: StockEnvTrade(train)])
        model = train_PPO(env_train, model_name='PPO_' + tickers[i])
        model.save(model_save_path + "PPO_" + tickers[i])
        teachers.append(model)
        envs.append(env_train)
    return teachers, envs
Exemplo n.º 5
0
def train_multitask(df,
                    unique_trade_date,
                    timesteps=10,
                    policy="MlpPolicy",
                    model_name="multitask"):
    # df of all intermixed values
    # get out the individual tickers and switch out the dates
    # timesteps = num training steps per date
    start = time.time()
    df = data_split(df,
                    start=unique_trade_date[0],
                    end=unique_trade_date[len(unique_trade_date) - 1])
    last_state, initial = [], True
    model = None
    for i in range(len(unique_trade_date) - 2):
        for ticker in df["tic"].unique():
            # Interval is every two days so we can optimize on the change in account value
            start_date, end_date = unique_trade_date[i], unique_trade_date[i +
                                                                           2]
            quanta_df = data_split(df, start=start_date, end=end_date)
            quanta_df = quanta_df[quanta_df["tic"] == ticker]
            if len(quanta_df.index) < 2:
                continue
            quanta_df = quanta_df.reset_index()
            quanta_env = DummyVecEnv([
                lambda: StockEnvTrade(quanta_df,
                                      previous_state=last_state,
                                      initial=initial,
                                      log_interval=1)
            ])
            quanta_env.reset()
            model = train_PPO_update(model,
                                     quanta_env,
                                     timesteps,
                                     policy=policy)
            last_state = quanta_env.render()
        initial = False

    model.save(f"{config.TRAINED_MODEL_DIR}/{model_name}")
    end = time.time()
    print('Training time (Multitask): ', (end - start) / 60, ' minutes')
    return model
def test_model(df,
               model,
               model_name,
               turbulence_threshold=140,
               start=20151001,
               end=20200707):
    trade_data = data_split(df, start=start, end=end)
    env_trade = DummyVecEnv([
        lambda: StockEnvTrade(trade_data,
                              turbulence_threshold=turbulence_threshold,
                              initial=True,
                              previous_state=[],
                              model_name=model_name,
                              iteration=0)
    ])

    obs_trade = env_trade.reset()

    state = None
    dones = [False for _ in range(env_trade.num_envs)]
    for i in range(len(trade_data.index.unique())):
        action, state = model.predict(obs_trade, state=state, mask=dones)
        obs_trade, rewards, dones, info = env_trade.step(action)
    return info[0]