示例#1
0
def DRL_prediction(df, model, name, last_state, iter_num, unique_trade_date,
                   rebalance_window, turbulence_threshold, initial):
    ### make a prediction based on trained model###

    ## trading env
    print("DRLPREDICTION")
    trade_data = data_split(df,
                            start=unique_trade_date[iter_num -
                                                    rebalance_window],
                            end=unique_trade_date[iter_num])
    env_trade = DummyVecEnv([
        lambda: StockEnvTrade(trade_data,
                              turbulence_threshold=turbulence_threshold,
                              initial=initial,
                              previous_state=last_state,
                              model_name=name,
                              iteration=iter_num)
    ])
    obs_trade = env_trade.reset()

    for i in range(len(trade_data.index.unique())):
        action, _states = model.predict(obs_trade)
        obs_trade, rewards, dones, info = env_trade.step(action)
        if i == (len(trade_data.index.unique()) - 2):
            # print(env_test.render())
            last_state = env_trade.render()

    df_last_state = pd.DataFrame({'last_state': last_state})
    df_last_state.to_csv('results/last_state_{}_{}.csv'.format(name, i),
                         index=False)
    return last_state
示例#2
0
def DRL_prediction_no_rebalance(df, model, name, unique_trade_date,
                                log_interval):
    ### make a prediction based on trained model###

    ## trading env
    print("DRL PREDICTION NO REBALANCE")
    all_data = data_split(df,
                          start=unique_trade_date[0],
                          end=unique_trade_date[len(unique_trade_date) - 1])
    for ticker in all_data["tic"].unique():
        trade_data = all_data[all_data["tic"] == ticker]
        env_trade = DummyVecEnv([
            lambda: StockEnvTrade(trade_data,
                                  initial=True,
                                  model_name=name + "_" + ticker,
                                  log_interval=log_interval)
        ])
        obs_trade = env_trade.reset()
        for i in range(len(trade_data.index.unique())):
            action, _states = model.predict(obs_trade)
            print("action: ", action)
            obs_trade, rewards, dones, info = env_trade.step(action)
            if i == (len(trade_data.index.unique()) - 2):
                # print(env_test.render())
                last_state = env_trade.render()

    # df_last_state = pd.DataFrame({'last_state': last_state})
    # df_last_state.to_csv('results/last_state_{}_{}.csv'.format(name, i), index=False)
    return last_state
def train_initial_model(train_func, df, timesteps, model_name, save_path):
    print("============Start Training Initial Model============")
    train = data_split(df, start=20090000, end=20151001)
    env_train = DummyVecEnv([lambda: StockEnvTrain(train)])
    return train_func(env_train,
                      model_name,
                      timesteps=timesteps,
                      save_path=save_path)
示例#4
0
def train_multitask(df,
                    unique_trade_date,
                    timesteps=10,
                    policy="MlpPolicy",
                    model_name="multitask"):
    # df of all intermixed values
    # get out the individual tickers and switch out the dates
    # timesteps = num training steps per date
    start = time.time()
    df = data_split(df,
                    start=unique_trade_date[0],
                    end=unique_trade_date[len(unique_trade_date) - 1])
    last_state, initial = [], True
    model = None
    for i in range(len(unique_trade_date) - 2):
        for ticker in df["tic"].unique():
            # Interval is every two days so we can optimize on the change in account value
            start_date, end_date = unique_trade_date[i], unique_trade_date[i +
                                                                           2]
            quanta_df = data_split(df, start=start_date, end=end_date)
            quanta_df = quanta_df[quanta_df["tic"] == ticker]
            if len(quanta_df.index) < 2:
                continue
            quanta_df = quanta_df.reset_index()
            quanta_env = DummyVecEnv([
                lambda: StockEnvTrade(quanta_df,
                                      previous_state=last_state,
                                      initial=initial,
                                      log_interval=1)
            ])
            quanta_env.reset()
            model = train_PPO_update(model,
                                     quanta_env,
                                     timesteps,
                                     policy=policy)
            last_state = quanta_env.render()
        initial = False

    model.save(f"{config.TRAINED_MODEL_DIR}/{model_name}")
    end = time.time()
    print('Training time (Multitask): ', (end - start) / 60, ' minutes')
    return model
示例#5
0
def train_PPO_models(stocks=['./data/TSLA.csv', './data/FB.csv'],
                     tickers=['TSLA', 'FB'],
                     start_date=20130102,
                     end_date=20180101,
                     model_save_path='/content/DeepRL4Stocks/trained_models/'):
    teachers = []
    envs = []
    for i in range(len(stocks)):
        print(i)
        df = process_yahoo_finance(stocks[i], tickers[i])
        train = data_split(df, start=start_date, end=end_date)

        env_train = DummyVecEnv([lambda: StockEnvTrade(train)])
        model = train_PPO(env_train, model_name='PPO_' + tickers[i])
        model.save(model_save_path + "PPO_" + tickers[i])
        teachers.append(model)
        envs.append(env_train)
    return teachers, envs
def test_model(df,
               model,
               model_name,
               turbulence_threshold=140,
               start=20151001,
               end=20200707):
    trade_data = data_split(df, start=start, end=end)
    env_trade = DummyVecEnv([
        lambda: StockEnvTrade(trade_data,
                              turbulence_threshold=turbulence_threshold,
                              initial=True,
                              previous_state=[],
                              model_name=model_name,
                              iteration=0)
    ])

    obs_trade = env_trade.reset()

    state = None
    dones = [False for _ in range(env_trade.num_envs)]
    for i in range(len(trade_data.index.unique())):
        action, state = model.predict(obs_trade, state=state, mask=dones)
        obs_trade, rewards, dones, info = env_trade.step(action)
    return info[0]
示例#7
0
def run_ensemble_strategy(
    df, unique_trade_date, rebalance_window, validation_window
) -> None:
    """Ensemble Strategy that combines PPO, A2C and DDPG"""
    # for ensemble model, it's necessary to feed the last state
    # of the previous model to the current model as the initial state
    last_state_ensemble = []

    ppo_sharpe_list = []
    ddpg_sharpe_list = []
    a2c_sharpe_list = []

    model_used = []

    # based on the analysis of the in-sample data
    # turbulence_threshold = 140
    insample_turbulence = df[
        (df.datadate < config.VALIDATION_START_DATE - 1)
        & (df.datadate >= config.TRAINING_START_DATE)
    ]
    insample_turbulence = insample_turbulence.drop_duplicates(subset=["datadate"])
    insample_turbulence_threshold = np.quantile(
        insample_turbulence.turbulence.values, 0.90
    )

    start = time.time()
    for i in range(
        rebalance_window + validation_window, len(unique_trade_date), rebalance_window
    ):
        ## initial state is empty
        if i - rebalance_window - validation_window == 0:
            # inital state
            initial = True
        else:
            # previous state
            initial = False

        # Tuning turbulence index based on historical data
        # Turbulence lookback window is one quarter
        end_date_index = df.index[
            df["datadate"]
            == unique_trade_date[i - rebalance_window - validation_window]
        ].to_list()[-1]
        start_date_index = end_date_index - validation_window * 30 + 1

        historical_turbulence = df.iloc[start_date_index : (end_date_index + 1), :]
        # historical_turbulence = df[(df.datadate<unique_trade_date[i - rebalance_window - validation_window]) & (df.datadate>=(unique_trade_date[i - rebalance_window - validation_window - 63]))]

        historical_turbulence = historical_turbulence.drop_duplicates(
            subset=["datadate"]
        )

        historical_turbulence_mean = np.mean(historical_turbulence.turbulence.values)

        if historical_turbulence_mean > insample_turbulence_threshold:
            # if the mean of the historical data is greater than the 90% quantile of insample turbulence data
            # then we assume that the current market is volatile,
            # therefore we set the 90% quantile of insample turbulence data as the turbulence threshold
            # meaning the current turbulence can't exceed the 90% quantile of insample turbulence data
            turbulence_threshold = insample_turbulence_threshold
        else:
            # if the mean of the historical data is less than the 90% quantile of insample turbulence data
            # then we tune up the turbulence_threshold, meaning we lower the risk
            turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1)

        style = "[bold #31DDCF]"
        rprint(
            Align(
                f"{style}Turbulence Threshold:[/] {str(turbulence_threshold)}", "center"
            )
        )

        ############## Environment Setup starts ##############
        ## training env
        train = data_split(
            df,
            start=config.TRAINING_START_DATE,
            end=unique_trade_date[i - rebalance_window - validation_window],
        )
        env_train = DummyVecEnv([lambda: StockEnvTrain(train)])

        ## validation env
        validation = data_split(
            df,
            start=unique_trade_date[i - rebalance_window - validation_window],
            end=unique_trade_date[i - rebalance_window],
        )
        env_val = DummyVecEnv(
            [
                lambda: StockEnvValidation(
                    validation, turbulence_threshold=turbulence_threshold, iteration=i
                )
            ]
        )

        obs_val = env_val.reset()
        ############## Environment Setup ends ##############

        ############## Training and Validation starts ##############
        table = Table(
            title=f"Training from 20090000 to {unique_trade_date[i - rebalance_window - validation_window]}",
            expand=True,
        )
        table.add_column("Mode Name", justify="center")
        table.add_column("Sharpe Ratio")
        table.add_column("Training Time")

        with Live(table, auto_refresh=False) as live:

            model_a2c, a2c_training_time = train_A2C(
                env_train, model_name="A2C_30k_dow_{}".format(i), timesteps=30000
            )
            DRL_validation(
                model=model_a2c,
                test_data=validation,
                test_env=env_val,
                test_obs=obs_val,
            )
            sharpe_a2c = get_validation_sharpe(i)
            table.add_row("A2C", str(sharpe_a2c), f"{a2c_training_time} minutes")
            live.update(table, refresh=True)

            model_ppo, ppo_training_time = train_PPO(
                env_train, model_name="PPO_100k_dow_{}".format(i), timesteps=100000
            )

            DRL_validation(
                model=model_ppo,
                test_data=validation,
                test_env=env_val,
                test_obs=obs_val,
            )
            sharpe_ppo = get_validation_sharpe(i)
            table.add_row("PPO", str(sharpe_ppo), f"{ppo_training_time} minutes")
            live.update(table, refresh=True)

            model_ddpg, ddpg_training_time = train_DDPG(
                env_train, model_name="DDPG_10k_dow_{}".format(i), timesteps=10000
            )
            # model_ddpg = train_TD3(env_train, model_name="DDPG_10k_dow_{}".format(i), timesteps=20000)
            DRL_validation(
                model=model_ddpg,
                test_data=validation,
                test_env=env_val,
                test_obs=obs_val,
            )
            sharpe_ddpg = get_validation_sharpe(i)
            table.add_row("DDPG", str(sharpe_ddpg), f"{ddpg_training_time} minutes")
            live.update(table, refresh=True)

            ppo_sharpe_list.append(sharpe_ppo)
            a2c_sharpe_list.append(sharpe_a2c)
            ddpg_sharpe_list.append(sharpe_ddpg)

        # Model Selection based on sharpe ratio
        if (sharpe_ppo >= sharpe_a2c) & (sharpe_ppo >= sharpe_ddpg):
            model_ensemble = model_ppo
            model_used.append("PPO")
        elif (sharpe_a2c > sharpe_ppo) & (sharpe_a2c > sharpe_ddpg):
            model_ensemble = model_a2c
            model_used.append("A2C")
        else:
            model_ensemble = model_ddpg
            model_used.append("DDPG")
        ############## Training and Validation ends ##############

        ############## Trading starts ##############

        # print("Used Model: ", model_ensemble)
        last_state_ensemble = DRL_prediction(
            df=df,
            model=model_ensemble,
            name="ensemble",
            last_state=last_state_ensemble,
            iter_num=i,
            unique_trade_date=unique_trade_date,
            rebalance_window=rebalance_window,
            turbulence_threshold=turbulence_threshold,
            initial=initial,
        )
        print("\n\n")
        # print("============Trading Done============")
        ############## Trading ends ##############

    end = time.time()
    print("Ensemble Strategy took: ", (end - start) / 60, " minutes")