示例#1
0
def run_ensemble_strategy(df, unique_trade_date, rebalance_window,
                          validation_window) -> None:
    """Ensemble Strategy that combines PPO, A2C and DDPG"""
    print("============Start Ensemble Strategy============")
    # for ensemble model, it's necessary to feed the last state
    # of the previous model to the current model as the initial state
    last_state_ensemble = []

    ppo_sharpe_list = []
    ddpg_sharpe_list = []
    a2c_sharpe_list = []

    model_use = []

    # based on the analysis of the in-sample data
    turbulence_threshold = 140

    start = time.time()
    for i in range(rebalance_window + validation_window,
                   len(unique_trade_date), rebalance_window):
        print("============================================")
        ## initial state is empty
        if i - rebalance_window - validation_window == 0:
            # inital state
            initial = True
        else:
            # previous state
            initial = False

        # Tuning trubulence index based on historical data
        # TODO: need a more dynamic model
        # 2018-2019
        if (i >= 692) & (i < 1090):
            turbulence_threshold = 100
        # 2020
        if i >= 1090:
            turbulence_threshold = 90

        ############## Environment Setup starts ##############
        ## training env
        train = data_split(df,
                           start=20090000,
                           end=unique_trade_date[i - rebalance_window -
                                                 validation_window])
        env_train = DummyVecEnv([lambda: StockEnvTrain(train)])

        ## validation env
        validation = data_split(df,
                                start=unique_trade_date[i - rebalance_window -
                                                        validation_window],
                                end=unique_trade_date[i - rebalance_window])
        env_val = DummyVecEnv([
            lambda: StockEnvValidation(validation,
                                       turbulence_threshold=
                                       turbulence_threshold,
                                       iteration=i)
        ])
        obs_val = env_val.reset()
        ############## Environment Setup ends ##############

        ############## Training and Validation starts ##############
        print("======Model training from: ", 20090000, "to ",
              unique_trade_date[i - rebalance_window - validation_window])
        # print("training: ",len(data_split(df, start=20090000, end=test.datadate.unique()[i-rebalance_window]) ))
        # print("==============Model Training===========")
        print("======A2C Training========")
        model_a2c = train_A2C(env_train,
                              model_name="A2C_10k_dow_{}".format(i),
                              timesteps=20000)
        print("======A2C Validation from: ",
              unique_trade_date[i - rebalance_window - validation_window],
              "to ", unique_trade_date[i - rebalance_window])
        DRL_validation(model=model_a2c,
                       test_data=validation,
                       test_env=env_val,
                       test_obs=obs_val)
        sharpe_a2c = get_validation_sharpe(i)
        print("A2C Sharpe Ratio: ", sharpe_a2c)

        print("======PPO Training========")
        model_ppo = train_PPO(env_train,
                              model_name="PPO_100k_dow_{}".format(i),
                              timesteps=50000)
        print("======PPO Validation from: ",
              unique_trade_date[i - rebalance_window - validation_window],
              "to ", unique_trade_date[i - rebalance_window])
        DRL_validation(model=model_ppo,
                       test_data=validation,
                       test_env=env_val,
                       test_obs=obs_val)
        sharpe_ppo = get_validation_sharpe(i)
        print("PPO Sharpe Ratio: ", sharpe_ppo)

        print("======DDPG Training========")
        model_ddpg = train_DDPG(env_train,
                                model_name="DDPG_10k_dow_{}".format(i),
                                timesteps=10000)
        print("======DDPG Validation from: ",
              unique_trade_date[i - rebalance_window - validation_window],
              "to ", unique_trade_date[i - rebalance_window])
        DRL_validation(model=model_ddpg,
                       test_data=validation,
                       test_env=env_val,
                       test_obs=obs_val)
        sharpe_ddpg = get_validation_sharpe(i)

        ppo_sharpe_list.append(sharpe_ppo)
        a2c_sharpe_list.append(sharpe_a2c)
        ddpg_sharpe_list.append(sharpe_ddpg)

        # Model Selection based on sharpe ratio
        if (sharpe_ppo >= sharpe_a2c) & (sharpe_ppo >= sharpe_ddpg):
            model_ensemble = model_ppo
            model_use.append('PPO')
        elif (sharpe_a2c > sharpe_ppo) & (sharpe_a2c > sharpe_ddpg):
            model_ensemble = model_a2c
            model_use.append('A2C')
        else:
            model_ensemble = model_ddpg
            model_use.append('DDPG')
        ############## Training and Validation ends ##############

        ############## Trading starts ##############
        print("======Trading from: ", unique_trade_date[i - rebalance_window],
              "to ", unique_trade_date[i])
        print("Used Model: ", model_ensemble)

        last_state_ensemble = DRL_prediction(
            df=df,
            model=model_ensemble,
            name="ensemble",
            last_state=last_state_ensemble,
            iter_num=i,
            unique_trade_date=unique_trade_date,
            rebalance_window=rebalance_window,
            turbulence_threshold=turbulence_threshold,
            initial=initial)
        # print("============Trading Done============")
        ############## Trading ends ##############

    end = time.time()
    print("Ensemble Strategy took: ", (end - start) / 60, " minutes")
示例#2
0
def run_ensemble_strategy(df, unique_trade_date, rebalance_window, validation_window) -> None:
    """Ensemble Strategy that combines PPO, A2C and DDPG"""
    print("============Start Ensemble Strategy============")
    # for ensemble model, it's necessary to feed the last state
    # of the previous model to the current model as the initial state
    last_state_ensemble = []

    ppo_sharpe_list = []
    ddpg_sharpe_list = []
    a2c_sharpe_list = []

    model_use = []

    # based on the analysis of the in-sample data
    #turbulence_threshold = 140
    insample_turbulence = df[(df.datadate<20151000) & (df.datadate>=20090000)]
    insample_turbulence = insample_turbulence.drop_duplicates(subset=['datadate'])
    insample_turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, .90)

    start = time.time()
    for i in range(rebalance_window + validation_window, len(unique_trade_date), rebalance_window):
        print("============================================")
        ## initial state is empty
        if i - rebalance_window - validation_window == 0:
            # inital state
            initial = True
        else:
            # previous state
            initial = False

        # Tuning trubulence index based on historical data
        # Turbulence lookback window is one quarter
        historical_turbulence = df[(df.datadate<unique_trade_date[i - rebalance_window - validation_window]) & (df.datadate>=(unique_trade_date[i - rebalance_window - validation_window-63]))]
        historical_turbulence = historical_turbulence.drop_duplicates(subset=['datadate'])
        historical_turbulence_mean = np.mean(historical_turbulence.turbulence.values)   

        if historical_turbulence_mean > insample_turbulence_threshold:
            # if the mean of the historical data is greater than the 90% quantile of insample turbulence data
            # then we assume that the current market is volatile, 
            # therefore we set the 90% quantile of insample turbulence data as the turbulence threshold 
            # meaning the current turbulence can't exceed the 90% quantile of insample turbulence data
            turbulence_threshold = insample_turbulence_threshold
        else:
            # if the mean of the historical data is less than the 90% quantile of insample turbulence data
            # then we tune up the turbulence_threshold, meaning we lower the risk 
            turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1)
        print("turbulence_threshold: ", turbulence_threshold)

        ############## Environment Setup starts ##############
        ## training env
        train = data_split(df, start=20090000, end=unique_trade_date[i - rebalance_window - validation_window])
        env_train = DummyVecEnv([lambda: StockEnvTrain(train)])

        ## validation env
        validation = data_split(df, start=unique_trade_date[i - rebalance_window - validation_window],
                                end=unique_trade_date[i - rebalance_window])
        env_val = DummyVecEnv([lambda: StockEnvValidation(validation,
                                                          turbulence_threshold=turbulence_threshold,
                                                          iteration=i)])
        obs_val = env_val.reset()
        ############## Environment Setup ends ##############

        ############## Training and Validation starts ##############
        print("======Model training from: ", 20090000, "to ",
              unique_trade_date[i - rebalance_window - validation_window])
        # print("training: ",len(data_split(df, start=20090000, end=test.datadate.unique()[i-rebalance_window]) ))
        # print("==============Model Training===========")
        print("======A2C Training========")
        model_a2c = train_A2C(env_train, model_name="A2C_30k_dow_{}".format(i), timesteps=30000)
        print("======A2C Validation from: ", unique_trade_date[i - rebalance_window - validation_window], "to ",
              unique_trade_date[i - rebalance_window])
        DRL_validation(model=model_a2c, test_data=validation, test_env=env_val, test_obs=obs_val)
        sharpe_a2c = get_validation_sharpe(i)
        print("A2C Sharpe Ratio: ", sharpe_a2c)

        print("======PPO Training========")
        model_ppo = train_PPO(env_train, model_name="PPO_100k_dow_{}".format(i), timesteps=100000)
        print("======PPO Validation from: ", unique_trade_date[i - rebalance_window - validation_window], "to ",
              unique_trade_date[i - rebalance_window])
        DRL_validation(model=model_ppo, test_data=validation, test_env=env_val, test_obs=obs_val)
        sharpe_ppo = get_validation_sharpe(i)
        print("PPO Sharpe Ratio: ", sharpe_ppo)

        print("======DDPG Training========")
        model_ddpg = train_DDPG(env_train, model_name="DDPG_10k_dow_{}".format(i), timesteps=10000)
        #model_ddpg = train_TD3(env_train, model_name="DDPG_10k_dow_{}".format(i), timesteps=20000)
        print("======DDPG Validation from: ", unique_trade_date[i - rebalance_window - validation_window], "to ",
              unique_trade_date[i - rebalance_window])
        DRL_validation(model=model_ddpg, test_data=validation, test_env=env_val, test_obs=obs_val)
        sharpe_ddpg = get_validation_sharpe(i)

        ppo_sharpe_list.append(sharpe_ppo)
        a2c_sharpe_list.append(sharpe_a2c)
        ddpg_sharpe_list.append(sharpe_ddpg)

        # Model Selection based on sharpe ratio
        if (sharpe_ppo >= sharpe_a2c) & (sharpe_ppo >= sharpe_ddpg):
            model_ensemble = model_ppo
            model_use.append('PPO')
        elif (sharpe_a2c > sharpe_ppo) & (sharpe_a2c > sharpe_ddpg):
            model_ensemble = model_a2c
            model_use.append('A2C')
        else:
            model_ensemble = model_ddpg
            model_use.append('DDPG')
        ############## Training and Validation ends ##############    

        ############## Trading starts ##############    
        print("======Trading from: ", unique_trade_date[i - rebalance_window], "to ", unique_trade_date[i])
        #print("Used Model: ", model_ensemble)
        last_state_ensemble = DRL_prediction(df=df, model=model_ensemble, name="ensemble",
                                             last_state=last_state_ensemble, iter_num=i,
                                             unique_trade_date=unique_trade_date,
                                             rebalance_window=rebalance_window,
                                             turbulence_threshold=turbulence_threshold,
                                             initial=initial)
        # print("============Trading Done============")
        ############## Trading ends ##############    

    end = time.time()
    print("Ensemble Strategy took: ", (end - start) / 60, " minutes")
示例#3
0
def run_ensemble_strategy(
    df, unique_trade_date, rebalance_window, validation_window
) -> None:
    """Ensemble Strategy that combines PPO, A2C and DDPG"""
    # for ensemble model, it's necessary to feed the last state
    # of the previous model to the current model as the initial state
    last_state_ensemble = []

    ppo_sharpe_list = []
    ddpg_sharpe_list = []
    a2c_sharpe_list = []

    model_used = []

    # based on the analysis of the in-sample data
    # turbulence_threshold = 140
    insample_turbulence = df[
        (df.datadate < config.VALIDATION_START_DATE - 1)
        & (df.datadate >= config.TRAINING_START_DATE)
    ]
    insample_turbulence = insample_turbulence.drop_duplicates(subset=["datadate"])
    insample_turbulence_threshold = np.quantile(
        insample_turbulence.turbulence.values, 0.90
    )

    start = time.time()
    for i in range(
        rebalance_window + validation_window, len(unique_trade_date), rebalance_window
    ):
        ## initial state is empty
        if i - rebalance_window - validation_window == 0:
            # inital state
            initial = True
        else:
            # previous state
            initial = False

        # Tuning turbulence index based on historical data
        # Turbulence lookback window is one quarter
        end_date_index = df.index[
            df["datadate"]
            == unique_trade_date[i - rebalance_window - validation_window]
        ].to_list()[-1]
        start_date_index = end_date_index - validation_window * 30 + 1

        historical_turbulence = df.iloc[start_date_index : (end_date_index + 1), :]
        # historical_turbulence = df[(df.datadate<unique_trade_date[i - rebalance_window - validation_window]) & (df.datadate>=(unique_trade_date[i - rebalance_window - validation_window - 63]))]

        historical_turbulence = historical_turbulence.drop_duplicates(
            subset=["datadate"]
        )

        historical_turbulence_mean = np.mean(historical_turbulence.turbulence.values)

        if historical_turbulence_mean > insample_turbulence_threshold:
            # if the mean of the historical data is greater than the 90% quantile of insample turbulence data
            # then we assume that the current market is volatile,
            # therefore we set the 90% quantile of insample turbulence data as the turbulence threshold
            # meaning the current turbulence can't exceed the 90% quantile of insample turbulence data
            turbulence_threshold = insample_turbulence_threshold
        else:
            # if the mean of the historical data is less than the 90% quantile of insample turbulence data
            # then we tune up the turbulence_threshold, meaning we lower the risk
            turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1)

        style = "[bold #31DDCF]"
        rprint(
            Align(
                f"{style}Turbulence Threshold:[/] {str(turbulence_threshold)}", "center"
            )
        )

        ############## Environment Setup starts ##############
        ## training env
        train = data_split(
            df,
            start=config.TRAINING_START_DATE,
            end=unique_trade_date[i - rebalance_window - validation_window],
        )
        env_train = DummyVecEnv([lambda: StockEnvTrain(train)])

        ## validation env
        validation = data_split(
            df,
            start=unique_trade_date[i - rebalance_window - validation_window],
            end=unique_trade_date[i - rebalance_window],
        )
        env_val = DummyVecEnv(
            [
                lambda: StockEnvValidation(
                    validation, turbulence_threshold=turbulence_threshold, iteration=i
                )
            ]
        )

        obs_val = env_val.reset()
        ############## Environment Setup ends ##############

        ############## Training and Validation starts ##############
        table = Table(
            title=f"Training from 20090000 to {unique_trade_date[i - rebalance_window - validation_window]}",
            expand=True,
        )
        table.add_column("Mode Name", justify="center")
        table.add_column("Sharpe Ratio")
        table.add_column("Training Time")

        with Live(table, auto_refresh=False) as live:

            model_a2c, a2c_training_time = train_A2C(
                env_train, model_name="A2C_30k_dow_{}".format(i), timesteps=30000
            )
            DRL_validation(
                model=model_a2c,
                test_data=validation,
                test_env=env_val,
                test_obs=obs_val,
            )
            sharpe_a2c = get_validation_sharpe(i)
            table.add_row("A2C", str(sharpe_a2c), f"{a2c_training_time} minutes")
            live.update(table, refresh=True)

            model_ppo, ppo_training_time = train_PPO(
                env_train, model_name="PPO_100k_dow_{}".format(i), timesteps=100000
            )

            DRL_validation(
                model=model_ppo,
                test_data=validation,
                test_env=env_val,
                test_obs=obs_val,
            )
            sharpe_ppo = get_validation_sharpe(i)
            table.add_row("PPO", str(sharpe_ppo), f"{ppo_training_time} minutes")
            live.update(table, refresh=True)

            model_ddpg, ddpg_training_time = train_DDPG(
                env_train, model_name="DDPG_10k_dow_{}".format(i), timesteps=10000
            )
            # model_ddpg = train_TD3(env_train, model_name="DDPG_10k_dow_{}".format(i), timesteps=20000)
            DRL_validation(
                model=model_ddpg,
                test_data=validation,
                test_env=env_val,
                test_obs=obs_val,
            )
            sharpe_ddpg = get_validation_sharpe(i)
            table.add_row("DDPG", str(sharpe_ddpg), f"{ddpg_training_time} minutes")
            live.update(table, refresh=True)

            ppo_sharpe_list.append(sharpe_ppo)
            a2c_sharpe_list.append(sharpe_a2c)
            ddpg_sharpe_list.append(sharpe_ddpg)

        # Model Selection based on sharpe ratio
        if (sharpe_ppo >= sharpe_a2c) & (sharpe_ppo >= sharpe_ddpg):
            model_ensemble = model_ppo
            model_used.append("PPO")
        elif (sharpe_a2c > sharpe_ppo) & (sharpe_a2c > sharpe_ddpg):
            model_ensemble = model_a2c
            model_used.append("A2C")
        else:
            model_ensemble = model_ddpg
            model_used.append("DDPG")
        ############## Training and Validation ends ##############

        ############## Trading starts ##############

        # print("Used Model: ", model_ensemble)
        last_state_ensemble = DRL_prediction(
            df=df,
            model=model_ensemble,
            name="ensemble",
            last_state=last_state_ensemble,
            iter_num=i,
            unique_trade_date=unique_trade_date,
            rebalance_window=rebalance_window,
            turbulence_threshold=turbulence_threshold,
            initial=initial,
        )
        print("\n\n")
        # print("============Trading Done============")
        ############## Trading ends ##############

    end = time.time()
    print("Ensemble Strategy took: ", (end - start) / 60, " minutes")