def train_one(): env_kwargs, processed = prepare_data() # Training & Trading data split train = data_split(processed, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") model = agent.get_model(config.CURRENT_MODEL) model = agent.train_model(model=model, total_timesteps=80000) path = f"{config.TRAINED_MODEL_DIR}/model" model.save(path) model = model.load(path) print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction( model=model, environment=e_trade_gym) log_account(df_account_value, df_actions)
def new_test(): processed = pd.read_csv( os.path.abspath('./me/datasets/new_data_with_techs_turb.csv'), index_col=0) train = data_split(processed, '2009-01-01', '2018-01-01') trade = data_split(processed, '2018-01-01', '2021-01-01') stock_dimension = len(train.tic.unique()) state_space = 1 + 2 * stock_dimension + len( config.TECHNICAL_INDICATORS_LIST) * stock_dimension print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}") env_kwargs = { "hmax": 100, "initial_amount": 1000000, "transaction_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() log_dir = "me/tmp/" os.makedirs(log_dir, exist_ok=True) env_train.envs[0] = Monitor(env_train.envs[0], log_dir) agent = DRLAgent(env=env_train) model_a2c = agent.get_model("a2c", verbose=0) trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c', total_timesteps=100000) data_turbulence = processed[(processed.date < '2018-01-01') & (processed.date >= '2009-01-01')] insample_turbulence = data_turbulence.drop_duplicates(subset=['date']) turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=380, **env_kwargs) env_trade, obs_trade = e_trade_gym.get_sb_env() print("BEGIN PREDICTION") df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_a2c, test_data=trade, test_env=env_trade, test_obs=obs_trade) print(df_account_value) print("END PREDICTION")
def train_model(e_train_gym, tb_log_name, model_type="a2c", load_model_path='', save_model_path='', train_timesteps=80000): training_env, _ = e_train_gym.get_sb_env() training_agent = DRLAgent(training_env) model = training_agent.get_model(model_type) if load_model_path: print("LOADING MODEL PARAMETERS") model = model.load(load_model_path) print("=======TRAINING MODEL========") trained_model = training_agent.train_model(model, tb_log_name=tb_log_name, total_timesteps=train_timesteps) trained_model.save(save_model_path) return trained_model
# ### Model 1: A2C # In[27]: agent = DRLAgent(env = env_train) A2C_PARAMS = {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0002} model_a2c = agent.get_model(model_name="a2c",model_kwargs = A2C_PARAMS) # In[28]: trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c', total_timesteps=50000) # In[ ]: agent = DRLAgent(env = env_train) A2C_PARAMS = {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0002} model_a2c = agent.get_model(model_name="a2c",model_kwargs = A2C_PARAMS) for i in range(20): for j in range(20): agent = DRLAgent(env = env_train) A2C_PARAMS = {"n_steps": 5, "ent_coef": 0.005+(i-10)/1000, "learning_rate": 0.0002+(j-10)/200000}
"tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() agent = DRLAgent(env=env_train) model_wm = agent.get_model("wm") print('START TRAIN') trained_wm = agent.train_model(model=model_wm, tb_log_name='wm', total_timesteps=30000) trade = data_split(data_df, start='2019-01-01', end='2021-01-01') e_trade_gym = StockTradingEnv(df=trade, **env_kwargs) env_trade, obs_trade = e_trade_gym.get_sb_env() df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_wm, test_data=trade, test_env=env_trade, test_obs=obs_trade) print("==============Get Backtest Results===========") now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
env_train, _ = e_train_gym.get_sb_env() ############# BUILD AGENT ############# agent = DRLAgent(env=env_train) '''Set Agent Parameters''' A2C_params = { "n_steps": 2048, "ent_coef": 0.01, "learning_rate": 0.00025, } model_a2c = agent.get_model("a2c", model_kwargs=A2C_params) '''Train Agent''' trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c', total_timesteps=1000) ############# BUILD TRADING ENV ############# '''Set Turbulence Threshold''' data_turbulence = processed_full[(processed_full.date < '2019-01-01') & (processed_full.date >= '2009-01-01')] insample_turbulence = data_turbulence.drop_duplicates(subset=['date']) turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1) turbulence_threshold '''Create Trade Env''' e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=380, **env_kwargs) ############# TRADE #############
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER, ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed = fe.preprocess_data(df) # Training & Trading data split train = data_split(processed, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + 2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env_trade, obs_trade = e_trade_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") model_sac = agent.get_model("sac") trained_sac = agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=80000) print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_sac, test_data=trade, test_env=env_trade, test_obs=obs_trade) df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" + now + ".csv") df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now + ".csv") print("==============Get Backtest Results===========") perf_stats_all = BackTestStats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + ".csv")
A2C_PARAMS = {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0002} model_a2c = agent.get_model(model_name="a2c",model_kwargs = A2C_PARAMS) trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c', total_timesteps=50000) ''' #PPO PPO_PARAMS = { "n_steps": 2048, "ent_coef": 0.005, "learning_rate": 0.0001, "batch_size": 128, } model_ppo = agent.get_model("ppo", model_kwargs=PPO_PARAMS) trained_ppo = agent.train_model(model=model_ppo, tb_log_name='ppo', total_timesteps=60000) #Trading ## make a prediction and get the account value change trade = pd.read_csv(args.testing, names=col_names) trade['tic'] = 'IBM' base = datetime.strptime(config.START_TRADE_DATE, "%Y-%m-%d") date = [base + timedelta(days=x) for x in range(len(trade))] trade['date'] = date trade = fe.preprocess_data(trade) e_trade_gym = StockTradingEnv(df=trade, **env_kwargs) actions = None df_account_value, df_actions = DRLAgent.DRL_prediction(
e_train_gym.seed(42) e_train_gym.action_space.seed(42) env_train, _ = e_train_gym.get_sb_env() env_train.seed(seed) env_train.action_space.seed(seed) print(type(env_train)) agent = DRLAgent(env = env_train) model_ddpg = agent.get_model("ddpg", model_kwargs={"batch_size": batch_size, "buffer_size": 50000, "learning_rate": lr} ) trained_ddpg = agent.train_model(model=model_ddpg, tb_log_name='ddpg', total_timesteps=50000) e_trade_gym = StockTradingEnv(df = validation, **env_kwargs) e_trade_gym.seed(seed) e_trade_gym.action_space.seed(seed) df_account_value, df_actions = DRLAgent.DRL_prediction( model=trained_ddpg, environment = e_trade_gym ) print("==============Get Backtest Results===========") now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M') perf_stats_all = backtest_stats(account_value=df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all)
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=['FXAIX'], ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed = fe.preprocess_data(df) # Training & Trading data split train = data_split(processed, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + 2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env_trade, obs_trade = e_trade_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") user_input = input('train model? 1 train 0 don\'t train') if user_input == 1: model_sac = agent.get_model("sac") trained_sac = agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=8000) trained_sac.save("../models/sac_8k" + df.tic[0] + "_frl") else: trained_sac = SAC.load('../models/sac_80k_msft_working') print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction( trained_sac, e_trade_gym) df_account_value.to_csv("../" + config.RESULTS_DIR + "/SAC_df_account_value_" + df.tic[0] + "_" + now + ".csv") df_actions.to_csv("../" + config.RESULTS_DIR + "/SAC_df_actions_" + df.tic[0] + "_" + now + ".csv") # print("==============Get Backtest Results===========") perf_stats_all = backtest_stats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("../" + config.RESULTS_DIR + "/SAC_perf_stats_all_" + df.tic[0] + "_" + now + ".csv") #plot acc value actions = df_actions['actions'] x = np.arange(0, df_account_value['account_value'].shape[0]) y = df_account_value['account_value'] points = np.array([x, y]).T.reshape(-1, 1, 2) segments = np.concatenate([points[:-1], points[1:]], axis=1) fig, axs = plt.subplots(2, 1, sharex=True, sharey=False) # plt.plot(x, y) # Use a boundary norm instead cmap = ListedColormap(['r', 'g', 'b']) norm = BoundaryNorm([-100, -0.1, 0.1, 100], cmap.N) lc = LineCollection(segments, cmap=cmap, norm=norm) lc.set_array(actions) lc.set_linewidth(2) line = axs[0].add_collection(lc) # fig.colorbar(line, ax=axs) axs[1].set_xlabel('Trading Day (' + 'From ' + config.START_TRADE_DATE + " to " + config.END_DATE + ')') axs[0].set_ylabel('Account Value (10000 of USD)') axs[0].set_title("Trading Test on " + df.tic[0]) axs[0].set_xlim(x.min(), x.max()) axs[0].set_ylim(y.min(), y.max()) custom_lines = [ Line2D([0], [0], color=cmap(0.), lw=4), Line2D([0], [0], color=cmap(.5), lw=4), Line2D([0], [0], color=cmap(1.), lw=4) ] # lines = ax.plot(data) axs[0].legend(custom_lines, ['Sell', 'Hold', 'Buy']) #plot stock value tx = np.arange(0, df_account_value['account_value'].shape[0]) ty = trade['close'] plt.ylabel('Price (USD)') plt.title(df.tic[0] + " Closing Price") plt.plot(tx, ty) plt.savefig("../" + config.RESULTS_DIR + "/plots/" "SAC_plot_" + df.tic[0] + "_" + now + ".png")
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER, ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed = fe.preprocess_data(df) list_ticker = processed["tic"].unique().tolist() list_date = list( pd.date_range(processed['date'].min(), processed['date'].max()).astype(str)) combination = list(itertools.product(list_date, list_ticker)) processed_full = pd.DataFrame(combination, columns=["date", "tic"]).merge(processed, on=["date", "tic"], how="left") processed_full = processed_full[processed_full['date'].isin( processed['date'])] processed_full = processed_full.sort_values(['date', 'tic']) processed_full = processed_full.fillna(0) # Training & Trading data split train = data_split(processed_full, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed_full, config.START_TRADE_DATE, config.END_DATE) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + 2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") model_sac = agent.get_model("sac") trained_sac = agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=80000) print("==============Start Trading===========") e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) df_account_value, df_actions = DRLAgent.DRL_prediction( model=trained_sac, environment=e_trade_gym) df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" + now + ".csv") df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now + ".csv") print("==============Get Backtest Results===========") perf_stats_all = backtest_stats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + ".csv")
def train_one(fetch=False): """ train an agent """ if fetch: df = fetch_and_store() else: df = load() counts = df[['date', 'tic']].groupby(['date']).count().tic assert counts.min() == counts.max() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, # use_turbulence=False, user_defined_feature=False, ) processed = fe.preprocess_data(df) # Training & Trading data split start_date, trade_date, end_date = calculate_split(df, start=config.START_DATE) print(start_date, trade_date, end_date) train = data_split(processed, start_date, trade_date) trade = data_split(processed, trade_date, end_date) print( f'\n******\nRunning from {start_date} to {end_date} for:\n{", ".join(config.CRYPTO_TICKER)}\n******\n' ) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + (2 * stock_dimension) + (len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)) env_kwargs = { "hmax": 100, "initial_amount": 100000, "buy_cost_pct": 0.0026, "sell_cost_pct": 0.0026, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, make_plots=True, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env_trade, obs_trade = e_trade_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime(config.DATETIME_FMT) model_sac = agent.get_model("sac") trained_sac = agent.train_model( model=model_sac, tb_log_name="sac", # total_timesteps=100 total_timesteps=80000) print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction( # model=trained_sac, test_data=trade, test_env=env_trade, test_obs=obs_trade trained_sac, e_trade_gym) df_account_value.to_csv( f"./{config.RESULTS_DIR}/df_account_value_{now}.csv") df_actions.to_csv(f"./{config.RESULTS_DIR}/df_actions_{now}.csv") df_txns = pd.DataFrame(e_trade_gym.transactions, columns=['date', 'amount', 'price', 'symbol']) df_txns = df_txns.set_index(pd.DatetimeIndex(df_txns['date'], tz=pytz.utc)) df_txns.to_csv(f'./{config.RESULTS_DIR}/df_txns_{now}.csv') df_positions = pd.DataFrame(e_trade_gym.positions, columns=['date', 'cash'] + config.CRYPTO_TICKER) df_positions = df_positions.set_index( pd.DatetimeIndex(df_positions['date'], tz=pytz.utc)).drop(columns=['date']) df_positions['cash'] = df_positions.astype( {col: np.float64 for col in df_positions.columns}) df_positions.to_csv(f'./{config.RESULTS_DIR}/df_positions_{now}.csv') print("==============Get Backtest Results===========") perf_stats_all = backtest_stats(df_account_value, transactions=df_txns, positions=df_positions) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv(f"./{config.RESULTS_DIR}/perf_stats_all_{now}.csv") backtest_plot(df_account_value, baseline_start=trade_date, baseline_end=end_date, positions=df_positions, transactions=df_txns)