def DRL_prediction(df, model, name, last_state, iter_num, unique_trade_date, rebalance_window, turbulence_threshold, initial): ### make a prediction based on trained model### ## trading env trade_data = data_split(df, start=unique_trade_date[iter_num - rebalance_window], end=unique_trade_date[iter_num]) env_trade = DummyVecEnv([lambda: StockEnvTrade(trade_data, turbulence_threshold=turbulence_threshold, initial=initial, previous_state=last_state, model_name=name, iteration=iter_num)]) obs_trade = env_trade.reset() for i in range(len(trade_data.index.unique())): action, _states = model.predict(obs_trade) obs_trade, rewards, dones, info = env_trade.step(action) if i == (len(trade_data.index.unique()) - 2): # print(env_test.render()) last_state = env_trade.render() df_last_state = pd.DataFrame({'last_state': last_state}) df_last_state.to_csv('results/last_state_{}_{}.csv'.format(name, i), index=False) return last_state
def DRL_prediction_no_rebalance(df, model, name, unique_trade_date, log_interval): ### make a prediction based on trained model### ## trading env print("DRL PREDICTION NO REBALANCE") all_data = data_split(df, start=unique_trade_date[0], end=unique_trade_date[len(unique_trade_date) - 1]) for ticker in all_data["tic"].unique(): trade_data = all_data[all_data["tic"] == ticker] env_trade = DummyVecEnv([ lambda: StockEnvTrade(trade_data, initial=True, model_name=name + "_" + ticker, log_interval=log_interval) ]) obs_trade = env_trade.reset() for i in range(len(trade_data.index.unique())): action, _states = model.predict(obs_trade) print("action: ", action) obs_trade, rewards, dones, info = env_trade.step(action) if i == (len(trade_data.index.unique()) - 2): # print(env_test.render()) last_state = env_trade.render() # df_last_state = pd.DataFrame({'last_state': last_state}) # df_last_state.to_csv('results/last_state_{}_{}.csv'.format(name, i), index=False) return last_state
def DRL_prediction(df, model, name, last_state, iter_num, unique_trade_date, rebalance_window, turbulence_threshold, initial): ### make a prediction based on trained model### ## trading env trade_data = data_split(df, start=unique_trade_date[iter_num - rebalance_window], end=unique_trade_date[iter_num]) ''' env_trade = DummyVecEnv([lambda: StockEnvTrade(trade_data, turbulence_threshold=turbulence_threshold, initial=initial, previous_state=last_state, model_name=name, iteration=iter_num)]) ''' from stable_baselines.common import make_vec_env env_trade = make_vec_env( lambda: StockEnvTrade(trade_data, turbulence_threshold=turbulence_threshold, initial=initial, previous_state=last_state, model_name=name, iteration=iter_num), n_envs=1) obs_trade = env_trade.env_method('reset') np.set_printoptions(formatter={'float_kind': '{:f}'.format}) fh = open('state_action.txt', 'a+') for i in range(len(trade_data.index.unique())): action, _states = model.predict(obs_trade) fh.write(str(obs_trade[0]) + "\n") fh.write(str(action[0]) + "\n") obs_trade, rewards, dones, info = env_trade.step(action) if i == (len(trade_data.index.unique()) - 2): # print(env_test.render()) last_state = env_trade.env_method('render') print('^' * 20) print(last_state) print('^' * 20) df_last_state = pd.DataFrame(data={'last_state': last_state[0]}) df_last_state.to_csv('results/last_state_{}_{}.csv'.format(name, i), index=False) return last_state[0]
def train_PPO_models(stocks=['./data/TSLA.csv', './data/FB.csv'], tickers=['TSLA', 'FB'], start_date=20130102, end_date=20180101, model_save_path='/content/DeepRL4Stocks/trained_models/'): teachers = [] envs = [] for i in range(len(stocks)): print(i) df = process_yahoo_finance(stocks[i], tickers[i]) train = data_split(df, start=start_date, end=end_date) env_train = DummyVecEnv([lambda: StockEnvTrade(train)]) model = train_PPO(env_train, model_name='PPO_' + tickers[i]) model.save(model_save_path + "PPO_" + tickers[i]) teachers.append(model) envs.append(env_train) return teachers, envs
def train_multitask(df, unique_trade_date, timesteps=10, policy="MlpPolicy", model_name="multitask"): # df of all intermixed values # get out the individual tickers and switch out the dates # timesteps = num training steps per date start = time.time() df = data_split(df, start=unique_trade_date[0], end=unique_trade_date[len(unique_trade_date) - 1]) last_state, initial = [], True model = None for i in range(len(unique_trade_date) - 2): for ticker in df["tic"].unique(): # Interval is every two days so we can optimize on the change in account value start_date, end_date = unique_trade_date[i], unique_trade_date[i + 2] quanta_df = data_split(df, start=start_date, end=end_date) quanta_df = quanta_df[quanta_df["tic"] == ticker] if len(quanta_df.index) < 2: continue quanta_df = quanta_df.reset_index() quanta_env = DummyVecEnv([ lambda: StockEnvTrade(quanta_df, previous_state=last_state, initial=initial, log_interval=1) ]) quanta_env.reset() model = train_PPO_update(model, quanta_env, timesteps, policy=policy) last_state = quanta_env.render() initial = False model.save(f"{config.TRAINED_MODEL_DIR}/{model_name}") end = time.time() print('Training time (Multitask): ', (end - start) / 60, ' minutes') return model
def test_model(df, model, model_name, turbulence_threshold=140, start=20151001, end=20200707): trade_data = data_split(df, start=start, end=end) env_trade = DummyVecEnv([ lambda: StockEnvTrade(trade_data, turbulence_threshold=turbulence_threshold, initial=True, previous_state=[], model_name=model_name, iteration=0) ]) obs_trade = env_trade.reset() state = None dones = [False for _ in range(env_trade.num_envs)] for i in range(len(trade_data.index.unique())): action, state = model.predict(obs_trade, state=state, mask=dones) obs_trade, rewards, dones, info = env_trade.step(action) return info[0]