def main(): parser = build_parser() options = parser.parse_args() if not os.path.exists("./" + config.DATA_SAVE_DIR): os.makedirs("./" + config.DATA_SAVE_DIR) if not os.path.exists("./" + config.TRAINED_MODEL_DIR): os.makedirs("./" + config.TRAINED_MODEL_DIR) if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR): os.makedirs("./" + config.TENSORBOARD_LOG_DIR) if not os.path.exists("./" + config.RESULTS_DIR): os.makedirs("./" + config.RESULTS_DIR) if options.mode == "train": import finrl.autotrain.training finrl.autotrain.training.train_one() elif options.mode == "download_data": from finrl.marketdata.yahoodownloader import YahooDownloader df = YahooDownloader(start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER).fetch_data() now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") df.to_csv("./" + config.DATA_SAVE_DIR + "/" + now + ".csv")
def fetch_and_store(start_date=config.START_DATE, end_date=None, interval=None, ticker_list=config.CRYPTO_TICKER): print("==============Start Fetching Data===========") df = YahooDownloader(start_date=start_date, end_date=end_date or datetime.utcnow().strftime("%Y-%m-%d"), ticker_list=ticker_list).fetch_data() now = datetime.now().strftime(config.DATETIME_FMT) filename = f'./{config.DATA_SAVE_DIR}/{now}.csv' df.to_csv(filename) return df
def get_baseline(ticker, start, end): if ticker == 'crypto': print('*********') else: dji = YahooDownloader( start_date=start, end_date=end, ticker_list=[ticker] ).fetch_data() print('here ' * 20) print(dji.head()) return dji
def get_stock_data(start_date:str, end_date:str, stocks_tradable:List[str], tech_indicator_list:List[str]): """ start_date and end_date include the whole period from train, validation to test time periods """ df = YahooDownloader(start_date=start_date, end_date=end_date, ticker_list=stocks_tradable).fetch_data() fe = FeatureEngineer(use_technical_indicator=True, # tech_indicator_list = config.TECHNICAL_INDICATORS_LIST, tech_indicator_list=tech_indicator_list, use_turbulence=False, user_defined_feature=False) processed = fe.preprocess_data(df) list_ticker = processed["tic"].unique().tolist() list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str)) combination = list(itertools.product(list_date,list_ticker)) processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left") processed_full = processed_full[processed_full['date'].isin(processed['date'])] processed_full = processed_full.sort_values(['date','tic']) processed_full = processed_full.fillna(0) return processed_full, list_date
def setUp(cls): cls.ticker_list = ["AAPL", "GOOG"] cls.df = YahooDownloader(start_date="2009-01-01", end_date="2021-01-01", ticker_list=cls.ticker_list).fetch_data() print(f"df columns: {cls.df.columns}") cls.indicators = ["open", "close", "high", "low", "volume"]
def baseline_strat(ticker='^DJI'): dji = YahooDownloader(start_date="2019-01-02", end_date="2020-10-30", ticker_list=[ticker]).fetch_data() dji['daily_return'] = dji['close'].pct_change(1) dow_strat = backtest_strat(dji) return dji, dow_strat
def fetch_initial_numerical(trade_date, prev_days=30, time_fmt="%Y-%m-%d"): start_date = datetime.datetime.strptime(trade_date, time_fmt) - datetime.timedelta(30) numerical_df = YahooDownloader(start_date=start_date.strftime(time_fmt), end_date=trade_date, ticker_list=stock_tickers).fetch_data() return numerical_df
def raw_data_download(raw_data_path, beg_date, end_date, ticker_list): if os.path.exists(raw_data_path): import pandas as pd raw_df = pd.read_pickle(raw_data_path) # DataFrame of Pandas print('| raw_df.columns.values:', raw_df.columns.values) # ['date' 'open' 'high' 'low' 'close' 'volume' 'tic' 'day'] else: from finrl.marketdata.yahoodownloader import YahooDownloader yd = YahooDownloader( start_date=beg_date, end_date=end_date, ticker_list=ticker_list, ) raw_df = yd.fetch_data() raw_df.to_pickle(raw_data_path) return raw_df
def baseline_strat(ticker, start, end): dji = YahooDownloader(start_date = start, end_date = end, ticker_list = [ticker]).fetch_data() dji['daily_return']=dji['close'].pct_change(1) dow_strat = backtest_strat(dji) return dji, dow_strat
def main(): parser = build_parser() options = parser.parse_args() # add following folders if not os.path.exists("./" + config.DATA_SAVE_DIR): os.makedirs("./" + config.DATA_SAVE_DIR) # From config.py file get following: # start_date START_DATE = config.START_DATE # end_date END_DATE = config.END_DATE # list of stocks# STOCK_LIST = config.DOW_30_TICKER print("All stocks used for training:", STOCK_LIST) print("Historical data are used from: ", START_DATE) print("Till end date: ", END_DATE) # Download and save the data in a pandas DataFrame: data_frame = YahooDownloader(start_date=START_DATE, end_date=END_DATE, ticker_list=STOCK_LIST).fetch_data() print("Data Frame shape is: ", data_frame.shape) print("Data Frame format is following: \n\n", data_frame.head()) ## ## Save downloaded data to file ## if os.path.exists("./" + config.DATA_SAVE_DIR + "/" + options.name + ".csv"): os.remove("./" + config.DATA_SAVE_DIR + "/" + options.name + ".csv") print("The download data file deleted") else: print("The download data file does not exist") data_frame.to_csv("./" + config.DATA_SAVE_DIR + "/" + options.name + ".csv") print( "Successfuly completed the task of downloading and saving financial data." )
def load_stock_trading_data(): from finrl.config import config cwd = './env/FinRL' raw_data_path = f'{cwd}/StockTradingEnv_raw_data.df' processed_data_path = f'{cwd}/StockTradingEnv_processed_data.df' os.makedirs(cwd, exist_ok=True) print("==============Start Fetching Data===========") if os.path.exists(raw_data_path): raw_df = pd.read_pickle(raw_data_path) # DataFrame of Pandas print('| raw_df.columns.values:', raw_df.columns.values) else: from finrl.marketdata.yahoodownloader import YahooDownloader raw_df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER, ).fetch_data() raw_df.to_pickle(raw_data_path) print("==============Start Feature Engineering===========") if os.path.exists(processed_data_path): processed_df = pd.read_pickle(processed_data_path) # DataFrame of Pandas print('| processed_df.columns.values:', processed_df.columns.values) else: from finrl.preprocessing.preprocessors import FeatureEngineer fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed_df = fe.preprocess_data(raw_df) processed_df.to_pickle(processed_data_path) # Training & Trading data split from finrl.preprocessing.data import data_split train_df = data_split(processed_df, '2008-03-19', '2016-01-01') # 1963/3223 eval_df = data_split(processed_df, '2016-01-01', '2021-01-01') # 1260/3223 return train_df, eval_df
def main(): start_date = '2020-01-01' trade_start_date = '2020-12-01' end_date = '2021-01-01' ticker_list = stock_tickers numerical_df = YahooDownloader(start_date=start_date, end_date=end_date, ticker_list=ticker_list).fetch_data() sentiment_df = generate_sentiment_scores(start_date, end_date) initial_data = get_initial_data(numerical_df, sentiment_df) train_data = data_split(initial_data, start_date, trade_start_date) trade_data = data_split(initial_data, trade_start_date, end_date) indicator_list = config.TECHNICAL_INDICATORS_LIST + ['sentiment'] stock_dimension = len(trade_data.tic.unique()) state_space = 1 + 2 * stock_dimension + len( indicator_list) * stock_dimension env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": indicator_list, "action_space": stock_dimension, "reward_scaling": 1e-4, "print_verbosity": 5 } e_train_gym = StockTradingEnv(df=train_data, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() # print(train_data.index) # print(trade_data.index) # print(trade_data.loc[0]) e_trade_gym = OnlineStockTradingEnv(trade_data.loc[0], **env_kwargs) training_agent = DRLAgent(env=env_train) model_a2c = training_agent.get_model("a2c") # print(train_data.index) # print(trade_data.index) #trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c',total_timesteps=10000) feature_engineer = FeatureEngineer() online_stock_pred = OnlineStockPrediction(e_trade_gym, model_a2c) for i in range(1, trade_data.index.unique().max()): print(trade_data.loc[i]) online_stock_pred.add_data(trade_data.loc[i]) action, states, next_obs, rewards = online_stock_pred.predict() print("Action:", action) print("States: ", states) print("Next observation: ", next_obs) print("Rewards: ", rewards)
def add_vix(self, data): """ add vix from yahoo finance :param data: (df) pandas dataframe :return: (df) pandas dataframe """ df = data.copy() df_vix = YahooDownloader(start_date = df.date.min(), end_date = df.date.max(), ticker_list = ["^VIX"]).fetch_data() vix = df_vix[['date','close']] vix.columns = ['date','vix'] df = df.merge(vix, on="date") df = df.sort_values(["date", "tic"]).reset_index(drop=True) return df
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader(start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER).fetch_data() print("==============Start Feature Engineering===========") df = FeatureEngineer(df, feature_number=5, use_technical_indicator=True, use_turbulence=True).preprocess_data() train = data_split(df, config.START_DATE, config.START_TRADE_DATE) trade = data_split(df, config.START_TRADE_DATE, config.END_DATE) env_setup = EnvSetup(stock_dim=len(train.tic.unique())) env_train = env_setup.create_env_training(data=train, env_class=StockEnvTrain) agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M') a2c_params_tuning = { 'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0007, 'verbose': 0, 'timesteps': 100000 } model_a2c = agent.train_A2C(model_name="A2C_{}".format(now), model_params=a2c_params_tuning) print("==============Start Trading===========") env_trade, obs_trade = env_setup.create_env_trading( data=trade, env_class=StockEnvTrade, turbulence_threshold=250) df_account_value = DRLAgent.DRL_prediction(model=model_a2c, test_data=trade, test_env=env_trade, test_obs=obs_trade) df_account_value.to_csv("./" + config.RESULTS_DIR + "/" + now + '.csv') print("==============Get Backtest Results===========") perf_stats_all = BackTestStats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + '.csv')
def test_process_data(): start_date = '2020-11-01' end_date='2021-01-01' ticker_list=stock_tickers numerical_df = YahooDownloader(start_date=start_date,end_date=end_date,ticker_list=ticker_list).fetch_data() sentiment_df = generate_sentiment_scores(start_date,end_date) initial_data = get_initial_data(numerical_df,sentiment_df) trade_data = data_split(initial_data,start_date,'2020-12-01') numerical_feed_data = numerical_df[numerical_df.date > '2020-12-01'] sentiment_feed_data = sentiment_df[sentiment_df.date > '2020-12-01'] data_processor = DataProcessor(FeatureEngineer(),trade_data) for date in numerical_feed_data.date.unique(): new_numerical = numerical_feed_data[numerical_feed_data.date==date] new_sentiment = sentiment_feed_data.loc[sentiment_feed_data.date==date] new_df=data_processor.process_data(new_numerical,new_sentiment) print(new_df)
def prepare_data() -> (dict, pd.DataFrame): processed = load_from_cache() if processed is None: print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.CURRENT_TICKER, ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=False, user_defined_feature=False, ) processed = fe.preprocess_data(df) save(processed) # calculate state action space stock_dimension = len(processed.tic.unique()) state_space = (2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension * 2) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.01, "sell_cost_pct": 0.01, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } return env_kwargs, processed
def get_yahoo_data(start, end): df = YahooDownloader(start_date=start, end_date=end, ticker_list=config.DOW_30_TICKER).fetch_data() df.sort_values(['date', 'tic'], ignore_index=True) x = df.tic.unique() templ = [] # get intersection data, smallest data for name, group in df.groupby('date'): g = group.tic.unique() if len(templ) == 0: templ = [i for i in g if i in x] else: templ = [i for i in g if i in templ] data_merge = pd.DataFrame(columns=list(df.columns)) x = np.array(templ).reshape(-1, 1) temp_df = pd.DataFrame.from_records(x, columns=['tic']) for name, group in df.groupby('date'): temp_df['date'] = name result_outer = pd.merge(group, temp_df, on=['date', 'tic']) result_outer = result_outer.sort_values(['date', 'tic'], ignore_index=True) assert len(result_outer) == len(temp_df.tic.unique()) data_merge = data_merge.append(result_outer) df = data_merge fe = FeatureEngineer(use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False) processed = fe.preprocess_data(df) processed.sort_values(['date', 'tic'], ignore_index=True) return processed
def test_download(self): df = YahooDownloader(start_date="2019-01-01", end_date="2019-02-01", ticker_list=self.ticker_list).fetch_data() self.assertIsInstance(df, pd.DataFrame)
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader(start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.SP_500_TICKER).fetch_data() print("==============Start Feature Engineering===========") df = FeatureEngineer(df, use_technical_indicator=True, use_turbulence=True).preprocess_data() # Training & Trade data split train = data_split(df, config.START_DATE, config.START_TRADE_DATE) trade = data_split(df, config.START_TRADE_DATE, config.END_DATE) # data normalization #feaures_list = list(train.columns) #feaures_list.remove('date') #feaures_list.remove('tic') #feaures_list.remove('close') #print(feaures_list) #data_normaliser = preprocessing.StandardScaler() #train[feaures_list] = data_normaliser.fit_transform(train[feaures_list]) #trade[feaures_list] = data_normaliser.fit_transform(trade[feaures_list]) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = 1 + 2 * stock_dimension + len( config.TECHNICAL_INDICATORS_LIST) * stock_dimension env_setup = EnvSetup(stock_dim=stock_dimension, state_space=state_space, hmax=100, initial_amount=3000, transaction_cost_pct=0.001) env_train = env_setup.create_env_training(data=train, env_class=StockEnvTrain) agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M') a2c_params_tuning = { 'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0007, 'verbose': 0, 'timesteps': 80000 } model = agent.train_A2C(model_name="A2C_{}".format(now), model_params=a2c_params_tuning) print("==============Start Trading===========") env_trade, obs_trade = env_setup.create_env_trading( data=trade, env_class=StockEnvTrade, turbulence_threshold=250) df_account_value, df_actions = DRLAgent.DRL_prediction(model=model, test_data=trade, test_env=env_trade, test_obs=obs_trade) df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" + now + '.csv') df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now + '.csv') print("==============Get Backtest Results===========") perf_stats_all = BackTestStats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + '.csv')
trained_model = agent.get_model(model, model_kwargs = model_params, verbose = 0).load(model_paths) print('Testing...') df_account_value, df_actions = DRLAgent.average_predict( model=trained_model, environment = test_gym_env,n_evals = 3) print('Comparing to DJI') dji = YahooDownloader( start_date=split_date, end_date=end_date, ticker_list=['^DJI'] ).fetch_data() dates_rl = matplotlib.dates.date2num(df_account_value['date']) dates_base = matplotlib.dates.date2num(dji['date']) init_dji_shares = 1000000/dji['close'][0] plt.plot_date(dates_rl,df_account_value['account_value'],'-') plt.plot_date(dates_base,dji['close'] * init_dji_shares,'-') plt.legend(['RL','DJI']) plt.title(f'{model} model trained from {start_date}-{split_date}') plt.ylabel('Account Value') plt.savefig(f'imgs/{model}_vs_dji_{split_date}_{end_date}.png')
metavar='<end_date>', help='training data end date') parser.add_argument('-t', '--trade-date', metavar='<trade_date>', help='trading start date') args = parser.parse_args() consumer = KafkaConsumer(args.topic, auto_offset_reset='latest', \ bootstrap_servers=args.hosts, api_version=(0, 10), consumer_timeout_ms=1000) producer = KafkaProducer(bootstrap_servers=args.hosts, api_version=(0, 10)) # data initialization tday = datetime.date.today() yday = tday - datetime.timedelta(days=1) fmt = "%Y-%m-%d" numerical_df = YahooDownloader(args.start_date, args.end_date, config.stock_tickers).fetch_data() sentiment_df = generate_sentiment_scores(args.start_date, args.end_date) initial_data = get_initial_data(numerical_df, sentiment_df) data_processor = DataProcessor(FeatureEngineer(), initial_data) new_numerical = YahooDownloader(datetime.datetime.strftime(yday, fmt), datetime.datetime.strftime(tday, fmt), config.stock_tickers).fetch_data() new_sentiment = generate_sentiment_scores( datetime.datetime.strftime(yday, fmt), datetime.datetime.strftime(yday, fmt)) # set up model to train on initial data load_path = "./trained_models/a2c_2019-2020_80k.zip" model = setup_model(initial_data) while consumer is None:
def train_one(): """ train an agent """ print("==============Start Fetching Data===========") df = YahooDownloader( start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.DOW_30_TICKER, ).fetch_data() print("==============Start Feature Engineering===========") fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False, ) processed = fe.preprocess_data(df) # Training & Trading data split train = data_split(processed, config.START_DATE, config.START_TRADE_DATE) trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE) # calculate state action space stock_dimension = len(train.tic.unique()) state_space = (1 + 2 * stock_dimension + len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension) env_kwargs = { "hmax": 100, "initial_amount": 1000000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4 } e_train_gym = StockTradingEnv(df=train, **env_kwargs) e_trade_gym = StockTradingEnv(df=trade, turbulence_threshold=250, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() env_trade, obs_trade = e_trade_gym.get_sb_env() agent = DRLAgent(env=env_train) print("==============Model Training===========") now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M") model_sac = agent.get_model("sac") trained_sac = agent.train_model(model=model_sac, tb_log_name="sac", total_timesteps=80000) print("==============Start Trading===========") df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_sac, test_data=trade, test_env=env_trade, test_obs=obs_trade) df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" + now + ".csv") df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now + ".csv") print("==============Get Backtest Results===========") perf_stats_all = BackTestStats(df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" + now + ".csv")
def get_dataset(datadir,data_type,start_date,end_date): if not data_type in config.SUPPORTED_DATA: raise ValueError('Market type not supported') data_path = os.path.join(datadir,data_type + '.csv') if not os.path.exists(data_path): if data_type == 'dow29': # If we don't have the data, we can download dow data from yahoo finance stock_tickers = config.DOW_30_TICKER_MINUS_VISA indicators = config.TECHNICAL_INDICATORS_LIST print('Getting Data: ') df = YahooDownloader(start_date = '2000-01-01', end_date = '2021-01-01', ticker_list = stock_tickers).fetch_data() fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list = indicators, use_turbulence=True, user_defined_feature = False) print('Adding Indicators') processed = fe.preprocess_data(df) list_ticker = processed["tic"].unique().tolist() list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str)) combination = list(itertools.product(list_date,list_ticker)) processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left") processed_full = processed_full[processed_full['date'].isin(processed['date'])] processed_full = processed_full.sort_values(['date','tic']) processed_full = processed_full.fillna(0) processed.to_csv(data_path,index = False) elif data_type == 'nas29': # If we don't have the data, we can download dow data from yahoo finance stock_tickers = config.NAS_29_TICKER indicators = config.TECHNICAL_INDICATORS_LIST print('Getting Data: ') df = YahooDownloader(start_date = '2000-01-01', end_date = '2021-01-01', ticker_list = stock_tickers).fetch_data() fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list = indicators, use_turbulence=True, user_defined_feature = False) print('Adding Indicators') processed = fe.preprocess_data(df) list_ticker = processed["tic"].unique().tolist() list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str)) combination = list(itertools.product(list_date,list_ticker)) processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left") processed_full = processed_full[processed_full['date'].isin(processed['date'])] processed_full = processed_full.sort_values(['date','tic']) processed_full = processed_full.fillna(0) processed.to_csv(data_path,index = False) elif data_type == 'dow290': raise ValueError('Need to add dow29 with zeros crossings to data directory') elif data_type == 'dow29w0': raise ValueError('Need to add dow29 with OUT zeros crossings to data directory') else: raise ValueError('Need to add crypto data to data directory') # Load and subset data full_df = pd.read_csv(data_path) max_date = max(full_df['date']) min_date = min(full_df['date']) if not (min_date <= start_date): warnings.warn('Earliest possible start date is {}: You have chosen {}. The later date will be used'.format(min_date,start_date)) if not (max_date >= end_date): warnings.warn('Latest possible end date is {}: You have chosen {}. The earlier date will be used'.format(max_date,end_date)) to_return = data_split(full_df,start_date,end_date) return to_return
# In[7]: # from config.py end_date is a string config.END_DATE # ticker_list is a list of stock tickers, in a single stock trading case, the list contains only 1 ticker # In[8]: # Download and save the data in a pandas DataFrame: data_df = YahooDownloader(start_date = '2009-01-01', end_date = '2021-01-01', ticker_list = ['AAPL']).fetch_data() # In[9]: data_df.shape # In[10]: data_df.head()
def main(): if not os.path.exists("./" + config.DATA_SAVE_DIR): os.makedirs("./" + config.DATA_SAVE_DIR) if not os.path.exists("./" + config.TRAINED_MODEL_DIR): os.makedirs("./" + config.TRAINED_MODEL_DIR) if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR): os.makedirs("./" + config.TENSORBOARD_LOG_DIR) if not os.path.exists("./" + config.RESULTS_DIR): os.makedirs("./" + config.RESULTS_DIR) print(config.START_DATE) print(config.END_DATE) print(config.PENNY_STOCKS) df = YahooDownloader(start_date=config.START_DATE, end_date=config.END_DATE, ticker_list=config.PENNY_STOCKS).fetch_data() fe = FeatureEngineer(use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False) processed = fe.preprocess_data(df) information_cols = list(processed) information_cols.remove('date') information_cols.remove('tic') stock_dimension = len(processed.tic.unique()) state_space = 1 + 2 * stock_dimension + len( information_cols) * stock_dimension print("Stock Dimension: {}, State Space: {}".format( stock_dimension, state_space)) env_kwargs = { "hmax": 100, "initial_amount": 5000, # Since in Indonesia the minimum number of shares per trx is 100, then we scaled the initial amount by dividing it with 100 "buy_cost_pct": 0.00, # IPOT has 0.19% buy cost "sell_cost_pct": 0.00, # IPOT has 0.29% sell cost "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": information_cols, "action_space": stock_dimension, "reward_scaling": 1e-4, "print_verbosity": 5 } rebalance_window = 63 # rebalance_window is the number of days to retrain the model validation_window = 63 # validation_window is the number of days to do validation and trading (e.g. if validation_window=63, then both validation and trading period will be 63 days) train_start = config.START_DATE train_end = config.START_TRADE_DATE val_test_start = config.START_TRADE_DATE val_test_end = config.END_DATE ensemble_agent = DRLEnsembleAgent(df=processed, train_period=(train_start, train_end), val_test_period=(val_test_start, val_test_end), rebalance_window=rebalance_window, validation_window=validation_window, **env_kwargs) A2C_model_kwargs = { 'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0005 } PPO_model_kwargs = { "ent_coef": 0.01, "n_steps": 2048, "learning_rate": 0.00025, "batch_size": 128 } DDPG_model_kwargs = { "action_noise": "ornstein_uhlenbeck", "buffer_size": 50000, "learning_rate": 0.000005, "batch_size": 128 } TD3_model_kwargs = { "batch_size": 100, "buffer_size": 1000000, "learning_rate": 0.001 } timesteps_dict = {'a2c': 4000, 'ppo': 4000, 'ddpg': 4000, 'td3': 4000} df_summary = ensemble_agent.run_ensemble_strategy(A2C_model_kwargs, PPO_model_kwargs, DDPG_model_kwargs, TD3_model_kwargs, timesteps_dict) print(df_summary) unique_trade_date = processed[(processed.date > val_test_start) & ( processed.date <= val_test_end)].date.unique() df_trade_date = pd.DataFrame({'datadate': unique_trade_date}) df_account_value = pd.DataFrame() for i in range(rebalance_window + validation_window, len(unique_trade_date) + 1, rebalance_window): print(rebalance_window + validation_window) print(len(unique_trade_date) + 1) print(rebalance_window) try: temp = pd.read_csv('results/account_value_trade_{}_{}.csv'.format( 'ensemble', i)) df_account_value = df_account_value.append(temp, ignore_index=True) except: break sharpe = (252**0.5) * df_account_value.account_value.pct_change( 1).mean() / df_account_value.account_value.pct_change(1).std() print('Sharpe Ratio: ', sharpe) df_account_value = df_account_value.join( df_trade_date[validation_window:].reset_index(drop=True)) df_account_value.account_value.plot() print("==============Get Backtest Results===========") now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M') perf_stats_all = backtest_stats(account_value=df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) print("==============Compare to IHSG===========") backtest_plot(df_account_value, baseline_ticker='^DJI', baseline_start=df_account_value.loc[0, 'date'], baseline_end=df_account_value.loc[len(df_account_value) - 1, 'date'])
df_account_value, df_actions = DRLAgent.average_predict( model=trained_model, environment = test_gym_env,n_evals = 10) df_account_value.to_csv('results/' + model +'account_values_10avg.csv') df_actions.to_csv('results/' + model +'actions_10avg.csv') dates_rl = matplotlib.dates.date2num(df_account_value['date']) plt.plot_date(dates_rl,df_account_value['account_value'],'-') end_date = '2018-11-28' # Model is tested from split_date to end_date split_date = '2016-10-01' print('Comparing to Index') dji = YahooDownloader( start_date=split_date, end_date=end_date, ticker_list=['^IXIC'] # Dow = '^DJI', NASDAQ = '^IXIC' ).fetch_data() model_names.append('Nas.') dates_rl = matplotlib.dates.date2num(df_account_value['date']) dates_base = matplotlib.dates.date2num(dji['date']) init_dji_shares = 1000000/dji['close'][0] plt.plot_date(dates_base,dji['close'] * init_dji_shares,'-') plt.legend(model_names) plt.xticks(rotation=30) plt.title(f'Models Trained from {start_date}-{split_date}') plt.ylabel('Account Value') plt.savefig(f'imgs/models_vs_NASDAQ_{split_date}_{end_date}.png')
import datetime from finrl.config import config from finrl.marketdata.yahoodownloader import YahooDownloader from finrl.preprocessing.preprocessors import FeatureEngineer from finrl.preprocessing.data import data_split from finrl.env.env_stocktrading import StockTradingEnv from finrl.model.models import DRLAgent from finrl.trade.backtest import BackTestStats, BaselineStats, BackTestPlot import sys sys.path.append("../FinRL-Library") # Download and save the data in a pandas DataFrame: data_df = YahooDownloader(start_date='2009-01-01', end_date='2021-01-01', ticker_list=['AAPL']).fetch_data() tech_indicator_list = config.TECHNICAL_INDICATORS_LIST tech_indicator_list = tech_indicator_list + [ 'kdjk', 'open_2_sma', 'boll', 'close_10.0_le_5_c', 'wr_10', 'dma', 'trix' ] fe = FeatureEngineer(use_technical_indicator=True, tech_indicator_list=tech_indicator_list, use_turbulence=False, user_defined_feature=False) data_df = fe.preprocess_data(data_df)
# from config.py end_date is a string config.END_DATE # In[7]: print(config.DOW_30_TICKER) # In[8]: df = YahooDownloader(start_date = config.START_DATE, end_date = config.END_DATE, ticker_list = config.DOW_30_TICKER).fetch_data() # # Part 4: Preprocess Data # In[9]: fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list = config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature = False) processed = fe.preprocess_data(df) # %% Show turbulence # if error open VSCode Settings (JSON) and change
def get_baseline(ticker, start, end): dji = YahooDownloader(start_date=start, end_date=end, ticker_list=[ticker]).fetch_data() return dji
os.makedirs("./" + config.TRAINED_MODEL_DIR) if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR): os.makedirs("./" + config.TENSORBOARD_LOG_DIR) if not os.path.exists("./" + config.RESULTS_DIR): os.makedirs("./" + config.RESULTS_DIR) config.START_DATE config.END_DATE print(config.DOW_30_TICKER) ############# DOWNLOAD DATA ############# df = YahooDownloader(start_date='2008-01-01', end_date='2021-01-01', ticker_list=config.DOW_30_TICKER).fetch_data() df.shape df.sort_values(['date', 'tic'], ignore_index=True).head() df.info() ############# PERFORM FEATURE ENGINEERING ############# fe = FeatureEngineer(use_technical_indicator=True, tech_indicator_list=config.TECHNICAL_INDICATORS_LIST, use_turbulence=True, user_defined_feature=False) processed = fe.preprocess_data(df)