def run_ESRNN(): import torch device = 'cuda' if torch.cuda.is_available() else 'cpu' path_daily = r'C:\Users\xxxli\Desktop\Daily' dic_daily = preprocess.read_file(path_daily) series_list = [] for k, v in dic_daily.items(): ticker_name = k df, cat = v df = preprocess.single_price(df, ticker_name) # column = [ticker] series_list.append(DataSeries(cat, 'daily', df)) collect = DataCollection('universe daily', series_list) train_dc, test_dc = collect.split(numTest = 24) m = ModelESRNN( max_epochs = 15, batch_size = 32, dilations=[[1,3], [7, 14]], input_size = 12, output_size = 24, device = device) m.train(train_dc) y_test = m.predict(test_dc) y_test_df = y_test.to_df() y_test_df.to_csv('hyper_ESRNN_1.csv')
def predict(self, numPredict:int, test_dc: DataCollection): # if recommend model is trained, use recommend model, # else do not use recommend model if not self.rec_model: rec = robjects.r('NULL') else: rec = self.rec_model date = test_dc.to_df().index res = [] for i, series in enumerate(self.data): rList = FloatVector(series) pred = pd.DataFrame(self.tel.telescope_forecast(rList, numPredict, rec_model = rec, natural = True, boxcox = True, doAnomDet = False, replace_zeros = True, use_indicators = True, plot = False)[0], columns = [self.tickers[i]], index = date) ds = DataSeries(self.categories[i], self.frequency, pred) res.append(ds) dc = DataCollection(self.label, res) return dc
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # random some sample data np.random.seed(123) n_assets = 4 time_series_group = [] for i in range(n_assets): rows,cols = 1000,1 data = np.random.rand(rows, cols) # use other random functions to generate values with constraints tidx = pd.date_range('2019-01-01', periods=rows, freq='MS') # freq='MS'set the frequency of date in months and start from day 1. You can use 'T' for minutes and so on ID = 'FakeStock_' + str(i+1) df = pd.DataFrame(data, columns=[ID], index=tidx) ds = DataSeries(category='Stock', freq = 'monthly', time_series=df) time_series_group.append(ds) input_dc_test = DataCollection(label='Test Collection', time_series_group=time_series_group) self.input_dc = input_dc_test # for exception test for i in range(2): rows,cols = 1000,1 data = np.random.rand(rows, cols) # use other random functions to generate values with constraints tidx = pd.date_range('2019-01-01', periods=rows, freq='D') # freq='MS'set the frequency of date in months and start from day 1. You can use 'T' for minutes and so on ID = 'FakeStock_Daily_' + str(i+1) df = pd.DataFrame(data, columns=[ID], index=tidx) ds = DataSeries(category='Stock', freq = 'daily', time_series=df) time_series_group.append(ds) input_dc_test_2 = DataCollection(label='Test Collection 2', time_series_group=time_series_group) self.input_dc_2 = input_dc_test_2
def test_MP_class(self): import torch device = 'cuda' if torch.cuda.is_available() else 'cpu' path_monthly = os.path.join('test','Data','Monthly') dic_monthly = DP.read_file(path_monthly) n_assets = 1 time_series_group = [] for i in range(n_assets): df = dic_monthly[list(dic_monthly.keys())[i]] ds = DataSeries('ETF', 'monthly', df[0]) time_series_group.append(ds) input_dc = DataCollection('test1', time_series_group) m = ModelESRNN(seasonality = [12], input_size = 4, output_size = 12, device=device) train_dc, test_dc = input_dc.split(numTest = 12) m.train(train_dc) forecast_dc = m.predict(test_dc) # train_dc.to_df().to_csv('insample.csv') test_dc.to_df().to_csv('test.csv') # forecast_dc.to_df().to_csv('forecast.csv') mn = MN.ModelNaive2(2, train_dc) naive2_dc = mn.fit_and_generate_prediction(12, 'MS') naive2_dc.to_df().to_csv('naive.csv') mp = MP.ModelPerformance("test model performance", 2, test_dc, forecast_dc, train_dc, naive2_dc) mase = MP.MASE(test_dc.to_df(), forecast_dc.to_df(), train_dc.to_df(), 2) smape = MP.sMAPE(test_dc.to_df(), forecast_dc.to_df()) mape = MP.MAPE(mp.y_df, mp.y_hat_df) r2 = MP.R2(test_dc.to_df(), forecast_dc.to_df()) rmse = MP.RMSE(test_dc.to_df(), forecast_dc.to_df()) owa = MP.OWA(test_dc.to_df(), forecast_dc.to_df(), train_dc.to_df(), naive2_dc.to_df(), 2) u1 = MP.Theil_U1(test_dc.to_df(), forecast_dc.to_df()) u2 = MP.Theil_U2(test_dc.to_df(), forecast_dc.to_df()) mp.MASE() mp.sMAPE() mp.MAPE() mp.R2() mp.RMSE() mp.OWA() mp.Theil_U1() mp.Theil_U2() self.assertAlmostEqual(mp.metrics['sMAPE'], smape) self.assertAlmostEqual(mp.metrics['MAPE'], mape) self.assertAlmostEqual(mp.metrics['R2'], r2) self.assertAlmostEqual(mp.metrics['RMSE'], rmse) self.assertAlmostEqual(mp.metrics['MASE'], mase) self.assertAlmostEqual(mp.metrics['OWA'], owa) self.assertAlmostEqual(mp.metrics['Theil_U1'], u1) self.assertAlmostEqual(mp.metrics['Theil_U2'], u2)
def calculate_initial_weight(self, input_dc: DataCollection): if not isinstance(input_dc.get_freq(), str): raise Exception("Optimization failed due to inconsistent series frequencies within input_dc.") else: self.input_freq = input_dc.get_freq() if self.initial_weight is None: self.tickers = input_dc.ticker_list() self.initial_weight = self.optimizer(input_dc.to_df().dropna()) else: raise Exception("initial weight was already calculated")
def data_reformat(self, train_data: DataCollection): ''' Store information needed ''' super().check_dc(train_data) # record information of data self.tickers = train_data.ticker_list() self.categories = train_data.category_list() self.last_days = train_data.last_date_list() self.data = train_data.to_list() self.label = str(train_data) # TODO: double check frequency self.frequency = train_data.freq self.freq = self.freq_map[train_data.freq]
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # fake data by ZZ Daily self.a_series = DataSeries( 'ETF', 'daily', pd.DataFrame([10.0, 15.0, 20.0, 30.0], columns=['ABC'], index=pd.to_datetime([ '2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04' ]))) self.b_series = DataSeries( 'Bond', 'daily', pd.DataFrame([1.0, 3.5, 4.5], columns=['KKK'], index=pd.to_datetime([ '2020-01-01', '2020-01-02', '2020-01-03', ]))) self.collect = DataCollection('trial', [self.a_series, self.b_series]) d = {'Initial weights': [0.6, 0.4]} self.weights = pd.DataFrame(data=d).T self.weights = self.weights.rename(columns={0: 'ABC', 1: 'KKK'}) self.p = port.EqualPort("test equal port") self.p.calculate_initial_weight(self.collect) # Monthly path_monthly = os.path.join('test', 'Data', 'Monthly') dic_monthly = DataPreprocessing.read_file(path_monthly) n_assets = 4 time_series_group = [] for i in range(n_assets): df = dic_monthly[list(dic_monthly.keys())[i]] ds = DataSeries(df[1], 'monthly', df[0]) time_series_group.append(ds) input_dc_test = DataCollection(label='Test Collection', time_series_group=time_series_group) self.input_dc = input_dc_test self.input_freq = input_dc_test.get_freq() self.input_df = self.input_dc.to_df() self.n_asset = len(self.input_df.columns) input_weights = [[1 / self.n_asset] * self.n_asset] input_weights_df = pd.DataFrame(input_weights, columns=self.input_df.columns, index=['Initial weights']) self.input_weights_df = input_weights_df
def calculate_initial_weight(self, input_dc: DataCollection, weight_bounds = (0,1), risk_aversion = 1, market_neutral = False, risk_free_rate = 0.0, target_volatility = 0.01, target_return = 0.11, returns_data = True, compounding = False): if not isinstance(input_dc.get_freq(), str): raise Exception("Optimization failed due to inconsistent series frequencies within input_dc.") else: self.input_freq = input_dc.get_freq() if self.initial_weight is None: self.tickers = input_dc.ticker_list() self.initial_weight = self.optimizer(input_dc.to_df().dropna(), self.input_freq, self.solution, weight_bounds,risk_aversion, market_neutral, risk_free_rate, target_volatility, target_return, returns_data, compounding) else: raise Exception("initial weight was already calculated")
def test_Naive2(self): path_monthly = os.path.join('test', 'Data', 'Monthly') dic_monthly = preprocess.read_file(path_monthly) series_list = [] for k, v in dic_monthly.items(): df, cat = v df = preprocess.single_price(df, k) series_list.append(DataSeries(cat, 'monthly', df)) collect = DataCollection('test1', series_list) train_dc, test_dc = collect.split(numTest=12) m = ModelNaive2(12, train_dc, test_dc) y_hat_Naive2_dc = m.fit_and_generate_prediction(12, freq='MS') y_hat_Naive2_dc.to_df().to_csv('test_Naive2_result.csv')
def __init__(self, portfolio: Portfolio, evaluate_dc: DataCollection): if portfolio.get_tickers() != evaluate_dc.ticker_list(): raise ValueError( "Tickers in portfolio and evaluate data do not match") self.portfolio = portfolio # Check this self.label = portfolio.get_solution() if portfolio.get_freq() != evaluate_dc.get_freq(): raise ValueError( "The frequency of the data and portfolio do not match") self.price_df = evaluate_dc.to_df().dropna() self.freq = evaluate_dc.get_freq() self.evaluate_dc = evaluate_dc self.metrics = {}
def to_dc( self, df, pred_label, pred_freq, ): ''' reformat forecast dataframe output from predict() into DataCollection Obj. Args ---------- pred_label: str used to label DataCollection pred_freq: dict{ticker: str} used as freq of each DataSeries ''' ds_lst = [] for k, v in df.groupby(['x', 'unique_id']): category, ticker = k ds_df = v[['ds', 'y_hat']] ds_df = ds_df.rename(columns={ 'ds': 'Date', 'y_hat': ticker }).set_index('Date') ds_lst.append(DataSeries(category, pred_freq[ticker], ds_df)) dc = DataCollection(pred_label, ds_lst) return dc
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.a = pd.DataFrame([10.2, 12, 32.1, 9.32], columns=['fakeSPY'], index=pd.to_datetime([ '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01' ])) self.a_series = DataSeries('ETF', 'monthly', self.a) self.b = pd.DataFrame([2.3, 3.6, 4.5], columns=['fakeTreasury'], index=pd.to_datetime( ['2019-12-12', '2020-02-05', '2020-09-13'])) self.b_series = DataSeries('Bond', 'monthly', self.b) self.c_collection = DataCollection('trial', [self.a_series, self.b_series]) # For test_the_rest_of_entire_dataset(): self.a_entire = pd.DataFrame([10.2, 12, 32.1, 9.32, 11.5, 9.7], columns=['fakeSPY'], index=pd.to_datetime([ '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01', '2020-06-01' ])) self.a_series_entire = DataSeries('ETF', 'monthly', self.a_entire) self.b_entire = pd.DataFrame([2.3, 3.6, 4.5, 5.5], columns=['fakeTreasury'], index=pd.to_datetime([ '2019-12-12', '2020-02-05', '2020-09-13', '2020-10-13' ])) self.b_series_entire = DataSeries('Bond', 'monthly', self.b_entire) self.c_collection_entire = DataCollection( 'trial', [self.a_series_entire, self.b_series_entire]) self.a_exp = pd.DataFrame([11.5, 9.7], columns=['fakeSPY'], index=pd.to_datetime( ['2020-05-01', '2020-06-01'])) self.a_series_exp = DataSeries('ETF', 'monthly', self.a_exp) self.b_exp = pd.DataFrame([5.5], columns=['fakeTreasury'], index=pd.to_datetime(['2020-10-13'])) self.b_series_exp = DataSeries('Bond', 'monthly', self.b_exp) self.c_collection_exp = DataCollection( 'trial', [self.a_series_exp, self.b_series_exp])
def dc_generator(path: str, frequency: str): dic, recover_list, ticker_list = DataPreprocessing.read_file(path) series_list = [] for k, v in dic.items(): df, cat = v df = DataPreprocessing.single_price(df, k) series_list.append(DataSeries(cat, frequency, df)) collect = DataCollection(frequency + ' Collection', series_list) return collect, recover_list, ticker_list
def recover_return(input_df: pd.DataFrame, recover_list, ticker_list): # input_df = input_df + recover - 1 ds_list = [] for column in input_df: idx = ticker_list.index(column) recover_num = recover_list[idx] temp_series = input_df[column] + recover_num - 1 ds_list.append(DataSeries('ETF', 'daily', temp_series.to_frame())) output_dc = DataCollection('Daily Collection', ds_list) return output_dc
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # An example of how to use Telescope model path_monthly = os.path.join('test', 'Data', 'Monthly') dic_monthly = preprocess.read_file(path_monthly) series_list = [] for k, v in dic_monthly.items(): df, cat = v df = preprocess.single_price(df, k) series_list.append(DataSeries(cat, 'monthly', df)) self.collect = DataCollection('test1', series_list)
def test_ESRNN(self): # An example of how to use ESRNN import torch device = 'cuda' if torch.cuda.is_available() else 'cpu' path_daily = os.path.join('test','Data','daily') dic_daily = preprocess.read_file(path_daily) series_list = [] for k, v in dic_daily.items(): df, cat = v df = preprocess.single_price(df, k) series_list.append(DataSeries(cat, 'daily', df)) collect = DataCollection('test1', series_list) m = ModelESRNN(max_epochs = 5, seasonality = [], batch_size = 64, input_size = 12, output_size = 12, device = device) train_dc, test_dc = collect.split(numTest = 12) m.train(train_dc) y_test = m.predict(test_dc) assert(isinstance(y_test, DataCollection)) y_test_df = y_test.to_df() y_test_df.to_csv('predict_result.csv')
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) a = pd.DataFrame([10.2, 12, 32.1, 9.32], columns=['ABC'], index=pd.to_datetime(['2020-01-01','2020-02-01','2020-03-01','2020-04-01'])) a.index.name = 'Date' self.a_series = DataSeries('ETF', 'monthly', a) b = pd.DataFrame([2.3, 3.6, 4.5], columns=['KKK'], index=pd.to_datetime(['2020-01-01','2020-02-01','2020-03-01',])) b.index.name = 'Date' self.b_series = DataSeries('Bond', 'monthly', b) self.collect = DataCollection('trial', [self.a_series, self.b_series])
def __init__(self, label: str, seasonality: int, y_dc: DataCollection, y_hat_dc: DataCollection, y_insample_dc: DataCollection, y_naive2_hat_dc=None): ''' Args ---------- label: str description of the target model y_df: pd.DataFrame actual test data OOS: test_df y_hat_df: pd.DataFrame predicted values OOS: forecast_df y_insample_df: pd.DataFrame actual training data in-sample: train_df y_naive2_hat_df: pd.DataFrame predicted value from native2 forecast approach OOS: naive2_df ''' self.label = label self.seasonality = seasonality self.metrics = {} self.y_df = y_dc.to_df() # testing data self.y_hat_df = y_hat_dc.to_df() # forecasts self.y_insample_df = y_insample_dc.to_df() # training data # naive2 forecast : if we decide not to use OWA, ignore if y_naive2_hat_dc is not None: self.y_naive2_hat_df = y_naive2_hat_dc.to_df() else: self.y_naive2_hat_df = pd.DataFrame([False], index=['Naive2 None']) # check tickers if not check_input_df_tickers([ self.y_df, self.y_hat_df, self.y_insample_df, self.y_naive2_hat_df ]): raise ValueError('Tickers in input dfs do not match!')
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) path_monthly = os.path.join('test', 'Data', 'Monthly') dic_monthly = DataPreprocessing.read_file(path_monthly) n_assets = 4 time_series_group = [] for i in range(n_assets): df = dic_monthly[list(dic_monthly.keys())[i]] ds = DataSeries('ETF', 'monthly', df[0]) time_series_group.append(ds) input_dc_test = DataCollection(label='Test Collection', time_series_group=time_series_group) self.input_dc = input_dc_test self.input_freq = input_dc_test.get_freq() self.input_df = self.input_dc.to_df().dropna() self.a = pd.DataFrame([10, 12, 32, 9, 11, 9], columns=['fakeSPY'], index=pd.to_datetime([ '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01', '2020-06-01' ])) self.a_series = DataSeries('ETF', self.input_freq, self.a) self.b = pd.DataFrame([1, 1.2, 3.2, 0.9], columns=['fakeTreasury'], index=pd.to_datetime([ '2019-12-01', '2020-02-01', '2020-03-01', '2020-04-01' ])) self.b_series = DataSeries('Bond', self.input_freq, self.b) self.c_collection = DataCollection('trial', [self.a_series, self.b_series]) self.c_df = self.c_collection.to_df().interpolate(method='linear', axis=0)
def train(self, train_data: DataCollection): ''' Model train: fit & validation Args ---------- train_data: DataCollection training set, y_insample_dc Returns: ---------- the trained model ''' if not self.fitted: # store the last date of training data for prediction purpose self.train_last_date = train_data.last_date_list() X_train, y_train = self.data_reformat(train_data) # Fit model self.fit(X_train, y_train) else: warnings.warn("Model was already trained") return self.model
class Test_Optimization(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) path_monthly = os.path.join('test', 'Data', 'Monthly') dic_monthly = DataPreprocessing.read_file(path_monthly) n_assets = 4 time_series_group = [] for i in range(n_assets): df = dic_monthly[list(dic_monthly.keys())[i]] ds = DataSeries('ETF', 'monthly', df[0]) time_series_group.append(ds) input_dc_test = DataCollection(label='Test Collection', time_series_group=time_series_group) self.input_dc = input_dc_test self.input_freq = input_dc_test.get_freq() self.input_df = self.input_dc.to_df().dropna() self.a = pd.DataFrame([10, 12, 32, 9, 11, 9], columns=['fakeSPY'], index=pd.to_datetime([ '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01', '2020-06-01' ])) self.a_series = DataSeries('ETF', self.input_freq, self.a) self.b = pd.DataFrame([1, 1.2, 3.2, 0.9], columns=['fakeTreasury'], index=pd.to_datetime([ '2019-12-01', '2020-02-01', '2020-03-01', '2020-04-01' ])) self.b_series = DataSeries('Bond', self.input_freq, self.b) self.c_collection = DataCollection('trial', [self.a_series, self.b_series]) self.c_df = self.c_collection.to_df().interpolate(method='linear', axis=0) def test_calculate_annualized_expected_returns(self): res = Opt.calculate_annualized_expected_returns( self.c_df, self.input_freq) expected = self.c_df.pct_change().mean() * 12 assert_frame_equal(res.to_frame(), expected.to_frame()) def calculate_annualized_return_covariance(self): res = Opt.calculate_annualized_return_covariance( self.c_df, self.input_freq) expected = self.c_df.pct_change().cov() * 12 assert_frame_equal(res.to_frame(), expected.to_frame()) def test_equal_portfolio(self): wts = Opt.equal_portfolio(self.input_df) self.assertEqual(type(wts), pd.DataFrame) self.assertEqual(wts.shape, (1, 4)) for w in wts.values[0]: self.assertTrue(w >= 0 and w <= 1) self.assertEqual(round(wts.sum(axis=1)[0]), 1) self.assertTrue(wts.eq(wts.iloc[:, 0], axis=0).all(1).item()) def test_black_litterman_portfolio(self): pass def test_portfolio_general(self): methods_list = [ 'max_sharpe', 'min_volatility', 'max_quadratic_utility', 'efficient_risk', 'efficient_return' ] for method in methods_list: try: wts = Opt.portfolio_opt(self.input_df, self.input_freq, solution=method) except: continue self.assertEqual(type(wts), pd.DataFrame) self.assertEqual(wts.shape, (1, 4)) for w in wts.values[0]: self.assertTrue(w >= 0 and w <= 1) self.assertEqual(round(wts.sum(axis=1)[0]), 1)
def validation_rolling(input_dc: DataCollection, num_split: int, numTest: int, max_epochs=15, batch_size=1, batch_size_test=128, freq_of_test=-1, learning_rate=1e-3, lr_scheduler_step_size=9, lr_decay=0.9, per_series_lr_multip=1.0, gradient_eps=1e-8, gradient_clipping_threshold=20, rnn_weight_decay=0, noise_std=0.001, level_variability_penalty=80, testing_percentile=50, training_percentile=50, ensemble=False, cell_type='LSTM', state_hsize=40, dilations=[[1, 2], [4, 8]], add_nl_layer=False, seasonality=[4], input_size=4, output_size=8, frequency=None, max_periods=20, random_seed=1): import time scores_list = [] train_val_dic = {} device = 'cuda' if torch.cuda.is_available() else 'cpu' for i in range(num_split): train, validation = input_dc.split(numTest=numTest) train_val_dic[i] = [train, validation] input_dc = train # record score of error total_score = 0 elapse = 0 for i in range(num_split - 1, -1, -1): train_dc = train_val_dic[i][0] validation_dc = train_val_dic[i][1] validation_df = validation_dc.to_df() start_time = time.time() m = ModelESRNN(max_epochs=max_epochs, batch_size=batch_size, batch_size_test=batch_size_test, freq_of_test=freq_of_test, learning_rate=learning_rate, lr_scheduler_step_size=lr_scheduler_step_size, lr_decay=lr_decay, per_series_lr_multip=per_series_lr_multip, gradient_eps=gradient_eps, gradient_clipping_threshold=gradient_clipping_threshold, rnn_weight_decay=rnn_weight_decay, noise_std=noise_std, level_variability_penalty=level_variability_penalty, testing_percentile=testing_percentile, training_percentile=training_percentile, ensemble=ensemble, cell_type=cell_type, state_hsize=state_hsize, dilations=dilations, add_nl_layer=add_nl_layer, seasonality=seasonality, input_size=input_size, output_size=output_size, frequency=frequency, max_periods=max_periods, random_seed=random_seed, device=device) m.train(train_dc) y_predict = m.predict(validation_dc) y_predict_df = y_predict.to_df() score = MP.MAPE(validation_df, y_predict_df) elapse += time.time() - start_time scores_list.append(score) total_score += score score = total_score / num_split return score, scores_list, elapse / num_split, (max_epochs, batch_size, input_size, output_size)
def validation_simple( input_dc: DataCollection, numTest: int, max_epochs=15, batch_size=1, batch_size_test=128, freq_of_test=-1, learning_rate=1e-3, lr_scheduler_step_size=9, lr_decay=0.9, per_series_lr_multip=1.0, gradient_eps=1e-8, gradient_clipping_threshold=20, rnn_weight_decay=0, noise_std=0.001, level_variability_penalty=80, testing_percentile=50, training_percentile=50, ensemble=False, cell_type='LSTM', state_hsize=40, dilations=[[1, 2], [4, 8]], add_nl_layer=False, seasonality=[4], input_size=4, output_size=8, frequency=None, max_periods=20, random_seed=1, ): train_dc, validation_dc = input_dc.split(numTest=numTest) validation_df = validation_dc.to_df() device = 'cuda' if torch.cuda.is_available() else 'cpu' m = ModelESRNN(max_epochs=max_epochs, batch_size=batch_size, batch_size_test=batch_size_test, freq_of_test=freq_of_test, learning_rate=learning_rate, lr_scheduler_step_size=lr_scheduler_step_size, lr_decay=lr_decay, per_series_lr_multip=per_series_lr_multip, gradient_eps=gradient_eps, gradient_clipping_threshold=gradient_clipping_threshold, rnn_weight_decay=rnn_weight_decay, noise_std=noise_std, level_variability_penalty=level_variability_penalty, testing_percentile=testing_percentile, training_percentile=training_percentile, ensemble=ensemble, cell_type=cell_type, state_hsize=state_hsize, dilations=dilations, add_nl_layer=add_nl_layer, seasonality=seasonality, input_size=input_size, output_size=output_size, frequency=frequency, max_periods=max_periods, random_seed=random_seed, device=device) m.train(train_dc) y_predict = m.predict(validation_dc) y_predict_df = y_predict.to_df() score = MP.MAPE(validation_df, y_predict_df) return score, (max_epochs, batch_size, input_size, output_size)
lr_decay = 0.9 noise_std = 0.001 level_variability_penalty = 80 state_hsize = 40 dilation = [[1]] add_nl_layer = False seasonality = [5] # action path = os.path.join('test', 'Data', 'Daily') dic = preprocess.read_file(path) series_list = [] for k, v in dic.items(): df, cat = v df = preprocess.single_price(df, k) series_list.append(DataSeries(cat, 'daily', df)) collect = DataCollection('RollingValidation', series_list) input_dc, _ = collect.split(numTest=2 * numTest) score, _ = validation_simple( input_dc, numTest=numTest, max_epochs=max_epochs, batch_size=batch_size, learning_rate=learning_rate, lr_scheduler_step_size=lr_scheduler_step_size, lr_decay=lr_decay, noise_std=noise_std, level_variability_penalty=level_variability_penalty, state_hsize=state_hsize, dilations=dilation, add_nl_layer=add_nl_layer,
class Test_Data(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.a = pd.DataFrame([10.2, 12, 32.1, 9.32], columns=['fakeSPY'], index=pd.to_datetime([ '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01' ])) self.a_series = DataSeries('ETF', 'monthly', self.a) self.b = pd.DataFrame([2.3, 3.6, 4.5], columns=['fakeTreasury'], index=pd.to_datetime( ['2019-12-12', '2020-02-05', '2020-09-13'])) self.b_series = DataSeries('Bond', 'monthly', self.b) self.c_collection = DataCollection('trial', [self.a_series, self.b_series]) # For test_the_rest_of_entire_dataset(): self.a_entire = pd.DataFrame([10.2, 12, 32.1, 9.32, 11.5, 9.7], columns=['fakeSPY'], index=pd.to_datetime([ '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01', '2020-06-01' ])) self.a_series_entire = DataSeries('ETF', 'monthly', self.a_entire) self.b_entire = pd.DataFrame([2.3, 3.6, 4.5, 5.5], columns=['fakeTreasury'], index=pd.to_datetime([ '2019-12-12', '2020-02-05', '2020-09-13', '2020-10-13' ])) self.b_series_entire = DataSeries('Bond', 'monthly', self.b_entire) self.c_collection_entire = DataCollection( 'trial', [self.a_series_entire, self.b_series_entire]) self.a_exp = pd.DataFrame([11.5, 9.7], columns=['fakeSPY'], index=pd.to_datetime( ['2020-05-01', '2020-06-01'])) self.a_series_exp = DataSeries('ETF', 'monthly', self.a_exp) self.b_exp = pd.DataFrame([5.5], columns=['fakeTreasury'], index=pd.to_datetime(['2020-10-13'])) self.b_series_exp = DataSeries('Bond', 'monthly', self.b_exp) self.c_collection_exp = DataCollection( 'trial', [self.a_series_exp, self.b_series_exp]) def test_DataSeries_basic(self): a = self.a a_series = self.a_series assert (len(a_series) == 4) assert (str(a_series) == 'monthly fakeSPY') assert (a_series.get_ticker() == 'fakeSPY') assert (a_series.get_category() == 'ETF') assert (a_series.get_freq() == 'monthly') assert (a.equals(a_series.get_ts())) # test deep copy a_copy = a_series.copy() assert (a_copy != a_series and a_copy.get_ts().equals(a_series.get_ts())) assert (isinstance(a_series.to_Series(), pd.Series)) def test_DataSeries_add_sub(self): diff = self.a_series_entire - self.a_series assert (self.compareSeries(diff, self.a_series_exp)) a_plus = diff + self.a_series assert (self.compareSeries(a_plus, self.a_series_entire)) def test_DataSeries_to_list(self): lst = self.a_series.to_list() assert (lst == [10.2, 12, 32.1, 9.32]) def test_last_index(self): assert (self.a_series.get_last_date() == pd.to_datetime('2020-04-01')) def test_DataSeries_split_and_trim(self): # test split a_train, a_test = self.a_series.split(pct=0.75) assert (isinstance(a_train, DataSeries)) assert (isinstance(a_test, DataSeries)) assert (len(a_train) == 3) assert (len(a_test) == 1) assert (self.a.iloc[:3].equals(a_train.get_ts())) assert (self.a.iloc[3:].equals(a_test.get_ts())) # test trim trimed = self.a_series.trim('2020-02-01', '2020-03-01') assert (len(trimed) == 2) assert (self.a.loc['2020-02-01':'2020-03-01'].equals(trimed.get_ts())) @staticmethod def compareSeries(a, b): flag = True if not isinstance(a, DataSeries): print("\n The first item is not a DataSeries object") return False if not isinstance(b, DataSeries): print("\n The Second item is not a DataSeries object") return False if a == b: print("\n The two items are the same object") flag = False if len(a) != len(b): print("\n The two items does not have the same length") flag = False if str(a) != str(b): print("\n The two items does not have the same ticker") flag = False if a.get_category() != b.get_category(): print("\n The two items does not have the same category") flag = False if not a.get_ts().equals(b.get_ts()): print("\n The two items does not have the same time series") flag = False if not a.get_freq() == b.get_freq(): print("\n The two items does not have the same frequency") flag = False return flag def test_DataCollection_basic(self): assert (len(self.c_collection) == 2) assert (self.c_collection.get_freq() == 'monthly') for item, compare in zip(self.c_collection, [self.a_series, self.b_series]): assert (self.compareSeries(item, compare)) def test_DataCollection_add_sub(self): res = self.c_collection_entire - self.c_collection expected = self.c_collection_exp for r, e in zip(res, expected): assert (self.compareSeries(r, e)) res_plus = res + self.c_collection for r, e in zip(res_plus, self.c_collection_entire): assert (self.compareSeries(r, e)) def test_DataCollection_get_series(self): item1 = self.c_collection[1] assert (self.compareSeries(item1, self.b_series)) item2 = self.c_collection.get_series('fakeSPY') assert (self.compareSeries(item2, self.a_series)) def test_DataCollection_copy(self): c = self.c_collection.copy() assert (c != self.c_collection) assert (c.label == self.c_collection.label) assert (c.get_freq() == self.c_collection.get_freq()) for one, two in zip(c, self.c_collection): assert (self.compareSeries(one, two)) def test_DataCollection_summary(self): pass def test_DataCollection_split(self): train, test = self.c_collection.split(pct=0.75) assert (str(train) == 'trial') assert (train.freq == 'monthly') assert (str(test) == 'trial') assert (test.freq == 'monthly') compare = [self.a_series.split(0.75), self.b_series.split(0.75)] compare_train, compare_test = zip(*compare) train_col, test_col = list(compare_train), list(compare_test) for i, item in enumerate(train): assert (self.compareSeries(item, train_col[i])) for i, item in enumerate(test): assert (self.compareSeries(item, test_col[i])) def test_DataCollection_list(self): assert (self.c_collection.ticker_list() == ['fakeSPY', 'fakeTreasury']) assert (self.c_collection.category_list() == ['ETF', 'Bond']) assert (self.c_collection.last_date_list() == pd.to_datetime( ['2020-04-01', '2020-09-13']).to_list()) assert (self.c_collection.to_list() == [[10.2, 12, 32.1, 9.32], [2.3, 3.6, 4.5]]) def test_DataCollection_add(self): d = pd.DataFrame([11, 22], columns=['fakeZZZ'], index=pd.to_datetime(['2019-1-12', '2019-02-05'])) d_series = DataSeries('Bond', 'monthly', d) c_plus = self.c_collection.copy() c_plus.add(d_series) compare = [self.a_series, self.b_series, d_series] for i, item in enumerate(c_plus): assert (self.compareSeries(item, compare[i])) def test_DataCollection_df(self): df = self.c_collection.to_df() compare = pd.concat([self.a, self.b], axis=1) assert (df.equals(compare)) def test_price_to_return(self): pass
class Test_Portfolio_Performance(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # fake data by ZZ Daily self.a_series = DataSeries( 'ETF', 'daily', pd.DataFrame([10.0, 15.0, 20.0, 30.0], columns=['ABC'], index=pd.to_datetime([ '2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04' ]))) self.b_series = DataSeries( 'Bond', 'daily', pd.DataFrame([1.0, 3.5, 4.5], columns=['KKK'], index=pd.to_datetime([ '2020-01-01', '2020-01-02', '2020-01-03', ]))) self.collect = DataCollection('trial', [self.a_series, self.b_series]) d = {'Initial weights': [0.6, 0.4]} self.weights = pd.DataFrame(data=d).T self.weights = self.weights.rename(columns={0: 'ABC', 1: 'KKK'}) self.p = port.EqualPort("test equal port") self.p.calculate_initial_weight(self.collect) # Monthly path_monthly = os.path.join('test', 'Data', 'Monthly') dic_monthly = DataPreprocessing.read_file(path_monthly) n_assets = 4 time_series_group = [] for i in range(n_assets): df = dic_monthly[list(dic_monthly.keys())[i]] ds = DataSeries(df[1], 'monthly', df[0]) time_series_group.append(ds) input_dc_test = DataCollection(label='Test Collection', time_series_group=time_series_group) self.input_dc = input_dc_test self.input_freq = input_dc_test.get_freq() self.input_df = self.input_dc.to_df() self.n_asset = len(self.input_df.columns) input_weights = [[1 / self.n_asset] * self.n_asset] input_weights_df = pd.DataFrame(input_weights, columns=self.input_df.columns, index=['Initial weights']) self.input_weights_df = input_weights_df def test_annualized_return(self): ans = PP.annualized_return(self.weights, self.collect.to_df().dropna(), 'daily') output_return_df = self.collect.to_df().dropna().pct_change().dropna() annual_return = output_return_df.mean() * 252 ans_new = annual_return @ self.weights.T self.assertAlmostEqual(ans_new[0], 203.4) self.assertAlmostEqual(ans, ans_new[0]) res2 = PP.annualized_return(self.input_weights_df, self.input_df, self.input_freq) expected2 = np.dot(self.input_weights_df, self.input_df.pct_change().mean() * 12).item() self.assertEqual(res2, expected2) def test_annualized_volatility(self): ans = PP.annualized_volatility(self.weights, self.collect.to_df().dropna(), 'daily') cov = self.collect.to_df().dropna().pct_change().dropna().cov() * 252 ans_new = (self.weights @ cov @ self.weights.T).iloc[0][0]**0.5 self.assertAlmostEqual(ans, ans_new) res2 = PP.annualized_volatility(self.input_weights_df, self.input_df, self.input_freq) expected2 = np.sqrt( np.dot( self.input_weights_df, np.dot(self.input_df.pct_change().cov() * 12, self.input_weights_df.T)).item()) self.assertAlmostEqual(res2, expected2) def test_sharpe_ratio(self): ans = PP.sharpe_ratio(0.6, 0.2, 0.03) ans_new = (0.6 - 0.03) / 0.2 self.assertAlmostEqual(ans, ans_new) def test_PnL(self): ans = PP.PnL(self.weights, self.collect.to_df().dropna()) output_return_df = self.collect.to_df().dropna().pct_change().dropna() ans_1 = output_return_df.iloc[0][0] * self.weights.iloc[0][ 0] + output_return_df.iloc[0][1] * self.weights.iloc[0][1] ans_2 = output_return_df.iloc[1][0] * self.weights.iloc[0][ 0] + output_return_df.iloc[1][1] * self.weights.iloc[0][1] self.assertAlmostEqual(ans.iloc[0][0], ans_1) self.assertAlmostEqual(ans.iloc[1][0], ans_2) def test_max_drawdown(self): price = { 'PnL': [75, 33, 35, 25, 80, 100, 95, 78, 72, 62, 65, 60, 42, 50] } pnl = pd.DataFrame(data=price).pct_change().dropna() ans = PP.max_drawdown(pnl) ans_new = (25 - 75) / 75 self.assertAlmostEqual(ans, ans_new) def test_partial_moment(self): pnl = PP.PnL(self.weights, self.collect.to_df().dropna()) pm = PP.partial_moment(pnl, threshold=0.6) length = pnl.shape[0] threshold = 0.6 diff_df = threshold - pnl drop_minus = diff_df[diff_df >= 0].dropna() pm_new = ((drop_minus**2).sum() / length).item() self.assertAlmostEqual(pm, 0.0408163265) self.assertAlmostEqual(pm, pm_new) def test_PP_class(self): self.assertEqual(self.p.get_freq(), self.collect.get_freq()) pp = PP.PortfolioPerformance(self.p, self.collect) pp.annualized_return() pp.annualized_volatility() pp.annualized_sharpe_ratio() # pp.print_metrics() # pp.get_metrics('annualized_return') pp.PnL() # print(pp.metrics['PnL']) pp.max_drawdown( ) # 0 since test data always increasing, but function tested self.assertEqual(pp.get_metrics("annualized_return"), 228) self.assertEqual(pp.get_metrics("PnL").iloc[0][0], 1.5) pp.print_metrics() pp.get_metrics('PnL') self.assertEqual(pp.metrics['annualized_return'], 228) self.assertAlmostEqual(pp.metrics['annualized_volatility'], 13.3630621) self.assertAlmostEqual(pp.metrics['sharpe_ratio'], 228 / 13.3630621) self.assertAlmostEqual(pp.metrics['PnL'].iloc[0][0], 1.5) self.assertAlmostEqual(pp.metrics['PnL'].iloc[1][0], 0.30952380952380953) self.assertEqual(pp.metrics['max_drawdown'], 0) # sortino pp.sortino_ratio(threshold=0.6) d = {'Initial weights': [0.5, 0.5]} self.weights2 = pd.DataFrame(data=d).T self.weights2 = self.weights2.rename(columns={0: 'ABC', 1: 'KKK'}) pnl = PP.PnL(self.weights2, self.collect.to_df().dropna()) threshold = 0.6 expected = pp.metrics['annualized_return'] lpm_sortino = PP.partial_moment(pnl, threshold, order=2, lower=True)**0.5 ans_sortino = (expected - threshold) / lpm_sortino self.assertAlmostEqual(pp.metrics['sortino_ratio'], ans_sortino) # omega pp.omega_ratio(threshold=0.6) lpm_omega = PP.partial_moment(pnl, threshold, order=1, lower=True) ans_omega = ((expected - threshold) / lpm_omega) + 1 self.assertAlmostEqual(pp.metrics['omega_ratio'], ans_omega)
import pandas as pd import numpy as np from utils.Data import DataCollection, DataSeries from Models.ModelESRNN import ModelESRNN import utils.DataPreprocessing as preprocess import utils.Validation as Vn path = os.path.join('test','Data','Daily') dic = preprocess.read_file(path) series_list = [] for k, v in dic.items(): df, cat = v df = preprocess.single_price(df, k) series_list.append(DataSeries(cat, 'daily', df)) collect = DataCollection('Simple', series_list) dilation_list = [[[1]],[[5]],[[1,5]],[[1,3,5]],[[1,5,10]],[[1,5,20]],[[1,3,5,10]] ,[[1,3,5,20]],[[1,5,10,20]],[[1],[5]], [[1],[3,5]], [[1],[5,10]], [[1],[5,20]], [[1],[3,5,10]], [[1],[3,5,20]], [[1],[5,10,20]], [[1,3],[5]], [[1,5],[10]], [[1,5],[20]], [[1,3],[5,10]],[[1,3,5],[10]],