def __init__(self, **kwargs): term = kwargs.get('term', 1) weight_df = pd.read_csv( kwargs.get('weight_file', './input/coint_vec.csv')) price_df = pd.read_csv( kwargs.get('price_file', './input/index_price_52.csv')) price_df = cf.convert_date_format(price_df).set_index( 'ValueDate').iloc[:, -1] weight_df['ValueDate'] = cf.convert_date_format(weight_df) weight_df.set_index('ValueDate', inplace=True) src_df = np.log( cf.get_fx_rate(start_date=weight_df.index[0], end_date=weight_df.index[-1], ccy_list=weight_df.columns).loc[weight_df.index]) assert src_df.shape[0] == weight_df.shape[0] self._price_dic = self._create_price_index(weight_df, src_df) #import pdb;pdb.set_trace() port_label = self._create_port_label(weight_df, src_df, term).loc[price_df.index] self._notional = pd.DataFrame( (np.abs(weight_df) * src_df).loc[price_df.index].sum(axis=1), columns=['Notional']) self._port_label = pd.DataFrame(port_label.Return / self._notional.Notional, columns=['Return'])
def execute(self): return_df, normalized_df = self.execute_normal_fc_sim() self._logger.info("Calculating Return with Estimated Financial Condition...") return_matrix = [] for alg in self._alg_list: self._logger.info("Processing {0}...".format(alg)) fc_label = self._all_fc_label.query("Algorithm == @alg")[['ValueDate', 'Predict']] fc_label = cf.convert_date_format(fc_label) fc_label.set_index('ValueDate', inplace=True) return_list = [] for value_date in return_df.index: if value_date in fc_label.index: target_ticker = self._fc_ticker_list[int(fc_label.loc[value_date].Predict)] if normalized_df['GSUSFCI Index'].loc[value_date] < self._threshold_dic[target_ticker]['Upper'] \ and normalized_df['GSUSFCI Index'].loc[value_date] > self._threshold_dic[target_ticker]['Lower']: return_list.append(return_df[target_ticker].loc[value_date]) else: return_list.append(0) else: return_list.append(return_df[self._fc_ticker_list[0]].loc[value_date]) return_matrix.append(return_list) self._all_return_df = pd.merge(pd.DataFrame(return_matrix, index=self._alg_list, columns = return_df.index).T, return_df, right_index = True, left_index = True) self._logger.info("Return Calculation Completed.")
def create_date_list(input_file_name, term_month): input_data_df = pd.read_csv(input_file_name) #input_data_df['ValueDate'] = cf.convert_date_format(input_data_df.ValueDate) input_data_df = cf.convert_date_format(input_data_df) input_data_df.set_index('ValueDate', inplace=True) start_date = np.min(list(input_data_df.index)) + relativedelta(months=term_month) return input_data_df.query("ValueDate >= @start_date").index
def __init__(self, *args, **kwargs): self._input_data = cf.convert_date_format( pd.read_csv( os.path.join(os.path.dirname(__file__), '../input', 'all_input_data.csv'))) self._start_date = kwargs.get('StartDate', date(2000, 1, 1)) self._end_date = kwargs.get('EndDate', date(2020, 7, 17)) self._target_ccy = kwargs.get('TargetCcy', ['ZAR', 'MXN']) self._base_ccy = kwargs.get('BaseCcy', 'USD') self._label_tickers = kwargs.get('label_tickers', ['NFCIINDX Index', 'GSUSFCI Index']) self._ticker_threshold = kwargs.get('ticker_threshold', { 'GSUSFCI Index': -0.05, 'NFCIINDX Index': 0 }) self._threshold_dic = {} for i in range(len(self._label_tickers)): if self._label_tickers[i] == 'GSUSFCI Index': self._threshold_dic[self._label_tickers[i]] = { 'Upper': 0.6, 'Lower': -0.6 } else: self._threshold_dic[self._label_tickers[i]] = { 'Upper': 3, 'Lower': -3 } self._price_ticker = [ self._base_ccy + self._target_ccy[0] + ' Index', self._base_ccy + self._target_ccy[1] + ' Index' ] self._price_df = self.create_factor(self._price_ticker) self._date_list = cf.create_weekly_datelist(self._start_date, self._end_date) self._surprise_ticker = 'CESI' + self._target_ccy[0] + ' Index' self._datachange_ticker = 'CECIC' + self._target_ccy[0] + ' Index' self._ctot_ticker = 'CTOT' + self._target_ccy[0] + ' Index' self._value_ticker = [ 'BISB' + ccy[:2] + 'N Index' for ccy in self._target_ccy ] self._fc_tickers = ['NFCIINDX Index', 'GSUSFCI Index'] self._carry_ticker_dic = { 'USD': 'USGG2YR Index', 'ZAR': 'GSAB2YR Index', 'MXN': 'GMXN02YR Index', 'TRY': 'GTRU2YR Index' }
def create_label(weight_file_name, input_file_name, is_regression=True): port_label_mgr = PortLabelManager(weight_file=weight_file_name) input_df = pd.read_csv(input_file_name) input_df = cf.convert_date_format(input_df) start_date = input_df.ValueDate.iloc[0] end_date = input_df.ValueDate.iloc[-1] #import pdb;pdb.set_trace() assert port_label_mgr.port_label.index[0] <= start_date #assert port_label_mgr.port_label.index[-1] >= end_date if is_regression: return pd.DataFrame(port_label_mgr.port_label.query("index >= @start_date & index <= @end_date")) else: return pd.DataFrame(port_label_mgr.port_label.query("index >= @start_date & index <= @end_date") .Return.apply(lambda x: 1 if x>0 else 0))
def __init__(self, **kwargs): self._logger = logging.getLogger("jpbank.quants") self._logger.info("{0} initializing...".format(self.__class__.__name__)) self._input_data = cf.convert_date_format(pd.read_csv(os.path.join(os.path.dirname(__file__), '../input', 'all_input_data.csv'))) self._fc_threshold = kwargs.get('fc_threshold', 0) self._includes_swap = kwargs.get('includes_swap', True) self._rolls = kwargs.get('rolls', False) self._start_date = kwargs.get('start_date', date(2003, 3, 28)) self._end_date = kwargs.get('end_date', date.today()) self._has_indication_diff = kwargs.get('has_indication_diff', True)#Chicago:True/GS:False self._roll_term = kwargs.get('roll_term', 52) self._date_list = cf.create_weekly_datelist(self._start_date, self._end_date) self._price_tickers = kwargs.get('price_tickers', ['USDZAR Index', 'USDMXN Index']) self._em_rate_tickers = kwargs.get('em_rate_tickers', ['GSAB2YR Index', 'GMXN02YR Index']) self._em_fwd_tickers = kwargs.get('em_fwd_tickers', ['USDZAR1W BGN Curncy', 'USDMXN1W BGN Curncy']) assert len(self._em_fwd_tickers) == len(self._em_rate_tickers) == len(self._price_tickers) self._em_price_rate_dic, self._em_rate_price_dic = self._create_ticker_dic(self._price_tickers, self._em_rate_tickers) self._em_price_fwd_dic, self._em_fwd_price_dic = self._create_ticker_dic(self._price_tickers, self._em_fwd_tickers) self._exp_return_file = kwargs.get('exp_return_file', None) self._base_rate_ticker = kwargs.get('base_rate_ticker', 'USGG2YR Index') self._fc_tickers = kwargs.get('fc_tickers', ['NFCIINDX Index', 'GSUSFCI Index']) self._price_df = self._get_price(self._price_tickers + self._em_rate_tickers + self._em_fwd_tickers + [self._base_rate_ticker] + self._fc_tickers) #calculate rate diff for em_ticker in self._em_rate_tickers: self._price_df[em_ticker] = self._price_df[em_ticker] - self._price_df[self._base_rate_ticker] self._logger.info("{0} initialized.".format(self.__class__.__name__))
def __init__(self, **kwargs): super().__init__(**kwargs) self._logger.info("{0} initializing...".format( self.__class__.__name__)) self._start_date = kwargs.get('start_date', None) self._end_date = kwargs.get('end_date', None) interval = kwargs.get('interval', 1) frequency = kwargs.get('frequency', 'weekly') if frequency == 'daily': date_list = cf.create_daily_datelist(self._start_date, self._end_date) elif frequency == 'weekly': date_list = cf.create_weekly_datelist(self._start_date, self._end_date) else: date_list = cf.create_monthly_datelist(self._start_date, self._end_date) coint_vec_file = kwargs.get('coint_Vec_file', './input/coint_vec.csv') self._weight_df = cf.convert_date_format(pd.read_csv(coint_vec_file))\ .set_index('ValueDate').loc[date_list] self._fx_rate_df = np.log( cf.get_fx_rate(self._start_date, self._end_date, self._weight_df.columns.tolist())).loc[date_list] #import pdb;pdb.set_trace() self._coint_index_df = pd.DataFrame( (self._fx_rate_df * self._weight_df[self._fx_rate_df.columns]).sum(axis=1), columns=['Price']).loc[date_list] #self._index_return_df = pd.DataFrame(self._coint_index_df[interval:] - self._coint_index_df[:-interval], # index = self._fx_rate_df.index[:-interval], # columns = ['Return'])#.loc[date_list] self._logger.info("{0} initialized.".format(self.__class__.__name__))
import numpy as np import pandas as pd import statsmodels.api as sm from datetime import datetime,date from dateutil.relativedelta import relativedelta #from analysis.coint_vec_analyst import CointVecAnalyst import util.common_func as cf if __name__ == '__main__': logging.config.fileConfig('./logger_config.ini') logger = logging.getLogger("jpbank.quants") import_file_name = './input/coint_vec_2y.csv' coint_vec_df = pd.read_csv(import_file_name) coint_vec_df['ValueDate'] = cf.convert_date_format(coint_vec_df) coint_vec_df.set_index('ValueDate', inplace=True) ccy_list = coint_vec_df.columns.tolist() term_week = 104 fx_rate_df = np.log(cf.get_fx_rate(start_date = coint_vec_df.index[0] - relativedelta(weeks=term_week), end_date = coint_vec_df.index[-1], ccy_list = ccy_list)) output_df = pd.DataFrame() price_list = [] for i in range(coint_vec_df.shape[0]-1): value_date = coint_vec_df.index[i] #value_date = date(2019,2,22) logger.info("Processing in {0}".format(value_date)) start_date = value_date - relativedelta(weeks=term_week) next_date = coint_vec_df.index[i+1]
factor_df = pd.DataFrame() ccy_list = ['USD','EUR','AUD','NZD', 'GBP', 'CHF', 'CAD'] for ccy in ccy_list: print("Processing", ccy, "...") factor_maker = PortFactorMaker(TargetCcy=['JPY', ccy], is_weekly=is_weely) df = factor_maker.create_feature_vector()#.drop(['Return'], axis=1) df.columns = np.array(df.columns).astype(object) + '_' + ccy if factor_df.shape[0] == 0: factor_df = df else: factor_df = pd.merge(factor_df, df, right_index=True, left_index=True) #P-Value pvalue_df = pd.read_csv('./input/min_pvalue.csv') pvalue_df = cf.convert_date_format(pvalue_df) pvalue_df.set_index('ValueDate', inplace=True) factor_df = pd.merge(factor_df, pvalue_df, right_index=True, left_index=True) #Financial Condition fin_cond_df = get_fin_cond(date(2001,1,1), date.today(), 'NFCIINDX Index') factor_df = pd.merge(factor_df, fin_cond_df, right_index=True, left_index=True) #Sentiment fin_cond_df = get_sentiment_index(date(2001,1,1), date.today(), 'DBQSGSI Index') factor_df = pd.merge(factor_df, fin_cond_df, right_index=True, left_index=True) #return_df = pd.read_csv('./input/coint_vec.csv') #import util.common_func as cf
def simulate(self): self._logger.info("Simulation Starting...") rate_return_df = self._calc_return( self._price_df[self._em_rate_tickers].loc[self._date_list]) fc_diff_df = self._price_df[self._fc_tickers].loc[ self._date_list].diff().dropna(axis=0) src_return_df = pd.merge(rate_return_df, fc_diff_df, right_index=True, left_index=True) normalized_df = pd.DataFrame( [[ self._normalize(src_return_df[ticker], value_date) for value_date in self._date_list[1:] ] for ticker in self._em_rate_tickers + self._fc_tickers], index=self._em_rate_tickers + self._fc_tickers, columns=self._date_list[1:]).T.dropna(axis=0) if self._exp_return_file is None: self._logger.info("Selecting EM Currency Tickers usgin Rate") em_prior_tickers = pd.DataFrame( [(self._em_rate_price_dic[normalized_df[ self._em_rate_tickers].iloc[i].idxmax()], self._em_rate_price_dic[normalized_df[ self._em_rate_tickers].iloc[i].idxmin()]) for i in range(normalized_df.shape[0])], index=normalized_df.index, columns=['best', 'worst']) else: self._logger.info( "Selecting EM Currency Tickers usgin Expected Return") exp_return_df = pd.read_csv(self._exp_return_file) #import pdb;pdb.set_trace() exp_return_df = cf.convert_date_format( exp_return_df, target_col='ValueDate').set_index('ValueDate') em_prior_tickers = pd.DataFrame( [(exp_return_df[self._price_tickers].iloc[i].idxmax(), exp_return_df[self._price_tickers].iloc[i].idxmin()) for i in range(exp_return_df.shape[0])], index=exp_return_df.index, columns=['best', 'worst']) if self._has_indication_diff: #one week delay, like Chicago FC fc_prior_tickers = pd.DataFrame([False] + normalized_df[self._fc_tickers[0]].iloc[:-1]\ .apply(lambda x: True if x < self._fc_threshold else False).tolist(), index = normalized_df.index, columns = ['fc_priority']) else: fc_prior_tickers = pd.DataFrame(normalized_df[self._fc_tickers[0]]\ .apply(lambda x: True if x < self._fc_threshold else False).tolist(), index = normalized_df.index, columns = ['fc_priority']) sign_df = pd.merge(em_prior_tickers, fc_prior_tickers, right_index=True, left_index=True) self._logger.info("Building Position...") #Risk On: Long EM Ccy of Worst Score ->Position: -1(USD Short, EM Long) #of Worst #Risk OFF: Short EM Ccy of Best Score ->Position: 1(USD Long, EM Short) #of Best position_df = pd.DataFrame( [(sign_df.iloc[i]['worst'], -1.0) if sign_df.iloc[i]['fc_priority'] else (sign_df.iloc[i]['best'], 1.0) for i in range(sign_df.shape[0])], index=sign_df.index, columns=['ccy', 'ls']) position_df.index.name = 'ValueDate' if self._includes_swap: price_return_df = self._calc_return_inc_swap( self._price_df[self._price_tickers + [ self._em_price_fwd_dic[k] for k in self._em_price_fwd_dic.keys() ]].loc[self._date_list], self._price_tickers, self._em_price_fwd_dic).loc[position_df.index] else: price_return_df = self._calc_return( self._price_df[self._price_tickers].loc[self._date_list], with_log=True).loc[position_df.index] self._logger.info("Calculating Perofrmance...") return_series_df = pd.DataFrame([ price_return_df[position_df.iloc[i][0]].iloc[i + 1] * position_df.iloc[i][1] for i in range(position_df.shape[0] - 1) ], index=position_df.index[:-1], columns=['return']) return_series_df.index.name = 'ValueDate' return_series_df['cum_return'] = return_series_df['return'].cumsum() #import pdb;pdb.set_trace() #output result output_suffix = datetime.now().strftime('%Y%m%d%H%M%S') self.output_detaild_result(position_df, return_series_df, output_suffix) pd.merge(return_series_df, sign_df, right_index=True, left_index=True)\ .to_csv(os.path.join('output', 'em_reutrn_series_{0}.csv'.format(output_suffix))) perform_measurer = PerformanceMeasurer() #perform_measurer.create_result_summary(return_series_df['return']).to_csv('em_performance.csv') perform_measurer.create_result_summary(return_series_df)[['return']]\ .to_csv(os.path.join('output','em_performance_{0}.csv'.format(output_suffix))) self._logger.info("Simulation Complated.")
def simulate(self): self._logger.info("Simulation Starting...") rate_return_df = self._calc_return(self._price_df[self._em_rate_tickers].reindex(self._date_list).fillna(method='ffill').fillna(method='bfill')) fc_diff_df = self._price_df[self._fc_tickers].loc[self._date_list].diff().dropna(axis=0) src_return_df = pd.merge(rate_return_df, fc_diff_df, right_index=True, left_index=True) normalized_df = pd.DataFrame([[self._normalize(src_return_df[ticker], value_date) for value_date in self._date_list[1:]] for ticker in self._em_rate_tickers + self._fc_tickers], index = self._em_rate_tickers + self._fc_tickers, columns = self._date_list[1:]).T.dropna(axis=0) if self._exp_return_file is None: self._logger.info("Selecting EM Currency Tickers using Rate") em_prior_tickers = pd.DataFrame([(self._em_rate_price_dic[normalized_df[self._em_rate_tickers].iloc[i].idxmax()], self._em_rate_price_dic[normalized_df[self._em_rate_tickers].iloc[i].idxmin()]) for i in range(normalized_df.shape[0])], index = normalized_df.index, columns = ['best', 'worst']) else: self._logger.info("Selecting EM Currency Tickers using Expected Return") exp_return_df = pd.read_csv(self._exp_return_file) exp_return_df = cf.convert_date_format(exp_return_df, target_col='ValueDate').set_index('ValueDate') em_prior_tickers = pd.DataFrame([(exp_return_df[self._price_tickers].iloc[i].idxmax(), exp_return_df[self._price_tickers].iloc[i].idxmin()) for i in range(exp_return_df.shape[0])], index = exp_return_df.index, columns = ['best', 'worst']) if self._has_indication_diff: sign_df = self._get_indicated_sign(normalized_df[self._fc_tickers[0]], em_prior_tickers) else: sign_df = self._get_indicated_sign(fc_diff_df[self._fc_tickers[0]], em_prior_tickers) self._logger.info("Building Position...") #Risk On: Long EM Ccy of Worst Score ->Position: -1(USD Short, EM Long) #of Worst #Risk OFF: Short EM Ccy of Best Score ->Position: 1(USD Long, EM Short) #of Best position_df = pd.DataFrame([(sign_df.iloc[i]['worst'], 0) if np.isnan(sign_df.iloc[i]['fc_priority']) else (sign_df.iloc[i]['worst'], -1.0) if sign_df.iloc[i]['fc_priority'] else (sign_df.iloc[i]['best'], 1.0) for i in range(sign_df.shape[0])], index = sign_df.index, columns=['ccy', 'ls']) position_df.index.name = 'ValueDate' if self._includes_swap: price_return_df = self._calc_return_inc_swap(self._price_df[self._price_tickers + [self._em_price_fwd_dic[k] for k in self._em_price_fwd_dic.keys()]].loc[self._date_list], self._price_tickers, self._em_price_fwd_dic).loc[position_df.index] else: price_return_df = self._calc_return(self._price_df[self._price_tickers].loc[self._date_list], with_log=True).loc[position_df.index] self._logger.info("Calculating Perofrmance...") return_series_df = pd.DataFrame([price_return_df[position_df.iloc[i][0]].iloc[i + 1] * position_df.iloc[i][1] for i in range(position_df.shape[0] - 1)], index = position_df.index[:-1], columns=['return']) return_series_df.index.name = 'ValueDate' return_series_df['cum_return'] = return_series_df['return'].cumsum() self._return_series_df = return_series_df self._sign_df = sign_df self._position_df = position_df self._fc_normalized_df = normalized_df[[self._fc_tickers[0]]] self._price_return_df = price_return_df self._logger.info("Simulation Complated.")
columns=['RiskOn', 'RiskOff']).to_csv(os.path.join('output', '{0}_detailed_result_{1}.csv'.format(output_prefix, output_suffix))) if __name__ == '__main__': import logging.config logging.config.fileConfig('./logger_config.ini') logger = logging.getLogger("jpbank.quants") roll_term = 104 #start_date = date(2006, 4, 7) - relativedelta(weeks=roll_term + 2) start_date = date(2005, 4, 1) - relativedelta(weeks=roll_term + 2) end_date = date(2020, 3, 27) price_tickers = ['USDZAR Index', 'USDMXN Index'] rate_tickers = ['GSAB2YR Index', 'GMXN02YR Index'] fwd_tickers = ['USDZAR1W BGN Curncy', 'USDMXN1W BGN Curncy'] fc_label = pd.read_csv('./input/fc_label_test.csv').query("Algorithm == 'ML_DNN_TF'")[['ValueDate', 'Predict']] fc_label = cf.convert_date_format(fc_label) fc_label.set_index('ValueDate', inplace=True) em_ccy_sim = EMCcySim(start_date=start_date, end_date=end_date, rolls=True, roll_term=roll_term, price_tickers=price_tickers, em_rate_tickers=rate_tickers, em_fwd_tickers=fwd_tickers, #exp_return_file=exp_return_file, #use_estimated_sign=True, #adjust_return=True, #fc_label=fc_label, fc_tickers=['GSUSFCI Index'], #fc_tickers=['MXEF Index'], fc_threshold = -0.05,