示例#1
0
    def __init__(self, **kwargs):
        term = kwargs.get('term', 1)
        weight_df = pd.read_csv(
            kwargs.get('weight_file', './input/coint_vec.csv'))
        price_df = pd.read_csv(
            kwargs.get('price_file', './input/index_price_52.csv'))
        price_df = cf.convert_date_format(price_df).set_index(
            'ValueDate').iloc[:, -1]

        weight_df['ValueDate'] = cf.convert_date_format(weight_df)
        weight_df.set_index('ValueDate', inplace=True)

        src_df = np.log(
            cf.get_fx_rate(start_date=weight_df.index[0],
                           end_date=weight_df.index[-1],
                           ccy_list=weight_df.columns).loc[weight_df.index])

        assert src_df.shape[0] == weight_df.shape[0]

        self._price_dic = self._create_price_index(weight_df, src_df)
        #import pdb;pdb.set_trace()
        port_label = self._create_port_label(weight_df, src_df,
                                             term).loc[price_df.index]
        self._notional = pd.DataFrame(
            (np.abs(weight_df) * src_df).loc[price_df.index].sum(axis=1),
            columns=['Notional'])
        self._port_label = pd.DataFrame(port_label.Return /
                                        self._notional.Notional,
                                        columns=['Return'])
示例#2
0
    def execute(self):
        return_df, normalized_df = self.execute_normal_fc_sim()
        
        self._logger.info("Calculating Return with Estimated Financial Condition...")
        return_matrix = []
        for alg in self._alg_list:
            self._logger.info("Processing {0}...".format(alg))
            fc_label = self._all_fc_label.query("Algorithm == @alg")[['ValueDate', 'Predict']]
            fc_label = cf.convert_date_format(fc_label)
            fc_label.set_index('ValueDate', inplace=True)

            return_list = []
            for value_date in return_df.index:
                if value_date in fc_label.index:
                
                    target_ticker = self._fc_ticker_list[int(fc_label.loc[value_date].Predict)]
                    if normalized_df['GSUSFCI Index'].loc[value_date] < self._threshold_dic[target_ticker]['Upper'] \
                    and normalized_df['GSUSFCI Index'].loc[value_date] > self._threshold_dic[target_ticker]['Lower']:
                        return_list.append(return_df[target_ticker].loc[value_date])
                    else:
                        return_list.append(0)
                else:
                    return_list.append(return_df[self._fc_ticker_list[0]].loc[value_date])

            return_matrix.append(return_list)
        
        self._all_return_df = pd.merge(pd.DataFrame(return_matrix, index=self._alg_list, columns = return_df.index).T,
                                       return_df, right_index = True, left_index = True)
        self._logger.info("Return Calculation Completed.")
示例#3
0
def create_date_list(input_file_name, term_month):

    input_data_df = pd.read_csv(input_file_name)
    #input_data_df['ValueDate'] = cf.convert_date_format(input_data_df.ValueDate)
    input_data_df = cf.convert_date_format(input_data_df)
    input_data_df.set_index('ValueDate', inplace=True)
    start_date = np.min(list(input_data_df.index)) + relativedelta(months=term_month)
    return input_data_df.query("ValueDate >= @start_date").index
示例#4
0
    def __init__(self, *args, **kwargs):
        self._input_data = cf.convert_date_format(
            pd.read_csv(
                os.path.join(os.path.dirname(__file__), '../input',
                             'all_input_data.csv')))

        self._start_date = kwargs.get('StartDate', date(2000, 1, 1))
        self._end_date = kwargs.get('EndDate', date(2020, 7, 17))
        self._target_ccy = kwargs.get('TargetCcy', ['ZAR', 'MXN'])
        self._base_ccy = kwargs.get('BaseCcy', 'USD')
        self._label_tickers = kwargs.get('label_tickers',
                                         ['NFCIINDX Index', 'GSUSFCI Index'])
        self._ticker_threshold = kwargs.get('ticker_threshold', {
            'GSUSFCI Index': -0.05,
            'NFCIINDX Index': 0
        })
        self._threshold_dic = {}
        for i in range(len(self._label_tickers)):
            if self._label_tickers[i] == 'GSUSFCI Index':
                self._threshold_dic[self._label_tickers[i]] = {
                    'Upper': 0.6,
                    'Lower': -0.6
                }
            else:
                self._threshold_dic[self._label_tickers[i]] = {
                    'Upper': 3,
                    'Lower': -3
                }

        self._price_ticker = [
            self._base_ccy + self._target_ccy[0] + ' Index',
            self._base_ccy + self._target_ccy[1] + ' Index'
        ]
        self._price_df = self.create_factor(self._price_ticker)

        self._date_list = cf.create_weekly_datelist(self._start_date,
                                                    self._end_date)

        self._surprise_ticker = 'CESI' + self._target_ccy[0] + ' Index'
        self._datachange_ticker = 'CECIC' + self._target_ccy[0] + ' Index'
        self._ctot_ticker = 'CTOT' + self._target_ccy[0] + ' Index'
        self._value_ticker = [
            'BISB' + ccy[:2] + 'N Index' for ccy in self._target_ccy
        ]
        self._fc_tickers = ['NFCIINDX Index', 'GSUSFCI Index']

        self._carry_ticker_dic = {
            'USD': 'USGG2YR Index',
            'ZAR': 'GSAB2YR Index',
            'MXN': 'GMXN02YR Index',
            'TRY': 'GTRU2YR Index'
        }
示例#5
0
def create_label(weight_file_name, input_file_name, is_regression=True):
    port_label_mgr = PortLabelManager(weight_file=weight_file_name)
    input_df = pd.read_csv(input_file_name)
    input_df = cf.convert_date_format(input_df)
    start_date = input_df.ValueDate.iloc[0]
    end_date = input_df.ValueDate.iloc[-1]
    #import pdb;pdb.set_trace()
    assert port_label_mgr.port_label.index[0] <= start_date
    #assert port_label_mgr.port_label.index[-1] >= end_date
    
    if is_regression:
        return pd.DataFrame(port_label_mgr.port_label.query("index >= @start_date & index <= @end_date"))
    else:
        return pd.DataFrame(port_label_mgr.port_label.query("index >= @start_date & index <= @end_date")
                                          .Return.apply(lambda x: 1 if x>0 else 0))
示例#6
0
    def __init__(self, **kwargs):
        self._logger = logging.getLogger("jpbank.quants")
        self._logger.info("{0} initializing...".format(self.__class__.__name__))

        self._input_data = cf.convert_date_format(pd.read_csv(os.path.join(os.path.dirname(__file__), '../input', 'all_input_data.csv')))
        self._fc_threshold = kwargs.get('fc_threshold', 0)
        self._includes_swap = kwargs.get('includes_swap', True)
        self._rolls = kwargs.get('rolls', False)
        self._start_date = kwargs.get('start_date', date(2003, 3, 28))
        self._end_date = kwargs.get('end_date', date.today())
        self._has_indication_diff = kwargs.get('has_indication_diff', True)#Chicago:True/GS:False
        self._roll_term = kwargs.get('roll_term', 52)
        self._date_list = cf.create_weekly_datelist(self._start_date, self._end_date)
        self._price_tickers = kwargs.get('price_tickers',
                                         ['USDZAR Index', 'USDMXN Index'])
        self._em_rate_tickers = kwargs.get('em_rate_tickers',
                                           ['GSAB2YR Index', 'GMXN02YR Index'])
        self._em_fwd_tickers = kwargs.get('em_fwd_tickers', 
                                          ['USDZAR1W BGN Curncy', 'USDMXN1W BGN Curncy'])

        assert len(self._em_fwd_tickers) == len(self._em_rate_tickers) == len(self._price_tickers)

        self._em_price_rate_dic, self._em_rate_price_dic = self._create_ticker_dic(self._price_tickers, self._em_rate_tickers)
        self._em_price_fwd_dic, self._em_fwd_price_dic = self._create_ticker_dic(self._price_tickers, self._em_fwd_tickers)
        self._exp_return_file = kwargs.get('exp_return_file', None)
        self._base_rate_ticker = kwargs.get('base_rate_ticker', 'USGG2YR Index')
        self._fc_tickers = kwargs.get('fc_tickers', ['NFCIINDX Index', 'GSUSFCI Index'])
        
        self._price_df = self._get_price(self._price_tickers + self._em_rate_tickers + self._em_fwd_tickers + [self._base_rate_ticker] + self._fc_tickers)

        #calculate rate diff
        for em_ticker in self._em_rate_tickers:
            self._price_df[em_ticker] = self._price_df[em_ticker] - self._price_df[self._base_rate_ticker]


        self._logger.info("{0} initialized.".format(self.__class__.__name__))
示例#7
0
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self._logger.info("{0} initializing...".format(
            self.__class__.__name__))
        self._start_date = kwargs.get('start_date', None)
        self._end_date = kwargs.get('end_date', None)
        interval = kwargs.get('interval', 1)
        frequency = kwargs.get('frequency', 'weekly')
        if frequency == 'daily':
            date_list = cf.create_daily_datelist(self._start_date,
                                                 self._end_date)
        elif frequency == 'weekly':
            date_list = cf.create_weekly_datelist(self._start_date,
                                                  self._end_date)
        else:
            date_list = cf.create_monthly_datelist(self._start_date,
                                                   self._end_date)

        coint_vec_file = kwargs.get('coint_Vec_file', './input/coint_vec.csv')
        self._weight_df = cf.convert_date_format(pd.read_csv(coint_vec_file))\
                            .set_index('ValueDate').loc[date_list]

        self._fx_rate_df = np.log(
            cf.get_fx_rate(self._start_date, self._end_date,
                           self._weight_df.columns.tolist())).loc[date_list]
        #import pdb;pdb.set_trace()
        self._coint_index_df = pd.DataFrame(
            (self._fx_rate_df *
             self._weight_df[self._fx_rate_df.columns]).sum(axis=1),
            columns=['Price']).loc[date_list]

        #self._index_return_df = pd.DataFrame(self._coint_index_df[interval:] - self._coint_index_df[:-interval],
        #                                    index = self._fx_rate_df.index[:-interval],
        #                                    columns = ['Return'])#.loc[date_list]

        self._logger.info("{0} initialized.".format(self.__class__.__name__))
示例#8
0
import numpy as np
import pandas  as pd
import statsmodels.api as sm
from datetime import datetime,date
from dateutil.relativedelta import relativedelta
#from analysis.coint_vec_analyst import CointVecAnalyst
import util.common_func as cf

if __name__ == '__main__':
    logging.config.fileConfig('./logger_config.ini')
    logger = logging.getLogger("jpbank.quants")

    import_file_name = './input/coint_vec_2y.csv'
    coint_vec_df = pd.read_csv(import_file_name)
    coint_vec_df['ValueDate'] = cf.convert_date_format(coint_vec_df)
    coint_vec_df.set_index('ValueDate', inplace=True)
    ccy_list = coint_vec_df.columns.tolist()
    term_week = 104

    fx_rate_df = np.log(cf.get_fx_rate(start_date = coint_vec_df.index[0] - relativedelta(weeks=term_week),
                                       end_date = coint_vec_df.index[-1],
                                       ccy_list = ccy_list))
    output_df = pd.DataFrame()
    price_list = []
    for i in range(coint_vec_df.shape[0]-1):
        value_date = coint_vec_df.index[i]
        #value_date = date(2019,2,22)
        logger.info("Processing in {0}".format(value_date))
        start_date = value_date - relativedelta(weeks=term_week)
        next_date = coint_vec_df.index[i+1]
示例#9
0
    factor_df = pd.DataFrame()
    ccy_list = ['USD','EUR','AUD','NZD', 'GBP', 'CHF', 'CAD']
    for ccy in ccy_list:
        print("Processing", ccy, "...")
        factor_maker = PortFactorMaker(TargetCcy=['JPY', ccy], is_weekly=is_weely)
        df = factor_maker.create_feature_vector()#.drop(['Return'], axis=1)
        df.columns = np.array(df.columns).astype(object) + '_' + ccy
        if factor_df.shape[0] == 0:
            factor_df = df
        else:
            factor_df = pd.merge(factor_df, df,
                                 right_index=True, left_index=True)

    #P-Value
    pvalue_df = pd.read_csv('./input/min_pvalue.csv')
    pvalue_df = cf.convert_date_format(pvalue_df)
    pvalue_df.set_index('ValueDate', inplace=True)
    factor_df = pd.merge(factor_df, pvalue_df,
                         right_index=True, left_index=True)

    #Financial Condition
    fin_cond_df = get_fin_cond(date(2001,1,1), date.today(), 'NFCIINDX Index')
    factor_df = pd.merge(factor_df, fin_cond_df,
                         right_index=True, left_index=True)
    
    #Sentiment
    fin_cond_df = get_sentiment_index(date(2001,1,1), date.today(), 'DBQSGSI Index')
    factor_df = pd.merge(factor_df, fin_cond_df,
                         right_index=True, left_index=True)
    #return_df = pd.read_csv('./input/coint_vec.csv')
    #import util.common_func as cf
示例#10
0
    def simulate(self):
        self._logger.info("Simulation Starting...")
        rate_return_df = self._calc_return(
            self._price_df[self._em_rate_tickers].loc[self._date_list])
        fc_diff_df = self._price_df[self._fc_tickers].loc[
            self._date_list].diff().dropna(axis=0)
        src_return_df = pd.merge(rate_return_df,
                                 fc_diff_df,
                                 right_index=True,
                                 left_index=True)
        normalized_df = pd.DataFrame(
            [[
                self._normalize(src_return_df[ticker], value_date)
                for value_date in self._date_list[1:]
            ] for ticker in self._em_rate_tickers + self._fc_tickers],
            index=self._em_rate_tickers + self._fc_tickers,
            columns=self._date_list[1:]).T.dropna(axis=0)
        if self._exp_return_file is None:
            self._logger.info("Selecting EM Currency Tickers usgin Rate")
            em_prior_tickers = pd.DataFrame(
                [(self._em_rate_price_dic[normalized_df[
                    self._em_rate_tickers].iloc[i].idxmax()],
                  self._em_rate_price_dic[normalized_df[
                      self._em_rate_tickers].iloc[i].idxmin()])
                 for i in range(normalized_df.shape[0])],
                index=normalized_df.index,
                columns=['best', 'worst'])
        else:
            self._logger.info(
                "Selecting EM Currency Tickers usgin Expected Return")
            exp_return_df = pd.read_csv(self._exp_return_file)
            #import pdb;pdb.set_trace()
            exp_return_df = cf.convert_date_format(
                exp_return_df, target_col='ValueDate').set_index('ValueDate')
            em_prior_tickers = pd.DataFrame(
                [(exp_return_df[self._price_tickers].iloc[i].idxmax(),
                  exp_return_df[self._price_tickers].iloc[i].idxmin())
                 for i in range(exp_return_df.shape[0])],
                index=exp_return_df.index,
                columns=['best', 'worst'])

        if self._has_indication_diff:  #one week delay, like Chicago FC
            fc_prior_tickers = pd.DataFrame([False] + normalized_df[self._fc_tickers[0]].iloc[:-1]\
                                                    .apply(lambda x: True if x < self._fc_threshold else False).tolist(),
                                            index = normalized_df.index,
                                            columns = ['fc_priority'])
        else:
            fc_prior_tickers = pd.DataFrame(normalized_df[self._fc_tickers[0]]\
                                           .apply(lambda x: True if x < self._fc_threshold else False).tolist(),
                                            index = normalized_df.index,
                                            columns = ['fc_priority'])

        sign_df = pd.merge(em_prior_tickers,
                           fc_prior_tickers,
                           right_index=True,
                           left_index=True)

        self._logger.info("Building Position...")
        #Risk On: Long EM Ccy of Worst Score ->Position: -1(USD Short, EM Long)
        #of Worst
        #Risk OFF: Short EM Ccy of Best Score ->Position: 1(USD Long, EM Short)
        #of Best
        position_df = pd.DataFrame(
            [(sign_df.iloc[i]['worst'],
              -1.0) if sign_df.iloc[i]['fc_priority'] else
             (sign_df.iloc[i]['best'], 1.0) for i in range(sign_df.shape[0])],
            index=sign_df.index,
            columns=['ccy', 'ls'])
        position_df.index.name = 'ValueDate'
        if self._includes_swap:
            price_return_df = self._calc_return_inc_swap(
                self._price_df[self._price_tickers + [
                    self._em_price_fwd_dic[k]
                    for k in self._em_price_fwd_dic.keys()
                ]].loc[self._date_list], self._price_tickers,
                self._em_price_fwd_dic).loc[position_df.index]
        else:
            price_return_df = self._calc_return(
                self._price_df[self._price_tickers].loc[self._date_list],
                with_log=True).loc[position_df.index]

        self._logger.info("Calculating Perofrmance...")
        return_series_df = pd.DataFrame([
            price_return_df[position_df.iloc[i][0]].iloc[i + 1] *
            position_df.iloc[i][1] for i in range(position_df.shape[0] - 1)
        ],
                                        index=position_df.index[:-1],
                                        columns=['return'])
        return_series_df.index.name = 'ValueDate'
        return_series_df['cum_return'] = return_series_df['return'].cumsum()
        #import pdb;pdb.set_trace()

        #output result
        output_suffix = datetime.now().strftime('%Y%m%d%H%M%S')
        self.output_detaild_result(position_df, return_series_df,
                                   output_suffix)
        pd.merge(return_series_df, sign_df, right_index=True, left_index=True)\
          .to_csv(os.path.join('output', 'em_reutrn_series_{0}.csv'.format(output_suffix)))
        perform_measurer = PerformanceMeasurer()
        #perform_measurer.create_result_summary(return_series_df['return']).to_csv('em_performance.csv')
        perform_measurer.create_result_summary(return_series_df)[['return']]\
            .to_csv(os.path.join('output','em_performance_{0}.csv'.format(output_suffix)))

        self._logger.info("Simulation Complated.")
示例#11
0
    def simulate(self):
        self._logger.info("Simulation Starting...")
        rate_return_df = self._calc_return(self._price_df[self._em_rate_tickers].reindex(self._date_list).fillna(method='ffill').fillna(method='bfill'))
        fc_diff_df = self._price_df[self._fc_tickers].loc[self._date_list].diff().dropna(axis=0)
        src_return_df = pd.merge(rate_return_df, fc_diff_df, right_index=True, left_index=True)
        normalized_df = pd.DataFrame([[self._normalize(src_return_df[ticker], value_date) 
                                        for value_date in self._date_list[1:]] 
                                       for ticker in self._em_rate_tickers + self._fc_tickers],
                                      index = self._em_rate_tickers + self._fc_tickers, 
                                      columns = self._date_list[1:]).T.dropna(axis=0)
        
        if self._exp_return_file is None:
            self._logger.info("Selecting EM Currency Tickers using Rate")
            em_prior_tickers = pd.DataFrame([(self._em_rate_price_dic[normalized_df[self._em_rate_tickers].iloc[i].idxmax()], 
                                              self._em_rate_price_dic[normalized_df[self._em_rate_tickers].iloc[i].idxmin()])
                                             for i in range(normalized_df.shape[0])],
                                            index = normalized_df.index,
                                            columns = ['best', 'worst'])
        else:
            self._logger.info("Selecting EM Currency Tickers using Expected Return")
            exp_return_df = pd.read_csv(self._exp_return_file)
            exp_return_df = cf.convert_date_format(exp_return_df, target_col='ValueDate').set_index('ValueDate')
            em_prior_tickers = pd.DataFrame([(exp_return_df[self._price_tickers].iloc[i].idxmax(), 
                                              exp_return_df[self._price_tickers].iloc[i].idxmin())
                                             for i in range(exp_return_df.shape[0])],
                                            index = exp_return_df.index,
                                            columns = ['best', 'worst'])
        
        if self._has_indication_diff:
            sign_df = self._get_indicated_sign(normalized_df[self._fc_tickers[0]], em_prior_tickers)
        else:
            sign_df = self._get_indicated_sign(fc_diff_df[self._fc_tickers[0]], em_prior_tickers)
                
        self._logger.info("Building Position...")
        #Risk On: Long EM Ccy of Worst Score ->Position: -1(USD Short, EM Long)
        #of Worst
        #Risk OFF: Short EM Ccy of Best Score ->Position: 1(USD Long, EM Short)
        #of Best
        position_df = pd.DataFrame([(sign_df.iloc[i]['worst'], 0) 
                                    if np.isnan(sign_df.iloc[i]['fc_priority'])
                                    else (sign_df.iloc[i]['worst'], -1.0) 
                                        if sign_df.iloc[i]['fc_priority']
                                        else (sign_df.iloc[i]['best'], 1.0) 
                                     for i in range(sign_df.shape[0])],
                                     index = sign_df.index,
                                     columns=['ccy', 'ls'])
        position_df.index.name = 'ValueDate'
        if self._includes_swap:
            price_return_df = self._calc_return_inc_swap(self._price_df[self._price_tickers + [self._em_price_fwd_dic[k] 
                                                                        for k in self._em_price_fwd_dic.keys()]].loc[self._date_list],
                                                         self._price_tickers,
                                                         self._em_price_fwd_dic).loc[position_df.index]
        else:
            price_return_df = self._calc_return(self._price_df[self._price_tickers].loc[self._date_list], 
                                                with_log=True).loc[position_df.index]

        self._logger.info("Calculating Perofrmance...")
        return_series_df = pd.DataFrame([price_return_df[position_df.iloc[i][0]].iloc[i + 1] * position_df.iloc[i][1]
                                         for i in range(position_df.shape[0] - 1)],
                                        index = position_df.index[:-1],
                                        columns=['return'])
        
        return_series_df.index.name = 'ValueDate'
        return_series_df['cum_return'] = return_series_df['return'].cumsum()
        
        self._return_series_df = return_series_df
        self._sign_df = sign_df
        self._position_df = position_df
        self._fc_normalized_df = normalized_df[[self._fc_tickers[0]]]
        self._price_return_df = price_return_df

        self._logger.info("Simulation Complated.")
示例#12
0
                     columns=['RiskOn', 'RiskOff']).to_csv(os.path.join('output', '{0}_detailed_result_{1}.csv'.format(output_prefix, output_suffix)))

if __name__ == '__main__':
    import logging.config
    logging.config.fileConfig('./logger_config.ini')
    logger = logging.getLogger("jpbank.quants")
    roll_term = 104
    #start_date = date(2006, 4, 7) - relativedelta(weeks=roll_term + 2)
    start_date = date(2005, 4, 1) - relativedelta(weeks=roll_term + 2)
    end_date = date(2020, 3, 27)

    price_tickers = ['USDZAR Index', 'USDMXN Index']
    rate_tickers = ['GSAB2YR Index', 'GMXN02YR Index']
    fwd_tickers = ['USDZAR1W BGN Curncy', 'USDMXN1W BGN Curncy']
    fc_label = pd.read_csv('./input/fc_label_test.csv').query("Algorithm == 'ML_DNN_TF'")[['ValueDate', 'Predict']]
    fc_label = cf.convert_date_format(fc_label)
    fc_label.set_index('ValueDate', inplace=True)

    em_ccy_sim = EMCcySim(start_date=start_date, end_date=end_date, 
                          rolls=True,
                          roll_term=roll_term,
                          price_tickers=price_tickers,
                          em_rate_tickers=rate_tickers,
                          em_fwd_tickers=fwd_tickers,
                          #exp_return_file=exp_return_file,
                          #use_estimated_sign=True,
                          #adjust_return=True,
                          #fc_label=fc_label,
                          fc_tickers=['GSUSFCI Index'],
                          #fc_tickers=['MXEF Index'],
                          fc_threshold = -0.05,