Пример #1
0
    def __init__(self,
                 broker_id=None,
                 sec_type=None,
                 exchange_id=None,
                 country=None,
                 sec_price=None,
                 no_sec=None,
                 plot=False,
                 location=None):
        # #### MAIN INPUT ####
        self.name = 'Transaction Cost Model'
        self.dbTCM = DatabaseManipulationTCM()
        self.broker_id = broker_id
        self.sec_type = sec_type
        self.exchange_id = exchange_id
        self.country = country
        # Ensure that sec_price is a numpy array, and create one if not provided
        if not isinstance(sec_price, list) and sec_price is not None:
            sec_price = [sec_price]
        self.sec_price = np.arange(
            10, 500, 10) if sec_price is None else np.array(sec_price)
        # Ensure that no_sec is a numpy array, and create one if not provided
        if not isinstance(no_sec, list) and no_sec is not None:
            no_sec = [no_sec]
        self.no_sec = np.arange(10, 500,
                                10) if no_sec is None else np.array(no_sec)
        # Assign location, if not provided, it will be main file of project
        if location is None:
            self.location = ''
        else:
            self.location = ''.join([location, '/'])

        # #### PRESENTATION ####
        self.plot = plot
        self.presname = ''

        if self.plot:
            pres = Presenter()
            pres.start_presentation(title='Transaction Costs')
            text = ''.join([
                'broker id: ',
                str(self.broker_id), '\n security type: ', self.sec_type,
                '\n exchange id: ', self.exchange_id, '\n country: ',
                self.country
            ])
            pres.add_text_slide(text, title='Input')
            self.presname = ''.join([
                self.location, 'Transaction_Costs_',
                str(broker_id), '_', self.sec_type, '_', self.exchange_id, '_',
                self.country, '.pptx'
            ])
            pres.save_presentation(self.presname)
Пример #2
0
class Backtesting(object):
    def __init__(self,
                 context,
                 data,
                 plot=False,
                 plot_title='Backtesting.pptx',
                 plot_data=None):
        """ Initializes Backtesting class

        :param context: dictionary filled with vital information for backtesting and running the strategy algorithm
        :param data: list of the dataframes of the various securities needed for performance measurements
        :param plot: boolean for whether to plot or not. Default is False
        :param plot_title: string title to save plot presentation as. Default is 'Backtesting.pptx'
        :param plot_data: dictionary with additional information to use for plotting
        :return: N.A.
        """

        self.context = context
        self.plot = plot
        if self.context['start_date'] is None:
            self.context['start_date'] = 'default'
        if self.context['end_date'] is None:
            self.context['end_date'] = 'default'

        # self.perf_data is one big dataframe of all dataframes in data, cut to provided start and end date
        # Will be used to determine performance (returns) of strategy
        # self.test_data is also one big dataframe, but cut only to provided end date
        # Will be used to determine orders from strategy
        self.perf_data = [None] * len(data)
        self.test_data = [None] * len(data)
        for i, df in enumerate(data):
            self.perf_data[i] = dfm.cut_dates(df, self.context['start_date'],
                                              self.context['end_date'])
            self.test_data[i] = dfm.cut_dates(df, 'default',
                                              self.context['end_date'])

        self.perf_df = dfm.merge_dfs(self.perf_data)
        self.test_df = dfm.merge_dfs(self.test_data)

        if self.plot:
            self.plot_data = plot_data
            self.presname = plot_title
            self.pres = Presenter(new_pres=self.presname, title='Backtesting')
            if self.plot_data['benchmark'] is not None:
                self.plot_data['benchmark'] = dfm.cut_dates(
                    plot_data['benchmark'], self.context['start_date'],
                    self.context['end_date'])

    def basic_checks(self):
        """ Performs basic checks

        :return: relative returns per year
        """

        # !! Note to self: Use close prices --> this isn't very general yet....
        # test data is only the price data needed for computing the order
        test_price_type = 'close_price'
        column_names = ['price_date'] + [
            x for x in self.test_df.columns.values
            if x.startswith(test_price_type)
        ]
        test_data = self.test_df.loc[:, column_names]

        for i, name in enumerate(column_names[1:]):
            test_data.rename(columns={name: ''.join(['price_',
                                                     str(i + 1)])},
                             inplace=True)

        # !! Note to self: Use adjusted close price --> also not general yet...
        # perf data is the price data needed for calculating the performance (returns)
        perf_price_type = 'adj_close_price'
        column_names = ['price_date'] + [
            x for x in self.test_df.columns.values
            if x.startswith(perf_price_type)
        ]
        perf_data = self.perf_df.loc[:, column_names]

        for i, name in enumerate(column_names[1:]):
            perf_data.rename(columns={name: ''.join(['price_',
                                                     str(i + 1)])},
                             inplace=True)

        # Run strategy
        order, log = self._employ_strategy(test_data)

        # Use orders to get returns (ret is return per order, returns is total return)
        returns_rel, returns_abs, ret_abs = self._calc_return(order, perf_data)

        # Initiate PerformanceMeasures class
        date = self.perf_df['price_date'].values
        pm = PerformanceMeasures(date, returns_rel)

        # Get return per year
        return_py = pm.returns_py
        years = pm.years

        # Get compounded annualized growth rate
        cagr = pm.calc_cagr()

        # Get volatility
        vol_scl, vol_vec = pm.calc_annual_volatility()

        # Get worst & best month
        return_pm = pm.returns_pm
        months = pm.months
        best_month, worst_month = pm.calc_best_worst_month()

        # Get Sharpe ratio
        sharpe_monthly = pm.calc_sharpe_ratio(self.context['risk_free_rate'])

        # Get maximum drawdown & maximum drawdown duration
        max_drawdown, peak_valley = pm.calc_drawdowns()

        if self.plot:
            if self.plot_data is None:
                names = np.array2string(np.arange(1, len(self.perf_data) + 1))
            else:
                names = self.plot_data['names']

            text = ''.join([
                ''.join([str(y), ':   ', str(a), '\n'])
                for y, a in zip(years, return_py)
            ])

            self.pres.add_text_slide(text, title='Return per year')

            pd_date = pd.to_datetime(date)
            plt_title = ''.join(
                ['Initial amount: ',
                 str(self.context['max_notional'])])

            # Plot of the returns
            self.pres.add_graph_slide(x=pd.to_datetime(date),
                                      y=returns_rel,
                                      graph_type='plot',
                                      ylabel='Return [-]',
                                      slide_title='Relative Returns')

            # Parameters that can be plotted against pd_date
            abs_ret_with_strategy = returns_abs + self.context['max_notional']
            if self.plot_data['benchmark'] is not None:
                abs_ret_with_benchmark = self.plot_data['benchmark']['adj_close_price'].values \
                                * self.context['max_notional'] / self.plot_data['benchmark']['adj_close_price'].iloc[0]
            abs_ret_with_strategy_stock_specific = ret_abs + self.context[
                'max_notional']
            adj_price_names = [
                ''.join(['adj_close_price_',
                         str(ind_sec + 1)])
                for ind_sec, ret_sec in enumerate(ret_abs.T)
            ]
            adj_price_val_temp = self.perf_df[adj_price_names]
            #abs_ret_with_specific_stock =

            ### DIT STUK MOET GEWOON KUNNEN ZONDER FOR LOOP, BIJVOORBEELD DOOR DF OF MATRIX?
            #for ind_sec, ret_sec in enumerate(ret_abs.T):
            #    abs_ret_with_strategy_stock_specific = ret_sec + self.context['max_notional']
            #    adj_price_val = self.perf_df[''.join(['adj_close_price_', str(ind_sec + 1)])]
            #    abs_ret_with_specific_stock = adj_price_val.values * self.context['max_notional'] / adj_price_val[0]

            # Plot strategy returns versus benchmark returns
            if self.plot_data['benchmark'] is not None:
                self.pres.add_graph_slide(
                    graph_type='plot',
                    x=pd_date,
                    y=[
                        returns_abs + self.context['max_notional'],
                        self.plot_data['benchmark']['adj_close_price'].values *
                        self.context['max_notional'] /
                        self.plot_data['benchmark']['adj_close_price'].iloc[0]
                    ],
                    labels=['Strategy Returns', 'Benchmark Returns'],
                    ylabel='Absolute Returns & Adjusted Close Prices',
                    slide_title='Strategy vs. Benchmark',
                    plot_title=plt_title)

            for ind_sec, ret_sec in enumerate(ret_abs.T):
                # Plot of breakdown of returns
                self.pres.add_graph_slide(
                    graph_type='plot',
                    x=pd_date,
                    y=[
                        ret_sec + self.context['max_notional'],
                        returns_abs + self.context['max_notional']
                    ],
                    labels=[
                        ''.join(['Returns ', names[ind_sec]]), 'Total Returns'
                    ],
                    ylabel='Absolute Returns [monetary unit]',
                    slide_title='Absolute Returns',
                    plot_title=plt_title)

            for ind_sec, ret_sec in enumerate(ret_abs.T):
                # Plot of the returns and securities adjusted close price
                adj_price_val = self.perf_df[''.join(
                    ['adj_close_price_', str(ind_sec + 1)])]
                self.pres.add_graph_slide(
                    graph_type='plot',
                    x=pd_date,
                    y=[
                        adj_price_val.values * self.context['max_notional'] /
                        adj_price_val[0],
                        returns_abs + self.context['max_notional']
                    ],
                    labels=[
                        ''.join(['Adj Close ', names[ind_sec]]),
                        'Strategy Returns'
                    ],
                    ylabel='Absolute Returns and Adjusted Close Prices',
                    slide_title='Abs. Returns & Adj. Close Prices',
                    plot_title=plt_title)

            if self.plot_data['benchmark'] is not None:
                for ind_sec, ret_sec in enumerate(ret_abs.T):
                    # Plot benchmark adjusted close price to securities adjusted close price
                    adj_price_val = self.perf_df[''.join(
                        ['adj_close_price_',
                         str(ind_sec + 1)])]
                    self.pres.add_graph_slide(
                        graph_type='plot',
                        x=pd_date,
                        y=[
                            adj_price_val.values *
                            self.context['max_notional'] / adj_price_val[0],
                            self.plot_data['benchmark']
                            ['adj_close_price'].values *
                            self.context['max_notional'] /
                            self.plot_data['benchmark']
                            ['adj_close_price'].iloc[0]
                        ],
                        labels=[
                            ''.join(['Adj Close ', names[ind_sec]]),
                            'Benchmark Returns'
                        ],
                        ylabel='Adjusted Close Prices',
                        slide_title='Securities vs. Benchmark',
                        plot_title=plt_title)
            self.pres.save_presentation(self.presname)

        return returns_abs

    def _employ_strategy(self, test_df):
        """ Executes the strategy

        :param test_df:
        :return:
        """

        # Initialize order and log arrays
        order = np.zeros(shape=(np.shape(self.perf_df)[0],
                                len(self.perf_data)))
        log = [None] * np.shape(self.perf_df)[0]

        date_pd = pd.to_datetime(test_df['price_date'].values).date
        start_point = dfm.find_nearest_date(
            date_pd,
            dt.datetime.strptime(self.perf_df['price_date'][0],
                                 '%Y-%m-%d').date(), 'daily')[0]
        date_pd = date_pd[start_point:]

        # Create date array to execute strategy on - one for weekly, one for monthly, depending on context
        dm = dfm.get_date_range(self.perf_df['price_date'].values[0],
                                self.perf_df['price_date'].values[-1],
                                'monthly',
                                self.context['start_day'],
                                last_point_now=False)
        dw = dfm.get_date_range(self.perf_df['price_date'].values[0],
                                self.perf_df['price_date'].values[-1],
                                'weekly',
                                1,
                                last_point_now=False)

        # Use cw & cm to combine the dates to check on. If there is a date both as 'monthly' and 'weekly', pick 'monthly'
        # The order of concatenation and np.unique is important for this to go correct
        # !! Note to self: this is very specific to Strategy 3!!
        dm_n = dfm.find_nearest_date(date_pd, dm, 'monthly')[0]
        cm = np.ones(np.shape(dm_n))
        dw_n = dfm.find_nearest_date(date_pd, dw, 'weekly')[0]
        cw = np.zeros(np.shape(dw_n))

        dr_c = np.concatenate([dm_n, dw_n])
        cr_c = np.concatenate([cm, cw])

        dr, ind_u = np.unique(dr_c, return_index=True)
        cr = cr_c[ind_u]

        dr_ind = 0
        ind = 0

        while (ind < len(date_pd)) & (dr_ind < len(dr)):
            if (self.context['period'].lower() != 'daily'):
                ind = dr[dr_ind]
                self.context[
                    'check'] = 'monthly' if cr[dr_ind] == 1 else 'weekly'
                dr_ind += 1
            elif self.context['period'].lower() == 'daily':
                ind += 1

            a = RotationalETF(test_df.loc[:(ind + start_point), :],
                              self.context,
                              backtest=True)
            order[ind, :], log[ind] = a.calc_results()

            self.context['positions'] += order[ind, :]
            if sum(order[ind, :]) != 0:
                self.context['last_order'] = test_df.loc[ind + start_point,
                                                         'price_date']

            print ind, len(date_pd), self.context['check'], date_pd[ind]

        return order, log

    '''
    # Employ strategy used for strategy2
    def _employ_strategy(self, test_df, perf_df):
        order = np.zeros(shape=(np.shape(self.perf_df)[0], len(self.perf_data)))
        log = [None] * np.shape(self.perf_df)[0]

        dr = dfm.get_date_range(self.perf_df['price_date'].values[0], self.perf_df['price_date'].values[-1],
                                self.context['period'], self.context['start_day'])

        date_pd = pd.to_datetime(test_df['price_date'].values).date
        start_point = dfm.find_nearest_date(date_pd,
                                            dt.datetime.strptime(self.perf_df['price_date'][0], '%Y-%m-%d').date(),
                                            'daily')[0]

        self.context['returns'] = None
        self.context['return_date'] = None
        self.context['return_prices'] = None

        date_pd = date_pd[start_point:]
        dr_ind = 0
        ind = 0

        while ind < (len(date_pd) - 1):
            if (self.context['period'].lower() != 'daily'):# & (date_pd[ind] != dr[dr_ind]):
                ind = dfm.find_nearest_date(date_pd, dr[dr_ind], self.context['period'])[0]
                dr_ind += 1
            elif self.context['period'].lower() == 'daily':
                ind += 1

            self.context['return_prices'] = perf_df.loc[:ind]
            self.context['check'] = 'monthly'
            a = RotationalETF(test_df.loc[:(ind + start_point), :], self.context, backtest=True)
            order[ind, :], log[ind] = a.calc_results()

            self.context['positions'] += order[ind, :]
            self.context['returns'] = self._calc_return(order[:(ind+1), :], perf_df.loc[:ind, :])[1]
            self.context['return_date'] = perf_df.loc[:ind, 'price_date']
            self.context['last_order'] = test_df.loc[ind + start_point, 'price_date']
            print ind, len(date_pd)
        return order, log
    '''

    def _calc_return(self, order_original, perf_df):
        """ Calculates the return based on an order array, and a price Dataframe

        :param order_original: array of orders
        :param perf_df: Dataframe with the prices necessary to compute the returns
        :return: the relative returns, absolute returns, and absolute returns per security
        """

        order = order_original.copy()
        no_sec = len(self.perf_data)
        price_names = np.array(
            ['price_' + str(i) for i in xrange(1, no_sec + 1)])
        ret = np.zeros((np.shape(order)[0], no_sec))

        transaction_cost = 0

        # buy_list vs sell_list contains order bought vs sold that cannot be matched yet to determine the return
        # For example when something has been bought, but nothing or not enough has been sold yet, the residue will be
        # listed in these lists.
        buy_shares = np.zeros((np.shape(order)[0], no_sec))
        buy_price = np.zeros((np.shape(order)[0], no_sec))
        sell_shares = np.zeros((np.shape(order)[0], no_sec))
        sell_price = np.zeros((np.shape(order)[0], no_sec))

        # bl_first vs sl_first indicates which row in buy_list vs sell_list can be used to "match" bought/sold shares.
        # It automatically points to the oldest row with still outstanding shares. Initial value is -1
        # bl_last vs sl_last indicates which row in buy_list vs sell_list can be used to write outstanding shares to.
        bl_first = np.ones(no_sec).astype(int) * -1
        bl_last = np.zeros(no_sec).astype(int)
        sl_first = np.ones(no_sec).astype(int) * -1
        sl_last = np.zeros(no_sec).astype(int)

        for ind in range(0, np.shape(order)[0]):
            bl_first[(bl_first == -1) & (bl_last > 0)] = 0
            sl_first[(sl_first == -1) & (sl_last > 0)] = 0

            # Three situations, per type: buy, sell, nothing
            # If nothing, skip to next day
            # Only returns made on one day are determined, later they will be accumulated.

            # Situation A.A: Sell order & outstanding buys larger than sell order
            col_to_change = (order[ind, :] < 0) & (np.sum(buy_shares, 0) >
                                                   -order[ind, :])
            if sum(col_to_change) != 0:
                share_cumsum = np.cumsum(buy_shares, 0)
                share_compl = (share_cumsum < -order[ind, :]) & col_to_change
                numb_shares = sum(buy_shares * share_compl, 0)[col_to_change]
                ret[ind, col_to_change] += numb_shares * perf_df.loc[ind, price_names[col_to_change]] \
                                           - sum(buy_shares * buy_price * share_compl, 0)[col_to_change]
                buy_shares[share_compl] = 0
                bl_first += sum(share_compl)
                order[col_to_change] += numb_shares

                ret[ind, col_to_change] += perf_df.loc[ind, price_names[col_to_change]] * -order[ind, col_to_change] * (1 - transaction_cost) \
                                           - buy_price[bl_first[col_to_change], col_to_change] \
                                             * -order[ind, col_to_change] * (1 + transaction_cost)
                buy_shares[bl_first[col_to_change],
                           col_to_change] += order[ind, col_to_change]
                order[ind, col_to_change] = 0

            # Situation A.B: Sell order & outstanding buys smaller than or equal to sell order
            # --> just fill out all outstanding buys, and change order. This order will be added to sell list in A.C
            col_to_change = (order[ind, :] < 0) & (np.sum(buy_shares, 0) > 0) \
                            & (np.sum(buy_shares, 0) <= -order[ind, :])
            if sum(col_to_change) != 0:
                numb_shares = buy_shares[:, col_to_change]
                price_shares = buy_price[:, col_to_change]
                ret[ind, col_to_change] += np.sum(numb_shares, 0) * \
                                           perf_df.loc[ind, price_names[col_to_change]].values * (1 - transaction_cost) \
                                           - np.sum(numb_shares * price_shares, 0) * (1 + transaction_cost)
                order[ind, col_to_change] += np.sum(numb_shares, 0)
                buy_shares[:, col_to_change] = 0
                bl_first[col_to_change] = bl_last[col_to_change] - 1

            # Situation A.C: Sell order & no outstanding buys
            col_to_change = (order[ind, :] < 0) & (np.sum(buy_shares, 0) == 0)
            if sum(col_to_change) != 0:
                row_to_change = bl_last[col_to_change]
                sell_shares[row_to_change,
                            col_to_change] = -order[ind, col_to_change]
                sell_price[row_to_change, col_to_change] = perf_df.loc[
                    ind, price_names[col_to_change]]
                sl_last[col_to_change] += 1

            # Situation B.A: Buy order & outstanding sells larger than buy order
            col_to_change = (order[ind, :] > 0) & (np.sum(sell_shares, 0) >
                                                   order[ind, :])
            if sum(col_to_change) != 0:
                share_cumsum = np.cumsum(sell_shares, 0)
                share_compl = (share_cumsum < order[ind, :]) & col_to_change
                numb_shares = sum(sell_shares * share_compl, 0)[col_to_change]
                ret[ind, col_to_change] += sum(sell_shares * sell_price * share_compl, 0)[col_to_change] * (1 - transaction_cost)\
                                           - numb_shares * perf_df.loc[ind, price_names[col_to_change]] * (1 + transaction_cost)
                sell_shares[share_compl] = 0
                sl_first += sum(share_compl)
                order[col_to_change] += -numb_shares

                ret[ind, col_to_change] += sell_price[sl_first[col_to_change], col_to_change] * order[ind, col_to_change] * (1 - transaction_cost)\
                                           - perf_df.loc[ind, price_names[col_to_change]] * order[ind, col_to_change] * (1 + transaction_cost)
                sell_shares[sl_first[col_to_change],
                            col_to_change] += -order[ind, col_to_change]
                order[ind, col_to_change] = 0

            # Situation B.B: Buy order & outstanding sells smaller than buy order
            # --> just fill out all outstanding sells, and change order. This order will be added to buy list in B.C
            col_to_change = (order[ind, :] > 0) & \
                            (np.sum(sell_shares, 0) > 0) & (np.sum(sell_shares, 0) <= order[ind, :])
            if sum(col_to_change) != 0:
                numb_shares = sell_shares[:, col_to_change]
                price_shares = sell_price[:, col_to_change]
                ret[ind, col_to_change] += np.sum(numb_shares * price_shares, 0) * (1 - transaction_cost) \
                                           - np.sum(numb_shares, 0) * perf_df.loc[ind, price_names[col_to_change]] * (1 + transaction_cost)
                order[ind, col_to_change] += -np.sum(numb_shares, 0)
                sell_shares[:, col_to_change] = 0
                sl_first[col_to_change] = sl_last[col_to_change] - 1

            # Situation B.C: Buy order & no outstanding sells
            col_to_change = (order[ind, :] > 0) & (np.sum(sell_shares, 0) == 0)
            if sum(col_to_change) != 0:
                row_to_change = bl_last[col_to_change]
                buy_shares[row_to_change, col_to_change] = order[ind,
                                                                 col_to_change]
                buy_price[row_to_change, col_to_change] = perf_df.loc[
                    ind, price_names[col_to_change]]
                bl_last[col_to_change] += 1

        ret_abs = np.array([sum(ret[:r]) for r in range(1, len(ret) + 1)])
        returns_abs = np.sum(ret_abs, 1)
        returns_rel = [
            i / self.context['max_notional'] + 1 for i in returns_abs
        ]

        return returns_rel, returns_abs, ret_abs

    @staticmethod
    def _histogram(y, no_buckets=21, normed=True):
        # Create histogram based on input y
        # Divide into x equal sized buckets
        # Add percentage at the bottom
        hist, bins = np.histogram(y, bins=no_buckets)
        if normed:
            hist = hist.astype(float) / float(len(y))
        width = 0.7 * np.diff(bins)
        center = (bins[:-1] + bins[1:]) / 2
        plt.bar(center, hist, align='center', width=width)