Пример #1
0
    def run_strategy_returns_stats(self, strategy):
        """
        run_strategy_returns_stats - Plots useful statistics for the trading strategy (using PyFolio)

        Parameters
        ----------
        strategy : StrategyTemplate
            defining trading strategy

        """

        pnl = strategy.get_strategy_pnl()
        tz = TimeSeriesTimezone()
        tsc = TimeSeriesCalcs()

        # PyFolio assumes UTC time based DataFrames (so force this localisation)
        try:
            pnl = tz.localise_index_as_UTC(pnl)
        except: pass

        # TODO for intraday strategy make daily

        # convert DataFrame (assumed to have only one column) to Series
        pnl = tsc.calculate_returns(pnl)
        pnl = pnl[pnl.columns[0]]

        fig = pf.create_returns_tear_sheet(pnl, return_fig=True)

        try:
            plt.savefig (strategy.DUMP_PATH + "stats.png")
        except: pass

        plt.show()
Пример #2
0
    def calculate_ret_stats(self, returns_df, ann_factor):
        """
        calculate_ret_stats - Calculates return statistics for an asset's returns including IR, vol, ret and drawdowns

        Parameters
        ----------
        returns_df : DataFrame
            asset returns
        ann_factor : int
            annualisation factor to use on return statistics

        Returns
        -------
        DataFrame
        """
        tsc = TimeSeriesCalcs()

        self._rets = returns_df.mean(axis=0) * ann_factor
        self._vol = returns_df.std(axis=0) * math.sqrt(ann_factor)
        self._inforatio = self._rets / self._vol
        self._kurtosis = returns_df.kurtosis(axis=0)

        index_df = tsc.create_mult_index(returns_df)
        max2here = pandas.expanding_max(index_df)
        dd2here = index_df / max2here - 1

        self._dd = dd2here.min()
Пример #3
0
    def bus_day_of_month_seasonality(self, data_frame,
                                 month_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], cum = True,
                                 cal = "FX", partition_by_month = True, add_average = False, price_index = False):

        tsc = TimeSeriesCalcs()
        tsf = TimeSeriesFilter()

        if price_index:
            data_frame = data_frame.resample('B')           # resample into business days
            data_frame = tsc.calculate_returns(data_frame)

        data_frame.index = pandas.to_datetime(data_frame.index)
        data_frame = tsf.filter_time_series_by_holidays(data_frame, cal)

        monthly_seasonality = tsc.average_by_month_day_by_bus_day(data_frame, cal)
        monthly_seasonality = monthly_seasonality.loc[month_list]

        if partition_by_month:
            monthly_seasonality = monthly_seasonality.unstack(level=0)

            if add_average:
               monthly_seasonality['Avg'] = monthly_seasonality.mean(axis=1)

        if cum is True:
            if partition_by_month:
                monthly_seasonality.loc[0] = numpy.zeros(len(monthly_seasonality.columns))
                # monthly_seasonality.index = monthly_seasonality.index + 1       # shifting index
                monthly_seasonality = monthly_seasonality.sort()

            monthly_seasonality = tsc.create_mult_index(monthly_seasonality)

        return monthly_seasonality
Пример #4
0
    def time_of_day_seasonality(self, data_frame, years=False):

        tsc = TimeSeriesCalcs()

        if years is False:
            return tsc.average_by_hour_min_of_day_pretty_output(data_frame)

        set_year = set(data_frame.index.year)
        year = sorted(list(set_year))

        intraday_seasonality = None

        commonman = CommonMan()

        for i in year:
            temp_seasonality = tsc.average_by_hour_min_of_day_pretty_output(data_frame[data_frame.index.year == i])

            temp_seasonality.columns = commonman.postfix_list(temp_seasonality.columns.values, " " + str(i))

            if intraday_seasonality is None:
                intraday_seasonality = temp_seasonality
            else:
                intraday_seasonality = intraday_seasonality.join(temp_seasonality)

        return intraday_seasonality
Пример #5
0
    def bus_day_of_month_seasonality(
        self,
        data_frame,
        month_list=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
        cum=True,
        cal="FX",
        partition_by_month=True,
    ):

        tsc = TimeSeriesCalcs()
        tsf = TimeSeriesFilter()

        data_frame.index = pandas.to_datetime(data_frame.index)
        data_frame = tsf.filter_time_series_by_holidays(data_frame, cal)

        monthly_seasonality = tsc.average_by_month_day_by_bus_day(data_frame, cal)
        monthly_seasonality = monthly_seasonality.loc[month_list]

        if partition_by_month:
            monthly_seasonality = monthly_seasonality.unstack(level=0)

        if cum is True:
            monthly_seasonality.ix[0] = numpy.zeros(len(monthly_seasonality.columns))

            if partition_by_month:
                monthly_seasonality.index = monthly_seasonality.index + 1  # shifting index
                monthly_seasonality = monthly_seasonality.sort()  # sorting by index

            monthly_seasonality = tsc.create_mult_index(monthly_seasonality)

        return monthly_seasonality
Пример #6
0
    def calculate_vol_adjusted_returns(self, returns_df, br, returns = True):
        """
        calculate_vol_adjusted_returns - Adjusts returns for a vol target

        Parameters
        ----------
        br : BacktestRequest
            Parameters for the backtest specifying start date, finish data, transaction costs etc.

        returns_a_df : pandas.DataFrame
            Asset returns to be traded

        Returns
        -------
        pandas.DataFrame
        """

        tsc = TimeSeriesCalcs()

        if not returns: returns_df = tsc.calculate_returns(returns_df)

        if not(hasattr(br, 'portfolio_vol_resample_type')):
            br.portfolio_vol_resample_type = 'mean'

        leverage_df = self.calculate_leverage_factor(returns_df,
                                                               br.portfolio_vol_target, br.portfolio_vol_max_leverage,
                                                               br.portfolio_vol_periods, br.portfolio_vol_obs_in_year,
                                                               br.portfolio_vol_rebalance_freq, br.portfolio_vol_resample_freq,
                                                               br.portfolio_vol_resample_type)

        vol_returns_df = tsc.calculate_signal_returns_with_tc_matrix(leverage_df, returns_df, tc = br.spot_tc_bp)
        vol_returns_df.columns = returns_df.columns

        return vol_returns_df, leverage_df
Пример #7
0
    def compare_strategy_vs_benchmark(self, br, strategy_df, benchmark_df):
        """
        compare_strategy_vs_benchmark - Compares the trading strategy we are backtesting against a benchmark

        Parameters
        ----------
        br : BacktestRequest
            Parameters for backtest such as start and finish dates

        strategy_df : pandas.DataFrame
            Strategy time series

        benchmark_df : pandas.DataFrame
            Benchmark time series
        """

        include_benchmark = False
        calc_stats = False

        if hasattr(br, 'include_benchmark'): include_benchmark = br.include_benchmark
        if hasattr(br, 'calc_stats'): calc_stats = br.calc_stats

        if include_benchmark:
            tsd = TimeSeriesDesc()
            cash_backtest = CashBacktest()
            ts_filter = TimeSeriesFilter()
            ts_calcs = TimeSeriesCalcs()

            # align strategy time series with that of benchmark
            strategy_df, benchmark_df = strategy_df.align(benchmark_df, join='left', axis = 0)

            # if necessary apply vol target to benchmark (to make it comparable with strategy)
            if hasattr(br, 'portfolio_vol_adjust'):
                if br.portfolio_vol_adjust is True:
                    benchmark_df = cash_backtest.calculate_vol_adjusted_index_from_prices(benchmark_df, br = br)

            # only calculate return statistics if this has been specified (note when different frequencies of data
            # might underrepresent vol
            if calc_stats:
                benchmark_df = benchmark_df.fillna(method='ffill')
                tsd.calculate_ret_stats_from_prices(benchmark_df, br.ann_factor)
                benchmark_df.columns = tsd.summary()

            # realign strategy & benchmark
            strategy_benchmark_df = strategy_df.join(benchmark_df, how='inner')
            strategy_benchmark_df = strategy_benchmark_df.fillna(method='ffill')

            strategy_benchmark_df = ts_filter.filter_time_series_by_date(br.plot_start, br.finish_date, strategy_benchmark_df)
            strategy_benchmark_df = ts_calcs.create_mult_index_from_prices(strategy_benchmark_df)

            self._benchmark_pnl = benchmark_df
            self._benchmark_tsd = tsd

            return strategy_benchmark_df

        return strategy_df
Пример #8
0
    def calculate_leverage_factor(self, returns_df, vol_target, vol_max_leverage, vol_periods = 60, vol_obs_in_year = 252,
                                  vol_rebalance_freq = 'BM', returns = True, period_shift = 0):
        """
        calculate_leverage_factor - Calculates the time series of leverage for a specified vol target

        Parameters
        ----------
        returns_df : DataFrame
            Asset returns

        vol_target : float
            vol target for assets

        vol_max_leverage : float
            maximum leverage allowed

        vol_periods : int
            number of periods to calculate volatility

        vol_obs_in_year : int
            number of observations in the year

        vol_rebalance_freq : str
            how often to rebalance

        returns : boolean
            is this returns time series or prices?

        period_shift : int
            should we delay the signal by a number of periods?

        Returns
        -------
        pandas.Dataframe
        """

        tsc = TimeSeriesCalcs()

        if not returns: returns_df = tsc.calculate_returns(returns_df)

        roll_vol_df = tsc.rolling_volatility(returns_df,
                                        periods = vol_periods, obs_in_year = vol_obs_in_year).shift(period_shift)

        # calculate the leverage as function of vol target (with max lev constraint)
        lev_df = vol_target / roll_vol_df
        lev_df[lev_df > vol_max_leverage] = vol_max_leverage

        # only allow the leverage change at resampling frequency (eg. monthly 'BM')
        lev_df = lev_df.resample(vol_rebalance_freq)

        returns_df, lev_df = returns_df.align(lev_df, join='left', axis = 0)

        lev_df = lev_df.fillna(method='ffill')

        return lev_df
Пример #9
0
    def calculate_ret_stats(self, returns_df, ann_factor):
        tsc = TimeSeriesCalcs()

        self._rets = returns_df.mean(axis=0) * ann_factor
        self._vol = returns_df.std(axis=0) * math.sqrt(ann_factor)
        self._inforatio = self._rets / self._vol

        index_df = tsc.create_mult_index(returns_df)
        max2here = pandas.expanding_max(index_df)
        dd2here = index_df / max2here - 1

        self._dd = dd2here.min()
Пример #10
0
    def get_pnl_trades(self):
        """
        get_pnl_trades - Gets P&L of each individual trade per signal

        Returns
        -------
        pandas.Dataframe
        """

        if self._pnl_trades is None:
            tsc = TimeSeriesCalcs()
            self._pnl_trades = tsc.calculate_individual_trade_gains(self._signal, self._pnl)

        return self._pnl_trades
Пример #11
0
    def run_day_of_month_analysis(self, strat):
        from pythalesians.economics.seasonality.seasonality import Seasonality
        from pythalesians.timeseries.calcs.timeseriescalcs import TimeSeriesCalcs

        tsc = TimeSeriesCalcs()
        seas = Seasonality()
        strat.construct_strategy()
        pnl = strat.get_strategy_pnl()

        # get seasonality by day of the month
        pnl = pnl.resample('B').mean()
        rets = tsc.calculate_returns(pnl)
        bus_day = seas.bus_day_of_month_seasonality(rets, add_average = True)

        # get seasonality by month
        pnl = pnl.resample('BM').mean()
        rets = tsc.calculate_returns(pnl)
        month = seas.monthly_seasonality(rets)

        self.logger.info("About to plot seasonality...")
        gp = GraphProperties()
        pf = PlotFactory()

        # Plotting spot over day of month/month of year
        gp.color = 'Blues'
        gp.scale_factor = self.SCALE_FACTOR
        gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality day of month.png'
        gp.html_file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality day of month.html'
        gp.title = strat.FINAL_STRATEGY + ' day of month seasonality'
        gp.display_legend = False
        gp.color_2_series = [bus_day.columns[-1]]
        gp.color_2 = ['red'] # red, pink
        gp.linewidth_2 = 4
        gp.linewidth_2_series = [bus_day.columns[-1]]
        gp.y_axis_2_series = [bus_day.columns[-1]]

        pf.plot_line_graph(bus_day, adapter = self.DEFAULT_PLOT_ENGINE, gp = gp)

        gp = GraphProperties()

        gp.scale_factor = self.SCALE_FACTOR
        gp.file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality month of year.png'
        gp.html_file_output = self.DUMP_PATH + strat.FINAL_STRATEGY + ' seasonality month of year.html'
        gp.title = strat.FINAL_STRATEGY + ' month of year seasonality'

        pf.plot_line_graph(month, adapter = self.DEFAULT_PLOT_ENGINE, gp = gp)

        return month
Пример #12
0
    def calculate_ret_stats_from_prices(self, prices_df, ann_factor):
        """
        calculate_ret_stats_from_prices - Calculates return statistics for an asset's price

        Parameters
        ----------
        prices_df : DataFrame
            asset prices
        ann_factor : int
            annualisation factor to use on return statistics

        Returns
        -------
        DataFrame
        """
        tsc = TimeSeriesCalcs()

        self.calculate_ret_stats(tsc.calculate_returns(prices_df), ann_factor)
Пример #13
0
    def run_strategy_returns_stats(self, strategy):
        """
        run_strategy_returns_stats - Plots useful statistics for the trading strategy (using PyFolio)

        Parameters
        ----------
        strategy : StrategyTemplate
            defining trading strategy

        """

        pnl = strategy.get_strategy_pnl()
        tz = TimeSeriesTimezone()
        tsc = TimeSeriesCalcs()

        # PyFolio assumes UTC time based DataFrames (so force this localisation)
        try:
            pnl = tz.localise_index_as_UTC(pnl)
        except: pass

        # set the matplotlib style sheet & defaults
        # at present this only works in Matplotlib engine
        try:
            matplotlib.rcdefaults()
            plt.style.use(GraphicsConstants().plotfactory_pythalesians_style_sheet['pythalesians-pyfolio'])
        except: pass

        # TODO for intraday strategies, make daily

        # convert DataFrame (assumed to have only one column) to Series
        pnl = tsc.calculate_returns(pnl)
        pnl = pnl.dropna()
        pnl = pnl[pnl.columns[0]]
        fig = pf.create_returns_tear_sheet(pnl, return_fig=True)

        try:
            plt.savefig (strategy.DUMP_PATH + "stats.png")
        except: pass

        plt.show()
Пример #14
0
    def calculate_vol_adjusted_index_from_prices(self, prices_df, br):
        """
        calculate_vol_adjusted_index_from_price - Adjusts an index of prices for a vol target

        Parameters
        ----------
        br : BacktestRequest
            Parameters for the backtest specifying start date, finish data, transaction costs etc.

        asset_a_df : pandas.DataFrame
            Asset prices to be traded

        Returns
        -------
        pandas.Dataframe containing vol adjusted index
        """

        tsc = TimeSeriesCalcs()

        returns_df, leverage_df = self.calculate_vol_adjusted_returns(prices_df, br, returns = False)

        return tsc.create_mult_index(returns_df)
Пример #15
0
    def monthly_seasonality(self, data_frame,
                                  cum = True,
                                  add_average = False, price_index = False):

        tsc = TimeSeriesCalcs()

        if price_index:
            data_frame = data_frame.resample('BM')          # resample into month end
            data_frame = tsc.calculate_returns(data_frame)

        data_frame.index = pandas.to_datetime(data_frame.index)

        monthly_seasonality = tsc.average_by_month(data_frame)

        if add_average:
            monthly_seasonality['Avg'] = monthly_seasonality.mean(axis=1)

        if cum is True:
            monthly_seasonality.loc[0] = numpy.zeros(len(monthly_seasonality.columns))
            monthly_seasonality = monthly_seasonality.sort()

            monthly_seasonality = tsc.create_mult_index(monthly_seasonality)

        return monthly_seasonality
    def g10_line_plot_gdp(self, start_date, finish_date):
        today_root = datetime.date.today().strftime("%Y%m%d") + " "
        country_group = 'g10-ez'
        gdp = self.get_GDP_QoQ(start_date, finish_date, country_group)

        from pythalesians_graphics.graphs import PlotFactory
        from pythalesians_graphics.graphs.graphproperties import GraphProperties

        gp = GraphProperties()
        pf = PlotFactory()

        gp.title = "G10 GDP"
        gp.units = 'Rebased'
        gp.scale_factor = Constants.plotfactory_scale_factor
        gp.file_output = today_root + 'G10 UNE ' + str(gp.scale_factor) + '.png'
        gdp.columns = [x.split('-')[0] for x in gdp.columns]
        gp.linewidth_2 = 3
        gp.linewidth_2_series = ['United Kingdom']

        from pythalesians.timeseries.calcs.timeseriescalcs import TimeSeriesCalcs
        tsc = TimeSeriesCalcs()
        gdp = gdp / 100
        gdp = tsc.create_mult_index_from_prices(gdp)
        pf.plot_generic_graph(gdp, type = 'line', adapter = 'pythalesians', gp = gp)
from pythalesians.market.loaders.lighttimeseriesfactory import LightTimeSeriesFactory
from pythalesians.market.requests.timeseriesrequest import TimeSeriesRequest
from pythalesians.timeseries.calcs.timeseriescalcs import TimeSeriesCalcs
from pythalesians.util.loggermanager import LoggerManager
from chartesians.graphs.graphproperties import GraphProperties

if True:
    logger = LoggerManager().getLogger(__name__)

    import datetime

    # just change "False" to "True" to run any of the below examples

    ###### download daily data from Bloomberg for USD/BRL and get biggest downmoves
    if True:
        tsc = TimeSeriesCalcs()

        time_series_request = TimeSeriesRequest(
                start_date = "01 Jan 2005",                     # start date
                finish_date = datetime.datetime.utcnow(),       # finish date
                freq = 'daily',                                 # daily data
                data_source = 'bloomberg',                      # use Bloomberg as data source
                tickers = ['USDBRL'] ,                          # ticker (Thalesians)
                fields = ['close'],                             # which fields to download
                vendor_tickers = ['USDBRL BGN Curncy'],         # ticker (Bloomberg)
                vendor_fields = ['PX_LAST'],                    # which Bloomberg fields to download
                cache_algo = 'internet_load_return')            # how to return data

        ltsf = LightTimeSeriesFactory()

        df = ltsf.harvest_time_series(time_series_request)
Пример #18
0
    def get_fx_cross(self, start, end, cross,
                     cut = "NYC", source = "bloomberg", freq = "intraday", cache_algo='cache_algo_return', type = 'spot'):

        if source == "gain" or source == 'dukascopy' or freq == 'tick':
            return self.get_fx_cross_tick(start, end, cross,
                     cut = cut, source = source, cache_algo='cache_algo_return', type = 'spot')

        if isinstance(cross, str):
            cross = [cross]

        time_series_request = TimeSeriesRequest()
        time_series_factory = self.time_series_factory
        time_series_calcs = TimeSeriesCalcs()
        data_frame_agg = None

        if freq == 'intraday':
            time_series_request.gran_freq = "minute"                # intraday

        elif freq == 'daily':
            time_series_request.gran_freq = "daily"                 # intraday

        time_series_request.freq_mult = 1                       # 1 min
        time_series_request.cut = cut                           # NYC/BGN ticker
        time_series_request.fields = 'close'                    # close field only
        time_series_request.cache_algo = cache_algo             # cache_algo_only, cache_algo_return, internet_load

        time_series_request.environment = 'backtest'
        time_series_request.start_date = start
        time_series_request.finish_date = end
        time_series_request.data_source = source

        for cr in cross:
            base = cr[0:3]
            terms = cr[3:6]

            if (type == 'spot'):
                # non-USD crosses
                if base != 'USD' and terms != 'USD':
                    base_USD = self.fxconv.correct_notation('USD' + base)
                    terms_USD = self.fxconv.correct_notation('USD' + terms)

                    # TODO check if the cross exists in the database

                    # download base USD cross
                    time_series_request.tickers = base_USD
                    time_series_request.category = self.fxconv.em_or_g10(base, freq)
                    base_vals = time_series_factory.harvest_time_series(time_series_request)

                    # download terms USD cross
                    time_series_request.tickers = terms_USD
                    time_series_request.category = self.fxconv.em_or_g10(terms, freq)
                    terms_vals = time_series_factory.harvest_time_series(time_series_request)

                    if (base_USD[0:3] == 'USD'):
                        base_vals = 1 / base_vals
                    if (terms_USD[0:3] == 'USD'):
                        terms_vals = 1 / terms_vals

                    base_vals.columns = ['temp']
                    terms_vals.columns = ['temp']
                    cross_vals = base_vals.div(terms_vals, axis = 'index')
                    cross_vals.columns = [cr + '.close']

                else:
                    if base == 'USD': non_USD = terms
                    if terms == 'USD': non_USD = base

                    correct_cr = self.fxconv.correct_notation(cr)

                    time_series_request.tickers = correct_cr
                    time_series_request.category = self.fxconv.em_or_g10(non_USD, freq)
                    cross_vals = time_series_factory.harvest_time_series(time_series_request)

                    # flip if not convention
                    if(correct_cr != cr):
                        cross_vals = 1 / cross_vals

                    cross_vals.columns.names = [cr + '.close']

            elif type[0:3] == "tot":
                if freq == 'daily':
                    # download base USD cross
                    time_series_request.tickers = base + 'USD'
                    time_series_request.category = self.fxconv.em_or_g10(base, freq) + '-tot'

                    if type == "tot":
                        base_vals = time_series_factory.harvest_time_series(time_series_request)
                    else:
                        x = 0

                    # download terms USD cross
                    time_series_request.tickers = terms + 'USD'
                    time_series_request.category = self.fxconv.em_or_g10(terms, freq) + '-tot'

                    if type == "tot":
                        terms_vals = time_series_factory.harvest_time_series(time_series_request)
                    else:
                        x = 0

                    base_rets = time_series_calcs.calculate_returns(base_vals)
                    terms_rets = time_series_calcs.calculate_returns(terms_vals)

                    cross_rets = base_rets.sub(terms_rets.iloc[:,0],axis=0)

                    # first returns of a time series will by NaN, given we don't know previous point
                    cross_rets.iloc[0] = 0

                    cross_vals = time_series_calcs.create_mult_index(cross_rets)
                    cross_vals.columns = [cr + '-tot.close']

                elif freq == 'intraday':
                    self.logger.info('Total calculated returns for intraday not implemented yet')
                    return None

            if data_frame_agg is None:
                data_frame_agg = cross_vals
            else:
                data_frame_agg = data_frame_agg.join(cross_vals, how='outer')

        # strip the nan elements
        data_frame_agg = data_frame_agg.dropna()
        return data_frame_agg
    logger.info("Running backtest...")

    # use technical indicator to create signals
    # (we could obviously create whatever function we wanted for generating the signal dataframe)
    tech_ind = TechIndicator()
    tech_ind.create_tech_ind(spot_df, indicator, tech_params); signal_df = tech_ind.get_signal()

    # use the same data for generating signals
    cash_backtest.calculate_trading_PnL(br, asset_df, signal_df)
    port = cash_backtest.get_cumportfolio()
    port.columns = [indicator + ' = ' + str(tech_params.sma_period) + ' ' + str(cash_backtest.get_portfolio_pnl_desc()[0])]
    signals = cash_backtest.get_porfolio_signal()   # get final signals for each series
    returns = cash_backtest.get_pnl()               # get P&L for each series

    time_series_calcs = TimeSeriesCalcs()
    trade_returns = time_series_calcs.calculate_individual_trade_gains(signals, returns)

    print(trade_returns)

    # print the last positions (we could also save as CSV etc.)
    print(signals.tail(1))

    pf = PlotFactory()
    gp = GraphProperties()
    gp.title = "EUR/USD trend model"
    gp.source = 'Thalesians/BBG (calc with PyThalesians Python library)'
    gp.scale_factor = 1
    gp.file_output = 'output_data/eurusd-trend-example.png'

    pf.plot_line_graph(port, adapter = 'pythalesians', gp = gp)
Пример #20
0
from pythalesians.graphics.graphs.graphproperties import GraphProperties

# for making elementary calculations on the time series
from pythalesians.timeseries.calcs.timeseriescalcs import TimeSeriesCalcs
from datetime import timedelta

if True:
    logger = LoggerManager().getLogger(__name__)

    import datetime

    # just change "False" to "True" to run any of the below examples

    ###### download daily data from Bloomberg for USD/BRL and get biggest downmoves
    if True:
        tsc = TimeSeriesCalcs()

        time_series_request = TimeSeriesRequest(
            start_date="01 Jan 2005",  # start date
            finish_date=datetime.datetime.utcnow(),  # finish date
            freq='daily',  # daily data
            data_source='bloomberg',  # use Bloomberg as data source
            tickers=['USDBRL'],  # ticker (Thalesians)
            fields=['close'],  # which fields to download
            vendor_tickers=['USDBRL BGN Curncy'],  # ticker (Bloomberg)
            vendor_fields=['PX_LAST'],  # which Bloomberg fields to download
            cache_algo='internet_load_return')  # how to return data

        ltsf = LightTimeSeriesFactory()

        df = ltsf.harvest_time_series(time_series_request)
Пример #21
0
    def calculate_leverage_factor(self, returns_df, vol_target, vol_max_leverage, vol_periods = 60, vol_obs_in_year = 252,
                                  vol_rebalance_freq = 'BM', data_resample_freq = None, data_resample_type = 'mean',
                                  returns = True, period_shift = 0):
        """
        calculate_leverage_factor - Calculates the time series of leverage for a specified vol target

        Parameters
        ----------
        returns_df : DataFrame
            Asset returns

        vol_target : float
            vol target for assets

        vol_max_leverage : float
            maximum leverage allowed

        vol_periods : int
            number of periods to calculate volatility

        vol_obs_in_year : int
            number of observations in the year

        vol_rebalance_freq : str
            how often to rebalance

        vol_resample_freq : str
            do we need to resample the underlying data first? (eg. have we got intraday data?)

        returns : boolean
            is this returns time series or prices?

        period_shift : int
            should we delay the signal by a number of periods?

        Returns
        -------
        pandas.Dataframe
        """

        tsc = TimeSeriesCalcs()

        if data_resample_freq is not None:
            return
            # TODO not implemented yet

        if not returns: returns_df = tsc.calculate_returns(returns_df)

        roll_vol_df = tsc.rolling_volatility(returns_df,
                                        periods = vol_periods, obs_in_year = vol_obs_in_year).shift(period_shift)

        # calculate the leverage as function of vol target (with max lev constraint)
        lev_df = vol_target / roll_vol_df
        lev_df[lev_df > vol_max_leverage] = vol_max_leverage

        # should we take the mean, first, last in our resample
        if data_resample_type == 'mean':
            lev_df = lev_df.resample(vol_rebalance_freq).mean()
        elif data_resample_type == 'first':
            lev_df = lev_df.resample(vol_rebalance_freq).first()
        elif data_resample_type == 'last':
            lev_df = lev_df.resample(vol_rebalance_freq).last()
        else:
            # TODO implement other types
            return

        returns_df, lev_df = returns_df.align(lev_df, join='left', axis = 0)

        lev_df = lev_df.fillna(method='ffill')
        lev_df.ix[0:vol_periods] = numpy.nan    # ignore the first elements before the vol window kicks in

        return lev_df
Пример #22
0
    def calculate_leverage_factor(self, returns_df, vol_target, vol_max_leverage, vol_periods = 60, vol_obs_in_year = 252,
                                  vol_rebalance_freq = 'BM', data_resample_freq = None, data_resample_type = 'mean',
                                  returns = True, period_shift = 0):
        """
        calculate_leverage_factor - Calculates the time series of leverage for a specified vol target

        Parameters
        ----------
        returns_df : DataFrame
            Asset returns

        vol_target : float
            vol target for assets

        vol_max_leverage : float
            maximum leverage allowed

        vol_periods : int
            number of periods to calculate volatility

        vol_obs_in_year : int
            number of observations in the year

        vol_rebalance_freq : str
            how often to rebalance

        vol_resample_type : str
            do we need to resample the underlying data first? (eg. have we got intraday data?)

        returns : boolean
            is this returns time series or prices?

        period_shift : int
            should we delay the signal by a number of periods?

        Returns
        -------
        pandas.Dataframe
        """

        tsc = TimeSeriesCalcs()
        tsf = TimeSeriesFilter()

        if data_resample_freq is not None:
            return
            # TODO not implemented yet

        if not returns: returns_df = tsc.calculate_returns(returns_df)

        roll_vol_df = tsc.rolling_volatility(returns_df,
                                        periods = vol_periods, obs_in_year = vol_obs_in_year).shift(period_shift)

        # calculate the leverage as function of vol target (with max lev constraint)
        lev_df = vol_target / roll_vol_df
        lev_df[lev_df > vol_max_leverage] = vol_max_leverage

        lev_df = tsf.resample_time_series_frequency(lev_df, vol_rebalance_freq, data_resample_type)

        returns_df, lev_df = returns_df.align(lev_df, join='left', axis = 0)

        lev_df = lev_df.fillna(method='ffill')
        lev_df.ix[0:vol_periods] = numpy.nan    # ignore the first elements before the vol window kicks in

        return lev_df
Пример #23
0
    def construct_strategy(self, br = None):
        """
        construct_strategy - Constructs the returns for all the strategies which have been specified.

        - gets parameters form fill_backtest_request
        - market data from fill_assets

        """

        time_series_calcs = TimeSeriesCalcs()

        # get the parameters for backtesting
        if hasattr(self, 'br'):
            br = self.br
        elif br is None:
            br = self.fill_backtest_request()

        # get market data for backtest
        asset_df, spot_df, spot_df2, basket_dict = self.fill_assets()

        if hasattr(br, 'tech_params'):
            tech_params = br.tech_params
        else:
            tech_params = TechParams()

        cumresults = pandas.DataFrame(index = asset_df.index)
        portleverage = pandas.DataFrame(index = asset_df.index)

        from collections import OrderedDict
        tsdresults = OrderedDict()

        # each portfolio key calculate returns - can put parts of the portfolio in the key
        for key in basket_dict.keys():
            asset_cut_df = asset_df[[x +'.close' for x in basket_dict[key]]]
            spot_cut_df = spot_df[[x +'.close' for x in basket_dict[key]]]

            self.logger.info("Calculating " + key)

            results, cash_backtest = self.construct_individual_strategy(br, spot_cut_df, spot_df2, asset_cut_df, tech_params, key)

            cumresults[results.columns[0]] = results
            portleverage[results.columns[0]] = cash_backtest.get_porfolio_leverage()
            tsdresults[key] = cash_backtest.get_portfolio_pnl_tsd()

            # for a key, designated as the final strategy save that as the "strategy"
            if key == self.FINAL_STRATEGY:
                self._strategy_pnl = results
                self._strategy_pnl_tsd = cash_backtest.get_portfolio_pnl_tsd()
                self._strategy_leverage = cash_backtest.get_porfolio_leverage()
                self._strategy_signal = cash_backtest.get_porfolio_signal()
                self._strategy_pnl_trades = cash_backtest.get_pnl_trades()

        # get benchmark for comparison
        benchmark = self.construct_strategy_benchmark()

        cumresults_benchmark = self.compare_strategy_vs_benchmark(br, cumresults, benchmark)

        self._strategy_group_benchmark_tsd = tsdresults

        if hasattr(self, '_benchmark_tsd'):
            tsdlist = tsdresults
            tsdlist['Benchmark'] = (self._benchmark_tsd)
            self._strategy_group_benchmark_tsd = tsdlist

        # calculate annualised returns
        years = time_series_calcs.average_by_annualised_year(time_series_calcs.calculate_returns(cumresults_benchmark))

        self._strategy_group_pnl = cumresults
        self._strategy_group_pnl_tsd = tsdresults
        self._strategy_group_benchmark_pnl = cumresults_benchmark
        self._strategy_group_leverage = portleverage
        self._strategy_group_benchmark_annualised_pnl = years
Пример #24
0
        # tickers for getting total return indices from Bloomberg directly
        time_series_request_total_ret = copy.copy(time_series_request_spot)
        time_series_request_total_ret.tickers = ['EURUSD', 'GBPUSD', 'AUDUSD']
        time_series_request_total_ret.vendor_tickers = ['EURUSDCR BGN Curncy', 'GBPUSDCR BGN Curncy', 'AUDUSDCR BGN Curncy']

        ltsf = LightTimeSeriesFactory()

        df = None
        spot_df = ltsf.harvest_time_series(time_series_request_spot)
        deposit_df = ltsf.harvest_time_series(time_series_request_deposit)

        deposit_df = deposit_df.fillna(method = 'ffill')
        deposit_df = deposit_df.fillna(method = 'bfill') # bit of a hack - because some deposit data sparse
        tot_df = ltsf.harvest_time_series(time_series_request_total_ret)
        tsc = TimeSeriesCalcs()

        tot_df = tsc.create_mult_index_from_prices(tot_df) # rebase index at 100

        # we can change the
        tenor = 'ON'

        # plot total return series comparison for all our crosses
        # in practice, we would typically make a set of xxxUSD total return indices
        # and use them to compute all other crosses (assuming we are USD denominated investor)
        for cross in ['AUDUSD', 'EURUSD', 'GBPUSD']:

            # create total return index using spot + deposits
            ind = IndicesFX()
            ind_df = ind.create_total_return_index(cross, tenor, spot_df, deposit_df)
            ind_df.columns = [x + '.PYT (with carry)' for x in ind_df.columns]
                "AUDUSD BGN Curncy",
            ],
            vendor_fields=["close"],  # which Bloomberg fields to download
            cache_algo="internet_load_return",
        )  # how to return data

        ltsf = LightTimeSeriesFactory()

        df = ltsf.harvest_time_series(time_series_request)
        df.columns = [x.replace(".close", "") for x in df.columns.values]

        gp = GraphProperties()
        pf = PlotFactory()
        gp.source = "Thalesians/BBG (created with PyThalesians Python library)"

        tsc = TimeSeriesCalcs()
        df = tsc.create_mult_index_from_prices(df)

        pf.plot_line_graph(df, adapter="pythalesians", gp=gp)

    ###### download daily data from Quandl (via FRED) for EUR/USD and GBP/USD spot and then plot
    if False:

        time_series_request = TimeSeriesRequest(
            start_date="01 Jan 1970",  # start date
            finish_date=datetime.date.today(),  # finish date
            freq="daily",  # daily data
            data_source="quandl",  # use Quandl as data source
            tickers=["EURUSD", "GBPUSD"],  # ticker (Thalesians)
            fields=["close"],  # which fields to download
            vendor_tickers=["FRED/DEXUSEU", "FRED/DEXUSUK"],  # ticker (Quandl)
    def create_tech_ind(self, data_frame_non_nan, name, tech_params):
        self._signal = None

        data_frame = data_frame_non_nan.fillna(method="ffill")

        if name == "SMA":
            self._techind = pandas.rolling_mean(data_frame, tech_params.sma_period)

            narray = numpy.where(data_frame > self._techind, 1, -1)

            self._signal = pandas.DataFrame(index = data_frame.index, data = narray)
            self._signal.columns = [x + " SMA Signal" for x in data_frame.columns.values]

            self._techind.columns = [x + " SMA" for x in data_frame.columns.values]
        elif name == "ROC":
            tsc = TimeSeriesCalcs()

            data_frame = tsc.calculate_returns(data_frame)

            self._techind = pandas.rolling_mean(data_frame, tech_params.roc_period)

            narray = numpy.where(self._techind > 0, 1, -1)

            self._signal = pandas.DataFrame(index = data_frame.index, data = narray)
            self._signal.columns = [x + " ROC Signal" for x in data_frame.columns.values]

            self._techind.columns = [x + " ROC" for x in data_frame.columns.values]

        elif name == "SMA2":
            sma = pandas.rolling_mean(data_frame, tech_params.sma_period)
            sma2 = pandas.rolling_mean(data_frame, tech_params.sma2_period)

            narray = numpy.where(sma > sma2, 1, -1)

            self._signal = pandas.DataFrame(index = data_frame.index, data = narray)
            self._signal.columns = [x + " SMA2 Signal" for x in data_frame.columns.values]

            sma.columns = [x + " SMA" for x in data_frame.columns.values]
            sma2.columns = [x + " SMA2" for x in data_frame.columns.values]
            self._techind = pandas.concat([sma, sma2], axis = 1)

        elif name in ['RSI']:
            # delta = data_frame.diff()
            #
            # dUp, dDown = delta.copy(), delta.copy()
            # dUp[dUp < 0] = 0
            # dDown[dDown > 0] = 0
            #
            # rolUp = pandas.rolling_mean(dUp, tech_params.rsi_period)
            # rolDown = pandas.rolling_mean(dDown, tech_params.rsi_period).abs()
            #
            # rsi = rolUp / rolDown

            # Get the difference in price from previous step
            delta = data_frame.diff()
            # Get rid of the first row, which is NaN since it did not have a previous
            # row to calculate the differences
            delta = delta[1:]

            # Make the positive gains (up) and negative gains (down) Series
            up, down = delta.copy(), delta.copy()
            up[up < 0] = 0
            down[down > 0] = 0

            # Calculate the EWMA
            roll_up1 = pandas.stats.moments.ewma(up, tech_params.rsi_period)
            roll_down1 = pandas.stats.moments.ewma(down.abs(), tech_params.rsi_period)

            # Calculate the RSI based on EWMA
            RS1 = roll_up1 / roll_down1
            RSI1 = 100.0 - (100.0 / (1.0 + RS1))

            # Calculate the SMA
            roll_up2 = pandas.rolling_mean(up, tech_params.rsi_period)
            roll_down2 = pandas.rolling_mean(down.abs(), tech_params.rsi_period)

            # Calculate the RSI based on SMA
            RS2 = roll_up2 / roll_down2
            RSI2 = 100.0 - (100.0 / (1.0 + RS2))

            self._techind = RSI2
            self._techind.columns = [x + " RSI" for x in data_frame.columns.values]

            signal = data_frame.copy()

            sells = (signal.shift(-1) < tech_params.rsi_lower) & (signal > tech_params.rsi_lower)
            buys = (signal.shift(-1) > tech_params.rsi_upper) & (signal < tech_params.rsi_upper)

            # print (buys[buys == True])

            # buys
            signal[buys] =  1
            signal[sells] = -1
            signal[~(buys | sells)] = numpy.nan
            signal = signal.fillna(method = 'ffill')

            self._signal = signal
            self._signal.columns = [x + " RSI Signal" for x in data_frame.columns.values]

        elif name in ["BB"]:
            ## calcuate Bollinger bands
            mid = pandas.rolling_mean(data_frame, tech_params.bb_period); mid.columns = [x + " BB Mid" for x in data_frame.columns.values]
            std_dev = pandas.rolling_std(data_frame, tech_params.bb_period)
            BB_std = tech_params.bb_mult * std_dev

            lower = pandas.DataFrame(data = mid.values - BB_std.values, index = mid.index,
                            columns = data_frame.columns)

            upper = pandas.DataFrame(data = mid.values + BB_std.values, index = mid.index,
                            columns = data_frame.columns)

            ## calculate signals
            signal = data_frame.copy()

            buys = signal > upper
            sells = signal < lower

            signal[buys] = 1
            signal[sells] = -1
            signal[~(buys | sells)] = numpy.nan
            signal = signal.fillna(method = 'ffill')

            self._signal = signal
            self._signal.columns = [x + " " + name + " Signal" for x in data_frame.columns.values]

            lower.columns = [x + " BB Lower" for x in data_frame.columns.values]
            upper.columns = [x + " BB Mid" for x in data_frame.columns.values]
            upper.columns = [x + " BB Lower" for x in data_frame.columns.values]

            self._techind = pandas.concat([lower, mid, upper], axis = 1)
        elif name == "long-only":
            ## have +1 signals only
            self._techind = data_frame  # the technical indicator is just "prices"

            narray = numpy.ones((len(data_frame.index), len(data_frame.columns)))

            self._signal = pandas.DataFrame(index = data_frame.index, data = narray)
            self._signal.columns = [x + " Long Only Signal" for x in data_frame.columns.values]

            self._techind.columns = [x + " Long Only" for x in data_frame.columns.values]

        # TODO create other indicators

        # apply signal multiplier (typically to flip signals)
        if hasattr(tech_params, 'signal_mult'):
            self._signal = self._signal * tech_params.signal_mult

        return self._techind
Пример #27
0
    tech_ind = TechIndicator()
    tech_ind.create_tech_ind(spot_df, indicator, tech_params)
    signal_df = tech_ind.get_signal()

    # use the same data for generating signals
    cash_backtest.calculate_trading_PnL(br, asset_df, signal_df)
    port = cash_backtest.get_cumportfolio()
    port.columns = [
        indicator + ' = ' + str(tech_params.sma_period) + ' ' +
        str(cash_backtest.get_portfolio_pnl_desc()[0])
    ]
    signals = cash_backtest.get_porfolio_signal(
    )  # get final signals for each series
    returns = cash_backtest.get_pnl()  # get P&L for each series

    time_series_calcs = TimeSeriesCalcs()
    trade_returns = time_series_calcs.calculate_individual_trade_gains(
        signals, returns)

    print(trade_returns)

    # print the last positions (we could also save as CSV etc.)
    print(signals.tail(1))

    pf = PlotFactory()
    gp = GraphProperties()
    gp.title = "EUR/USD trend model"
    gp.source = 'Thalesians/BBG (calc with PyThalesians Python library)'
    gp.scale_factor = 1
    gp.file_output = 'output_data/eurusd-trend-example.png'
Пример #28
0
    def calculate_trading_PnL(self, br, asset_a_df, signal_df):
        """
        calculate_trading_PnL - Calculates P&L of a trading strategy and statistics to be retrieved later

        Parameters
        ----------
        br : BacktestRequest
            Parameters for the backtest specifying start date, finish data, transaction costs etc.

        asset_a_df : pandas.DataFrame
            Asset prices to be traded

        signal_df : pandas.DataFrame
            Signals for the trading strategy
        """

        tsc = TimeSeriesCalcs()
        # signal_df.to_csv('e:/temp0.csv')
        # make sure the dates of both traded asset and signal are aligned properly
        asset_df, signal_df = asset_a_df.align(signal_df, join='left', axis = 'index')

        # only allow signals to change on the days when we can trade assets
        signal_df = signal_df.mask(numpy.isnan(asset_df.values))    # fill asset holidays with NaN signals
        signal_df = signal_df.fillna(method='ffill')                # fill these down
        asset_df = asset_df.fillna(method='ffill')                  # fill down asset holidays

        returns_df = tsc.calculate_returns(asset_df)
        tc = br.spot_tc_bp

        signal_cols = signal_df.columns.values
        returns_cols = returns_df.columns.values

        pnl_cols = []

        for i in range(0, len(returns_cols)):
            pnl_cols.append(returns_cols[i] + " / " + signal_cols[i])

        # do we have a vol target for individual signals?
        if hasattr(br, 'signal_vol_adjust'):
            if br.signal_vol_adjust is True:
                if not(hasattr(br, 'signal_vol_resample_type')):
                    br.signal_vol_resample_type = 'mean'

                leverage_df = self.calculate_leverage_factor(returns_df, br.signal_vol_target, br.signal_vol_max_leverage,
                                               br.signal_vol_periods, br.signal_vol_obs_in_year,
                                               br.signal_vol_rebalance_freq, br.signal_vol_resample_freq,
                                               br.signal_vol_resample_type)

                signal_df = pandas.DataFrame(
                    signal_df.values * leverage_df.values, index = signal_df.index, columns = signal_df.columns)

                self._individual_leverage = leverage_df     # contains leverage of individual signal (before portfolio vol target)

        _pnl = tsc.calculate_signal_returns_with_tc_matrix(signal_df, returns_df, tc = tc)
        _pnl.columns = pnl_cols

        # portfolio is average of the underlying signals: should we sum them or average them?
        if hasattr(br, 'portfolio_combination'):
            if br.portfolio_combination == 'sum':
                 portfolio = pandas.DataFrame(data = _pnl.sum(axis = 1), index = _pnl.index, columns = ['Portfolio'])
            elif br.portfolio_combination == 'mean':
                 portfolio = pandas.DataFrame(data = _pnl.mean(axis = 1), index = _pnl.index, columns = ['Portfolio'])
        else:
            portfolio = pandas.DataFrame(data = _pnl.mean(axis = 1), index = _pnl.index, columns = ['Portfolio'])

        portfolio_leverage_df = pandas.DataFrame(data = numpy.ones(len(_pnl.index)), index = _pnl.index, columns = ['Portfolio'])

        # should we apply vol target on a portfolio level basis?
        if hasattr(br, 'portfolio_vol_adjust'):
            if br.portfolio_vol_adjust is True:
                portfolio, portfolio_leverage_df = self.calculate_vol_adjusted_returns(portfolio, br = br)

        self._portfolio = portfolio
        self._signal = signal_df                            # individual signals (before portfolio leverage)
        self._portfolio_leverage = portfolio_leverage_df    # leverage on portfolio

        # multiply portfolio leverage * individual signals to get final position signals
        length_cols = len(signal_df.columns)
        leverage_matrix = numpy.repeat(portfolio_leverage_df.values.flatten()[numpy.newaxis,:], length_cols, 0)

        # final portfolio signals (including signal & portfolio leverage)
        self._portfolio_signal = pandas.DataFrame(
            data = numpy.multiply(numpy.transpose(leverage_matrix), signal_df.values),
            index = signal_df.index, columns = signal_df.columns)

        if hasattr(br, 'portfolio_combination'):
            if br.portfolio_combination == 'sum':
                pass
            elif br.portfolio_combination == 'mean':
                self._portfolio_signal = self._portfolio_signal / float(length_cols)
        else:
            self._portfolio_signal = self._portfolio_signal / float(length_cols)

        self._pnl = _pnl                                                            # individual signals P&L

        # TODO FIX very slow - hence only calculate on demand
        _pnl_trades = None
        # _pnl_trades = tsc.calculate_individual_trade_gains(signal_df, _pnl)
        self._pnl_trades = _pnl_trades

        self._tsd_pnl = TimeSeriesDesc()
        self._tsd_pnl.calculate_ret_stats(self._pnl, br.ann_factor)

        self._portfolio.columns = ['Port']
        self._tsd_portfolio = TimeSeriesDesc()
        self._tsd_portfolio.calculate_ret_stats(self._portfolio, br.ann_factor)

        self._cumpnl = tsc.create_mult_index(self._pnl)                             # individual signals cumulative P&L
        self._cumpnl.columns = pnl_cols

        self._cumportfolio = tsc.create_mult_index(self._portfolio)                 # portfolio cumulative P&L
        self._cumportfolio.columns = ['Port']
Пример #29
0
if True:

    time_series_request = TimeSeriesRequest(
        start_date="01 Jan 2013",  # start date
        finish_date=datetime.date.today(),  # finish date
        freq='daily',  # daily data
        data_source='google',  # use Bloomberg as data source
        tickers=['Apple', 'S&P500 ETF'],  # ticker (Thalesians)
        fields=['close'],  # which fields to download
        vendor_tickers=['aapl', 'spy'],  # ticker (Google)
        vendor_fields=['Close'],  # which Bloomberg fields to download
        cache_algo='internet_load_return')  # how to return data

    ltsf = LightTimeSeriesFactory()
    tsc = TimeSeriesCalcs()

    df = tsc.create_mult_index_from_prices(
        ltsf.harvest_time_series(time_series_request))

    gp = GraphProperties()
    gp.title = "S&P500 vs Apple"

    # plot first with PyThalesians and then Plotly (via Cufflinks)
    # just needs 1 word to change
    # (although, note that AdapterCufflinks does have some extra parameters that can be set in GraphProperties)
    gp.plotly_username = '******'  # note: need to fill in Plotly API key on Constants and change this!
    gp.plotly_world_readable = True
    gp.plotly_plot_mode = "online"  # will render on Plotly website

    pf = PlotFactory()
Пример #30
0
    def calculate_trading_PnL(self, br, asset_a_df, signal_df):
        """
        calculate_trading_PnL - Calculates P&L of a trading strategy and statistics to be retrieved later

        Parameters
        ----------
        br : BacktestRequest
            Parameters for the backtest specifying start date, finish data, transaction costs etc.

        asset_a_df : pandas.DataFrame
            Asset prices to be traded

        signal_df : pandas.DataFrame
            Signals for the trading strategy
        """

        tsc = TimeSeriesCalcs()
        # signal_df.to_csv('e:/temp0.csv')
        # make sure the dates of both traded asset and signal are aligned properly
        asset_df, signal_df = asset_a_df.align(signal_df, join='left', axis = 'index')

        # only allow signals to change on the days when we can trade assets
        signal_df = signal_df.mask(numpy.isnan(asset_df.values))    # fill asset holidays with NaN signals
        signal_df = signal_df.fillna(method='ffill')                # fill these down
        asset_df = asset_df.fillna(method='ffill')                  # fill down asset holidays

        returns_df = tsc.calculate_returns(asset_df)
        tc = br.spot_tc_bp

        signal_cols = signal_df.columns.values
        returns_cols = returns_df.columns.values

        pnl_cols = []

        for i in range(0, len(returns_cols)):
            pnl_cols.append(returns_cols[i] + " / " + signal_cols[i])

        # do we have a vol target for individual signals?
        if hasattr(br, 'signal_vol_adjust'):
            if br.signal_vol_adjust is True:
                if not(hasattr(br, 'signal_vol_resample_type')):
                    br.signal_vol_resample_type = 'mean'

                if not(hasattr(br, 'signal_vol_resample_freq')):
                    br.signal_vol_resample_freq = None

                leverage_df = self.calculate_leverage_factor(returns_df, br.signal_vol_target, br.signal_vol_max_leverage,
                                               br.signal_vol_periods, br.signal_vol_obs_in_year,
                                               br.signal_vol_rebalance_freq, br.signal_vol_resample_freq,
                                               br.signal_vol_resample_type)

                signal_df = pandas.DataFrame(
                    signal_df.values * leverage_df.values, index = signal_df.index, columns = signal_df.columns)

                self._individual_leverage = leverage_df     # contains leverage of individual signal (before portfolio vol target)

        _pnl = tsc.calculate_signal_returns_with_tc_matrix(signal_df, returns_df, tc = tc)
        _pnl.columns = pnl_cols

        # portfolio is average of the underlying signals: should we sum them or average them?
        if hasattr(br, 'portfolio_combination'):
            if br.portfolio_combination == 'sum':
                 portfolio = pandas.DataFrame(data = _pnl.sum(axis = 1), index = _pnl.index, columns = ['Portfolio'])
            elif br.portfolio_combination == 'mean':
                 portfolio = pandas.DataFrame(data = _pnl.mean(axis = 1), index = _pnl.index, columns = ['Portfolio'])
        else:
            portfolio = pandas.DataFrame(data = _pnl.mean(axis = 1), index = _pnl.index, columns = ['Portfolio'])

        portfolio_leverage_df = pandas.DataFrame(data = numpy.ones(len(_pnl.index)), index = _pnl.index, columns = ['Portfolio'])

        # should we apply vol target on a portfolio level basis?
        if hasattr(br, 'portfolio_vol_adjust'):
            if br.portfolio_vol_adjust is True:
                portfolio, portfolio_leverage_df = self.calculate_vol_adjusted_returns(portfolio, br = br)

        self._portfolio = portfolio
        self._signal = signal_df                            # individual signals (before portfolio leverage)
        self._portfolio_leverage = portfolio_leverage_df    # leverage on portfolio

        # multiply portfolio leverage * individual signals to get final position signals
        length_cols = len(signal_df.columns)
        leverage_matrix = numpy.repeat(portfolio_leverage_df.values.flatten()[numpy.newaxis,:], length_cols, 0)

        # final portfolio signals (including signal & portfolio leverage)
        self._portfolio_signal = pandas.DataFrame(
            data = numpy.multiply(numpy.transpose(leverage_matrix), signal_df.values),
            index = signal_df.index, columns = signal_df.columns)

        if hasattr(br, 'portfolio_combination'):
            if br.portfolio_combination == 'sum':
                pass
            elif br.portfolio_combination == 'mean':
                self._portfolio_signal = self._portfolio_signal / float(length_cols)
        else:
            self._portfolio_signal = self._portfolio_signal / float(length_cols)

        self._pnl = _pnl                                                            # individual signals P&L

        # TODO FIX very slow - hence only calculate on demand
        _pnl_trades = None
        # _pnl_trades = tsc.calculate_individual_trade_gains(signal_df, _pnl)
        self._pnl_trades = _pnl_trades

        self._tsd_pnl = TimeSeriesDesc()
        self._tsd_pnl.calculate_ret_stats(self._pnl, br.ann_factor)

        self._portfolio.columns = ['Port']
        self._tsd_portfolio = TimeSeriesDesc()
        self._tsd_portfolio.calculate_ret_stats(self._portfolio, br.ann_factor)

        self._cumpnl = tsc.create_mult_index(self._pnl)                             # individual signals cumulative P&L
        self._cumpnl.columns = pnl_cols

        self._cumportfolio = tsc.create_mult_index(self._portfolio)                 # portfolio cumulative P&L
        self._cumportfolio.columns = ['Port']
Пример #31
0
# just change "False" to "True" to run any of the below examples

if True:
    time_series_request = TimeSeriesRequest(
                start_date = "01 Jan 2013",                     # start date
                finish_date = datetime.date.today(),            # finish date
                freq = 'daily',                                 # daily data
                data_source = 'google',                         # use Bloomberg as data source
                tickers = ['Apple', 'S&P500 ETF'],                  # ticker (Thalesians)
                fields = ['close'],                                 # which fields to download
                vendor_tickers = ['aapl', 'spy'],                   # ticker (Google)
                vendor_fields = ['Close'],                          # which Bloomberg fields to download
                cache_algo = 'internet_load_return')                # how to return data

    ltsf = LightTimeSeriesFactory()
    tsc = TimeSeriesCalcs()

    df = tsc.create_mult_index_from_prices(ltsf.harvest_time_series(time_series_request))

    gp = GraphProperties()
    gp.html_file_output = "output_data/apple.htm"
    gp.title = "S&P500 vs Apple"

    # plot first with PyThalesians and then Bokeh
    # just needs 1 word to change
    gp.display_legend = False

    pf = PlotFactory()
    pf.plot_generic_graph(df, type = 'line', adapter = 'pythalesians', gp = gp)
    pf.plot_generic_graph(df, type = 'line', adapter = 'bokeh', gp = gp)
                tickers = ['EURUSD',                            # ticker (Thalesians)
                           'GBPUSD',
                           'AUDUSD'],
                fields = ['close'],                             # which fields to download
                vendor_tickers = ['EURUSD BGN Curncy',          # ticker (Bloomberg)
                                  'GBPUSD BGN Curncy',
                                  'AUDUSD BGN Curncy'],
                vendor_fields = ['PX_LAST'],                    # which Bloomberg fields to download
                cache_algo = 'internet_load_return')                # how to return data

        ltsf = LightTimeSeriesFactory()

        df = None
        df = ltsf.harvest_time_series(time_series_request)

        tsc = TimeSeriesCalcs()
        df = tsc.calculate_returns(df)
        df = tsc.rolling_corr(df['EURUSD.close'], 20, data_frame2 = df[['GBPUSD.close', 'AUDUSD.close']])

        gp = GraphProperties()
        gp.title = "1M FX rolling correlations"
        gp.scale_factor = 3

        pf = PlotFactory()
        pf.plot_line_graph(df, adapter = 'pythalesians', gp = gp)

    ###### download daily data from Bloomberg for AUD/JPY, NZD/JPY spot with S&P500, then calculate correlation
    if True:
        time_series_request = TimeSeriesRequest(
                start_date="01 Jan 2015",  # start date
                finish_date=datetime.date.today(),  # finish date
Пример #33
0
    def get_intraday_moves_over_custom_event(self,
                                             data_frame_rets,
                                             ef_time_frame,
                                             vol=False,
                                             minute_start=5,
                                             mins=3 * 60,
                                             min_offset=0,
                                             create_index=False,
                                             resample=False,
                                             freq='minutes'):

        tsf = TimeSeriesFilter()
        ef_time_frame = tsf.filter_time_series_by_date(
            data_frame_rets.index[0], data_frame_rets.index[-1], ef_time_frame)
        ef_time = ef_time_frame.index

        if freq == 'minutes':
            ef_time_start = ef_time - timedelta(minutes=minute_start)
            ef_time_end = ef_time + timedelta(minutes=mins)
            ann_factor = 252 * 1440
        elif freq == 'days':
            ef_time = ef_time_frame.index.normalize()
            ef_time_start = ef_time - timedelta(days=minute_start)
            ef_time_end = ef_time + timedelta(days=mins)
            ann_factor = 252

        ords = range(-minute_start + min_offset, mins + min_offset)

        # all data needs to be equally spaced
        if resample:
            tsf = TimeSeriesFilter()

            # make sure time series is properly sampled at 1 min intervals
            data_frame_rets = data_frame_rets.resample('1min')
            data_frame_rets = data_frame_rets.fillna(value=0)
            data_frame_rets = tsf.remove_out_FX_out_of_hours(data_frame_rets)

        data_frame_rets['Ind'] = numpy.nan

        start_index = data_frame_rets.index.searchsorted(ef_time_start)
        finish_index = data_frame_rets.index.searchsorted(ef_time_end)

        # not all observation windows will be same length (eg. last one?)

        # fill the indices which represent minutes
        # TODO vectorise this!
        for i in range(0, len(ef_time_frame.index)):
            try:
                data_frame_rets.ix[start_index[i]:finish_index[i],
                                   'Ind'] = ords
            except:
                data_frame_rets.ix[start_index[i]:finish_index[i],
                                   'Ind'] = ords[0:(finish_index[i] -
                                                    start_index[i])]

        # set the release dates
        data_frame_rets.ix[start_index, 'Rel'] = ef_time  # set entry points
        data_frame_rets.ix[finish_index + 1, 'Rel'] = numpy.zeros(
            len(start_index))  # set exit points
        data_frame_rets['Rel'] = data_frame_rets['Rel'].fillna(
            method='pad')  # fill down signals

        data_frame_rets = data_frame_rets[pandas.notnull(
            data_frame_rets['Ind'])]  # get rid of other

        data_frame = data_frame_rets.pivot(index='Ind',
                                           columns='Rel',
                                           values=data_frame_rets.columns[0])

        data_frame.index.names = [None]

        if create_index:
            tsc = TimeSeriesCalcs()
            data_frame.ix[-minute_start + min_offset, :] = numpy.nan
            data_frame = tsc.create_mult_index(data_frame)
        else:
            if vol is True:
                # annualise (if vol)
                data_frame = data_frame.rolling(
                    center=False, window=5).std() * math.sqrt(ann_factor)
            else:
                data_frame = data_frame.cumsum()

        return data_frame
Пример #34
0
    def get_intraday_moves_over_custom_event(self, data_frame_rets, ef_time_frame, vol=False,
                                             minute_start = 5, mins = 3 * 60, min_offset = 0 , create_index = False,
                                             resample = False, freq = 'minutes'):

        tsf = TimeSeriesFilter()
        ef_time_frame = tsf.filter_time_series_by_date(data_frame_rets.index[0], data_frame_rets.index[-1], ef_time_frame)
        ef_time = ef_time_frame.index

        if freq == 'minutes':
            ef_time_start = ef_time - timedelta(minutes = minute_start)
            ef_time_end = ef_time + timedelta(minutes = mins)
            ann_factor = 252 * 1440
        elif freq == 'days':
            ef_time = ef_time_frame.index.normalize()
            ef_time_start = ef_time - timedelta(days = minute_start)
            ef_time_end = ef_time + timedelta(days = mins)
            ann_factor = 252

        ords = range(-minute_start + min_offset, mins + min_offset)

        # all data needs to be equally spaced
        if resample:
            tsf = TimeSeriesFilter()

            # make sure time series is properly sampled at 1 min intervals
            data_frame_rets = data_frame_rets.resample('1min')
            data_frame_rets = data_frame_rets.fillna(value = 0)
            data_frame_rets = tsf.remove_out_FX_out_of_hours(data_frame_rets)

        data_frame_rets['Ind'] = numpy.nan

        start_index = data_frame_rets.index.searchsorted(ef_time_start)
        finish_index = data_frame_rets.index.searchsorted(ef_time_end)

        # not all observation windows will be same length (eg. last one?)

        # fill the indices which represent minutes
        # TODO vectorise this!
        for i in range(0, len(ef_time_frame.index)):
            try:
                data_frame_rets.ix[start_index[i]:finish_index[i], 'Ind'] = ords
            except:
                data_frame_rets.ix[start_index[i]:finish_index[i], 'Ind'] = ords[0:(finish_index[i] - start_index[i])]

        # set the release dates
        data_frame_rets.ix[start_index,'Rel'] = ef_time                                         # set entry points
        data_frame_rets.ix[finish_index + 1,'Rel'] = numpy.zeros(len(start_index))              # set exit points
        data_frame_rets['Rel'] = data_frame_rets['Rel'].fillna(method = 'pad')                  # fill down signals

        data_frame_rets = data_frame_rets[pandas.notnull(data_frame_rets['Ind'])]               # get rid of other

        data_frame = data_frame_rets.pivot(index='Ind',
                                           columns='Rel', values=data_frame_rets.columns[0])

        data_frame.index.names = [None]

        if create_index:
            tsc = TimeSeriesCalcs()
            data_frame.ix[-minute_start + min_offset,:] = numpy.nan
            data_frame = tsc.create_mult_index(data_frame)
        else:
            if vol is True:
                # annualise (if vol)
                data_frame = pandas.rolling_std(data_frame, window=5) * math.sqrt(ann_factor)
            else:
                data_frame = data_frame.cumsum()

        return data_frame
Пример #35
0
        time_series_request_total_ret.tickers = ['EURUSD', 'GBPUSD', 'AUDUSD']
        time_series_request_total_ret.vendor_tickers = [
            'EURUSDCR BGN Curncy', 'GBPUSDCR BGN Curncy', 'AUDUSDCR BGN Curncy'
        ]

        ltsf = LightTimeSeriesFactory()

        df = None
        spot_df = ltsf.harvest_time_series(time_series_request_spot)
        deposit_df = ltsf.harvest_time_series(time_series_request_deposit)

        deposit_df = deposit_df.fillna(method='ffill')
        deposit_df = deposit_df.fillna(
            method='bfill')  # bit of a hack - because some deposit data sparse
        tot_df = ltsf.harvest_time_series(time_series_request_total_ret)
        tsc = TimeSeriesCalcs()

        tot_df = tsc.create_mult_index_from_prices(
            tot_df)  # rebase index at 100

        # we can change the
        tenor = 'ON'

        # plot total return series comparison for all our crosses
        # in practice, we would typically make a set of xxxUSD total return indices
        # and use them to compute all other crosses (assuming we are USD denominated investor)
        for cross in ['AUDUSD', 'EURUSD', 'GBPUSD']:

            # create total return index using spot + deposits
            ind = IndicesFX()
            ind_df = ind.create_total_return_index(cross, tenor, spot_df,
Пример #36
0
            cache_algo='internet_load_return')  # how to return data

        df = ltsf.harvest_time_series(time_series_request)
        df.columns = [x.replace('.close', '') for x in df.columns.values]

        # Bloomberg does not give the milisecond field when you make a tick request, so might as well downsample to S

        df['JPYUSD'] = 1 / df['JPYUSD']

        gp = GraphProperties()
        pf = PlotFactory()
        gp.scale_factor = 3
        gp.title = 'FX around last NFP date'
        gp.source = 'Thalesians/BBG (created with PyThalesians Python library)'

        tsc = TimeSeriesCalcs()
        df = tsc.create_mult_index_from_prices(df)

        pf.plot_line_graph(df, adapter='pythalesians', gp=gp)

    ###### download tick data from Bloomberg for EUR/USD around last FOMC and then downsample to plot
    if True:
        finish_date = datetime.datetime.utcnow()
        start_date = finish_date - timedelta(days=60)

        # fetch Fed times from Bloomberg
        time_series_request = TimeSeriesRequest(
            start_date=start_date,  # start date
            finish_date=finish_date,  # finish date
            category="events",
            freq='daily',  # daily data
Пример #37
0
                data_source = 'bloomberg',                  # use Bloomberg as data source
                tickers = ['USDJPY'],                       # ticker (Thalesians)
                fields = ['close'],                         # which fields to download
                vendor_tickers = ['USDJPY BGN Curncy'],     # ticker (Bloomberg)
                vendor_fields = ['PX_LAST'],                # which Bloomberg fields to download
                cache_algo = 'internet_load_return')           # how to return data

        ltsf = LightTimeSeriesFactory()

        df = None
        df = ltsf.harvest_time_series(time_series_request)

        utc_time = pytz.utc
        df.index = df.index.tz_localize(utc_time)       # work in UTC time

        tsc = TimeSeriesCalcs()
        df = tsc.calculate_returns(df)

        # fetch NFP times from Bloomberg
        time_series_request = TimeSeriesRequest(
                start_date = start_date,                # start date
                finish_date = finish_date,              # finish date
                category = "events",
                freq = 'daily',                         # daily data
                data_source = 'bloomberg',              # use Bloomberg as data source
                tickers = ['NFP'],
                fields = ['release-date-time-full'],                    # which fields to download
                vendor_tickers = ['NFP TCH Index'], # ticker (Bloomberg)
                vendor_fields = ['ECO_FUTURE_RELEASE_DATE_LIST'],   # which Bloomberg fields to download
                cache_algo = 'internet_load_return')                # how to return data
Пример #38
0
    def construct_strategy(self):
        """
        construct_strategy - Constructs the returns for all the strategies which have been specified.

        - gets parameters form fill_backtest_request
        - market data from fill_assets

        """

        time_series_calcs = TimeSeriesCalcs()

        # get the parameters for backtesting
        if hasattr(self, 'br'):
            br = self.br
        else:
            br = self.fill_backtest_request()

        # get market data for backtest
        asset_df, spot_df, spot_df2, basket_dict = self.fill_assets()

        if hasattr(br, 'tech_params'):
            tech_params = br.tech_params
        else:
            tech_params = TechParams()

        cumresults = pandas.DataFrame(index=asset_df.index)
        portleverage = pandas.DataFrame(index=asset_df.index)
        tsdresults = {}

        # each portfolio key calculate returns - can put parts of the portfolio in the key
        for key in basket_dict.keys():
            asset_cut_df = asset_df[[x + '.close' for x in basket_dict[key]]]
            spot_cut_df = spot_df[[x + '.close' for x in basket_dict[key]]]

            self.logger.info("Calculating " + key)

            results, cash_backtest = self.construct_individual_strategy(
                br, spot_cut_df, spot_df2, asset_cut_df, tech_params, key)

            cumresults[results.columns[0]] = results
            portleverage[
                results.columns[0]] = cash_backtest.get_porfolio_leverage()
            tsdresults[key] = cash_backtest.get_portfolio_pnl_tsd()

            # for a key, designated as the final strategy save that as the "strategy"
            if key == self.FINAL_STRATEGY:
                self._strategy_pnl = results
                self._strategy_pnl_tsd = cash_backtest.get_portfolio_pnl_tsd()
                self._strategy_leverage = cash_backtest.get_porfolio_leverage()
                self._strategy_signal = cash_backtest.get_porfolio_signal()
                self._strategy_pnl_trades = cash_backtest.get_pnl_trades()

        # get benchmark for comparison
        benchmark = self.construct_strategy_benchmark()

        cumresults_benchmark = self.compare_strategy_vs_benchmark(
            br, cumresults, benchmark)

        self._strategy_group_benchmark_tsd = tsdresults

        if hasattr(self, '_benchmark_tsd'):
            tsdlist = tsdresults
            tsdlist['Benchmark'] = (self._benchmark_tsd)
            self._strategy_group_benchmark_tsd = tsdlist

        # calculate annualised returns
        years = time_series_calcs.average_by_annualised_year(
            time_series_calcs.calculate_returns(cumresults_benchmark))

        self._strategy_group_pnl = cumresults
        self._strategy_group_pnl_tsd = tsdresults
        self._strategy_group_benchmark_pnl = cumresults_benchmark
        self._strategy_group_leverage = portleverage
        self._strategy_group_benchmark_annualised_pnl = years
Пример #39
0
    def compare_strategy_vs_benchmark(self, br, strategy_df, benchmark_df):
        """
        compare_strategy_vs_benchmark - Compares the trading strategy we are backtesting against a benchmark

        Parameters
        ----------
        br : BacktestRequest
            Parameters for backtest such as start and finish dates

        strategy_df : pandas.DataFrame
            Strategy time series

        benchmark_df : pandas.DataFrame
            Benchmark time series
        """

        include_benchmark = False
        calc_stats = False

        if hasattr(br, 'include_benchmark'):
            include_benchmark = br.include_benchmark
        if hasattr(br, 'calc_stats'): calc_stats = br.calc_stats

        if include_benchmark:
            tsd = TimeSeriesDesc()
            cash_backtest = CashBacktest()
            ts_filter = TimeSeriesFilter()
            ts_calcs = TimeSeriesCalcs()

            # align strategy time series with that of benchmark
            strategy_df, benchmark_df = strategy_df.align(benchmark_df,
                                                          join='left',
                                                          axis=0)

            # if necessary apply vol target to benchmark (to make it comparable with strategy)
            if hasattr(br, 'portfolio_vol_adjust'):
                if br.portfolio_vol_adjust is True:
                    benchmark_df = cash_backtest.calculate_vol_adjusted_index_from_prices(
                        benchmark_df, br=br)

            # only calculate return statistics if this has been specified
            if calc_stats:
                tsd.calculate_ret_stats_from_prices(benchmark_df,
                                                    br.ann_factor)
                benchmark_df.columns = tsd.summary()

            # realign strategy & benchmark
            strategy_benchmark_df = strategy_df.join(benchmark_df, how='inner')
            strategy_benchmark_df = strategy_benchmark_df.fillna(
                method='ffill')

            strategy_benchmark_df = ts_filter.filter_time_series_by_date(
                br.plot_start, br.finish_date, strategy_benchmark_df)
            strategy_benchmark_df = ts_calcs.create_mult_index_from_prices(
                strategy_benchmark_df)

            self._benchmark_pnl = benchmark_df
            self._benchmark_tsd = tsd

            return strategy_benchmark_df

        return strategy_df
Пример #40
0
# process data
from pythalesians.economics.seasonality.seasonality import Seasonality
from pythalesians.timeseries.calcs.timeseriescalcs import TimeSeriesCalcs

# displaying data
from pythalesians.graphics.graphs.plotfactory import PlotFactory
from pythalesians.graphics.graphs.graphproperties import GraphProperties

# logging
from pythalesians.util.loggermanager import LoggerManager

import datetime

seasonality = Seasonality()
tsc = TimeSeriesCalcs()
logger = LoggerManager().getLogger(__name__)

pf = PlotFactory()

###### calculate seasonal moves in EUR/USD and GBP/USD (using Quandl data)
if True:
    time_series_request = TimeSeriesRequest(
                start_date = "01 Jan 1970",                     # start date
                finish_date = datetime.date.today(),            # finish date
                freq = 'daily',                                 # daily data
                data_source = 'quandl',                         # use Quandl as data source
                tickers = ['EURUSD',                            # ticker (Thalesians)
                           'GBPUSD'],
                fields = ['close'],                                 # which fields to download
                vendor_tickers = ['FRED/DEXUSEU', 'FRED/DEXUSUK'],  # ticker (Quandl)
    def fetch_group_time_series(self, time_series_request_list):

        data_frame_agg = None

        time_series_calcs = TimeSeriesCalcs()

        # depends on the nature of operation as to whether we should use threading or multiprocessing library
        if Constants().time_series_factory_thread_technique is "thread":
            from multiprocessing.dummy import Pool
        else:
            # most of the time is spend waiting for Bloomberg to return, so can use threads rather than multiprocessing
            # must use the multiprocessing_on_dill library otherwise can't pickle objects correctly
            # note: currently not very stable
            from multiprocessing_on_dill import Pool

        thread_no = Constants().time_series_factory_thread_no['other']

        if time_series_request_list[0].data_source in Constants().time_series_factory_thread_no:
            thread_no = Constants().time_series_factory_thread_no[time_series_request_list[0].data_source]

        pool = Pool(thread_no)

        # open the market data downloads in their own threads and return the results
        result = pool.map_async(self.fetch_single_time_series, time_series_request_list)
        data_frame_group = result.get()

        pool.close()
        pool.join()

        # data_frame_group = results.get()
        # data_frame_group = results
        # data_frame_group = None

        # import multiprocessing as multiprocessing
        # close the pool and wait for the work to finish

        # processes = []

        # for x in range(0, len(time_series_request_list)):
        #    time_series_request = time_series_request_list[x]
        # processes =   [multiprocessing.Process(target = self.fetch_single_time_series,
        #                                           args = (x)) for x in time_series_request_list]

        # pool.apply_async(tsf.harvest_category, args = (category_desc, environment, freq,
        #             exclude_freq_cat, force_new_download_freq_cat, include_freq_cat))

        # Run processes
        # for p in processes: p.start()

        # Exit the completed processes
        # for p in processes: p.join()

        # collect together all the time series
        if data_frame_group is not None:
            data_frame_group = [i for i in data_frame_group if i is not None]

            if data_frame_group is not None:
                data_frame_agg = time_series_calcs.pandas_outer_join(data_frame_group)

            # for data_frame_single in data_frame_group:
            #     # if you call for returning multiple tickers, be careful with memory considerations!
            #     if data_frame_single is not None:
            #         if data_frame_agg is not None:
            #             data_frame_agg = data_frame_agg.join(data_frame_single, how='outer')
            #         else:
            #             data_frame_agg = data_frame_single

        return data_frame_agg