Пример #1
0
    def test_historical_price__margin_adjustment__daily(self):
        # In case if we want only 1 historical bar and the last full bar was more than ~12 days ago, the adjustment of
        # the margin for the "number of days to go back" need to be performed
        self.current_time = str_to_date("2021-05-18 00:00:00.000000",
                                        DateFormat.FULL_ISO)
        actual_bars = self.data_provider.historical_price(
            self.ticker_1, PriceField.ohlcv(), 1, frequency=Frequency.DAILY)
        expected_bars = PricesDataFrame(data=[[25.0, 25.1, 25.2, None, 25.3]],
                                        index=[str_to_date('2021-05-05')],
                                        columns=PriceField.ohlcv())
        assert_dataframes_equal(actual_bars, expected_bars, check_names=False)

        self.current_time = str_to_date("2021-05-27 00:00:00.000000",
                                        DateFormat.FULL_ISO)
        actual_bars = self.data_provider.historical_price(
            self.ticker_1, PriceField.ohlcv(), 1, frequency=Frequency.DAILY)
        assert_dataframes_equal(actual_bars, expected_bars, check_names=False)

        with self.assertRaises(ValueError):
            self.current_time = str_to_date("2021-06-06 00:00:00.000000",
                                            DateFormat.FULL_ISO)
            self.data_provider.historical_price(self.ticker_1,
                                                PriceField.ohlcv(),
                                                1,
                                                frequency=Frequency.DAILY)
Пример #2
0
    def setUp(self):
        self.dates = pd.date_range(start='2015-05-13', periods=5)
        self.column_names = ['a', 'b', 'c', 'd', 'e']

        self.prices_values = [[1., 1., 1., 1, 1.], [2., 2., 2., 2., 2.],
                              [3., 3., 3., 3., 3.], [4., 4., 4., 4., 4.],
                              [5., 5., 5., 5., 5.]]
        self.test_prices_df = PricesDataFrame(data=self.prices_values,
                                              index=self.dates,
                                              columns=self.column_names)

        self.log_returns_values = [
            [0.693147, 0.693147, 0.693147, 0.693147, 0.693147],
            [0.405465, 0.405465, 0.405465, 0.405465, 0.405465],
            [0.287682, 0.287682, 0.287682, 0.287682, 0.287682],
            [0.223144, 0.223144, 0.223144, 0.223144, 0.223144]
        ]
        self.test_log_returns_df = LogReturnsDataFrame(
            data=self.log_returns_values,
            index=self.dates[1:],
            columns=self.column_names)

        self.simple_returns_values = [
            [1.000000, 1.000000, 1.000000, 1.000000, 1.000000],
            [0.500000, 0.500000, 0.500000, 0.500000, 0.500000],
            [0.333333, 0.333333, 0.333333, 0.333333, 0.333333],
            [0.250000, 0.250000, 0.250000, 0.250000, 0.250000]
        ]
        self.test_simple_returns_df = SimpleReturnsDataFrame(
            data=self.simple_returns_values,
            index=self.dates[1:],
            columns=self.column_names)
Пример #3
0
    def _get_prices_df(self, ticker: Ticker, start_date: datetime, end_date: datetime) -> PricesDataFrame:
        """ Returns non-adjusted open and close prices, indexed with the Market Open and Market Close time."""
        if isinstance(ticker, FutureTicker):
            ticker.initialize_data_provider(SettableTimer(end_date), self._data_provider)
            tickers_chain = ticker.get_expiration_dates()

            if start_date >= tickers_chain.index[-1] or end_date <= tickers_chain.index[0]:
                # If the futures chain starts after the _end_date or ends before the _start_date - no data available
                return PricesDataFrame()

            # Get all tickers from the chain that were valid between the start_date and expiration date of the
            # currently valid ticker
            end_date = tickers_chain[tickers_chain == ticker.get_current_specific_ticker()].index[0]
            tickers_chain = tickers_chain.loc[start_date:end_date]
            tickers = tickers_chain.values.tolist()

            open_prices = self._data_provider.get_price(tickers, PriceField.Open, start_date, end_date)
            close_prices = self._data_provider.get_price(tickers, PriceField.Close, start_date, end_date)
        else:
            open_prices = self._data_provider.get_price([ticker], PriceField.Open, start_date, end_date)
            close_prices = self._data_provider.get_price([ticker], PriceField.Close, start_date, end_date)

        open_prices.index = [dt + MarketOpenEvent.trigger_time() for dt in open_prices.index]
        close_prices.index = [dt + MarketCloseEvent.trigger_time() for dt in close_prices.index]
        prices = concat([open_prices, close_prices]).sort_index()
        return prices
    def setUp(self):
        tms = date_range('1991-05-14', periods=12, freq='D')

        open = [
            100.55, 101.20, 103.29, 99.64, 126.93, 127.38, 125.39, 128.04,
            124.17, 122.72, 123.83, 126.74
        ]
        high = [
            105.16, 105.36, 104.13, 106.35, 130.47, 132.54, 128.83, 131.73,
            128.29, 127.16, 126.27, 132.98
        ]
        low = [
            98.07, 99.43, 100.03, 98.89, 115.37, 122.72, 120.48, 126.39,
            121.27, 120.74, 122.01, 126.26
        ]
        close = [
            101.20, 105.15, 101.12, 104.37, 121.72, 124.73, 125.29, 126.67,
            122.92, 124.05, 122.20, 128.19
        ]

        data_2d = array([open, high, low, close]).transpose()
        self.ohlc = PricesDataFrame(data=data_2d,
                                    index=tms,
                                    columns=[
                                        PriceField.Open, PriceField.High,
                                        PriceField.Low, PriceField.Close
                                    ])
Пример #5
0
    def _data_array_to_dataframe(self, prices_data_array: QFDataArray):
        """
        Converts a QFDataArray into a DataFrame by removing the "Price Field" axis.

        In order to remove it open and close prices get different time component in their corresponding datetimes
        (open prices will get the time of `MarketOpenEvent` and close prices will get the time of `MarketCloseEvent`).
        """
        original_dates = prices_data_array.dates.to_index()

        market_open_datetimes = [
            price_datetime + MarketOpenEvent.trigger_time()
            for price_datetime in original_dates
        ]
        market_close_datetimes = [
            price_datetime + MarketCloseEvent.trigger_time()
            for price_datetime in original_dates
        ]

        new_dates = set(market_open_datetimes + market_close_datetimes)

        prices_df = PricesDataFrame(index=new_dates,
                                    columns=prices_data_array.tickers)
        prices_df.loc[
            market_open_datetimes, :] = prices_data_array.loc[:, :, PriceField.
                                                              Open].values
        prices_df.loc[
            market_close_datetimes, :] = prices_data_array.loc[:, :,
                                                               PriceField.
                                                               Close].values

        prices_df.sort_index(inplace=True)
        return prices_df
Пример #6
0
    def _data_array_to_dataframe(self, prices_data_array: QFDataArray,
                                 frequency: Frequency):
        """
        Converts a QFDataArray into a DataFrame by removing the "Price Field" axis.

        Every index (e.g. 15:00) denotes the close price of the time range beginning at this time (15:00 - 15:01)
        The only exception is the time range 1 minute before market open (e.g. 9:29 - 9:30 if market opens 9:30). The
        price for this time range, denotes the OPEN price of 9:30 - 9:31.
        """
        original_dates = list(prices_data_array.dates.to_index())
        dates = prices_data_array.resample(dates='1D').first().dates.to_index()
        market_open_datetimes = [
            price_datetime + MarketOpenEvent.trigger_time()
            for price_datetime in dates if price_datetime +
            MarketOpenEvent.trigger_time() in original_dates
        ]
        shifted_open_datetimes = [
            price_datetime - frequency.time_delta()
            for price_datetime in market_open_datetimes
        ]

        new_dates = list(set(original_dates + shifted_open_datetimes))
        new_dates = sorted(new_dates)
        prices_df = PricesDataFrame(index=new_dates,
                                    columns=prices_data_array.tickers)

        prices_df.loc[shifted_open_datetimes, :] = \
            prices_data_array.loc[market_open_datetimes, :, PriceField.Open].values
        prices_df.loc[original_dates, :] = prices_data_array.loc[
            original_dates, :, PriceField.Close].values

        return prices_df
Пример #7
0
    def setUp(self):
        self.tms = date_range('1991-05-14', periods=6, freq='D')
        open = [100, 100, 100, 101, 101, 102]
        close = [100, 100, 100, 101, 101, 102]

        data_2d = array([open, close]).transpose()
        self.prices_df = PricesDataFrame(
            data=data_2d,
            index=self.tms,
            columns=[PriceField.Open, PriceField.Close])
Пример #8
0
    def test_exponential_average(self):
        smoothed_values = [[1.000000, 1.000000, 1.000000, 1.000000, 1.000000],
                           [1.940000, 1.940000, 1.940000, 1.940000, 1.940000],
                           [2.936400, 2.936400, 2.936400, 2.936400, 2.936400],
                           [3.936184, 3.936184, 3.936184, 3.936184, 3.936184],
                           [4.936171, 4.936171, 4.936171, 4.936171, 4.936171]]
        expected_dataframe = PricesDataFrame(data=smoothed_values, index=self.dates, columns=self.column_names)

        actual_dataframe = self.test_prices_df.exponential_average()

        assert_dataframes_equal(expected_dataframe, actual_dataframe)
Пример #9
0
    def test_min_max_normalized(self):
        normalized_prices = [[0.00, 0.00, 0.00, 0.00, 0.00],
                             [0.25, 0.25, 0.25, 0.25, 0.25],
                             [0.50, 0.50, 0.50, 0.50, 0.50],
                             [0.75, 0.75, 0.75, 0.75, 0.75],
                             [1.00, 1.00, 1.00, 1.00, 1.00]]
        expected_dataframe = PricesDataFrame(data=normalized_prices, index=self.dates, columns=self.column_names)

        actual_dataframe = self.test_prices_df.min_max_normalized()

        assert_dataframes_equal(expected_dataframe, actual_dataframe)
    def test_get_price_with_single_field(self):
        actual_frame = self.prefetching_data_provider.get_price(
            self.cached_tickers, PriceField.Volume, self.start_date,
            self.end_date, self.frequency)

        expected_frame = PricesDataFrame(data=np.full(
            (len(self.cached_dates_idx), len(self.cached_tickers_idx)), 0),
                                         index=self.cached_dates_idx,
                                         columns=self.cached_tickers_idx)
        tt.assert_dataframes_equal(expected_frame,
                                   actual_frame,
                                   check_index_type=True,
                                   check_column_type=True)
Пример #11
0
    def test_historical_price__single_ticker__multiple_fields__daily(self):
        self.current_time = str_to_date("2021-05-06 00:00:00.000000",
                                        DateFormat.FULL_ISO)

        # Test when the current day does not have the open price
        actual_bars = self.data_provider.historical_price(
            self.ticker_2, PriceField.ohlcv(), 2, frequency=Frequency.DAILY)
        expected_bars = PricesDataFrame(
            data=[[29.0, 29.1, 29.2, 30.0, 29.3],
                  [27.0, 27.1, 27.2, None, 27.3]],
            index=[str_to_date('2021-05-02'),
                   str_to_date('2021-05-05')],
            columns=PriceField.ohlcv())
        assert_dataframes_equal(expected_bars, actual_bars, check_names=False)

        self.current_time = str_to_date("2021-05-06 00:00:00.000000",
                                        DateFormat.FULL_ISO)

        actual_bars = self.data_provider.historical_price(
            self.ticker_2, PriceField.ohlcv(), 3, frequency=Frequency.DAILY)
        expected_bars = PricesDataFrame(data=[[27.0, 27.1, 27.2, 28.0, 27.3],
                                              [29.0, 29.1, 29.2, 30.0, 29.3],
                                              [27.0, 27.1, 27.2, None, 27.3]],
                                        index=[
                                            str_to_date('2021-05-01'),
                                            str_to_date('2021-05-02'),
                                            str_to_date('2021-05-05')
                                        ],
                                        columns=PriceField.ohlcv())
        assert_dataframes_equal(expected_bars, actual_bars, check_names=False)

        # More than 3 bars are not available
        with self.assertRaises(ValueError):
            self.data_provider.historical_price(self.ticker_2,
                                                PriceField.ohlcv(),
                                                4,
                                                frequency=Frequency.DAILY)
Пример #12
0
    def setUpClass(cls):
        cls.start_date = str_to_date("2017-10-01")
        cls.end_date = str_to_date("2017-11-01")
        cls.frequency = Frequency.DAILY

        datetime_index = pd.DatetimeIndex([
            '2017-10-02', '2017-10-03', '2017-10-04', '2017-10-05', '2017-10-06',
            '2017-10-09', '2017-10-10', '2017-10-11', '2017-10-12', '2017-10-13',
            '2017-10-16', '2017-10-17', '2017-10-18', '2017-10-19', '2017-10-20',
            '2017-10-23', '2017-10-24', '2017-10-25', '2017-10-26', '2017-10-27',
            '2017-10-30', '2017-10-31', '2017-11-01'
        ])

        bbg_data_provider = Mock(spec=BloombergDataProvider)

        all_tickers_str = ['BCIT3T Index', 'IEF US Equity', 'LQD US Equity', 'MSBIERTR Index', 'MXUS Index',
                           'SPGSCITR Index', 'XAU Curncy']
        all_tickers = BloombergTicker.from_string(all_tickers_str)
        assets_prices_df = PricesDataFrame(index=datetime_index, columns=all_tickers, data=[
            [263.7628, 106.24, 121.02, 321.8249, 2409.48, 2295.60, 1271.13],
            [263.9803, 106.39, 121.29, 322.0949, 2414.41, 2294.91, 1271.66],
            [264.1640, 106.36, 121.22, 322.3203, 2417.31, 2294.28, 1274.85],
            [264.0932, 106.25, 121.05, 322.4172, 2430.80, 2323.34, 1268.22],
            [263.9816, 106.12, 120.95, 322.1411, 2428.16, 2282.24, 1276.68],
            [263.9816, 106.24, 121.05, None, 2423.41, 2284.78, 1284.05],
            [264.4529, 106.28, 121.13, 322.3113, 2428.73, 2318.99, 1288.03],
            [264.5108, 106.40, 121.07, 322.3553, 2433.09, 2324.63, 1291.72],
            [264.8223, 106.50, 121.10, 322.7489, 2428.89, 2314.78, 1293.72],
            [264.9401, 106.86, 121.58, 322.8720, 2430.63, 2342.19, 1303.82],
            [264.2089, 106.68, 121.41, 322.8467, 2434.66, 2353.20, 1295.79],
            [264.0592, 106.64, 121.39, 323.1079, 2436.35, 2345.04, 1285.12],
            [263.9370, 106.37, 121.21, 323.2238, 2438.08, 2345.57, 1281.08],
            [264.0463, 106.48, 121.39, 323.5498, 2439.31, 2332.31, 1290.13],
            [263.8424, 106.04, 121.06, 322.9874, 2451.70, 2340.26, 1280.47],
            [263.8961, 106.14, 121.18, 322.7436, 2441.71, 2343.72, 1282.27],
            [263.7129, 105.82, 120.88, 322.3214, 2445.61, 2366.00, 1276.58],
            [263.3216, 105.65, 120.56, 322.4332, 2434.13, 2364.23, 1277.53],
            [263.3638, 105.51, 120.55, 322.1635, 2438.07, 2376.52, 1266.99],
            [263.8662, 105.85, 120.91, 322.3655, 2457.45, 2396.93, 1273.35],
            [264.4531, 106.23, 121.31, 322.9710, 2449.20, 2407.43, 1276.29],
            [264.4690, 106.16, 121.14, 323.0688, 2452.15, 2415.28, 1271.45],
            [264.4727, 106.06, 121.01, 323.1553, 2455.70, 2415.48, 1274.66]
        ])
        bbg_data_provider.get_price.return_value = assets_prices_df

        cls.bbg_data_provider = bbg_data_provider
Пример #13
0
    def calculate_analysis(cls, strategy_tms: QFSeries,
                           benchmark_tms: QFSeries):
        """
        Calculates the rolling table for provided timeseries
        """
        rows = list()
        windows = [(6 * 21, "6 Months"), (252, "1 Year"), (252 * 2, "2 Years"),
                   (252 * 5, "5 Years")]

        # Ensure that this data is daily.
        df = PricesDataFrame()
        strategy_name = strategy_tms.name
        benchmark_name = benchmark_tms.name
        df[strategy_name] = strategy_tms.to_prices()
        df[benchmark_name] = benchmark_tms.to_prices()
        df.fillna(method='ffill', inplace=True)

        for window_info in windows:
            window = window_info[0]

            # if window is too big for the strategy then skip it
            if window >= int(df.shape[0] / 2):
                continue

            step = int(window * 0.2)

            strategy_rolling = df[strategy_name].rolling_window(
                window, lambda x: x.total_cumulative_return(), step)
            benchmark_rolling = df[benchmark_name].rolling_window(
                window, lambda x: x.total_cumulative_return(), step)

            outperforming = strategy_rolling > benchmark_rolling
            percentage_outperforming = len(
                strategy_rolling[outperforming]) / len(strategy_rolling)

            dto = RollingAnalysisDTO(
                period=window_info[1],
                strategy_average=strategy_rolling.mean(),
                strategy_worst=strategy_rolling.min(),
                strategy_best=strategy_rolling.max(),
                benchmark_average=benchmark_rolling.mean(),
                benchmark_worst=benchmark_rolling.min(),
                benchmark_best=benchmark_rolling.max(),
                percentage_difference=percentage_outperforming)
            rows.append(dto)
        return rows
Пример #14
0
    def _create_performance_contribution_tables(
            self, performance_df: QFDataFrame) -> List[DFTable]:
        """
        Create a list of DFTables with assets names in the index and different years / months in columns, which contains
        details on the performance contribution for each asset.
        """
        # Create a QFSeries which contains the initial amount of cash in the portfolio for each year / month
        numeric_columns = [
            col for col in performance_df.columns
            if is_numeric_dtype(performance_df[col])
        ]
        portfolio_values = performance_df[numeric_columns].sum().shift(
            fill_value=self._initial_cash).cumsum()
        performance_df[numeric_columns] = performance_df[
            numeric_columns] / portfolio_values[numeric_columns]

        # Add category column and aggregate data accordingly
        ticker_name_to_category = {
            t.name: category
            for t, category in self._ticker_to_category.items()
        }
        performance_df["Category"] = performance_df["Asset"].apply(
            lambda t: ticker_name_to_category[t])
        all_categories = list(set(ticker_name_to_category.values()))
        performance_df = performance_df.sort_values(by=["Category", "Asset"])
        performance_df = performance_df.groupby("Category").apply(
            lambda d: pd.concat([
                PricesDataFrame({
                    **{
                        "Asset": [d.name],
                        "Category": [d.name]
                    },
                    **{c: [d[c].sum()]
                       for c in numeric_columns}
                }), d
            ],
                                ignore_index=True)).drop(columns=["Category"])

        # Add the Total Performance row (divide by 2 as the df contains already aggregated data for each group)
        total_sum_row = performance_df[numeric_columns].sum() / 2
        total_sum_row["Asset"] = "Total Performance"
        performance_df = performance_df.append(total_sum_row,
                                               ignore_index=True)

        # Format the rows using the percentage formatter
        performance_df[numeric_columns] = performance_df[
            numeric_columns].applymap(lambda x: '{:.2%}'.format(x))

        # Divide the performance dataframe into a number of dataframes, so that each of them contains up to
        # self._max_columns_per_page columns
        split_dfs = np.array_split(performance_df.set_index("Asset"),
                                   np.ceil(
                                       (performance_df.num_of_columns - 1) /
                                       self._max_columns_per_page),
                                   axis=1)
        df_tables = [
            DFTable(df.reset_index(),
                    css_classes=[
                        'table', 'shrink-font', 'right-align',
                        'wide-first-column'
                    ]) for df in split_dfs
        ]

        # Get the indices of rows, which contain category info
        category_indices = performance_df[performance_df["Asset"].isin(
            all_categories)].index

        for df_table in df_tables:
            # Add table formatting, highlight rows showing the total contribution of the given category
            df_table.add_rows_styles(
                category_indices, {
                    "font-weight": "bold",
                    "font-size": "0.95em",
                    "background-color": "#cbd0d2"
                })
            df_table.add_rows_styles(
                [performance_df.index[-1]], {
                    "font-weight": "bold",
                    "font-size": "0.95em",
                    "background-color": "#b9bcbd"
                })
        return df_tables
Пример #15
0
    def _generate_chain(self, fields, start_time: datetime, end_time: datetime) -> PricesDataFrame:
        """
        Returns a chain of futures, combined together using a certain method.

        Parameters
        ----------
        start_time
        end_time
            the time ranges for the generated future chain
        method
            the method used to combine the the Nth contracts together into one data series, possible methods:
            - NTH_NEAREST - the price data for a certain period of time is taken from the N-th contract, there is no
            discontinuities correction at the contract expiry dates
            - BACK_ADJUST - the historical price discontinuities are corrected, so that they would align smoothly on the
            expiry date. The gaps between consecutive contracts are being adjusted, by shifting the historical data by
            the difference between the Open price on the first day of new contract and Close price on the last day of
            the old contract. The back adjustment considers only the Open, High, Low, Close price values.
            The Volumes are not being adjusted.
        """
        # Verify the parameters values
        N = self._future_ticker.get_N()
        days_before_exp_date = self._future_ticker.get_days_before_exp_date()
        fields, got_single_field = convert_to_list(fields, PriceField)

        if N < 1 or days_before_exp_date < 1:
            raise ValueError("The number of the contract and the number of days before expiration date should be "
                             "greater than 0.")

        # Shift the index and data according to the start time and end time values. We shift the number of days by 1,
        # so that the days_before_exp_date=1 will use the prices on the expiration date from the newer contract.
        shifted_index = pd.DatetimeIndex(self.index) - pd.Timedelta(days=(days_before_exp_date - 1))
        if shifted_index.empty:
            return PricesDataFrame(columns=fields)

        # We use the backfill search for locating the start time, because we will additionally consider the time range
        # between start_time and the found starting expiry date time
        start_time_index_position = shifted_index.get_loc(start_time, method='backfill')

        shifted_index = shifted_index[start_time_index_position:]
        shifted_data = self.iloc[start_time_index_position:]
        shifted_data = shifted_data.iloc[(N - 1):]

        # Compute the time ranges for each of the contract. The time ranges should be equal to:
        # [[start_date, exp_date_1 - days_before_exp_date),
        #  [exp_date_1 - days_before_exp_date, exp_date_2 - days_before_exp_date),
        #  [exp_date_2 - days_before_exp_date, exp_date_3 - days_before_exp_date)
        #   ...
        #  [exp_date_K - days_before_exp_date, end_date]]
        # Each of these time ranges is mapped into one contract, from which date within this time would be taken.
        index_left_ranges = [pd.to_datetime(start_time)] + list(shifted_index)
        index_right_ranges = list(shifted_index)

        # Combine the calculated time ranges with the corresponding future contracts. We want the N-th contract
        # to be mapped onto the first time range (start_date, exp_date_1 - days_before_exp_date), N+1-th contract
        # to be mapped onto the second time range etc, therefore we zip the list of both left and ride boundaries
        # of time ranges with a shifted list of contracts.
        time_ranges_and_futures = zip(index_left_ranges, index_right_ranges, shifted_data)

        # Get the data within the desired time ranges from corresponding contracts
        combined_data_frame = pd.concat(
            [future.data.loc[left:right] for left, right, future in time_ranges_and_futures], sort=False)
        # To avoid shifting data on the time ranges, we use overlapping ends and beginnings of the time ranges.
        # Therefore, we need to check if any duplicates exist and on the expiry dates, we keep the data from
        # newer contract
        combined_data_frame = combined_data_frame[~combined_data_frame.index.duplicated(keep='last')]
        combined_data_frame = combined_data_frame.loc[:end_time]

        if self._futures_adjustment_method == FuturesAdjustmentMethod.BACK_ADJUSTED:
            # Create the back adjusted series
            # Compute the differences between prices on the expiration days (shifted by the days_before_exp_date
            # number of days). In case if the shifted days in the index contain e.g. saturdays, sundays or other dates
            # that are not in the Future's prices data frame, the first older valid date is taken.
            end_time_index_position = shifted_index.get_loc(end_time, method='pad')

            # In the following slice, in case if end_time == expiry date, we also want to include it in the index
            first_days_of_next_contracts = shifted_index[:end_time_index_position + 1]

            # Apply the back adjustment. Pass the futures chain shifting the data in the way, which will allow to
            # treat the Nth contract as the first element of the data frame
            combined_data_frame = self._back_adjust(fields, first_days_of_next_contracts,
                                                    shifted_data,
                                                    combined_data_frame)

        return combined_data_frame