def setUp(self): self.dates = pd.date_range(start='2015-05-13', periods=5) self.column_names = ['a', 'b', 'c', 'd', 'e'] self.prices_values = [[1., 1., 1., 1, 1.], [2., 2., 2., 2., 2.], [3., 3., 3., 3., 3.], [4., 4., 4., 4., 4.], [5., 5., 5., 5., 5.]] self.test_prices_df = PricesDataFrame(data=self.prices_values, index=self.dates, columns=self.column_names) self.log_returns_values = [ [0.693147, 0.693147, 0.693147, 0.693147, 0.693147], [0.405465, 0.405465, 0.405465, 0.405465, 0.405465], [0.287682, 0.287682, 0.287682, 0.287682, 0.287682], [0.223144, 0.223144, 0.223144, 0.223144, 0.223144] ] self.test_log_returns_df = LogReturnsDataFrame( data=self.log_returns_values, index=self.dates[1:], columns=self.column_names) self.simple_returns_values = [ [1.000000, 1.000000, 1.000000, 1.000000, 1.000000], [0.500000, 0.500000, 0.500000, 0.500000, 0.500000], [0.333333, 0.333333, 0.333333, 0.333333, 0.333333], [0.250000, 0.250000, 0.250000, 0.250000, 0.250000] ] self.test_simple_returns_df = SimpleReturnsDataFrame( data=self.simple_returns_values, index=self.dates[1:], columns=self.column_names)
def _data_array_to_dataframe(self, prices_data_array: QFDataArray, frequency: Frequency): """ Converts a QFDataArray into a DataFrame by removing the "Price Field" axis. Every index (e.g. 15:00) denotes the close price of the time range beginning at this time (15:00 - 15:01) The only exception is the time range 1 minute before market open (e.g. 9:29 - 9:30 if market opens 9:30). The price for this time range, denotes the OPEN price of 9:30 - 9:31. """ original_dates = list(prices_data_array.dates.to_index()) dates = prices_data_array.resample(dates='1D').first().dates.to_index() market_open_datetimes = [ price_datetime + MarketOpenEvent.trigger_time() for price_datetime in dates if price_datetime + MarketOpenEvent.trigger_time() in original_dates ] shifted_open_datetimes = [ price_datetime - frequency.time_delta() for price_datetime in market_open_datetimes ] new_dates = list(set(original_dates + shifted_open_datetimes)) new_dates = sorted(new_dates) prices_df = PricesDataFrame(index=new_dates, columns=prices_data_array.tickers) prices_df.loc[shifted_open_datetimes, :] = \ prices_data_array.loc[market_open_datetimes, :, PriceField.Open].values prices_df.loc[original_dates, :] = prices_data_array.loc[ original_dates, :, PriceField.Close].values return prices_df
def _compute_pnl_for_ticker(self, prices_df: PricesDataFrame, transactions_series: QFSeries, start_date: datetime, end_date: datetime) -> PricesSeries: pnl_values = [] current_realised_pnl = 0 ticker_to_position = {} # type: Dict[Ticker, BacktestPosition] prices_df = prices_df.ffill() for timestamp in date_range(start_date, end_date, freq="B"): timestamp = timestamp + AfterMarketCloseEvent.trigger_time() previous_after_market_close = timestamp - RelativeDelta(days=1) transactions_for_past_day = transactions_series.loc[previous_after_market_close:timestamp] transactions_for_past_day = transactions_for_past_day \ .where(transactions_for_past_day.index > previous_after_market_close).dropna(how="all") for t in transactions_for_past_day: position = ticker_to_position.get(t.ticker, BacktestPositionFactory.create_position(t.ticker)) ticker_to_position[t.ticker] = position position.transact_transaction(t) if position.is_closed(): ticker_to_position.pop(t.ticker) current_realised_pnl += position.total_pnl # update prices of all existing positions and get their unrealised pnl current_unrealised_pnl = 0.0 for ticker, position in ticker_to_position.items(): price = prices_df.loc[:timestamp, ticker].iloc[-1] position.update_price(price, price) current_unrealised_pnl += position.total_pnl pnl_values.append(current_unrealised_pnl + current_realised_pnl) return PricesSeries(data=pnl_values, index=date_range(start_date, end_date, freq="B"))
def test_historical_price__margin_adjustment__daily(self): # In case if we want only 1 historical bar and the last full bar was more than ~12 days ago, the adjustment of # the margin for the "number of days to go back" need to be performed self.current_time = str_to_date("2021-05-18 00:00:00.000000", DateFormat.FULL_ISO) actual_bars = self.data_provider.historical_price( self.ticker_1, PriceField.ohlcv(), 1, frequency=Frequency.DAILY) expected_bars = PricesDataFrame(data=[[25.0, 25.1, 25.2, None, 25.3]], index=[str_to_date('2021-05-05')], columns=PriceField.ohlcv()) assert_dataframes_equal(actual_bars, expected_bars, check_names=False) self.current_time = str_to_date("2021-05-27 00:00:00.000000", DateFormat.FULL_ISO) actual_bars = self.data_provider.historical_price( self.ticker_1, PriceField.ohlcv(), 1, frequency=Frequency.DAILY) assert_dataframes_equal(actual_bars, expected_bars, check_names=False) with self.assertRaises(ValueError): self.current_time = str_to_date("2021-06-06 00:00:00.000000", DateFormat.FULL_ISO) self.data_provider.historical_price(self.ticker_1, PriceField.ohlcv(), 1, frequency=Frequency.DAILY)
def setUp(self): tms = date_range('1991-05-14', periods=12, freq='D') open = [ 100.55, 101.20, 103.29, 99.64, 126.93, 127.38, 125.39, 128.04, 124.17, 122.72, 123.83, 126.74 ] high = [ 105.16, 105.36, 104.13, 106.35, 130.47, 132.54, 128.83, 131.73, 128.29, 127.16, 126.27, 132.98 ] low = [ 98.07, 99.43, 100.03, 98.89, 115.37, 122.72, 120.48, 126.39, 121.27, 120.74, 122.01, 126.26 ] close = [ 101.20, 105.15, 101.12, 104.37, 121.72, 124.73, 125.29, 126.67, 122.92, 124.05, 122.20, 128.19 ] data_2d = array([open, high, low, close]).transpose() self.ohlc = PricesDataFrame(data=data_2d, index=tms, columns=[ PriceField.Open, PriceField.High, PriceField.Low, PriceField.Close ])
def _get_prices_df(self, ticker: Ticker, start_date: datetime, end_date: datetime) -> PricesDataFrame: """ Returns non-adjusted open and close prices, indexed with the Market Open and Market Close time.""" if isinstance(ticker, FutureTicker): ticker.initialize_data_provider(SettableTimer(end_date), self._data_provider) tickers_chain = ticker.get_expiration_dates() if start_date >= tickers_chain.index[-1] or end_date <= tickers_chain.index[0]: # If the futures chain starts after the _end_date or ends before the _start_date - no data available return PricesDataFrame() # Get all tickers from the chain that were valid between the start_date and expiration date of the # currently valid ticker end_date = tickers_chain[tickers_chain == ticker.get_current_specific_ticker()].index[0] tickers_chain = tickers_chain.loc[start_date:end_date] tickers = tickers_chain.values.tolist() open_prices = self._data_provider.get_price(tickers, PriceField.Open, start_date, end_date) close_prices = self._data_provider.get_price(tickers, PriceField.Close, start_date, end_date) else: open_prices = self._data_provider.get_price([ticker], PriceField.Open, start_date, end_date) close_prices = self._data_provider.get_price([ticker], PriceField.Close, start_date, end_date) open_prices.index = [dt + MarketOpenEvent.trigger_time() for dt in open_prices.index] close_prices.index = [dt + MarketCloseEvent.trigger_time() for dt in close_prices.index] prices = concat([open_prices, close_prices]).sort_index() return prices
def calculate_analysis(cls, strategy_tms: QFSeries, benchmark_tms: QFSeries): """ Calculates the rolling table for provided timeseries """ rows = list() windows = [(6 * 21, "6 Months"), (252, "1 Year"), (252 * 2, "2 Years"), (252 * 5, "5 Years")] # Ensure that this data is daily. df = PricesDataFrame() strategy_name = strategy_tms.name benchmark_name = benchmark_tms.name df[strategy_name] = strategy_tms.to_prices() df[benchmark_name] = benchmark_tms.to_prices() df.fillna(method='ffill', inplace=True) for window_info in windows: window = window_info[0] # if window is too big for the strategy then skip it if window >= int(df.shape[0] / 2): continue step = int(window * 0.2) strategy_rolling = df[strategy_name].rolling_window( window, lambda x: x.total_cumulative_return(), step) benchmark_rolling = df[benchmark_name].rolling_window( window, lambda x: x.total_cumulative_return(), step) outperforming = strategy_rolling > benchmark_rolling percentage_outperforming = len( strategy_rolling[outperforming]) / len(strategy_rolling) dto = RollingAnalysisDTO( period=window_info[1], strategy_average=strategy_rolling.mean(), strategy_worst=strategy_rolling.min(), strategy_best=strategy_rolling.max(), benchmark_average=benchmark_rolling.mean(), benchmark_worst=benchmark_rolling.min(), benchmark_best=benchmark_rolling.max(), percentage_difference=percentage_outperforming) rows.append(dto) return rows
def _replace_close_by_next_open(prices_df: PricesDataFrame): result = prices_df.drop(columns=[PriceField.Close]) open = prices_df[PriceField.Open] result[PriceField.Close] = open.shift( -1) # shift to put open of next day instead of close result = result.drop( result.index[-1]) # remove the last row that will have a NaN return result
def setUp(self): self.tms = date_range('1991-05-14', periods=6, freq='D') open = [100, 100, 100, 101, 101, 102] close = [100, 100, 100, 101, 101, 102] data_2d = array([open, close]).transpose() self.prices_df = PricesDataFrame( data=data_2d, index=self.tms, columns=[PriceField.Open, PriceField.Close])
def test_alpha_model__calculate_fraction_at_risk(self): data_handler = MagicMock() prices_df = PricesDataFrame.from_records(data=[(6.0, 4.0, 5.0) for _ in range(10)], columns=[PriceField.High, PriceField.Low, PriceField.Close]) data_handler.historical_price.return_value = prices_df atr = average_true_range(prices_df, normalized=True) risk_estimation_factor = 3 alpha_model = AlphaModel(risk_estimation_factor=risk_estimation_factor, data_provider=data_handler) fraction_at_risk = alpha_model.calculate_fraction_at_risk(self.ticker) self.assertEqual(risk_estimation_factor * atr, fraction_at_risk)
def test_exponential_average(self): smoothed_values = [[1.000000, 1.000000, 1.000000, 1.000000, 1.000000], [1.940000, 1.940000, 1.940000, 1.940000, 1.940000], [2.936400, 2.936400, 2.936400, 2.936400, 2.936400], [3.936184, 3.936184, 3.936184, 3.936184, 3.936184], [4.936171, 4.936171, 4.936171, 4.936171, 4.936171]] expected_dataframe = PricesDataFrame(data=smoothed_values, index=self.dates, columns=self.column_names) actual_dataframe = self.test_prices_df.exponential_average() assert_dataframes_equal(expected_dataframe, actual_dataframe)
def test_min_max_normalized(self): normalized_prices = [[0.00, 0.00, 0.00, 0.00, 0.00], [0.25, 0.25, 0.25, 0.25, 0.25], [0.50, 0.50, 0.50, 0.50, 0.50], [0.75, 0.75, 0.75, 0.75, 0.75], [1.00, 1.00, 1.00, 1.00, 1.00]] expected_dataframe = PricesDataFrame(data=normalized_prices, index=self.dates, columns=self.column_names) actual_dataframe = self.test_prices_df.min_max_normalized() assert_dataframes_equal(expected_dataframe, actual_dataframe)
def _data_array_to_dataframe(self, prices_data_array: QFDataArray): """ Converts a QFDataArray into a DataFrame by removing the "Price Field" axis. In order to remove it open and close prices get different time component in their corresponding datetimes (open prices will get the time of `MarketOpenEvent` and close prices will get the time of `MarketCloseEvent`). """ original_dates = prices_data_array.dates.to_index() market_open_datetimes = [ price_datetime + MarketOpenEvent.trigger_time() for price_datetime in original_dates ] market_close_datetimes = [ price_datetime + MarketCloseEvent.trigger_time() for price_datetime in original_dates ] new_dates = set(market_open_datetimes + market_close_datetimes) prices_df = PricesDataFrame(index=new_dates, columns=prices_data_array.tickers) prices_df.loc[ market_open_datetimes, :] = prices_data_array.loc[:, :, PriceField. Open].values prices_df.loc[ market_close_datetimes, :] = prices_data_array.loc[:, :, PriceField. Close].values prices_df.sort_index(inplace=True) return prices_df
def test_get_price_with_single_field(self): actual_frame = self.prefetching_data_provider.get_price( self.cached_tickers, PriceField.Volume, self.start_date, self.end_date, self.frequency) expected_frame = PricesDataFrame(data=np.full( (len(self.cached_dates_idx), len(self.cached_tickers_idx)), 0), index=self.cached_dates_idx, columns=self.cached_tickers_idx) tt.assert_dataframes_equal(expected_frame, actual_frame, check_index_type=True, check_column_type=True)
def _get_simulation_plot(self, scenarios_df: PricesDataFrame) -> Chart: chart = LineChart(log_scale=True) for _, scenario in scenarios_df.items(): data_element = DataElementDecorator(scenario, linewidth=0.5) chart.add_decorator(data_element) # Add a legend legend = LegendDecorator(key="legend_decorator") # Add Ensemble average ensemble_avg = scenarios_df.mean(axis=1) ensemble_avg_data_element = DataElementDecorator(ensemble_avg, color="#e1e5f4", linewidth=3) chart.add_decorator(ensemble_avg_data_element) legend.add_entry(ensemble_avg_data_element, "Ensemble average") # Add Expectation (vol adjusted) trade_returns = QFSeries(data=[trade.percentage_pnl for trade in self.trades]) std = trade_returns.std() expectation_adj_series = np.ones(len(ensemble_avg)) * (trade_returns.mean() - 0.5 * std * std) expectation_adj_series = SimpleReturnsSeries(data=expectation_adj_series, index=ensemble_avg.index) expectation_adj_series = expectation_adj_series.to_prices() data_element = DataElementDecorator(expectation_adj_series, color="#46474b", linewidth=2) chart.add_decorator(data_element) legend.add_entry(data_element, "Expectation (vol adjusted)") # Add title title_decorator = TitleDecorator("Monte Carlo Simulations (log scale)", key="title") chart.add_decorator(title_decorator) position_decorator = AxesPositionDecorator(*self.full_image_axis_position) chart.add_decorator(position_decorator) chart.add_decorator(legend) return chart
def test_historical_price__single_ticker__multiple_fields__daily(self): self.current_time = str_to_date("2021-05-06 00:00:00.000000", DateFormat.FULL_ISO) # Test when the current day does not have the open price actual_bars = self.data_provider.historical_price( self.ticker_2, PriceField.ohlcv(), 2, frequency=Frequency.DAILY) expected_bars = PricesDataFrame( data=[[29.0, 29.1, 29.2, 30.0, 29.3], [27.0, 27.1, 27.2, None, 27.3]], index=[str_to_date('2021-05-02'), str_to_date('2021-05-05')], columns=PriceField.ohlcv()) assert_dataframes_equal(expected_bars, actual_bars, check_names=False) self.current_time = str_to_date("2021-05-06 00:00:00.000000", DateFormat.FULL_ISO) actual_bars = self.data_provider.historical_price( self.ticker_2, PriceField.ohlcv(), 3, frequency=Frequency.DAILY) expected_bars = PricesDataFrame(data=[[27.0, 27.1, 27.2, 28.0, 27.3], [29.0, 29.1, 29.2, 30.0, 29.3], [27.0, 27.1, 27.2, None, 27.3]], index=[ str_to_date('2021-05-01'), str_to_date('2021-05-02'), str_to_date('2021-05-05') ], columns=PriceField.ohlcv()) assert_dataframes_equal(expected_bars, actual_bars, check_names=False) # More than 3 bars are not available with self.assertRaises(ValueError): self.data_provider.historical_price(self.ticker_2, PriceField.ohlcv(), 4, frequency=Frequency.DAILY)
def compute_atr(self, prices_df: PricesDataFrame): try: prices_df = prices_df[[ PriceField.Close, PriceField.Open, PriceField.High, PriceField.Low ]] prices_df = prices_df.dropna(how='all').fillna( method='ffill').dropna() # Compute the ATR atr_values = average_true_range( prices_df.iloc[-self.num_of_bars_atr:], normalized=True) except Exception: raise NotEnoughDataException( "Not enough data to compute the average true range") return atr_values
def setUpClass(cls): cls.start_date = str_to_date("2017-10-01") cls.end_date = str_to_date("2017-11-01") cls.frequency = Frequency.DAILY datetime_index = pd.DatetimeIndex([ '2017-10-02', '2017-10-03', '2017-10-04', '2017-10-05', '2017-10-06', '2017-10-09', '2017-10-10', '2017-10-11', '2017-10-12', '2017-10-13', '2017-10-16', '2017-10-17', '2017-10-18', '2017-10-19', '2017-10-20', '2017-10-23', '2017-10-24', '2017-10-25', '2017-10-26', '2017-10-27', '2017-10-30', '2017-10-31', '2017-11-01' ]) bbg_data_provider = Mock(spec=BloombergDataProvider) all_tickers_str = ['BCIT3T Index', 'IEF US Equity', 'LQD US Equity', 'MSBIERTR Index', 'MXUS Index', 'SPGSCITR Index', 'XAU Curncy'] all_tickers = BloombergTicker.from_string(all_tickers_str) assets_prices_df = PricesDataFrame(index=datetime_index, columns=all_tickers, data=[ [263.7628, 106.24, 121.02, 321.8249, 2409.48, 2295.60, 1271.13], [263.9803, 106.39, 121.29, 322.0949, 2414.41, 2294.91, 1271.66], [264.1640, 106.36, 121.22, 322.3203, 2417.31, 2294.28, 1274.85], [264.0932, 106.25, 121.05, 322.4172, 2430.80, 2323.34, 1268.22], [263.9816, 106.12, 120.95, 322.1411, 2428.16, 2282.24, 1276.68], [263.9816, 106.24, 121.05, None, 2423.41, 2284.78, 1284.05], [264.4529, 106.28, 121.13, 322.3113, 2428.73, 2318.99, 1288.03], [264.5108, 106.40, 121.07, 322.3553, 2433.09, 2324.63, 1291.72], [264.8223, 106.50, 121.10, 322.7489, 2428.89, 2314.78, 1293.72], [264.9401, 106.86, 121.58, 322.8720, 2430.63, 2342.19, 1303.82], [264.2089, 106.68, 121.41, 322.8467, 2434.66, 2353.20, 1295.79], [264.0592, 106.64, 121.39, 323.1079, 2436.35, 2345.04, 1285.12], [263.9370, 106.37, 121.21, 323.2238, 2438.08, 2345.57, 1281.08], [264.0463, 106.48, 121.39, 323.5498, 2439.31, 2332.31, 1290.13], [263.8424, 106.04, 121.06, 322.9874, 2451.70, 2340.26, 1280.47], [263.8961, 106.14, 121.18, 322.7436, 2441.71, 2343.72, 1282.27], [263.7129, 105.82, 120.88, 322.3214, 2445.61, 2366.00, 1276.58], [263.3216, 105.65, 120.56, 322.4332, 2434.13, 2364.23, 1277.53], [263.3638, 105.51, 120.55, 322.1635, 2438.07, 2376.52, 1266.99], [263.8662, 105.85, 120.91, 322.3655, 2457.45, 2396.93, 1273.35], [264.4531, 106.23, 121.31, 322.9710, 2449.20, 2407.43, 1276.29], [264.4690, 106.16, 121.14, 323.0688, 2452.15, 2415.28, 1271.45], [264.4727, 106.06, 121.01, 323.1553, 2455.70, 2415.48, 1274.66] ]) bbg_data_provider.get_price.return_value = assets_prices_df cls.bbg_data_provider = bbg_data_provider
def _insert_table_with_overall_measures(self, prices_df: PricesDataFrame, ticker: Ticker): table = Table(column_names=["Measure", "Value"], css_class="table stats-table") table.add_row(["Instrument", ticker.as_string()]) series = prices_df[PriceField.Close] table.add_row(["Start date", date_to_str(series.index[0])]) table.add_row(["End date", date_to_str(series.index[-1])]) trend_strength_overall = trend_strength(prices_df, self.use_next_open_instead_of_close) table.add_row(["Overall strength of the day trends", trend_strength_overall]) trend_strength_1y = trend_strength(prices_df.tail(252), self.use_next_open_instead_of_close) table.add_row(["Strength of the day trends in last 1Y", trend_strength_1y]) self.ticker_to_trend_dict[ticker] = (trend_strength_1y, trend_strength_overall) table.add_row(["Up trends strength", up_trend_strength(prices_df, self.use_next_open_instead_of_close)]) table.add_row(["Down trends strength", down_trend_strength(prices_df, self.use_next_open_instead_of_close)]) self.document.add_element(table)
def _get_chances_of_dropping_below_table(self, scenarios_df: PricesDataFrame) -> DFTable: _, all_scenarios_number = scenarios_df.shape rows = [] crop_table = False for percentage in np.linspace(0.1, 0.9, 9): # Count number of scenarios, whose returns at some point of time dropped below the percentage * initial # value _, scenarios_above_percentage = scenarios_df.where(scenarios_df > (1.0 - percentage)).dropna(axis=1).shape probability = (all_scenarios_number - scenarios_above_percentage) / all_scenarios_number rows.append(("{:.0%}".format(percentage), "{:.2%}".format(probability))) if crop_table is True: break elif probability < 0.1: crop_table = True table = DFTable(QFDataFrame.from_records(rows, columns=["Chances of dropping below", "Probability"]), css_classes=['table', 'left-align']) table.add_columns_classes(["Chances of dropping below"], 'wide-column') return table
def _create_performance_contribution_tables( self, performance_df: QFDataFrame) -> List[DFTable]: """ Create a list of DFTables with assets names in the index and different years / months in columns, which contains details on the performance contribution for each asset. """ # Create a QFSeries which contains the initial amount of cash in the portfolio for each year / month numeric_columns = [ col for col in performance_df.columns if is_numeric_dtype(performance_df[col]) ] portfolio_values = performance_df[numeric_columns].sum().shift( fill_value=self._initial_cash).cumsum() performance_df[numeric_columns] = performance_df[ numeric_columns] / portfolio_values[numeric_columns] # Add category column and aggregate data accordingly ticker_name_to_category = { t.name: category for t, category in self._ticker_to_category.items() } performance_df["Category"] = performance_df["Asset"].apply( lambda t: ticker_name_to_category[t]) all_categories = list(set(ticker_name_to_category.values())) performance_df = performance_df.sort_values(by=["Category", "Asset"]) performance_df = performance_df.groupby("Category").apply( lambda d: pd.concat([ PricesDataFrame({ **{ "Asset": [d.name], "Category": [d.name] }, **{c: [d[c].sum()] for c in numeric_columns} }), d ], ignore_index=True)).drop(columns=["Category"]) # Add the Total Performance row (divide by 2 as the df contains already aggregated data for each group) total_sum_row = performance_df[numeric_columns].sum() / 2 total_sum_row["Asset"] = "Total Performance" performance_df = performance_df.append(total_sum_row, ignore_index=True) # Format the rows using the percentage formatter performance_df[numeric_columns] = performance_df[ numeric_columns].applymap(lambda x: '{:.2%}'.format(x)) # Divide the performance dataframe into a number of dataframes, so that each of them contains up to # self._max_columns_per_page columns split_dfs = np.array_split(performance_df.set_index("Asset"), np.ceil( (performance_df.num_of_columns - 1) / self._max_columns_per_page), axis=1) df_tables = [ DFTable(df.reset_index(), css_classes=[ 'table', 'shrink-font', 'right-align', 'wide-first-column' ]) for df in split_dfs ] # Get the indices of rows, which contain category info category_indices = performance_df[performance_df["Asset"].isin( all_categories)].index for df_table in df_tables: # Add table formatting, highlight rows showing the total contribution of the given category df_table.add_rows_styles( category_indices, { "font-weight": "bold", "font-size": "0.95em", "background-color": "#cbd0d2" }) df_table.add_rows_styles( [performance_df.index[-1]], { "font-weight": "bold", "font-size": "0.95em", "background-color": "#b9bcbd" }) return df_tables
def _generate_chain(self, fields, start_time: datetime, end_time: datetime) -> PricesDataFrame: """ Returns a chain of futures, combined together using a certain method. Parameters ---------- start_time end_time the time ranges for the generated future chain method the method used to combine the the Nth contracts together into one data series, possible methods: - NTH_NEAREST - the price data for a certain period of time is taken from the N-th contract, there is no discontinuities correction at the contract expiry dates - BACK_ADJUST - the historical price discontinuities are corrected, so that they would align smoothly on the expiry date. The gaps between consecutive contracts are being adjusted, by shifting the historical data by the difference between the Open price on the first day of new contract and Close price on the last day of the old contract. The back adjustment considers only the Open, High, Low, Close price values. The Volumes are not being adjusted. """ # Verify the parameters values N = self._future_ticker.get_N() days_before_exp_date = self._future_ticker.get_days_before_exp_date() fields, got_single_field = convert_to_list(fields, PriceField) if N < 1 or days_before_exp_date < 1: raise ValueError("The number of the contract and the number of days before expiration date should be " "greater than 0.") # Shift the index and data according to the start time and end time values. We shift the number of days by 1, # so that the days_before_exp_date=1 will use the prices on the expiration date from the newer contract. shifted_index = pd.DatetimeIndex(self.index) - pd.Timedelta(days=(days_before_exp_date - 1)) if shifted_index.empty: return PricesDataFrame(columns=fields) # We use the backfill search for locating the start time, because we will additionally consider the time range # between start_time and the found starting expiry date time start_time_index_position = shifted_index.get_loc(start_time, method='backfill') shifted_index = shifted_index[start_time_index_position:] shifted_data = self.iloc[start_time_index_position:] shifted_data = shifted_data.iloc[(N - 1):] # Compute the time ranges for each of the contract. The time ranges should be equal to: # [[start_date, exp_date_1 - days_before_exp_date), # [exp_date_1 - days_before_exp_date, exp_date_2 - days_before_exp_date), # [exp_date_2 - days_before_exp_date, exp_date_3 - days_before_exp_date) # ... # [exp_date_K - days_before_exp_date, end_date]] # Each of these time ranges is mapped into one contract, from which date within this time would be taken. index_left_ranges = [pd.to_datetime(start_time)] + list(shifted_index) index_right_ranges = list(shifted_index) # Combine the calculated time ranges with the corresponding future contracts. We want the N-th contract # to be mapped onto the first time range (start_date, exp_date_1 - days_before_exp_date), N+1-th contract # to be mapped onto the second time range etc, therefore we zip the list of both left and ride boundaries # of time ranges with a shifted list of contracts. time_ranges_and_futures = zip(index_left_ranges, index_right_ranges, shifted_data) # Get the data within the desired time ranges from corresponding contracts combined_data_frame = pd.concat( [future.data.loc[left:right] for left, right, future in time_ranges_and_futures], sort=False) # To avoid shifting data on the time ranges, we use overlapping ends and beginnings of the time ranges. # Therefore, we need to check if any duplicates exist and on the expiry dates, we keep the data from # newer contract combined_data_frame = combined_data_frame[~combined_data_frame.index.duplicated(keep='last')] combined_data_frame = combined_data_frame.loc[:end_time] if self._futures_adjustment_method == FuturesAdjustmentMethod.BACK_ADJUSTED: # Create the back adjusted series # Compute the differences between prices on the expiration days (shifted by the days_before_exp_date # number of days). In case if the shifted days in the index contain e.g. saturdays, sundays or other dates # that are not in the Future's prices data frame, the first older valid date is taken. end_time_index_position = shifted_index.get_loc(end_time, method='pad') # In the following slice, in case if end_time == expiry date, we also want to include it in the index first_days_of_next_contracts = shifted_index[:end_time_index_position + 1] # Apply the back adjustment. Pass the futures chain shifting the data in the way, which will allow to # treat the Nth contract as the first element of the data frame combined_data_frame = self._back_adjust(fields, first_days_of_next_contracts, shifted_data, combined_data_frame) return combined_data_frame
class TestDataFrames(TestCase): def setUp(self): self.dates = pd.date_range(start='2015-05-13', periods=5) self.column_names = ['a', 'b', 'c', 'd', 'e'] self.prices_values = [[1., 1., 1., 1, 1.], [2., 2., 2., 2., 2.], [3., 3., 3., 3., 3.], [4., 4., 4., 4., 4.], [5., 5., 5., 5., 5.]] self.test_prices_df = PricesDataFrame(data=self.prices_values, index=self.dates, columns=self.column_names) self.log_returns_values = [ [0.693147, 0.693147, 0.693147, 0.693147, 0.693147], [0.405465, 0.405465, 0.405465, 0.405465, 0.405465], [0.287682, 0.287682, 0.287682, 0.287682, 0.287682], [0.223144, 0.223144, 0.223144, 0.223144, 0.223144] ] self.test_log_returns_df = LogReturnsDataFrame( data=self.log_returns_values, index=self.dates[1:], columns=self.column_names) self.simple_returns_values = [ [1.000000, 1.000000, 1.000000, 1.000000, 1.000000], [0.500000, 0.500000, 0.500000, 0.500000, 0.500000], [0.333333, 0.333333, 0.333333, 0.333333, 0.333333], [0.250000, 0.250000, 0.250000, 0.250000, 0.250000] ] self.test_simple_returns_df = SimpleReturnsDataFrame( data=self.simple_returns_values, index=self.dates[1:], columns=self.column_names) def test_num_of_columns(self): self.assertEqual(self.test_prices_df.num_of_columns, 5) def test_prices_dataframe_dtypes(self): dtypes = self.test_prices_df.dtypes self.assertEqual({dtype("float64")}, set(dtypes)) def test_log_returns_dataframe_dtypes(self): dtypes = self.test_log_returns_df.dtypes self.assertEqual({dtype("float64")}, set(dtypes)) def test_simple_returns_dataframe_dtypes(self): dtypes = self.test_simple_returns_df.dtypes self.assertEqual({dtype("float64")}, set(dtypes)) def test_prices_to_log_returns(self): expected_dataframe = self.test_log_returns_df actual_dataframe = self.test_prices_df.to_log_returns() assert_dataframes_equal(expected_dataframe, actual_dataframe) def test_simple_to_log_returns(self): expected_dataframe = self.test_log_returns_df actual_dataframe = self.test_simple_returns_df.to_log_returns() assert_dataframes_equal(expected_dataframe, actual_dataframe) self.assertEqual({dtype("float64")}, set(actual_dataframe.dtypes)) def test_log_to_log_returns(self): expected_dataframe = self.test_log_returns_df actual_dataframe = self.test_log_returns_df.to_log_returns() assert_dataframes_equal(expected_dataframe, actual_dataframe) self.assertEqual({dtype("float64")}, set(actual_dataframe.dtypes)) def test_log_to_simple_returns(self): expected_dataframe = self.test_simple_returns_df actual_dataframe = self.test_log_returns_df.to_simple_returns() assert_dataframes_equal(expected_dataframe, actual_dataframe) self.assertEqual({dtype("float64")}, set(actual_dataframe.dtypes)) def test_simple_to_simple_returns(self): expected_dataframe = self.test_simple_returns_df actual_dataframe = self.test_simple_returns_df.to_simple_returns() assert_dataframes_equal(expected_dataframe, actual_dataframe) self.assertEqual({dtype("float64")}, set(actual_dataframe.dtypes)) def test_prices_to_simple_returns(self): expected_dataframe = self.test_simple_returns_df actual_dataframe = self.test_prices_df.to_simple_returns() assert_dataframes_equal(expected_dataframe, actual_dataframe) self.assertEqual({dtype("float64")}, set(actual_dataframe.dtypes)) def test_log_returns_to_prices(self): expected_dataframe = self.test_prices_df actual_dataframe = self.test_log_returns_df.to_prices() assert_dataframes_equal(expected_dataframe, actual_dataframe) self.assertEqual({dtype("float64")}, set(actual_dataframe.dtypes)) def test_simple_returns_to_prices(self): expected_dataframe = self.test_prices_df actual_dataframe = self.test_simple_returns_df.to_prices() assert_dataframes_equal(expected_dataframe, actual_dataframe) self.assertEqual({dtype("float64")}, set(actual_dataframe.dtypes)) def test_prices_to_prices(self): expected_dataframe = self.test_prices_df actual_dataframe = self.test_prices_df.to_prices() assert_dataframes_equal(expected_dataframe, actual_dataframe) self.assertEqual({dtype("float64")}, set(actual_dataframe.dtypes)) def test_min_max_normalized(self): normalized_prices = [[0.00, 0.00, 0.00, 0.00, 0.00], [0.25, 0.25, 0.25, 0.25, 0.25], [0.50, 0.50, 0.50, 0.50, 0.50], [0.75, 0.75, 0.75, 0.75, 0.75], [1.00, 1.00, 1.00, 1.00, 1.00]] expected_dataframe = PricesDataFrame(data=normalized_prices, index=self.dates, columns=self.column_names) actual_dataframe = self.test_prices_df.min_max_normalized() assert_dataframes_equal(expected_dataframe, actual_dataframe) self.assertEqual({dtype("float64")}, set(actual_dataframe.dtypes)) def test_exponential_average(self): smoothed_values = [[1.000000, 1.000000, 1.000000, 1.000000, 1.000000], [1.940000, 1.940000, 1.940000, 1.940000, 1.940000], [2.936400, 2.936400, 2.936400, 2.936400, 2.936400], [3.936184, 3.936184, 3.936184, 3.936184, 3.936184], [4.936171, 4.936171, 4.936171, 4.936171, 4.936171]] expected_dataframe = PricesDataFrame(data=smoothed_values, index=self.dates, columns=self.column_names) actual_dataframe = self.test_prices_df.exponential_average() assert_dataframes_equal(expected_dataframe, actual_dataframe) self.assertEqual({dtype("float64")}, set(actual_dataframe.dtypes)) def test_aggregate_by_year(self): dates = pd.DatetimeIndex( ['2015-06-01', '2015-12-30', '2016-01-01', '2016-05-01']) test_dataframe = SimpleReturnsDataFrame( data=self.simple_returns_values, index=dates) expected_aggregated_rets = [[ 2.000000, 2.000000, 2.000000, 2.000000, 2.000000 ], [0.666666, 0.666666, 0.666666, 0.666666, 0.666666]] expected_dataframe = SimpleReturnsDataFrame( data=expected_aggregated_rets, index=pd.DatetimeIndex(['2015-12-31', '2016-12-31'])) actual_dataframe = test_dataframe.aggregate_by_year() assert_dataframes_equal(expected_dataframe, actual_dataframe) self.assertEqual({dtype("float64")}, set(actual_dataframe.dtypes)) def test_rolling_time_window(self): actual_result = self.test_prices_df.rolling_time_window( window_length=2, step=1, func=lambda x: x.mean()) expected_values = [[1.5, 1.5, 1.5, 1.5, 1.5], [2.5, 2.5, 2.5, 2.5, 2.5], [3.5, 3.5, 3.5, 3.5, 3.5], [4.5, 4.5, 4.5, 4.5, 4.5]] expected_index = self.test_prices_df.index[-4:].copy(deep=True) expected_columns = ['a', 'b', 'c', 'd', 'e'] expected_result = QFDataFrame(expected_values, expected_index, expected_columns) assert_dataframes_equal(expected_result, actual_result, absolute_tolerance=1e-20) actual_result = self.test_prices_df.rolling_time_window( window_length=2, step=1, func=lambda x: x.mean().mean()) expected_values = [1.5, 2.5, 3.5, 4.5] expected_index = self.test_prices_df.index[-4:].copy(deep=True) expected_result = QFSeries(expected_values, expected_index) assert_series_equal(expected_result, actual_result, absolute_tolerance=1e-20) self.assertEqual(dtype("float64"), actual_result.dtypes) def test_total_cumulative_return(self): actual_result = self.test_prices_df.total_cumulative_return() expected_result = PricesSeries(index=self.test_prices_df.columns, data=[4.0, 4.0, 4.0, 4.0, 4.0]) assert_series_equal(expected_result, actual_result) self.assertEqual(dtype("float64"), actual_result.dtypes) def test_stats_functions(self): qf_df = QFDataFrame(data=self.prices_values, index=self.dates, columns=self.column_names) max_qf_df = qf_df.max() expected_max = QFSeries([5, 5, 5, 5, 5], index=['a', 'b', 'c', 'd', 'e']) self.assertEqual(type(max_qf_df), QFSeries) assert_series_equal(max_qf_df, expected_max) self.assertEqual(dtype("float64"), max_qf_df.dtypes) def test_squeeze(self): qf_df = QFDataFrame(data=self.prices_values, index=self.dates, columns=self.column_names) self.assertEqual(type(qf_df[['a']]), QFDataFrame) self.assertEqual({dtype("float64")}, set(qf_df[['a']].dtypes)) self.assertEqual(type(qf_df[['a']].squeeze()), QFSeries) self.assertEqual(dtype("float64"), qf_df[['a']].squeeze().dtypes) self.assertEqual(type(qf_df[['a', 'b']].squeeze()), QFDataFrame) self.assertEqual({dtype("float64")}, set(qf_df[['a', 'b']].squeeze().dtypes)) self.assertEqual(type(qf_df.iloc[[0]]), QFDataFrame) self.assertEqual({dtype("float64")}, set(qf_df.iloc[[0]].dtypes)) self.assertEqual(type(qf_df.iloc[[0]].squeeze()), QFSeries) self.assertEqual("float64", qf_df.iloc[[0]].squeeze().dtypes) def test_concat(self): full_df = QFDataFrame(data=self.prices_values, index=self.dates, columns=self.column_names) # Concatenate along index (axis = 0) number_of_rows = len(full_df) half_df = full_df.iloc[:number_of_rows // 2] second_half_df = full_df.iloc[number_of_rows // 2:] concatenated_df = pd.concat([half_df, second_half_df]) self.assertEqual(type(concatenated_df), QFDataFrame) self.assertEqual({dtype("float64")}, set(concatenated_df.dtypes)) assert_dataframes_equal(concatenated_df, full_df) # Concatenate along columns (axis = 1) number_of_columns = full_df.num_of_columns half_df = full_df.loc[:, full_df.columns[:number_of_columns // 2]] second_half_df = full_df.loc[:, full_df.columns[number_of_columns // 2:]] concatenated_df = pd.concat([half_df, second_half_df], axis=1) self.assertEqual(type(concatenated_df), QFDataFrame) self.assertEqual({dtype("float64")}, set(concatenated_df.dtypes)) assert_dataframes_equal(concatenated_df, full_df) def test_concat_series(self): index = [1, 2, 3] series_1 = QFSeries(data=[17., 15., 16.], index=index) series_2 = QFSeries(data=[18., 19., 20.], index=index) df = pd.concat([series_1, series_2], axis=1) self.assertEqual(type(df), QFDataFrame) self.assertEqual({s.dtypes for s in [series_1, series_2]}, set(df.dtypes)) series = pd.concat([series_1, series_2], axis=0) self.assertEqual(type(series), QFSeries) self.assertEqual("float64", series.dtypes) self.assertEqual({s.dtypes for s in [series_1, series_2]}, {series.dtypes})
class TestDataFrames(TestCase): def setUp(self): self.dates = pd.date_range(start='2015-05-13', periods=5) self.column_names = ['a', 'b', 'c', 'd', 'e'] self.prices_values = [[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3], [4, 4, 4, 4, 4], [5, 5, 5, 5, 5]] self.test_prices_df = PricesDataFrame(data=self.prices_values, index=self.dates, columns=self.column_names) self.log_returns_values = [ [0.693147, 0.693147, 0.693147, 0.693147, 0.693147], [0.405465, 0.405465, 0.405465, 0.405465, 0.405465], [0.287682, 0.287682, 0.287682, 0.287682, 0.287682], [0.223144, 0.223144, 0.223144, 0.223144, 0.223144] ] self.test_log_returns_df = LogReturnsDataFrame( data=self.log_returns_values, index=self.dates[1:], columns=self.column_names) self.simple_returns_values = [ [1.000000, 1.000000, 1.000000, 1.000000, 1.000000], [0.500000, 0.500000, 0.500000, 0.500000, 0.500000], [0.333333, 0.333333, 0.333333, 0.333333, 0.333333], [0.250000, 0.250000, 0.250000, 0.250000, 0.250000] ] self.test_simple_returns_df = SimpleReturnsDataFrame( data=self.simple_returns_values, index=self.dates[1:], columns=self.column_names) def test_num_of_columns(self): self.assertEqual(self.test_prices_df.num_of_columns, 5) def test_prices_to_log_returns(self): expected_dataframe = self.test_log_returns_df actual_dataframe = self.test_prices_df.to_log_returns() assert_dataframes_equal(expected_dataframe, actual_dataframe) def test_simple_to_log_returns(self): expected_dataframe = self.test_log_returns_df actual_dataframe = self.test_simple_returns_df.to_log_returns() assert_dataframes_equal(expected_dataframe, actual_dataframe) def test_log_to_log_returns(self): expected_dataframe = self.test_log_returns_df actual_dataframe = self.test_log_returns_df.to_log_returns() assert_dataframes_equal(expected_dataframe, actual_dataframe) def test_log_to_simple_returns(self): expected_dataframe = self.test_simple_returns_df actual_dataframe = self.test_log_returns_df.to_simple_returns() assert_dataframes_equal(expected_dataframe, actual_dataframe) def test_simple_to_simple_returns(self): expected_dataframe = self.test_simple_returns_df actual_dataframe = self.test_simple_returns_df.to_simple_returns() assert_dataframes_equal(expected_dataframe, actual_dataframe) def test_prices_to_simple_returns(self): expected_dataframe = self.test_simple_returns_df actual_dataframe = self.test_prices_df.to_simple_returns() assert_dataframes_equal(expected_dataframe, actual_dataframe) def test_log_returns_to_prices(self): expected_dataframe = self.test_prices_df actual_dataframe = self.test_log_returns_df.to_prices() assert_dataframes_equal(expected_dataframe, actual_dataframe) def test_simple_returns_to_prices(self): expected_dataframe = self.test_prices_df actual_dataframe = self.test_simple_returns_df.to_prices() assert_dataframes_equal(expected_dataframe, actual_dataframe) def test_prices_to_prices(self): expected_dataframe = self.test_prices_df actual_dataframe = self.test_prices_df.to_prices() assert_dataframes_equal(expected_dataframe, actual_dataframe) def test_min_max_normalized(self): normalized_prices = [[0.00, 0.00, 0.00, 0.00, 0.00], [0.25, 0.25, 0.25, 0.25, 0.25], [0.50, 0.50, 0.50, 0.50, 0.50], [0.75, 0.75, 0.75, 0.75, 0.75], [1.00, 1.00, 1.00, 1.00, 1.00]] expected_dataframe = PricesDataFrame(data=normalized_prices, index=self.dates, columns=self.column_names) actual_dataframe = self.test_prices_df.min_max_normalized() assert_dataframes_equal(expected_dataframe, actual_dataframe) def test_exponential_average(self): smoothed_values = [[1.000000, 1.000000, 1.000000, 1.000000, 1.000000], [1.940000, 1.940000, 1.940000, 1.940000, 1.940000], [2.936400, 2.936400, 2.936400, 2.936400, 2.936400], [3.936184, 3.936184, 3.936184, 3.936184, 3.936184], [4.936171, 4.936171, 4.936171, 4.936171, 4.936171]] expected_dataframe = PricesDataFrame(data=smoothed_values, index=self.dates, columns=self.column_names) actual_dataframe = self.test_prices_df.exponential_average() assert_dataframes_equal(expected_dataframe, actual_dataframe) def test_aggregate_by_year(self): dates = pd.DatetimeIndex( ['2015-06-01', '2015-12-30', '2016-01-01', '2016-05-01']) test_dataframe = SimpleReturnsDataFrame( data=self.simple_returns_values, index=dates) expected_aggregated_rets = [[ 2.000000, 2.000000, 2.000000, 2.000000, 2.000000 ], [0.666666, 0.666666, 0.666666, 0.666666, 0.666666]] expected_dataframe = SimpleReturnsDataFrame( data=expected_aggregated_rets, index=pd.DatetimeIndex(['2015-12-31', '2016-12-31'])) actual_dataframe = test_dataframe.aggregate_by_year() assert_dataframes_equal(expected_dataframe, actual_dataframe) def test_rolling_time_window(self): actual_result = self.test_prices_df.rolling_time_window( window_length=2, step=1, func=lambda x: x.mean()) expected_values = [[1.5, 1.5, 1.5, 1.5, 1.5], [2.5, 2.5, 2.5, 2.5, 2.5], [3.5, 3.5, 3.5, 3.5, 3.5], [4.5, 4.5, 4.5, 4.5, 4.5]] expected_index = self.test_prices_df.index[-4:].copy(deep=True) expected_columns = ['a', 'b', 'c', 'd', 'e'] expected_result = QFDataFrame(expected_values, expected_index, expected_columns) assert_dataframes_equal(expected_result, actual_result, absolute_tolerance=1e-20) actual_result = self.test_prices_df.rolling_time_window( window_length=2, step=1, func=lambda x: x.mean().mean()) expected_values = [1.5, 2.5, 3.5, 4.5] expected_index = self.test_prices_df.index[-4:].copy(deep=True) expected_result = QFSeries(expected_values, expected_index) assert_series_equal(expected_result, actual_result, absolute_tolerance=1e-20) def test_total_cumulative_return(self): actual_result = self.test_prices_df.total_cumulative_return() expected_result = pd.Series(index=self.test_prices_df.columns, data=[4.0, 4.0, 4.0, 4.0, 4.0]) assert_series_equal(expected_result, actual_result)