def get_price(self, tickers: Union[Ticker, Sequence[Ticker]], fields: Union[PriceField, Sequence[PriceField]], start_date: datetime, end_date: datetime = None, frequency: Frequency = Frequency.DAILY) -> \ Union[None, PricesSeries, PricesDataFrame, QFDataArray]: got_single_date = False if frequency > Frequency.DAILY else ( bool(start_date and (start_date == end_date)) ) if got_single_date: raise NotImplementedError("Single date queries are not supported yet") fields_str = self._map_field_to_str(tickers, fields) container = self.get_history(tickers, fields_str, start_date, end_date, frequency) # Convert to PriceSeries / PriceDataFrame and replace the string index with PriceField index if self._is_single_price_field(fields): if self._is_single_ticker(tickers): container = cast_series(container, PricesSeries) else: container = cast_dataframe(container, PricesDataFrame) else: str_to_field_dict = self.str_to_price_field_map(self._get_first_ticker(tickers)) if self._is_single_ticker(tickers): # Many fields and single ticker - replace columns in PricesDataFrame container = cast_dataframe(container, PricesDataFrame) renaming_dict = {field_str: str_to_field_dict[field_str] for field_str in container.columns} container.rename(columns=renaming_dict, inplace=True) else: container = container.assign_coords(fields=[str_to_field_dict[field_str] for field_str in container.fields.values]) return container
def _get_single_rolling_contract_info( self, real_contracts_prices_da: PricesDataFrame, rolling_dates: pd.DatetimeIndex, contract_number: int) \ -> RollingContractData: now = self.timer.now() # lists of series (each series being a partial result, they are concatenated in the end) prices_df_list = [] returns_tms_list = [] time_to_expiration_tms_list = [] last_rolling_date_idx = len(rolling_dates) - 1 tickers = real_contracts_prices_da.tickers.values for i, start_date in enumerate(rolling_dates): if i < last_rolling_date_idx: end_date = rolling_dates[i + 1] else: end_date = now front_contract_idx = contract_number - 1 + i front_contract_da = real_contracts_prices_da[:, front_contract_idx, :] front_contract_df = front_contract_da.to_pandas() front_contract_df = cast_dataframe(front_contract_df, PricesDataFrame) front_contract_df = front_contract_df.dropna() front_contract_df.name = tickers[front_contract_idx] partial_prices_df, partial_tte_tms = self._filter_dates( front_contract_df, start_date, end_date) close_prices_tms = partial_prices_df.loc[:, PriceField.Close] partial_returns_df = close_prices_tms.to_simple_returns() # remove price and first time to expiration for every contract except for the first one. Otherwise there # would be two data points on rolling dates # Think if the following code shouldn't be removed, # so that there would be 2 data points on rolling dates if i > 0: partial_prices_df = partial_prices_df.iloc[1:] partial_tte_tms = partial_tte_tms.iloc[1:] prices_df_list.append(partial_prices_df) time_to_expiration_tms_list.append(partial_tte_tms) returns_tms_list.append(partial_returns_df) prices_df = pd.concat(prices_df_list, axis=0) prices_df = cast_dataframe(prices_df, PricesDataFrame) time_to_expiration_tms = pd.concat(time_to_expiration_tms_list, axis=0) # type: pd.Series returns_tms = pd.concat(returns_tms_list, axis=0) returns_tms = cast_series(returns_tms, ReturnsSeries) # set names for series self._set_series_names(contract_number, prices_df, returns_tms, time_to_expiration_tms) return RollingContractData(prices_df, time_to_expiration_tms, returns_tms)
def rolling_time_window( self, window_length: int, step: int, func: Callable[[Union["QFDataFrame", np.ndarray]], "QFSeries"]) \ -> Union[None, "QFSeries", "QFDataFrame"]: """ Runs a given function on each rolling window in the dataframe. The content of a rolling window is also a QFDataFrame thus the funciton which should be applied should accept a QFDataFrame as an argument. The function may return either a QFSeries (then the output of rolling_time_window will be QFDataFrame) or a scalar value (then the output of rolling_time_window will be QFSeries). The rolling window is moved along the time index (rows). Parameters ---------- window_length number of rows which should be taken into rolling window step number of rows by which rolling window should be moved func function to apply on each rolling window. If it returns a QFSeries then the output of rolling_time_window() will be a QFDataFrame; if it returns a scalar value, the return value of rolling_time_window() will be a QFSeries Returns ------- None (if the result of running the rolling window was empty) or QFSeries (if the function applied returned scalar value for each window) or QFDataFrame (if the function applied returned QFSeries for each window) """ results_dict = dict() # type: Dict[datetime, pd.Series] end_idx = self.num_of_rows while True: start_idx = end_idx - window_length if start_idx < 0: break patch = self.iloc[start_idx:end_idx, :] end_date = self.index[end_idx - 1] results_dict[end_date] = func(patch) end_idx -= step if not results_dict: return None first_element = next(iter(results_dict.values())) # type: "QFSeries" if isinstance(first_element, pd.Series): result = QFDataFrame.from_dict(results_dict, orient='index') result = cast_dataframe(result, QFDataFrame) else: from qf_lib.containers.series.qf_series import QFSeries dates_and_values = [(date, value) for date, value in results_dict.items()] dates, values = zip(*dates_and_values) result = QFSeries(index=dates, data=values) result = result.sort_index() return result
def exponential_average(self, lambda_coeff: float = 0.94) -> "QFDataFrame": """ Calculates the exponential average of a dataframe. Parameters ---------- lambda_coeff lambda coefficient Returns ------- smoothed version (exponential average) of the data frame """ lambda_coefficients = self._prepare_value_per_column_list(lambda_coeff) lambda_coefficients_iter = self._get_iterator_for_pandas( lambda_coefficients) def exponential_avg_func(column, lambda_coeff_iter=lambda_coefficients_iter): lambda_coefficient = next(lambda_coeff_iter) smoothed_column = column.exponential_average(lambda_coefficient) return smoothed_column smoothed_df = self.apply(exponential_avg_func, axis=0) smoothed_df = cast_dataframe(smoothed_df, self._constructor) return smoothed_df
def _setup_r_square_of_each_predictor(self): regressors_df = self.input_data.regressors_df corr_matrix = regressors_df.corr() corr_matrix = cast_dataframe(corr_matrix, output_type=QFDataFrame) vif = np.diagonal(inv(corr_matrix)) r_squared_values = 1 - (1 / vif) self.r_squared_of_each_predictor = QFSeries(data=r_squared_values, index=regressors_df.columns.copy())
def drawdown_tms(input_data: InputData, frequency: Frequency = None) -> InputData: """ Calculates the timeseries of the same dates as prices_tms, which contains the drawdown value for each date. Parameters ---------- input_data: QFSeries, QFDataFrame QF timeseries or multiple timeseries grouped into a DataFrame frequency: Frequency optional parameter that improves teh performance of the function as to_prices does not need to infer the frequency Returns ------- QFSeries, QFDataFrame series of drawdowns (drawdown for each day). Drawdown for a given date is defined as the percentage difference between the the maximal price value up to the given date and the price value for that date. """ prices_tms = input_data.to_prices(frequency=frequency) max_price_tms = prices_tms.cummax() drawdowns = 1 - prices_tms / max_price_tms if isinstance(input_data, QFSeries): drawdowns = cast_series(drawdowns, QFSeries) else: drawdowns = cast_dataframe(drawdowns, QFDataFrame) return drawdowns
def _setup_correlations(self, fitted_tms): analysed_tms = self.input_data.analysed_tms regressors_df = self.input_data.regressors_df data_for_correlation = pd.concat( (fitted_tms, regressors_df, analysed_tms), axis=1) self.correlation_matrix = cast_dataframe(data_for_correlation.corr(), output_type=QFDataFrame)
def cast_dataframe_to_proper_type(result): num_of_dimensions = len(result.axes) if num_of_dimensions == 1: casted_result = cast_series(result, QFSeries) elif num_of_dimensions == 2: casted_result = cast_dataframe(result, QFDataFrame) else: casted_result = result return casted_result
def _get_historical_data(self, exposures_tms, prices_array, ticker): prices_df = prices_array.loc[:, ticker, :].to_pandas() prices_df = cast_dataframe(prices_df, PricesDataFrame) open_prices_tms = prices_df.loc[:, PriceField.Open] low_prices_tms = prices_df.loc[:, PriceField.Low] high_prices_tms = prices_df.loc[:, PriceField.High] historical_data = pd.concat( (exposures_tms, open_prices_tms, low_prices_tms, high_prices_tms), axis=1) return historical_data
def test_is_equal_risk_contribution(self): asset_a_tms = self.factors_df.loc[:, 'a'] factors_df = pd.concat((asset_a_tms, asset_a_tms), axis=1) factors_df = cast_dataframe(factors_df, SimpleReturnsDataFrame) factors_df.columns = ['a', 'b'] factors_covariance = factors_df.cov() weights = pd.Series([0.25, 0.75], index=self.factors_df.columns) actual_result = RiskContributionAnalysis.is_equal_risk_contribution(factors_covariance, weights) self.assertFalse(actual_result) weights = pd.Series([0.5, 0.5], index=self.factors_df.columns) actual_result = RiskContributionAnalysis.is_equal_risk_contribution(factors_covariance, weights) self.assertTrue(actual_result)
def _calculate_portfolio_returns_tms(self, tickers, open_to_open_returns_df: QFDataFrame, exposure_values_df: QFDataFrame) \ -> SimpleReturnsSeries: """ SimpleReturnsSeries of the portfolio - for each date equal to the portfolio performance over the last open-to-open period, ex. value indexed as 2010-02-15 would refer to the portfolio value change between open at 14th and open at 15th, and would be based on the signal from 2010-02-13; the first index of the series is the Day 3 of the backtest, as the first signal calculation occurs after Day 1 (see ORDER OF ACTIONS below) the last index of the series is test_end_date and the portfolio exposure is being set to zero on the opening of the test_end_date ORDER OF ACTIONS: -- Day 1 -- signal is generated, based on the historic data INCLUDING prices from Day 1 suggested exposure for Day 2 is calculated -- Day 2 -- a trade is entered, held or exited (or nothing happens) regarding the suggested exposure this action is performed on the opening of the day -- Day 3 -- at the opening the open-to-open return is calculated now it is possible to estimate current portfolio value the simple return of the portfolio (Day 3 to Day 2) is saved and indexed with Day 3 date """ open_to_open_returns_df = open_to_open_returns_df.dropna(how="all") shifted_signals_df = exposure_values_df.shift(2, axis=0) shifted_signals_df = shifted_signals_df.iloc[2:] daily_returns_of_strategies_df = shifted_signals_df * open_to_open_returns_df daily_returns_of_strategies_df = daily_returns_of_strategies_df.dropna( axis=0, how='all') daily_returns_of_strategies_df = cast_dataframe( daily_returns_of_strategies_df, SimpleReturnsDataFrame) # type: SimpleReturnsDataFrame weights = Portfolio.one_over_n_weights(tickers) # for strategies based on more than one ticker (ex. VolLongShort) use the line below: # weights = QFSeries(np.ones(daily_returns_of_strategies_df.num_of_columns)) portfolio_rets_tms, _ = Portfolio.constant_weights( daily_returns_of_strategies_df, weights) return portfolio_rets_tms
def to_prices(self, initial_prices: Sequence[float] = None, suggested_initial_date: Union[datetime, int, float] = None, frequency: Frequency = None) -> "PricesDataFrame": """ Converts a dataframe to the dataframe of prices. The dataframe of prices returned will have an extra date at the beginning (in comparison to the returns' dataframe). The difference between the extra date and the rest of the dates can be inferred from the returns' dataframe or can be calculated using the frequency passed as the optional argument. Additional date at the beginning (so called "initial date") is caused by the fact, that return for the first date of prices timeseries cannot be calculated, so it's missing. Thus, during the opposite conversion, extra date at the beginning will be added. Parameters ---------- initial_prices initial price for all timeseries. If no prices are specified, then they will be assumed to be 1. If only one value is passed (instead of a list with values for each column), then the initial price will be the same for each series contained within the dataframe. suggested_initial_date the first date or initial value for the prices series. It won't be necessarily the first date of the price series (e.g. if the method is run on the PricesDataFrame then it won't be used). frequency the frequency of the returns' timeseries. It is used to infer the initial date for the prices series. Returns ------- prices dataframe of prices """ initial_prices = self._prepare_value_per_column_list(initial_prices) initial_prices_iter = self._get_iterator_for_pandas(initial_prices) def to_prices_func(series, init_prices_iter=initial_prices_iter, suggested_init_date=suggested_initial_date, freq=frequency): initial_price = next(init_prices_iter) prices_series = series.to_prices( initial_price=initial_price, suggested_initial_date=suggested_init_date, frequency=freq) return prices_series dataframe = self.apply(to_prices_func, axis=0) from qf_lib.containers.dataframe.prices_dataframe import PricesDataFrame dataframe = cast_dataframe(dataframe, PricesDataFrame) return dataframe
def _get_assets_data(self, end_date, start_date, frequency): # download data asset_prices_df = self.bbg_data_provider.get_price(self.all_tickers, PriceField.Close, start_date, end_date, frequency) asset_prices_df = cast_dataframe(asset_prices_df, output_type=PricesDataFrame) # trim common_start, common_end = get_common_start_and_end(asset_prices_df) trimmed_asset_prices_df = asset_prices_df.loc[common_start:common_end, :] # type: PricesDataFrame # remove intermediate NaNs trimmed_asset_prices_df = trimmed_asset_prices_df.fillna(method='pad') # forward fill # convert to simple returns assets_rets = trimmed_asset_prices_df.to_simple_returns() return assets_rets
def make_stats(self, initial_risks: Sequence[float], scenarios_list: Sequence[QFDataFrame]) -> QFDataFrame: """ Creates a pandas.DataFrame showing how many strategies failed (reached certain draw down level) and how many of them succeeded (that is: reached the target return and not failed on the way). Parameters ---------- initial_risks: Sequence[float] list of initial_risk parameters where initial_risk is a float number scenarios_list: Sequence[pandas.DataFrame] list with scenarios (QFDataFrame) where each DataFrame corresponds to one initial_risk value Each DataFrame has columns corresponding to different scenarios and its indexed by Trades' ordinal number. Its values are returns of Trades. Returns ------- pandas.DataFrame DataFrame indexed with initial_risk values and with columns FAILED (fraction of scenarios that failed) and SUCCEEDED (fraction of scenarios that met the objective and didn't fail on the way) """ result = QFDataFrame(index=pd.Index(initial_risks), columns=pd.Index([self.FAILED, self.SUCCEEDED]), dtype=np.float64) for init_risk, scenarios in zip(initial_risks, scenarios_list): # calculate drawdown for each scenario scenarios_df = cast_dataframe( scenarios, SimpleReturnsDataFrame) # type: SimpleReturnsDataFrame max_drawdowns = max_drawdown(scenarios_df) total_returns = scenarios_df.total_cumulative_return() failed = max_drawdowns >= self._max_accepted_dd reached_target_return = total_returns >= self._target_return succeeded = ~failed & reached_target_return num_of_scenarios = scenarios_df.num_of_columns failed_normalized = failed.sum() / num_of_scenarios succeeded_normalized = succeeded.sum() / num_of_scenarios result.loc[init_risk, [self.FAILED, self.SUCCEEDED]] = [ failed_normalized, succeeded_normalized ] return result
def to_log_returns(self) -> "LogReturnsDataFrame": """ Converts dataframe to the dataframe of logarithmic returns. First date of prices in the returns dataframe won't be present. Returns ------- returns_df dataframe of log returns """ from qf_lib.containers.dataframe.log_returns_dataframe import LogReturnsDataFrame series_type = self._constructor_sliced dataframe = self.apply(series_type.to_log_returns, axis=0) dataframe = cast_dataframe(dataframe, LogReturnsDataFrame) return dataframe
def to_simple_returns(self) -> "SimpleReturnsDataFrame": """ Converts dataframe to the dataframe of simple returns. First date of prices in the returns timeseries won't be present. Returns ------- returns_df dataframe of simple returns """ from qf_lib.containers.dataframe.simple_returns_dataframe import SimpleReturnsDataFrame series_type = self._constructor_sliced dataframe = self.apply(series_type.to_simple_returns, axis=0) dataframe = cast_dataframe(dataframe, SimpleReturnsDataFrame) return dataframe
def cast_data_array_to_proper_type(result: QFDataArray, use_prices_types=False): if use_prices_types: series_type = PricesSeries data_frame_type = PricesDataFrame else: series_type = QFSeries data_frame_type = QFDataFrame num_of_dimensions = len(result.shape) if num_of_dimensions == 0: casted_result = result.item() elif num_of_dimensions == 1: casted_result = cast_series(result.to_pandas(), series_type) casted_result.name = result.name elif num_of_dimensions == 2: casted_result = cast_dataframe(result.to_pandas(), data_frame_type) else: casted_result = result return casted_result
def min_max_normalized( self, original_min_values: Sequence[float] = None, original_max_values: Sequence[float] = None) -> "QFDataFrame": """ Normalizes the data using min-max scaling: it maps all the data to the [0;1] range, so that 0 corresponds to the minimal value in the original series and 1 corresponds to the maximal value. It is also possible to specify values which should correspond to 0 and 1 after applying the normalization. It is useful if the same normalization parameters are used to normalize different data. Parameters ---------- original_min_values values which should correspond to 0 after applying the normalization (one value for each column) original_max_values values which should correspond to 1 after applying the normalization (one value for each column) Returns ------- normalized_dataframe dataframe of normalized values """ # assert that user specified either both min and max values or none of them min_values = self._prepare_value_per_column_list(original_min_values) max_values = self._prepare_value_per_column_list(original_max_values) min_values_iter = self._get_iterator_for_pandas(min_values) max_values_iter = self._get_iterator_for_pandas(max_values) def min_max_norm_func(column, min_val_iter=min_values_iter, max_val_iter=max_values_iter): norm_column = column.min_max_normalized(next(min_val_iter), next(max_val_iter)) return norm_column norm_dataframe = self.apply(min_max_norm_func, axis=0) norm_dataframe = cast_dataframe(norm_dataframe, self._constructor) return norm_dataframe
def _get_open_prices(self, prices_data_array): open_prices_pandas_df = prices_data_array.loc[:, :, PriceField. Open].to_pandas() open_prices_df = cast_dataframe(open_prices_pandas_df, PricesDataFrame) return open_prices_df