def _get_data_for_multiple_tickers(self, tickers, fields, start_date, end_date, use_prices_types): if use_prices_types: type_of_field = PriceField def get_data_func(data_prov: DataProvider, tickers_for_single_data_provider): prices = data_prov.get_price(tickers_for_single_data_provider, fields, start_date, end_date) return prices else: type_of_field = str def get_data_func(data_prov: DataProvider, tickers_for_single_data_provider): prices = data_prov.get_history(tickers_for_single_data_provider, fields, start_date, end_date) return prices tickers, got_single_ticker = convert_to_list(tickers, Ticker) fields, got_single_field = convert_to_list(fields, type_of_field) got_single_date = start_date is not None and (start_date == end_date) partial_results = [] for ticker_class, ticker_group in groupby(tickers, lambda t: type(t)): data_provider = self._identify_data_provider(ticker_class) partial_result = get_data_func(data_provider, list(ticker_group)) if partial_result is not None: partial_results.append(partial_result) result = QFDataArray.concat(partial_results, dim=TICKERS) normalized_result = normalize_data_array( result, tickers, fields, got_single_date, got_single_ticker, got_single_field, use_prices_types) return normalized_result
def get_tabular_data(self, ticker: BloombergTicker, field: str) -> List: """ Provides current tabular data from Bloomberg. Was tested on 'INDX_MEMBERS' and 'MERGERS_AND_ACQUISITIONS' requests. There is no guarantee that all other request will be handled, as returned data structures might vary. Parameters ----------- ticker: BloombergTicker ticker for security that should be retrieved field: str field of security that should be retrieved Returns ------- List tabular data for the given ticker and field """ if field is None: raise ValueError("Field being None is not supported by {}".format( self.__class__.__name__)) self._connect_if_needed() self._assert_is_connected() tickers, got_single_ticker = convert_to_list(ticker, BloombergTicker) fields, got_single_field = convert_to_list(field, (PriceField, str)) tickers_str = tickers_as_strings(tickers) result = self._tabular_data_provider.get(tickers_str, fields) return result
def get_history( self, tickers: Union[BloombergTicker, Sequence[BloombergTicker]], fields: Union[str, Sequence[str]], start_date: datetime, end_date: datetime = None, frequency: Frequency = Frequency.DAILY, currency: str = None, override_name: str = None, override_value: str = None) \ -> Union[QFSeries, QFDataFrame, QFDataArray]: if fields is None: raise ValueError("Fields being None is not supported by {}".format(self.__class__.__name__)) self._connect_if_needed() self._assert_is_connected() got_single_date = start_date is not None and (start_date == end_date) tickers, got_single_ticker = convert_to_list(tickers, BloombergTicker) fields, got_single_field = convert_to_list(fields, (PriceField, str)) if end_date is None: end_date = datetime.now() tickers_str = tickers_as_strings(tickers) data_array = self._historical_data_provider.get( tickers_str, fields, start_date, end_date, frequency, currency, override_name, override_value) normalized_result = normalize_data_array( data_array, tickers, fields, got_single_date, got_single_ticker, got_single_field) return normalized_result
def get_history( self, tickers: Union[CcyTicker, Sequence[CcyTicker]], fields: Union[None, str, Sequence[str]] = None, start_date: datetime = None, end_date: datetime = None, **kwargs) \ -> Union[QFSeries, QFDataFrame, QFDataArray]: tickers, got_single_ticker = convert_to_list(tickers, CcyTicker) got_single_date = start_date is not None and (start_date == end_date) if fields is not None: fields, got_single_field = convert_to_list(fields, (PriceField, str)) else: got_single_field = False # all existing fields will be present in the result tickers_data_dict = {} with Session() as session: for ticker in tickers: single_ticker_data = self._get_single_ticker( ticker, fields, start_date, end_date, session) tickers_data_dict[ticker] = single_ticker_data if fields is None: fields = get_fields_from_tickers_data_dict(tickers_data_dict) result_data_array = tickers_dict_to_data_array(tickers_data_dict, tickers, fields) result = normalize_data_array(result_data_array, tickers, fields, got_single_date, got_single_ticker, got_single_field) return result
def get_history(self, tickers: Union[Ticker, Sequence[Ticker]], fields: Union[str, Sequence[str]], start_date: datetime, end_date: datetime = None, frequency: Frequency = Frequency.DAILY, **kwargs ) -> Union[QFSeries, QFDataFrame, QFDataArray]: # Verify whether the passed frequency parameter is correct and can be used with the preset data assert frequency == self._frequency, "Currently, for the get history does not support data sampling" if end_date is None: end_date = datetime.now() if frequency > Frequency.DAILY: # In case of high frequency - the data array should not include the end_date. The data range is # labeled with the beginning index time and excludes the end of the time range, therefore a new # end date is computed. end_date = end_date - Frequency.MIN_1.time_delta() # In order to be able to return data for FutureTickers create a mapping between tickers and corresponding # specific tickers (in case of non FutureTickers it will be an identity mapping) tickers, got_single_ticker = convert_to_list(tickers, Ticker) tickers_mapping = { (t.get_current_specific_ticker() if isinstance(t, FutureTicker) else t): t for t in tickers } specific_tickers = list(tickers_mapping.keys()) fields, got_single_field = convert_to_list(fields, str) got_single_date = start_date is not None and ( (start_date == end_date) if frequency <= Frequency.DAILY else (start_date + frequency.time_delta() >= end_date) ) self._check_if_cached_data_available(specific_tickers, fields, start_date, end_date) data_array = self._data_bundle.loc[start_date:end_date, specific_tickers, fields] normalized_result = normalize_data_array(data_array, specific_tickers, fields, got_single_date, got_single_ticker, got_single_field, use_prices_types=False) # Map the specific tickers onto the tickers given by the tickers_mapping array if isinstance(normalized_result, QFDataArray): normalized_result = normalized_result.assign_coords( tickers=[tickers_mapping[t] for t in normalized_result.tickers.values]) elif isinstance(normalized_result, PricesDataFrame): normalized_result = normalized_result.rename(columns=tickers_mapping) elif isinstance(normalized_result, PricesSeries): # Name of the PricesSeries can only contain strings ticker = tickers[0] normalized_result = normalized_result.rename(ticker.name) return normalized_result
def use_data_bundle(self, tickers: Union[Ticker, Sequence[Ticker]], fields: Union[PriceField, Sequence[PriceField]], start_date: datetime, end_date: datetime): """ Optimises running of the backtest. All the data will be downloaded before the backtest. Note that requesting during the backtest any other ticker or price field than the ones in the params of this function will result in an Exception. """ assert not self.is_optimised, "Multiple calls on use_data_bundle() are forbidden" tickers, _ = convert_to_list(tickers, Ticker) fields, _ = convert_to_list(fields, PriceField) self.price_data_provider = PrefetchingDataProvider( self._initial_data_provider, tickers, fields, start_date, end_date) self.is_optimised = True
def get_fields_history_url(self, fieldlist_id: str, fields: Union[str, Sequence[str]]) -> str: """ Method to create history hapi fields and get history fields address URL Parameters ---------- fieldlist_id: str ID of hapi fields fields: str History fields used in query Returns ------- fieldlist_url: str URL address of created hapi fields """ fields, got_single_field = convert_to_list(fields, str) cont = [{'mnemonic': field} for field in fields] fieldlist_payload = { '@type': 'HistoryFieldList', 'identifier': fieldlist_id, 'title': 'FieldList History Payload', 'description': 'FieldList History Payload used in creating fields component', 'contains': cont } self.logger.info('Field list component payload:\n %s', pprint.pformat(fieldlist_payload)) fieldlist_url = self._get_fields_list_common(fieldlist_id, fieldlist_payload) return fieldlist_url
def _preload_data_and_generate_chain(self, fields: Union[PriceField, Sequence[PriceField]], start_date: datetime, end_date: datetime, frequency: Frequency) -> \ Union[PricesDataFrame, PricesSeries]: """ Function, which at first preloads all of the necessary data, by initializing the Futures Chain object with the self._initialize_futures_chain function. Afterwards, it generates the PricesDataFrame (PricesSeries) using the self._generate_chain function and updates the self._specific_ticker. It returns the resulting PricesDataFrame (PricesSeries). At first, it initializes the FuturesChain with all the necessary data. If the selected futures adjustment method is the BACK_ADJUST, verify whether the fields contain the PriceField.Open and PriceField.Close and add them if needed. """ fields_list, _ = convert_to_list(fields, PriceField) necessary_fields = set(fields_list).union({PriceField.Open, PriceField.Close}) necessary_fields = necessary_fields.union(self._cached_fields) necessary_fields = list(necessary_fields) self._initialize_futures_chain(necessary_fields, start_date, end_date, frequency) # Generate the PricesDataFrame (PricesSeries) self._chain = self._generate_chain(fields, start_date, end_date) # Update the specific ticker self._specific_ticker = self._future_ticker.ticker self._cached_fields = set(fields_list) return self._chain[fields_list].loc[start_date:end_date].squeeze()
def get_futures_chain_tickers(self, tickers: Union[FutureTicker, Sequence[FutureTicker]], expiration_date_fields: Union[ExpirationDateField, Sequence[ExpirationDateField]]) \ -> Dict[FutureTicker, QFDataFrame]: expiration_date_fields, got_single_expiration_date_field = convert_to_list( expiration_date_fields, ExpirationDateField) mapping_dict = self.expiration_date_field_str_map() expiration_date_fields_str = [ mapping_dict[field] for field in expiration_date_fields ] exp_dates_dict = self._get_futures_chain_dict( tickers, expiration_date_fields_str) for future_ticker, exp_dates in exp_dates_dict.items(): exp_dates = exp_dates.rename( columns=self.str_to_expiration_date_field_map()) for ticker in exp_dates.index: ticker.security_type = future_ticker.security_type ticker.point_value = future_ticker.point_value ticker.set_name(future_ticker.name) if got_single_expiration_date_field: exp_dates = exp_dates.squeeze() exp_dates_dict[future_ticker] = exp_dates return exp_dates_dict
def build_document(self, backtest_summary: BacktestSummary, out_of_sample_start_date: Optional[datetime] = None): self.backtest_summary = backtest_summary self.backtest_evaluator = BacktestSummaryEvaluator(backtest_summary) self.document = Document(backtest_summary.backtest_name) self.out_of_sample_start_date = out_of_sample_start_date if out_of_sample_start_date is not None else \ (backtest_summary.start_date + (backtest_summary.end_date - backtest_summary.start_date) / 2) self._add_header() self._add_backtest_description() tickers_groups_for_stats_purposes = list(self.backtest_summary.tickers) # In case of > 1 ticker in the backtest summary, include also stats for all tickers if possible if len(self.backtest_summary.tickers) > 1: tickers_groups_for_stats_purposes = [ self.backtest_summary.tickers ] + tickers_groups_for_stats_purposes if backtest_summary.num_of_model_params not in [1, 2]: raise ValueError( "Incorrect number of parameters. Supported: 1 and 2") for tickers in tickers_groups_for_stats_purposes: tickers, _ = convert_to_list(tickers, Ticker) self.document.add_element(NewPageElement()) if backtest_summary.num_of_model_params == 1: self._add_line_plots(tickers) else: self._add_heat_maps(tickers)
def get_price( self, tickers: Union[Ticker, Sequence[Ticker]], fields: Union[PriceField, Sequence[PriceField]], start_date: datetime, end_date: datetime = None) -> Union[None, PricesSeries, PricesDataFrame, QFDataArray]: tickers, got_single_ticker = convert_to_list(tickers, Ticker) fields, got_single_field = convert_to_list(fields, PriceField) got_single_date = start_date is not None and (start_date == end_date) if self._check_data_availability: self._check_if_cached_data_available(tickers, fields, start_date, end_date) data_array = self._data_bundle.loc[start_date:end_date, tickers, fields] normalized_result = normalize_data_array( data_array, tickers, fields, got_single_date, got_single_ticker, got_single_field, use_prices_types=True) return normalized_result
def __init__(self, model_type: Type[AlphaModel], kwargs: Dict[str, Any], modeled_params: Union[str, Sequence[str]]): self.model_type = model_type # type: Type[AlphaModel] self.kwargs = kwargs self.model_parameters_names, _ = convert_to_list(modeled_params, str) assert set(param for param in self.model_parameters_names if param in self.kwargs.keys()) == set( self.model_parameters_names), "The modeled_params need to be passed in the kwargs"
def _get_history( self, convert_to_prices_types: bool, tickers: Union[QuandlTicker, Sequence[QuandlTicker]], fields: Union[None, str, Sequence[str], PriceField, Sequence[PriceField]] = None, start_date: datetime = None, end_date: datetime = None) -> \ Union[QFSeries, QFDataFrame, QFDataArray]: """ NOTE: Only use one Quandl Database at the time. Do not mix multiple databases in one query - this is the natural limitation coming from the fact that column names (fields) are different across databases. """ tickers, got_single_ticker = convert_to_list(tickers, QuandlTicker) got_single_date = start_date is not None and (start_date == end_date) if fields is not None: fields, got_single_field = convert_to_list(fields, (PriceField, str)) else: got_single_field = False # all existing fields will be present in the result result_dict = {} for db_name, ticker_group in groupby(tickers, lambda t: t.database_name): ticker_group = list(ticker_group) partial_result_dict = self._get_result_for_single_database( convert_to_prices_types, ticker_group, fields, start_date, end_date) result_dict.update(partial_result_dict) if fields is None: fields = get_fields_from_tickers_data_dict(result_dict) result_data_array = tickers_dict_to_data_array(result_dict, tickers, fields) normalized_result = normalize_data_array( result_data_array, tickers, fields, got_single_date, got_single_ticker, got_single_field, use_prices_types=convert_to_prices_types) return normalized_result
def set_css_classes(self, css_classes: Union[str, Sequence[str]] = ""): css_classes, _ = convert_to_list(css_classes, str) def merge_classes(css_classes_list: Sequence[str]) -> str: return " ".join(css_classes_list) css_classes = merge_classes(css_classes) self._css_classes = css_classes
def get_current_values( self, tickers: Union[BloombergTicker, Sequence[BloombergTicker]], fields: Union[str, Sequence[str]] ) -> Union[None, float, QFSeries, QFDataFrame]: """ Gets the current values of fields for given tickers. Parameters ---------- tickers: BloombergTicker, Sequence[BloombergTicker] tickers for securities which should be retrieved fields: str, Sequence[str] fields of securities which should be retrieved Returns ------- QFDataFrame/QFSeries Either QFDataFrame with 2 dimensions: ticker, field or QFSeries with 1 dimensions: ticker of field (depending if many tickers or fields was provided) is returned. Raises ------- BloombergError When couldn't get the data from Bloomberg Service """ self._connect_if_needed() self._assert_is_connected() tickers, got_single_ticker = convert_to_list(tickers, BloombergTicker) fields, got_single_field = convert_to_list(fields, (PriceField, str)) data_frame = self._reference_data_provider.get(tickers, fields) # to keep the order of tickers and fields we reindex the data frame data_frame = data_frame.reindex(index=tickers, columns=fields) # squeeze unused dimensions tickers_indices = 0 if got_single_ticker else slice(None) fields_indices = 0 if got_single_field else slice(None) squeezed_result = data_frame.iloc[tickers_indices, fields_indices] casted_result = cast_dataframe_to_proper_type(squeezed_result) if tickers_indices != 0 or fields_indices != 0 \ else squeezed_result return casted_result
def __init__(self, tickers: Union[Ticker, Sequence[Ticker]], start_date: datetime, end_date: datetime, data_handler: DataHandler, alpha_models: Union[AlphaModel, Sequence[AlphaModel]], settings: Settings, pdf_exporter: PDFExporter, only_entry_signals: bool = True, title: str = "Signals Plotter"): super().__init__(settings, pdf_exporter, title) # Set the market open and close events in order to use the data handler (necessary to e.g. compute the market # close time of the previous day) MarketOpenEvent.set_trigger_time({ "hour": 13, "minute": 30, "second": 0, "microsecond": 0 }) MarketCloseEvent.set_trigger_time({ "hour": 20, "minute": 0, "second": 0, "microsecond": 0 }) self.tickers, _ = convert_to_list(tickers, Ticker) self.alpha_models, _ = convert_to_list(alpha_models, AlphaModel) self.start_date = start_date self.end_date = end_date self.data_handler = data_handler assert isinstance(self.data_handler.timer, SettableTimer) self.timer: SettableTimer = self.data_handler.timer self.only_entry_signals = only_entry_signals for ticker in tickers: if isinstance(ticker, FutureTicker): ticker.initialize_data_provider( self.timer, self.data_handler.data_provider)
def get_price(tickers, fields, start_date, end_date, _): prices_bar = [5.0, 10.0, 1.0, 4.0, 50] # Open, High, Low, Close, Volume dates_index = pd.date_range(start_date, end_date, freq='B') tickers, got_single_ticker = convert_to_list(tickers, Ticker) fields, got_single_field = convert_to_list(fields, PriceField) got_single_date = len(dates_index) == 1 prices_df = pd.DataFrame( index=pd.Index(dates_index, name=TICKERS), columns=pd.Index(PriceField.ohlcv(), name=FIELDS), data=[prices_bar] * len(dates_index) ) data_array = tickers_dict_to_data_array({ ticker: prices_df for ticker in self.tickers }, self.tickers, PriceField.ohlcv()) return normalize_data_array(data_array.loc[start_date:end_date, tickers, fields], tickers, fields, got_single_date, got_single_ticker, got_single_field)
def get_current_bar(self, tickers: Union[Ticker, Sequence[Ticker]], frequency: Frequency = None) \ -> Union[QFSeries, QFDataFrame]: """ Gets the current bar(s) for given Ticker(s). If the bar is not available yet, None is returned. If the request for single Ticker was made, then the result is a QFSeries indexed with PriceFields (OHLCV). If the request for multiple Tickers was made, then the result has Tickers as an index and PriceFields as columns. In case of N minutes frequency, the current bar can be returned in the time between (inclusive) N minutes after MarketOpenEvent and the MarketCloseEvent). E.g. for 1 minute frequency, at 13:00 (if the market opens before 13:00), the 12:59 - 13:00 bar will be returned. In case of 15 minutes frequency, when the market opened less then 15 minutes ago, Nones will be returned. If current time ("now") contains non-zero seconds or microseconds, Nones will be returned. Parameters ----------- tickers: Ticker, Sequence[Ticker] tickers of the securities which prices should be downloaded frequency: Frequency frequency of the data Returns ------- QFSeries, QFDataFrame current bar """ if not tickers: return QFSeries() frequency = frequency or self.fixed_data_provider_frequency or Frequency.MIN_1 tickers, was_single_ticker_provided = convert_to_list(tickers, Ticker) current_datetime = self.timer.now() start_date = current_datetime - frequency.time_delta() prices_data_array = self.get_price(tickers=tickers, fields=PriceField.ohlcv(), start_date=start_date, end_date=current_datetime, frequency=frequency) try: last_available_bars = cast_data_array_to_proper_type( prices_data_array.loc[start_date]) except KeyError: return QFDataFrame(index=tickers, columns=PriceField.ohlcv()) if was_single_ticker_provided: last_available_bars = last_available_bars.iloc[0, :] return last_available_bars
def _calculate_backtest_summary(self, tickers: Union[Ticker, Sequence[Ticker]], config: FastAlphaModelTesterConfig, prices_data_array: QFDataArray, open_to_open_returns_df: QFDataFrame, exposure_values_df: QFDataFrame) -> BacktestSummaryElement: tickers, _ = convert_to_list(tickers, Ticker) portfolio_rets_tms = self._calculate_portfolio_returns_tms(tickers, open_to_open_returns_df, exposure_values_df) trades = self._calculate_trades(prices_data_array, exposure_values_df) return BacktestSummaryElement(config.model_parameters(), config.model_parameters_names, portfolio_rets_tms, trades, tickers)
def __init__(self, data_provider: DataProvider, tickers: Union[Ticker, Sequence[Ticker]], fields: Union[PriceField, Sequence[PriceField]], start_date: datetime, end_date: datetime, frequency: Frequency): # Convert fields into list in order to return a QFDataArray as the result of get_price function fields, _ = convert_to_list(fields, PriceField) # Convert the tickers to list and remove duplicates tickers, _ = convert_to_list(tickers, Ticker) tickers = list(dict.fromkeys(tickers)) future_tickers = [ ticker for ticker in tickers if isinstance(ticker, FutureTicker) ] non_future_tickers = [ ticker for ticker in tickers if not isinstance(ticker, FutureTicker) ] exp_dates = None all_tickers = non_future_tickers if future_tickers: exp_dates = data_provider.get_futures_chain_tickers( future_tickers, ExpirationDateField.all_dates()) # Filter out all theses specific future contracts, which expired before start_date for ft in future_tickers: all_tickers.extend( chain_tickers_within_range(ft, exp_dates[ft], start_date, end_date)) data_array = data_provider.get_price(all_tickers, fields, start_date, end_date, frequency) super().__init__(data=data_array, exp_dates=exp_dates, start_date=start_date, end_date=end_date, frequency=frequency)
def _get_single_date_price( self, tickers: Union[Ticker, Sequence[Ticker]], nans_allowed: bool, frequency: Frequency = Frequency.DAILY) \ -> Union[float, QFSeries]: tickers, was_single_ticker_provided = convert_to_list(tickers, Ticker) # if an empty tickers list was supplied then return an empty result if not tickers: return QFSeries() # Compute the time ranges, used further by the get_price function current_datetime = self.timer.now() # We download the prices since the last 7 days. In case of getting the last available price, we assume that # within each 7 consecutive days, at least one price will occur. If not, in case e.g. future contracts, we # assume that the contract ended and we need to e.g. close the position for this ticker in the portfolio, if # open. start_date = current_datetime - RelativeDelta(days=7) current_date = self._zero_out_time_component(current_datetime) price_fields = [PriceField.Open, PriceField.Close] prices_data_array = self.data_provider.get_price( tickers, price_fields, start_date, current_date, frequency) prices_df = self._data_array_to_dataframe(prices_data_array) prices_df = prices_df.loc[:current_datetime] try: prices_series = prices_df.loc[current_datetime, :] except KeyError: prices_series = QFSeries(index=tickers) prices_series.name = "Current asset prices" if not nans_allowed: # fill NaNs with latest available prices last_available_close_prices = prices_df.apply( func=lambda series: series.asof(current_datetime)) if not last_available_close_prices.empty: unavailable_prices_tickers = prices_series.isnull() prices_series.loc[unavailable_prices_tickers] = \ last_available_close_prices.loc[unavailable_prices_tickers] prices_series.name = "Last available asset prices" prices_series = cast_series(prices_series, QFSeries) if was_single_ticker_provided: return prices_series[0] else: return prices_series
def __init__(self, path: str, tickers: Union[Ticker, Sequence[Ticker]], fields: Union[PriceField, List[PriceField]], start_date: datetime, end_date: datetime, frequency: Frequency): self.logger = qf_logger.getChild(self.__class__.__name__) if frequency not in [Frequency.DAILY, Frequency.MIN_1]: raise NotImplementedError("{} supports only DAILY and MIN_1 bars loading".format(self.__class__.__name__)) fields, _ = convert_to_list(fields, PriceField) # Convert to list and remove duplicates tickers, _ = convert_to_list(tickers, Ticker) tickers = list(dict.fromkeys(tickers)) future_tickers = [ticker for ticker in tickers if isinstance(ticker, FutureTicker)] non_future_tickers = [ticker for ticker in tickers if not isinstance(ticker, FutureTicker)] exp_dates = None all_tickers = non_future_tickers if future_tickers: exp_dates = self._get_expiration_dates(path, future_tickers) # Filter out all theses specific future contracts, which expired before start_date for ft in future_tickers: all_tickers.extend(chain_tickers_within_range(ft, exp_dates[ft], start_date, end_date)) data_array, contracts_df = self._get_price_and_contracts(path, all_tickers, fields, start_date, end_date, frequency) normalized_data_array = normalize_data_array(data_array, all_tickers, fields, False, False, False) self._contracts_df = contracts_df super().__init__(data=normalized_data_array, exp_dates=exp_dates, start_date=start_date, end_date=end_date, frequency=frequency)
def use_data_bundle(self, tickers: Union[Ticker, Sequence[Ticker]], fields: Union[PriceField, Sequence[PriceField]], start_date: datetime, end_date: datetime, frequency: Frequency = Frequency.DAILY): """ Optimises running of the backtest. All the data will be downloaded before the backtest. Note that requesting during the backtest any other ticker or price field than the ones in the params of this function will result in an Exception. Parameters ---------- tickers: Ticker, Sequence[Ticker] ticker or sequence of tickers of the securities fields: PriceField, Sequence[PriceField] PriceField or sequence of PriceFields of the securities start_date: datetime initial date that should be downloaded end_date: datetime last date that should be downloaded frequency frequency of the data """ assert not self.is_optimised, "Multiple calls on use_data_bundle() are forbidden" tickers, _ = convert_to_list(tickers, Ticker) fields, _ = convert_to_list(fields, PriceField) self._check_frequency(frequency) self.fixed_data_provider_frequency = frequency self.data_provider = PrefetchingDataProvider(self.data_provider, tickers, fields, start_date, end_date, frequency) self.is_optimised = True
def tickers_dict_to_data_array(tickers_data_dict: Dict[Ticker, QFDataFrame], requested_tickers: Union[Ticker, Sequence[Ticker]], requested_fields) -> QFDataArray: """ Converts a dictionary mapping tickers to DateFrame onto a QFDataArray. Parameters ---------- tickers_data_dict: Dict[Ticker, QFDataFrame] Ticker -> QFDataFrame[dates, fields] requested_tickers: Sequence[Ticker] requested_fields Returns ------- QFDataArray """ # return empty xr.DataArray if there is no data to be converted requested_tickers, _ = convert_to_list(requested_tickers, Ticker) if not tickers_data_dict: return QFDataArray.create(dates=[], tickers=requested_tickers, fields=requested_fields) tickers = [] data_arrays = [] for ticker, df in tickers_data_dict.items(): df.index.name = DATES if df.empty: # if there is no data for a given ticker, skip it (proper column will be added afterwards anyway) continue data_array = df.to_xarray() data_array = data_array.to_array(dim=FIELDS, name=ticker) data_array = data_array.transpose(DATES, FIELDS) tickers.append(ticker) data_arrays.append(data_array) tickers_index = pd.Index(tickers, name=TICKERS) if not data_arrays: return QFDataArray.create(dates=[], tickers=requested_tickers, fields=requested_fields) result = QFDataArray.concat(data_arrays, dim=tickers_index) if len(tickers) < len(requested_tickers): result = result.reindex(tickers=requested_tickers, fields=requested_fields) # the DataArray gets a name after the first ticker in the tickers_data_dict.keys() which is incorrect; # it should have no name result.name = None return result
def get_bar_for_today( self, tickers: Union[Ticker, Sequence[Ticker]] ) -> Union[pd.Series, pd.DataFrame]: """ Gets the bar(s) for given Ticker(s) for today. If the bar is not available yet, None is returned. If the request for single Ticker was made, then the result is a pandas.Series indexed with PriceFields (OHLCV). If the request for multiple Tickers was made, then the result has Tickers as an index and PriceFields as columns. Normally, on working days the method will return non-empty bars in the time between (inclusive) MarketCloseEvent and midnight (exclusive). On non-working days or on working days in the time between midnight (inclusive) and MarketClose (exclusive), e.g. 12:00, the returned bars will contain Nones as values. """ if not tickers: return pd.Series() tickers, was_single_ticker_provided = convert_to_list(tickers, Ticker) current_datetime = self.timer.now() current_date = self._zero_out_time_component(current_datetime) start_date = current_date - RelativeDelta(days=7) if self.time_helper.datetime_of_latest_market_event( MarketCloseEvent) < current_datetime: last_available_bars = pd.DataFrame(index=tickers, columns=PriceField.ohlcv()) else: prices_data_array = self.get_price( tickers=tickers, fields=PriceField.ohlcv(), start_date=start_date, end_date=current_date) # type: QFDataArray if current_date in prices_data_array.dates.to_index(): # to_index used in the line above as a fix (xarray doesn't handle the "contains" check properly, # It thinks that datetime(some_date) != numpy.datetime64(some_date) because of different types) last_available_bars = prices_data_array.loc[ current_date, :, :].to_pandas() else: return pd.DataFrame(index=tickers, columns=PriceField.ohlcv()) if was_single_ticker_provided: last_available_bars = last_available_bars.iloc[0, :] return last_available_bars
def _get_single_date_price(self, tickers: Union[Ticker, Sequence[Ticker]], nans_allowed: bool) -> Union[float, pd.Series]: tickers, was_single_ticker_provided = convert_to_list(tickers, Ticker) # if an empty tickers list was supplied then return an empty result if not tickers: return pd.Series() current_datetime = self.timer.now() current_date = self._zero_out_time_component(current_datetime) start_date = current_date - RelativeDelta(days=7) price_fields = [PriceField.Open, PriceField.Close] prices_data_array = self.price_data_provider.get_price( tickers, price_fields, start_date, current_date) prices_df = self._data_array_to_dataframe(prices_data_array) prices_df = prices_df.loc[:current_datetime] try: prices_series = prices_df.loc[ current_datetime, :] # axes: date, ticker except KeyError: prices_series = pd.Series(index=tickers) prices_series.name = "Current asset prices" if not nans_allowed: # fill NaNs with latest available prices last_available_close_prices = prices_df.apply( func=lambda series: series.asof(current_datetime)) if not last_available_close_prices.empty: unavailable_prices_tickers = prices_series.isnull() prices_series.loc[unavailable_prices_tickers] = \ last_available_close_prices.loc[unavailable_prices_tickers] prices_series.name = "Last available asset prices" prices_series = cast_series(prices_series, pd.Series) if was_single_ticker_provided: return prices_series[0] else: return prices_series
def get_futures_chain_tickers(self, tickers: Union[FutureTicker, Sequence[FutureTicker]], expiration_date_fields: Union[ExpirationDateField, Sequence[ExpirationDateField]]) \ -> Dict[FutureTicker, Union[QFSeries, QFDataFrame]]: tickers, got_single_ticker = convert_to_list(tickers, Ticker) # Check if the futures tickers are in the exp_dates keys uncached_future_tickers = set(tickers) - set(self._exp_dates.keys()) if uncached_future_tickers: tickers_str = [t.name for t in tickers] raise ValueError("Tickers: {} are not available in the Data Bundle".format(tickers_str)) future_chain_tickers = { ticker: self._exp_dates[ticker] for ticker in tickers } return future_chain_tickers
def use_data_preloading(self, tickers: Union[Ticker, Sequence[Ticker]], time_delta: RelativeDelta = None): if time_delta is None: time_delta = RelativeDelta(years=1) data_start = self.start_date - time_delta # The tickers and price fields are sorted in order to always return the same hash of the data bundle for # the same set of tickers and fields tickers, _ = convert_to_list(tickers, Ticker) self.data_handler.use_data_bundle(sorted(tickers), sorted(PriceField.ohlcv()), data_start, self.end_date, self.frequency) self._hash_of_data_bundle = compute_container_hash( self.data_handler.data_provider.data_bundle) self.logger.info("Preloaded data hash value {}".format( self._hash_of_data_bundle))
def __init__(self, data: QFDataFrame = None, columns: Sequence[str] = None, css_classes: Union[str, Sequence[str]] = "table", title: str = "", grid_proportion: GridProportion = GridProportion.Eight): super().__init__(grid_proportion) self.model = ModelController( data=data, index=data.index, columns=columns if columns is not None else data.columns) # Set the initial Table css classes css_classes, _ = convert_to_list(css_classes, str) self.model.table_styles.add_css_class(css_classes) self.title = title
def create_trades_from_backtest_positions( self, positions: Union[BacktestPosition, Sequence[BacktestPosition]], portfolio_values: Optional[QFSeries] = None ) -> Union[Trade, Sequence[Trade]]: """ Generates trades from BacktestPositions. Parameters ---------- positions: BacktestPosition, Sequence[BacktestPosition] Position or positions that will be used to generated the trades portfolio_values: Optional[QFSeries] Series containing portfolio values at different point in time. It is optional and if provided, the percentage pnl value is set in the Trade. Returns -------- Trade, Sequence[Trade] Generated Trade (in case of one BacktestPosition) or a sequence of Trades """ positions, got_single_position = convert_to_list( positions, BacktestPosition) def compute_percentage_pnl(position: BacktestPosition): if portfolio_values is not None: return position.total_pnl / portfolio_values.asof( position.start_time) else: return None trades = [ Trade(p.start_time, p.end_time, p.contract(), p.total_pnl, p.total_commission(), p.direction(), compute_percentage_pnl(p)) for p in positions ] if got_single_position: return trades[0] else: return trades