Пример #1
0
    def test_compute_container_hash__data_array(self):
        ticker_1 = BloombergTicker("Example 1")
        ticker_2 = BloombergTicker("Example 2")

        prices_df_1 = QFDataFrame(data={
            PriceField.Close: [1, 2, 3],
            PriceField.Open: [4, 5, 6]
        })
        prices_df_2 = QFDataFrame(data={PriceField.Close: [5, 7, 8]})

        data_array_1 = tickers_dict_to_data_array(
            {
                ticker_1: prices_df_1,
                ticker_2: prices_df_2
            }, [ticker_1, ticker_2], [PriceField.Open, PriceField.Close])

        data_array_2 = tickers_dict_to_data_array(
            {
                ticker_1: prices_df_1,
                ticker_2: prices_df_2,
            }, [ticker_1, ticker_2], [PriceField.Open, PriceField.Close])

        data_array_3 = tickers_dict_to_data_array(
            {
                ticker_2: prices_df_2,
                ticker_1: prices_df_1,
            }, [ticker_1, ticker_2], [PriceField.Open, PriceField.Close])

        self.assertEqual(compute_container_hash(data_array_1),
                         compute_container_hash(data_array_2))
        self.assertNotEqual(compute_container_hash(data_array_1),
                            compute_container_hash(data_array_3))
Пример #2
0
    def get_history(
            self, tickers: Union[CcyTicker, Sequence[CcyTicker]], fields: Union[None, str, Sequence[str]] = None,
            start_date: datetime = None, end_date: datetime = None, **kwargs) \
            -> Union[QFSeries, QFDataFrame, QFDataArray]:
        tickers, got_single_ticker = convert_to_list(tickers, CcyTicker)
        got_single_date = start_date is not None and (start_date == end_date)

        if fields is not None:
            fields, got_single_field = convert_to_list(fields,
                                                       (PriceField, str))
        else:
            got_single_field = False  # all existing fields will be present in the result

        tickers_data_dict = {}
        with Session() as session:
            for ticker in tickers:
                single_ticker_data = self._get_single_ticker(
                    ticker, fields, start_date, end_date, session)
                tickers_data_dict[ticker] = single_ticker_data

        if fields is None:
            fields = get_fields_from_tickers_data_dict(tickers_data_dict)

        result_data_array = tickers_dict_to_data_array(tickers_data_dict,
                                                       tickers, fields)
        result = normalize_data_array(result_data_array, tickers, fields,
                                      got_single_date, got_single_ticker,
                                      got_single_field)
        return result
Пример #3
0
    def get_history(self, filepath: str) -> QFDataArray:
        """
        Method to parse hapi response and get history data

        Parameters
        ----------
        filepath: str
            The full filepath with downloaded response

        Returns
        -------
        QFDataArray
            QFDataArray with history data
        """
        fields, content = self._get_fields_and_data_content(
            filepath,
            column_names=[
                "Ticker", "Error code", "Num flds", "Pricing Source", "Dates"
            ])
        tickers = content["Ticker"].unique().tolist()
        content["Dates"] = to_datetime(content["Dates"], format="%Y%m%d")
        content = content.drop(
            columns=["Error code", "Num flds", "Pricing Source"])

        def extract_tickers_df(data_array, ticker: str):
            df = data_array[data_array["Ticker"] == ticker].drop(
                columns="Ticker")
            df = df.dropna(subset=["Dates"]).set_index("Dates")
            return df

        tickers_dict = {t: extract_tickers_df(content, t) for t in tickers}

        return tickers_dict_to_data_array(tickers_dict, tickers, fields)
Пример #4
0
    def test_tickers_dict_to_data_array(self):
        ticker_1 = BloombergTicker("Example 1")
        ticker_2 = BloombergTicker("Example 2")
        fields = [PriceField.Open, PriceField.Close]
        index = self.index[:3]
        data = [[[4., 1.], [nan, 5.]], [[5., 2.], [nan, 7.]],
                [[6., 3.], [nan, 8.]]]

        prices_df_1 = QFDataFrame(data={
            PriceField.Close: [1., 2., 3.],
            PriceField.Open: [4., 5., 6.]
        },
                                  index=index)
        prices_df_2 = QFDataFrame(data={PriceField.Close: [5., 7., 8.]},
                                  index=index)

        data_array = tickers_dict_to_data_array(
            {
                ticker_1: prices_df_1,
                ticker_2: prices_df_2
            }, [ticker_1, ticker_2], fields)

        self.assertEqual(dtype("float64"), data_array.dtype)

        expected_data_array = QFDataArray.create(index, [ticker_1, ticker_2],
                                                 fields, data)
        assert_equal(data_array, expected_data_array)
Пример #5
0
    def _get_price_and_contracts(self, path: str, tickers: Sequence[Ticker], fields: Sequence[PriceField],
                                 start_date: datetime, end_date: datetime, freq: Frequency):

        field_to_price_field_dict = {
            'Open': PriceField.Open,
            'High': PriceField.High,
            'Low': PriceField.Low,
            'Close': PriceField.Close,
            'LastPrice': PriceField.Close,
            'Date': 'dates',
            'Date_Time': 'dates'
        }

        # it is required to distinguish intraday and daily volume
        if freq == Frequency.MIN_1:
            field_to_price_field_dict['TradeVolume'] = PriceField.Volume  # for intraday
        elif freq == Frequency.DAILY:
            field_to_price_field_dict['Volume'] = PriceField.Volume  # for daily

        tickers_strings_to_tickers = {
            ticker.as_string(): ticker for ticker in tickers if not isinstance(ticker, FutureTicker)
        }
        tickers_paths = [list(Path(path).glob('**/{}.csv'.format(ticker_str)))
                         for ticker_str in tickers_strings_to_tickers.keys()]
        joined_tickers_paths = [item for sublist in tickers_paths for item in sublist]

        tickers_prices_dict = {}
        contracts_data = {}

        for path in joined_tickers_paths:
            ticker_str = path.name.replace('.csv', '')
            ticker = tickers_strings_to_tickers[ticker_str]
            # It is important to save the Time and Date as strings, in order to correctly infer the date format
            df = QFDataFrame(pd.read_csv(path, dtype={"Time": str, "Date": str, "Date_Time": str}))

            if 'Time' in df and freq == Frequency.MIN_1:
                df.index = pd.to_datetime(df["Date"] + ' ' + df["Time"])
            elif 'Time' not in df and 'Date' in df and freq == Frequency.DAILY:
                df.index = pd.to_datetime(df['Date'])
            else:
                self.logger.info(f"Ticker {ticker} does not satisfy timing requirements. File path: {path}")
                continue

            contracts_data[ticker] = df['Contract'] if 'Contract' in df.columns else QFSeries()

            df = df.rename(columns=field_to_price_field_dict)
            df = df.loc[start_date:end_date, df.columns.isin(fields)]
            fields_diff = set(fields).difference(df.columns)
            if fields_diff:
                self.logger.info("Not all fields are available for {}. Difference: {}".format(ticker, fields_diff))

            tickers_prices_dict[ticker] = QFDataFrame(df)

        contracts_df = QFDataFrame(contracts_data)
        return tickers_dict_to_data_array(tickers_prices_dict, list(tickers_prices_dict.keys()), fields), contracts_df
Пример #6
0
    def _setup_data_handler(self, volume_value: Optional[float]) -> DataHandler:
        dates = pd.date_range(str_to_date("2019-12-01"), str_to_date("2020-01-30"), freq='D')
        prices_data_frame = QFDataFrame(data={PriceField.Volume: [volume_value] * len(dates)},
                                        index=dates)

        prices_data_array = tickers_dict_to_data_array({
            self.ticker: prices_data_frame,
        }, [self.ticker], [PriceField.Volume])

        data_provider = PresetDataProvider(prices_data_array, dates[0], dates[-1], Frequency.DAILY)
        timer = SettableTimer(dates[-1])

        return DailyDataHandler(data_provider, timer)
Пример #7
0
    def _receive_historical_response(
            self, requested_tickers: Sequence[BloombergTicker],
            requested_fields: Sequence[str]):
        ticker_str_to_ticker: Dict[str, BloombergTicker] = {
            t.as_string(): t
            for t in requested_tickers
        }

        response_events = get_response_events(self._session)
        tickers_data_dict = defaultdict(
            lambda: QFDataFrame(columns=requested_fields))

        for event in response_events:
            try:
                check_event_for_errors(event)

                security_data = extract_security_data(event)
                check_security_data_for_errors(security_data)

                field_data_array = security_data.getElement(FIELD_DATA)
                dates = [
                    to_datetime(x.getElementAsDatetime(DATE))
                    for x in field_data_array.values()
                ]

                dates_fields_values = QFDataFrame(np.nan,
                                                  index=dates,
                                                  columns=requested_fields)

                for field_name in requested_fields:
                    dates_fields_values.loc[:, field_name] = [
                        self._get_float_or_nan(data_of_date_elem, field_name)
                        for data_of_date_elem in field_data_array.values()
                    ]
                security_name = security_data.getElementAsString(SECURITY)

                try:
                    ticker = ticker_str_to_ticker[security_name]
                    tickers_data_dict[ticker] = tickers_data_dict[
                        ticker].append(dates_fields_values)
                except KeyError:
                    self.logger.warning(
                        f"Received data for a ticker which was not present in the request: "
                        f"{security_name}. The data for that ticker will be excluded from parsing."
                    )
            except BloombergError as e:
                self.logger.error(e)

        return tickers_dict_to_data_array(tickers_data_dict,
                                          list(tickers_data_dict.keys()),
                                          requested_fields)
Пример #8
0
    def _receive_historical_response(self, requested_tickers,
                                     requested_fields):
        response_events = get_response_events(self._session)

        # mapping: ticker -> DataArray[dates, fields]
        tickers_data_dict = dict()  # type: Dict[BloombergTicker, pd.DataFrame]

        for event in response_events:
            check_event_for_errors(event)
            security_data = extract_security_data(event)

            security_name = security_data.getElementAsString(SECURITY)
            ticker = BloombergTicker.from_string(security_name)

            try:
                check_security_data_for_errors(security_data)

                field_data_array = security_data.getElement(FIELD_DATA)
                field_data_list = [
                    field_data_array.getValueAsElement(i)
                    for i in range(field_data_array.numValues())
                ]
                dates = [
                    pd.to_datetime(x.getElementAsDatetime(DATE))
                    for x in field_data_list
                ]

                data = np.empty((len(dates), len(requested_fields)))
                data[:] = np.nan

                dates_fields_values = pd.DataFrame(data,
                                                   index=dates,
                                                   columns=requested_fields)

                for field_name in requested_fields:
                    dates_fields_values.loc[:, field_name] = [
                        self._get_float_or_nan(data_of_date_elem, field_name)
                        for data_of_date_elem in field_data_list
                    ]

                tickers_data_dict[ticker] = dates_fields_values

            except BloombergError:
                self.logger.exception(
                    "Error in the received historical response")

        return tickers_dict_to_data_array(tickers_data_dict, requested_tickers,
                                          requested_fields)
Пример #9
0
    def _get_history(
            self, convert_to_prices_types: bool, tickers: Union[QuandlTicker, Sequence[QuandlTicker]],
            fields: Union[None, str, Sequence[str], PriceField, Sequence[PriceField]] = None,
            start_date: datetime = None, end_date: datetime = None) -> \
            Union[QFSeries, QFDataFrame, QFDataArray]:
        """
        NOTE: Only use one Quandl Database at the time.
        Do not mix multiple databases in one query - this is the natural limitation coming from the fact that column
        names (fields) are different across databases.
        """
        tickers, got_single_ticker = convert_to_list(tickers, QuandlTicker)
        got_single_date = start_date is not None and (start_date == end_date)

        if fields is not None:
            fields, got_single_field = convert_to_list(fields,
                                                       (PriceField, str))
        else:
            got_single_field = False  # all existing fields will be present in the result

        result_dict = {}
        for db_name, ticker_group in groupby(tickers,
                                             lambda t: t.database_name):
            ticker_group = list(ticker_group)

            partial_result_dict = self._get_result_for_single_database(
                convert_to_prices_types, ticker_group, fields, start_date,
                end_date)

            result_dict.update(partial_result_dict)

        if fields is None:
            fields = get_fields_from_tickers_data_dict(result_dict)

        result_data_array = tickers_dict_to_data_array(result_dict, tickers,
                                                       fields)

        normalized_result = normalize_data_array(
            result_data_array,
            tickers,
            fields,
            got_single_date,
            got_single_ticker,
            got_single_field,
            use_prices_types=convert_to_prices_types)

        return normalized_result
Пример #10
0
    def _get_data_for_backtest(self) -> QFDataArray:
        """
        Creates a QFDataArray containing OHLCV values for all tickers passes to Fast Alpha Models Tester.
        """
        print("\nLoading all price values of tickers:")
        self._timer.set_current_time(self._end_date)
        tickers_dict = {}
        for ticker in tqdm(self._tickers, file=sys.stdout):
            if isinstance(ticker, FutureTicker):
                fc = FuturesChain(ticker, self._data_handler)
                tickers_dict[ticker] = fc.get_price(PriceField.ohlcv(), self._start_date, self._end_date,
                                                    Frequency.DAILY)
            else:
                tickers_dict[ticker] = self._data_handler.get_price(ticker, PriceField.ohlcv(), self._start_date,
                                                                    self._end_date)

        prices_data_array = tickers_dict_to_data_array(tickers_dict, self._tickers, PriceField.ohlcv())
        return prices_data_array
Пример #11
0
        def get_price(tickers, fields, start_date, end_date, _):
            prices_bar = [5.0, 10.0, 1.0, 4.0, 50]  # Open, High, Low, Close, Volume

            dates_index = pd.date_range(start_date, end_date, freq='B')
            tickers, got_single_ticker = convert_to_list(tickers, Ticker)
            fields, got_single_field = convert_to_list(fields, PriceField)
            got_single_date = len(dates_index) == 1

            prices_df = pd.DataFrame(
                index=pd.Index(dates_index, name=TICKERS),
                columns=pd.Index(PriceField.ohlcv(), name=FIELDS),
                data=[prices_bar] * len(dates_index)
            )
            data_array = tickers_dict_to_data_array({
                ticker: prices_df for ticker in self.tickers
            }, self.tickers, PriceField.ohlcv())

            return normalize_data_array(data_array.loc[start_date:end_date, tickers, fields], tickers, fields,
                                        got_single_date, got_single_ticker, got_single_field)
Пример #12
0
    def _get_intraday_data(self, ref_data_service,
                           tickers: Sequence[BloombergTicker], fields,
                           start_date, end_date, frequency):
        """ Sends requests for each ticker and combines the outputs together. """

        tickers_data_dict = dict()

        for ticker in tickers:
            request = ref_data_service.createRequest("IntradayBarRequest")
            set_ticker(request, ticker.as_string())
            self._set_intraday_time_period(request, start_date, end_date,
                                           frequency)
            self._session.sendRequest(request)
            tickers_data_dict[ticker] = self._receive_intraday_response(
                ticker, fields)

        return tickers_dict_to_data_array(tickers_data_dict,
                                          list(tickers_data_dict.keys()),
                                          fields)