def validate_data_range( data: pd.DataFrame, start_date: date, end_date: date, abs_tol: Optional[int] = 0, rel_tol: Optional[float] = 0, ): target_dates = set( generate_trading_days(start_date=start_date, end_date=end_date,) ) received_dates = set( [dt.date() for dt in np.unique(data.index.to_pydatetime())] ) if len(received_dates - target_dates) != 0: raise ValueError( f"Data has out-of-range dates: {received_dates - target_dates}." ) if abs_tol: if len(target_dates - received_dates) > abs_tol: raise ValueError( f"Data has {len(target_dates - received_dates)} missing dates," f" but abs_tol is {abs_tol}." ) if rel_tol: ratio = len(target_dates - received_dates) / len(target_dates) if ratio > rel_tol: raise ValueError( f"Data has {ratio} of the requested dates, but rel_tol" f" is {rel_tol}." )
def test_retrieving_intermittently_cached_intraday(tmpdir, provider): retriever = HistoricalRetriever(provider=provider, hist_data_dir=tmpdir,) data = pd.DataFrame() dates = generate_trading_days( start_date=date.today() - timedelta(days=10), end_date=date.today() - timedelta(days=1), ) date_ranges = [ dates[1:2], dates[-3:-2], dates, ] for date_range in date_ranges: start_date = date_range[0] end_date = date_range[-1] data = retriever.retrieve_bar_data( symbol="SPY", start_date=start_date, end_date=end_date, bar_size=timedelta(days=1), ) validate_data_range(data=data, start_date=dates[0], end_date=dates[-1])
def test_retrieving_intermittently_cached_intraday(tmpdir, provider): retriever = HistoricalRetriever(provider=provider, hist_data_dir=tmpdir,) data = pd.DataFrame() dates = generate_trading_days( start_date=date.today() - timedelta(days=10), end_date=date.today() - timedelta(days=1), ) date_ranges = [ dates[1:2], dates[-3:-2], dates, ] for date_range in date_ranges: start_date = date_range[0] end_date = date_range[-1] contract = StockContract(symbol="SPY") try: data = retriever.retrieve_bar_data( contract=contract, start_date=start_date, end_date=end_date, bar_size=timedelta(days=1), ) except NotImplementedError: # todo: fix return validate_data_range(data=data, start_date=dates[0], end_date=dates[-1])
def test_retrieving_intermittently_cached_trades(tmpdir, provider): retriever = HistoricalRetriever(provider=provider, hist_data_dir=tmpdir,) data = pd.DataFrame() dates = generate_trading_days( start_date=date(2020, 7, 21), end_date=date(2020, 7, 23), ) date_ranges = [ dates[:1], dates[-1:], dates, ] for date_range in date_ranges: start_date = date_range[0] end_date = date_range[-1] contract = StockContract(symbol="SPY") try: data = retriever.retrieve_trades_data( contract=contract, start_date=start_date, end_date=end_date, ) except NotImplementedError: return validate_data_range(data=data, start_date=dates[0], end_date=dates[-1])
def download_bars_data( self, contract: AContract, start_date: date, end_date: date, bar_size: timedelta, rth: bool, **kwargs, ) -> pd.DataFrame: ib_contract = self._to_ib_contract(contract=contract) dates = generate_trading_days(start_date=start_date, end_date=end_date) duration = f"{len(dates)} D" bar_size_str = self._to_ib_bar_size(bar_size=bar_size) bar_data = self._ib_conn.reqHistoricalData( contract=ib_contract, endDateTime=end_date, durationStr=duration, barSizeSetting=bar_size_str, whatToShow="TRADES", useRTH=False, ) data = util.df(objs=bar_data) if data is not None and len(data) != 0: data = self._format_data(data=data) else: data = pd.DataFrame() return data
def _get_missing_date_ranges( data: pd.DataFrame, start_date: date, end_date: date, ) -> List[List[date]]: dates = generate_trading_days(start_date=start_date, end_date=end_date) if len(data) != 0: data_dates = np.unique(data.index.date).tolist() date_ranges = [] date_range = [] for i in range(len(dates)): date_ = dates[i] if date_ != data_dates[0]: date_range.append(date_) else: data_dates.pop(0) if len(date_range) != 0: date_ranges.append(date_range) if len(data_dates) == 0: if i != len(dates) - 1: date_ranges.append(dates[i + 1 :]) break date_range = [] else: date_ranges = [dates] return date_ranges
def download_bars_data( self, contract: AContract, start_date: date, end_date: date, bar_size: timedelta, rth: bool, **kwargs, ): # TODO: test rth data = pd.DataFrame() dates = generate_trading_days(start_date=start_date, end_date=end_date) for date_ in dates: day_data = self._conn.download_stock_data( symbol=contract.symbol, request_date=date_, bar_size=bar_size, ) data = data.append(other=day_data, ignore_index=True) if len(data) != 0: if is_daily(bar_size): data = self._format_daily_data(data=data) else: data = self._format_intraday_data(data=data) return data
def hist_file_names( start_date: date, end_date: date, bar_size: timedelta, ): if is_daily(bar_size=bar_size): f_names = ["daily.csv"] else: dates = generate_trading_days(start_date=start_date, end_date=end_date) f_names = [f"{date_.strftime(DATE_FORMAT)}.csv" for date_ in dates] return f_names
def download_data( self, symbol: str, start_date: date, end_date: date, bar_size: timedelta, **kwargs, ): params = {"token": self._api_token} if is_daily(bar_size=bar_size): request_type = "chart" params["chartByDay"] = True params["range"] = "date" elif bar_size == timedelta(minutes=1): request_type = "intraday-prices" params["range"] = "1d" else: raise ValueError( f"{type(self)} can only download historical data or" f" 1-minute bars. Got a bar size of {bar_size}." ) params["types"] = [request_type] url = f"{self._base_url}/stock/{symbol.lower()}/batch" data = pd.DataFrame() dates = generate_trading_days(start_date=start_date, end_date=end_date) for date_ in dates: params["exactDate"] = date_.strftime(self._REQ_DATE_FORMAT) r = requests.get(url=url, params=params) json_data = json.loads(r.text) day_data = pd.DataFrame(data=json_data[request_type]) data = data.append(other=day_data, ignore_index=True) if len(data) != 0: if is_daily(bar_size): data = self._format_daily_data(data=data) else: data = self._format_intraday_data(data=data) return data
def download_trades_data( self, contract: AContract, start_date: date, end_date: date, rth: bool, **kwargs, ) -> pd.DataFrame: data = pd.DataFrame() dates = generate_trading_days(start_date=start_date, end_date=end_date) for date_ in dates: day_data = self._conn.download_trades_data(symbol=contract.symbol, request_date=date_, rth=rth) data = data.append(other=day_data, ignore_index=True) data = self._format_trades_data(data=data) return data
prepare_dataset_intra_single_day(data_dir=data_dir, case=case) broker = get_sme_sim_broker_intra_single_day(data_dir=data_dir) broker.run_sim(cache_only=True) assert broker.acc_cash == expected_acc_cash assert broker.get_position("TEST") == expected_pos def prepare_dataset_daily(data_dir: Path, case: int): f_dir = data_dir / "TEST" os.makedirs(str(f_dir)) f_path = f_dir / "daily.csv" dates = generate_trading_days( start_date=datetime(2020, 2, 1), end_date=datetime(2020, 3, 31), ) index = pd.DatetimeIndex(dates, name="datetime") data = pd.DataFrame( data={ "open": np.full(len(index), 100), "high": np.full(len(index), 100), "low": np.full(len(index), 100), "close": np.full(len(index), 100), "volume": np.full(len(index), 1000), }, index=index, ) apply_case(data=data, case=case)
retriever = HistoricalRetriever(provider=provider, hist_data_dir=tmpdir) contract = StockContract(symbol="SPY") try: data = retriever.retrieve_trades_data( contract=contract, start_date=start_date, end_date=end_date, ) except NotImplementedError: return validate_data_range(data=data, start_date=start_date, end_date=end_date) @pytest.mark.skipif( len(generate_trading_days(start_date=date.today(), end_date=date.today())) == 0, reason="Today is not a trading day.", ) @pytest.mark.parametrize("provider", [provider for provider in HIST_PROVIDERS]) def test_retrieve_non_cached_trades_data_today_partial(tmpdir, provider): end_date = date.today() start_date = end_date - timedelta(days=1) retriever = HistoricalRetriever(provider=provider, hist_data_dir=tmpdir) contract = StockContract(symbol="SPY") try: data = retriever.retrieve_trades_data( contract=contract, start_date=start_date,