def aud_usd_data_loader(): from nautilus_trader.backtest.data.providers import TestInstrumentProvider from tests.test_kit.stubs import TestStubs from tests.unit_tests.backtest.test_backtest_config import TEST_DATA_DIR instrument = TestInstrumentProvider.default_fx_ccy("AUD/USD", venue=Venue("SIM")) def parse_csv_tick(df, instrument_id): yield instrument for r in df.values: ts = secs_to_nanos(pd.Timestamp(r[0]).timestamp()) tick = QuoteTick( instrument_id=instrument_id, bid=Price.from_str(str(r[1])), ask=Price.from_str(str(r[2])), bid_size=Quantity.from_int(1_000_000), ask_size=Quantity.from_int(1_000_000), ts_event=ts, ts_init=ts, ) yield tick catalog = DataCatalog.from_env() instrument_provider = InstrumentProvider() instrument_provider.add(instrument) process_files( glob_path=f"{TEST_DATA_DIR}/truefx-audusd-ticks.csv", reader=CSVReader( block_parser=partial(parse_csv_tick, instrument_id=TestStubs.audusd_id()), as_dataframe=True, ), instrument_provider=instrument_provider, catalog=catalog, )
def test_batch_generic_data(self): # Arrange TestStubs.setup_news_event_persistence() process_files( glob_path=f"{PACKAGE_ROOT}/data/news_events.csv", reader=CSVReader(block_parser=TestStubs.news_event_parser), catalog=self.catalog, ) data_config = BacktestDataConfig( catalog_path="/root/", catalog_fs_protocol="memory", data_cls_path=f"{NewsEventData.__module__}.NewsEventData", client_id="NewsClient", ) # Add some arbitrary instrument data to appease BacktestEngine instrument_data_config = BacktestDataConfig( catalog_path="/root/", catalog_fs_protocol="memory", instrument_id=self.catalog.instruments(as_nautilus=True)[0].id.value, data_cls_path=f"{InstrumentStatusUpdate.__module__}.InstrumentStatusUpdate", ) run_config = BacktestRunConfig( data=[data_config, instrument_data_config], persistence=BetfairTestStubs.persistence_config(catalog_path=self.catalog.path), venues=[BetfairTestStubs.betfair_venue_config()], strategies=[], batch_size_bytes=parse_bytes("1mib"), ) # Act node = BacktestNode() node.run_sync([run_config]) # Assert assert node
def test_catalog_generic_data_not_overwritten(self): # Arrange TestPersistenceStubs.setup_news_event_persistence() process_files( glob_path=f"{TEST_DATA_DIR}/news_events.csv", reader=CSVReader(block_parser=TestPersistenceStubs.news_event_parser), catalog=self.catalog, ) objs = self.catalog.generic_data( cls=NewsEventData, filter_expr=ds.field("currency") == "USD", as_nautilus=True ) # Clear the catalog again data_catalog_setup() self.catalog = DataCatalog.from_env() assert ( len(self.catalog.generic_data(NewsEventData, raise_on_empty=False, as_nautilus=True)) == 0 ) chunk1, chunk2 = objs[:10], objs[5:15] # Act, Assert write_objects(catalog=self.catalog, chunk=chunk1) assert len(self.catalog.generic_data(NewsEventData)) == 10 write_objects(catalog=self.catalog, chunk=chunk2) assert len(self.catalog.generic_data(NewsEventData)) == 15
def _loaded_data_into_catalog(self): self.instrument_provider = BetfairInstrumentProvider.from_instruments([]) process_files( glob_path=PACKAGE_ROOT + "/data/1.166564490.bz2", reader=BetfairTestStubs.betfair_reader(instrument_provider=self.instrument_provider), instrument_provider=self.instrument_provider, catalog=self.catalog, )
def test_data_catalog_generic_data(self): TestStubs.setup_news_event_persistence() process_files( glob_path=f"{TEST_DATA_DIR}/news_events.csv", reader=CSVReader(block_parser=TestStubs.news_event_parser), catalog=self.catalog, ) df = self.catalog.generic_data(cls=NewsEventData, filter_expr=ds.field("currency") == "USD") assert len(df) == 22925 data = self.catalog.generic_data( cls=NewsEventData, filter_expr=ds.field("currency") == "CHF", as_nautilus=True ) assert len(data) == 2745 and isinstance(data[0], GenericData)
def test_backtest_data_config_status_updates(self): process_files( glob_path=PACKAGE_ROOT + "/data/1.166564490.bz2", reader=BetfairTestStubs.betfair_reader(), catalog=self.catalog, ) c = BacktestDataConfig( catalog_path="/root/", catalog_fs_protocol="memory", data_cls=InstrumentStatusUpdate, ) result = c.load() assert len(result["data"]) == 2 assert result["instrument"] is None assert result["client_id"] is None
def test_csv_reader_dataframe_separator(self): bar_type = TestDataStubs.bartype_adabtc_binance_1min_last() instrument = TestInstrumentProvider.adabtc_binance() wrangler = BarDataWrangler(bar_type, instrument) def parser(data): data["timestamp"] = data["timestamp"].astype("datetime64[ms]") bars = wrangler.process(data.set_index("timestamp")) return bars binance_spot_header = [ "timestamp", "open", "high", "low", "close", "volume", "ts_close", "quote_volume", "n_trades", "taker_buy_base_volume", "taker_buy_quote_volume", "ignore", ] reader = CSVReader(block_parser=parser, header=binance_spot_header, separator="|") in_ = process_files( glob_path=f"{TEST_DATA_DIR}/ADABTC_pipe_separated-1m-2021-11-*.csv", reader=reader, catalog=self.catalog, ) assert sum(in_.values()) == 10
def test_backtest_data_config_filters(self): # Arrange TestPersistenceStubs.setup_news_event_persistence() process_files( glob_path=f"{TEST_DATA_DIR}/news_events.csv", reader=CSVReader(block_parser=TestPersistenceStubs.news_event_parser), catalog=self.catalog, ) c = BacktestDataConfig( catalog_path="/root/", catalog_fs_protocol="memory", data_cls=NewsEventData, filter_expr="field('currency') == 'CHF'", client_id="NewsClient", ) result = c.load() assert len(result["data"]) == 2745
def test_backtest_data_config_generic_data(self): # Arrange TestPersistenceStubs.setup_news_event_persistence() process_files( glob_path=f"{TEST_DATA_DIR}/news_events.csv", reader=CSVReader(block_parser=TestPersistenceStubs.news_event_parser), catalog=self.catalog, ) c = BacktestDataConfig( catalog_path="/root/", catalog_fs_protocol="memory", data_cls=NewsEventData, client_id="NewsClient", ) result = c.load() assert len(result["data"]) == 86985 assert result["instrument"] is None assert result["client_id"] == ClientId("NewsClient")
def test_split_and_serialize_generic_data_gets_correct_class(self): # Arrange TestPersistenceStubs.setup_news_event_persistence() process_files( glob_path=f"{TEST_DATA_DIR}/news_events.csv", reader=CSVReader(block_parser=TestPersistenceStubs.news_event_parser), catalog=self.catalog, ) objs = self.catalog.generic_data( cls=NewsEventData, filter_expr=ds.field("currency") == "USD", as_nautilus=True ) # Act split = split_and_serialize(objs) # Assert assert NewsEventData in split assert None in split[NewsEventData] assert len(split[NewsEventData][None]) == 22941
def test_feather_writer_generic_data(self): # Arrange TestPersistenceStubs.setup_news_event_persistence() process_files( glob_path=f"{PACKAGE_ROOT}/data/news_events.csv", reader=CSVReader( block_parser=TestPersistenceStubs.news_event_parser), catalog=self.catalog, ) data_config = BacktestDataConfig( catalog_path="/root/", catalog_fs_protocol="memory", data_cls=NewsEventData, client_id="NewsClient", ) # Add some arbitrary instrument data to appease BacktestEngine instrument_data_config = BacktestDataConfig( catalog_path="/root/", catalog_fs_protocol="memory", data_cls=InstrumentStatusUpdate, ) run_config = BacktestRunConfig( data=[data_config, instrument_data_config], persistence=BetfairTestStubs.persistence_config( catalog_path=self.catalog.path), venues=[BetfairTestStubs.betfair_venue_config()], strategies=[], ) # Act node = BacktestNode() node.run_sync([run_config]) # Assert result = self.catalog.read_backtest(backtest_run_id=run_config.id, raise_on_failed_deserialize=True) result = Counter([r.__class__.__name__ for r in result]) assert result["NewsEventData"] == 86985
def _loaded_data_into_catalog(self): self.instrument_provider = BetfairInstrumentProvider.from_instruments( []) result = process_files( glob_path=PACKAGE_ROOT + "/data/1.166564490*.bz2", reader=BetfairTestStubs.betfair_reader( instrument_provider=self.instrument_provider), instrument_provider=self.instrument_provider, catalog=self.catalog, ) assert result data = (self.catalog.instruments(as_nautilus=True) + self.catalog.instrument_status_updates(as_nautilus=True) + self.catalog.trade_ticks(as_nautilus=True) + self.catalog.order_book_deltas(as_nautilus=True) + self.catalog.tickers(as_nautilus=True)) return data
def test_load_text_betfair(self): # Arrange instrument_provider = BetfairInstrumentProvider.from_instruments([]) # Act files = process_files( glob_path=f"{TEST_DATA_DIR}/**.bz2", reader=BetfairTestStubs.betfair_reader(instrument_provider=instrument_provider), catalog=self.catalog, instrument_provider=instrument_provider, ) # Assert assert files == { TEST_DATA_DIR + "/1.166564490.bz2": 2908, TEST_DATA_DIR + "/betfair/1.180305278.bz2": 17085, TEST_DATA_DIR + "/betfair/1.166811431.bz2": 22692, }
def test_load_dask_distributed_client(self): # Arrange from distributed import Client instrument_provider = BetfairInstrumentProvider.from_instruments([]) with Client(processes=False, threads_per_worker=1) as c: tasks = process_files( glob_path=f"{TEST_DATA_DIR}/1.166564490*", reader=make_betfair_reader(instrument_provider), catalog=self.catalog, instrument_provider=instrument_provider, ) # Act results = c.gather(c.compute(tasks)) # Assert expected = {TEST_DATA + "/1.166564490.bz2": 2908} assert results == expected
def test_data_catalog_bars(self): # Arrange bar_type = TestDataStubs.bartype_adabtc_binance_1min_last() instrument = TestInstrumentProvider.adabtc_binance() wrangler = BarDataWrangler(bar_type, instrument) def parser(data): data["timestamp"] = data["timestamp"].astype("datetime64[ms]") bars = wrangler.process(data.set_index("timestamp")) return bars binance_spot_header = [ "timestamp", "open", "high", "low", "close", "volume", "ts_close", "quote_volume", "n_trades", "taker_buy_base_volume", "taker_buy_quote_volume", "ignore", ] reader = CSVReader(block_parser=parser, header=binance_spot_header) # Act _ = process_files( glob_path=f"{TEST_DATA_DIR}/ADABTC-1m-2021-11-*.csv", reader=reader, catalog=self.catalog, ) # Assert bars = self.catalog.bars() assert len(bars) == 21