def setup(self): # Fixture Setup _reset() self.catalog = DataCatalog(path="/root", fs_protocol="memory") self.order_factory = OrderFactory( trader_id=TraderId("T-001"), strategy_id=StrategyId("S-001"), clock=TestClock(), ) self.order = self.order_factory.market( AUDUSD_SIM.id, OrderSide.BUY, Quantity.from_int(100000), ) self.order_submitted = copy.copy(self.order) self.order_submitted.apply(TestStubs.event_order_submitted(self.order)) self.order_accepted = copy.copy(self.order_submitted) self.order_accepted.apply( TestStubs.event_order_accepted(self.order_submitted)) self.order_pending_cancel = copy.copy(self.order_accepted) self.order_pending_cancel.apply( TestStubs.event_order_pending_cancel(self.order_accepted)) self.order_cancelled = copy.copy(self.order_pending_cancel) self.order_cancelled.apply( TestStubs.event_order_canceled(self.order_pending_cancel))
def test_catalog_generic_data_not_overwritten(self): # Arrange TestPersistenceStubs.setup_news_event_persistence() process_files( glob_path=f"{TEST_DATA_DIR}/news_events.csv", reader=CSVReader(block_parser=TestPersistenceStubs.news_event_parser), catalog=self.catalog, ) objs = self.catalog.generic_data( cls=NewsEventData, filter_expr=ds.field("currency") == "USD", as_nautilus=True ) # Clear the catalog again data_catalog_setup() self.catalog = DataCatalog.from_env() assert ( len(self.catalog.generic_data(NewsEventData, raise_on_empty=False, as_nautilus=True)) == 0 ) chunk1, chunk2 = objs[:10], objs[5:15] # Act, Assert write_objects(catalog=self.catalog, chunk=chunk1) assert len(self.catalog.generic_data(NewsEventData)) == 10 write_objects(catalog=self.catalog, chunk=chunk2) assert len(self.catalog.generic_data(NewsEventData)) == 15
def test_data_catalog_instrument_ids_correctly_unmapped(self): # Arrange catalog = DataCatalog.from_env() instrument = TestInstrumentProvider.default_fx_ccy("AUD/USD", venue=Venue("SIM")) trade_tick = TradeTick( instrument_id=instrument.id, price=Price.from_str("2.0"), size=Quantity.from_int(10), aggressor_side=AggressorSide.UNKNOWN, trade_id=TradeId("1"), ts_event=0, ts_init=0, ) write_objects(catalog=catalog, chunk=[instrument, trade_tick]) # Act instrument = catalog.instruments(instrument_ids=["AUD/USD.SIM"], as_nautilus=True)[0] trade_tick = catalog.trade_ticks(instrument_ids=["AUD/USD.SIM"], as_nautilus=True)[0] # Assert assert instrument.id.value == "AUD/USD.SIM" assert trade_tick.instrument_id.value == "AUD/USD.SIM"
def test_write_parquet_partitions( self, ): # Arrange catalog = DataCatalog.from_env() fs = catalog.fs root = catalog.path path = "sample.parquet" df = pd.DataFrame( {"value": np.random.random(5), "instrument_id": ["a", "a", "a", "b", "b"]} ) # Act write_parquet( fs=fs, path=f"{root}/{path}", df=df, schema=pa.schema({"value": pa.float64(), "instrument_id": pa.string()}), partition_cols=["instrument_id"], ) dataset = ds.dataset(str(root.joinpath("sample.parquet")), filesystem=fs) result = dataset.to_table().to_pandas() # Assert assert result.equals(df[["value"]]) # instrument_id is a partition now assert dataset.files[0].startswith("/root/sample.parquet/instrument_id=a/") assert dataset.files[1].startswith("/root/sample.parquet/instrument_id=b/")
def test_write_parquet_no_partitions( self, ): # Arrange df = pd.DataFrame( {"value": np.random.random(5), "instrument_id": ["a", "a", "a", "b", "b"]} ) catalog = DataCatalog.from_env() fs = catalog.fs root = catalog.path # Act write_parquet( fs=fs, path=f"{root}/sample.parquet", df=df, schema=pa.schema({"value": pa.float64(), "instrument_id": pa.string()}), partition_cols=None, ) result = ( ds.dataset(str(root.joinpath("sample.parquet")), filesystem=fs).to_table().to_pandas() ) # Assert assert result.equals(df)
def setup(self): data_catalog_setup() dask.config.set(scheduler="single-threaded") aud_usd_data_loader() self.catalog = DataCatalog.from_env() self.backtest_config = BacktestRunConfig( engine=BacktestEngineConfig(), venues=[ BacktestVenueConfig( name="SIM", oms_type="HEDGING", account_type="MARGIN", base_currency="USD", starting_balances=["1000000 USD"], # fill_model=fill_model, # TODO(cs): Implement next iteration ) ], data=[ BacktestDataConfig( catalog_path="/root", catalog_fs_protocol="memory", data_cls=QuoteTick, instrument_id="AUD/USD.SIM", start_time=1580398089820000000, end_time=1580504394501000000, ) ], )
def aud_usd_data_loader(): from nautilus_trader.backtest.data.providers import TestInstrumentProvider from tests.test_kit.stubs import TestStubs from tests.unit_tests.backtest.test_backtest_config import TEST_DATA_DIR instrument = TestInstrumentProvider.default_fx_ccy("AUD/USD", venue=Venue("SIM")) def parse_csv_tick(df, instrument_id): yield instrument for r in df.values: ts = secs_to_nanos(pd.Timestamp(r[0]).timestamp()) tick = QuoteTick( instrument_id=instrument_id, bid=Price.from_str(str(r[1])), ask=Price.from_str(str(r[2])), bid_size=Quantity.from_int(1_000_000), ask_size=Quantity.from_int(1_000_000), ts_event=ts, ts_init=ts, ) yield tick catalog = DataCatalog.from_env() instrument_provider = InstrumentProvider() instrument_provider.add(instrument) process_files( glob_path=f"{TEST_DATA_DIR}/truefx-audusd-ticks.csv", reader=CSVReader( block_parser=partial(parse_csv_tick, instrument_id=TestStubs.audusd_id()), as_dataframe=True, ), instrument_provider=instrument_provider, catalog=catalog, )
def catalog(self): from nautilus_trader.persistence.catalog import DataCatalog return DataCatalog( path=self.catalog_path, fs_protocol=self.catalog_fs_protocol, fs_storage_options=self.catalog_fs_storage_options, )
def _reset(): """Cleanup resources before each test run""" os.environ["NAUTILUS_CATALOG"] = "memory:///root/" catalog = DataCatalog.from_env() assert isinstance(catalog.fs, MemoryFileSystem) try: catalog.fs.rm("/", recursive=True) except FileNotFoundError: pass catalog.fs.mkdir("/root/data") assert catalog.fs.exists("/root/")
def test_data_catalog_currency_with_null_max_price_loads(self): # Arrange catalog = DataCatalog.from_env() instrument = TestInstrumentProvider.default_fx_ccy("AUD/USD", venue=Venue("SIM")) write_objects(catalog=catalog, chunk=[instrument]) # Act instrument = catalog.instruments(instrument_ids=["AUD/USD.SIM"], as_nautilus=True)[0] # Assert assert instrument.max_price is None
def merge_existing_data(catalog: DataCatalog, cls: type, df: pd.DataFrame) -> pd.DataFrame: """ Handle existing data for instrument subclasses. Instruments all live in a single file, so merge with existing data. For all other classes, simply return data unchanged. """ if cls not in Instrument.__subclasses__(): return df else: try: existing = catalog.instruments(instrument_type=cls) return existing.append(df.drop(["type"], axis=1)).drop_duplicates() except pa.lib.ArrowInvalid: return df
def data_catalog_setup(): """ Reset the filesystem and DataCatalog to a clean state """ clear_singleton_instances(DataCatalog) os.environ["NAUTILUS_CATALOG"] = "memory:///root/" catalog = DataCatalog.from_env() assert isinstance(catalog.fs, MemoryFileSystem) try: catalog.fs.rm("/", recursive=True) except FileNotFoundError: pass catalog.fs.mkdir("/root/data") assert catalog.fs.exists("/root/") assert not catalog.fs.ls("/root/data")
def test_repartition_dataset(self): # Arrange catalog = DataCatalog.from_env() fs = catalog.fs root = catalog.path path = "sample.parquet" # Write some out of order, overlapping for start_date in ("2020-01-01", "2020-01-8", "2020-01-04"): df = pd.DataFrame( { "value": np.arange(5), "instrument_id": ["a", "a", "a", "b", "b"], "ts_init": [ int(ts.to_datetime64()) for ts in pd.date_range(start_date, periods=5, tz="UTC") ], } ) write_parquet( fs=fs, path=f"{root}/{path}", df=df, schema=pa.schema( {"value": pa.float64(), "instrument_id": pa.string(), "ts_init": pa.int64()} ), partition_cols=["instrument_id"], ) original_partitions = fs.glob(f"{root}/{path}/**/*.parquet") # Act _validate_dataset(catalog=catalog, path=f"{root}/{path}") new_partitions = fs.glob(f"{root}/{path}/**/*.parquet") # Assert assert len(original_partitions) == 6 expected = [ "/root/sample.parquet/instrument_id=a/20200101.parquet", "/root/sample.parquet/instrument_id=a/20200104.parquet", "/root/sample.parquet/instrument_id=a/20200108.parquet", "/root/sample.parquet/instrument_id=b/20200101.parquet", "/root/sample.parquet/instrument_id=b/20200104.parquet", "/root/sample.parquet/instrument_id=b/20200108.parquet", ] assert new_partitions == expected
def build_filenames(catalog: DataCatalog, data_configs: List[BacktestDataConfig]) -> List[FileMeta]: files = [] for config in data_configs: filename = catalog._make_path(cls=config.data_type) if config.instrument_id: filename += f"/instrument_id={clean_key(config.instrument_id)}" if not catalog.fs.exists(filename): continue files.append( FileMeta( filename=filename, datatype=config.data_type, instrument_id=config.instrument_id, client_id=config.client_id, start=config.start_time_nanos, end=config.end_time_nanos, ) ) return files
def main(catalog: DataCatalog): """Rename match_id to trade_id in TradeTick""" fs: fsspec.AbstractFileSystem = catalog.fs print("Loading instrument ids") instrument_ids = catalog.query(TradeTick, table_kwargs={"columns": ["instrument_id"] })["instrument_id"].unique() tmp_catalog = DataCatalog(str(catalog.path) + "_tmp") tmp_catalog.fs = catalog.fs for ins_id in tqdm(instrument_ids): # Load trades for instrument trades = catalog.trade_ticks( instrument_ids=[ins_id], projections={"trade_id": ds.field("match_id")}, as_nautilus=True, ) # Create temp parquet in case of error fs.move( f"{catalog.path}/data/trade_tick.parquet/instrument_id={ins_id}", f"{catalog.path}/data/trade_tick.parquet_tmp/instrument_id={ins_id}", recursive=True, ) try: # Rewrite to new catalog write_objects(tmp_catalog, trades) # Ensure we can query again _ = tmp_catalog.trade_ticks(instrument_ids=[ins_id], as_nautilus=True) # Clear temp parquet fs.rm( f"{catalog.path}/data/trade_tick.parquet_tmp/instrument_id={ins_id}", recursive=True) except Exception: warnings.warn(f"Failed to write or read instrument_id {ins_id}") fs.move( f"{catalog.path}/data/trade_tick.parquet_tmp/instrument_id={ins_id}", f"{catalog.path}/data/trade_tick.parquet/instrument_id={ins_id}", recursive=True, )
def setup(self): data_catalog_setup() self.catalog = DataCatalog.from_env() self.venue_config = BacktestVenueConfig( name="SIM", venue_type="ECN", oms_type="HEDGING", account_type="MARGIN", base_currency="USD", starting_balances=["1000000 USD"], # fill_model=fill_model, # TODO(cs): Implement next iteration ) self.data_config = BacktestDataConfig( catalog_path="/root", catalog_fs_protocol="memory", data_cls_path="nautilus_trader.model.data.tick.QuoteTick", instrument_id="AUD/USD.SIM", start_time=1580398089820000000, end_time=1580504394501000000, ) self.backtest_configs = [ BacktestRunConfig( engine=BacktestEngineConfig(), venues=[self.venue_config], data=[self.data_config], ) ] self.strategies = [ ImportableStrategyConfig( path="nautilus_trader.examples.strategies.ema_cross:EMACross", config=EMACrossConfig( instrument_id="AUD/USD.SIM", bar_type="AUD/USD.SIM-100-TICK-MID-INTERNAL", fast_ema_period=10, slow_ema_period=20, trade_size=Decimal(1_000_000), order_id_tag="001", ), ) ]
def setup(self): data_catalog_setup() self.catalog = DataCatalog.from_env() self.fs: fsspec.AbstractFileSystem = self.catalog.fs self._loaded_data_into_catalog()
def setup(self): data_catalog_setup() self.catalog = DataCatalog.from_env() self.fs = self.catalog.fs self.reader = BetfairTestStubs.betfair_reader()
class TestParquetSerializer: def setup(self): # Fixture Setup _reset() self.catalog = DataCatalog(path="/root", fs_protocol="memory") self.order_factory = OrderFactory( trader_id=TraderId("T-001"), strategy_id=StrategyId("S-001"), clock=TestClock(), ) self.order = self.order_factory.market( AUDUSD_SIM.id, OrderSide.BUY, Quantity.from_int(100000), ) self.order_submitted = copy.copy(self.order) self.order_submitted.apply(TestStubs.event_order_submitted(self.order)) self.order_accepted = copy.copy(self.order_submitted) self.order_accepted.apply( TestStubs.event_order_accepted(self.order_submitted)) self.order_pending_cancel = copy.copy(self.order_accepted) self.order_pending_cancel.apply( TestStubs.event_order_pending_cancel(self.order_accepted)) self.order_cancelled = copy.copy(self.order_pending_cancel) self.order_cancelled.apply( TestStubs.event_order_canceled(self.order_pending_cancel)) def _test_serialization(self, obj: Any): cls = type(obj) serialized = ParquetSerializer.serialize(obj) if not isinstance(serialized, list): serialized = [serialized] deserialized = ParquetSerializer.deserialize(cls=cls, chunk=serialized) # Assert expected = obj if isinstance(deserialized, list) and not isinstance(expected, list): expected = [expected] assert deserialized == expected write_objects(catalog=self.catalog, chunk=[obj]) df = self.catalog._query(cls=cls) assert len(df) == 1 nautilus = self.catalog._query(cls=cls, as_dataframe=False)[0] assert nautilus.ts_init == 0 return True @pytest.mark.parametrize( "tick", [ TestStubs.ticker(), TestStubs.quote_tick_5decimal(), TestStubs.trade_tick_5decimal(), ], ) def test_serialize_and_deserialize_tick(self, tick): self._test_serialization(obj=tick) def test_serialize_and_deserialize_order_book_delta(self): delta = OrderBookDelta( instrument_id=TestStubs.audusd_id(), book_type=BookType.L2_MBP, action=BookAction.CLEAR, order=None, ts_event=0, ts_init=0, ) serialized = ParquetSerializer.serialize(delta) [deserialized] = ParquetSerializer.deserialize(cls=OrderBookDelta, chunk=serialized) # Assert expected = OrderBookDeltas( instrument_id=TestStubs.audusd_id(), book_type=BookType.L2_MBP, deltas=[delta], ts_event=0, ts_init=0, ) assert deserialized == expected write_objects(catalog=self.catalog, chunk=[delta]) def test_serialize_and_deserialize_order_book_deltas(self): kw = { "instrument_id": "AUD/USD.SIM", "ts_event": 0, "ts_init": 0, "book_type": "L2_MBP", } deltas = OrderBookDeltas( instrument_id=TestStubs.audusd_id(), book_type=BookType.L2_MBP, deltas=[ OrderBookDelta.from_dict({ "action": "ADD", "order_side": "BUY", "order_price": 8.0, "order_size": 30.0, "order_id": "e0364f94-8fcb-0262-cbb3-075c51ee4917", **kw, }), OrderBookDelta.from_dict({ "action": "ADD", "order_side": "SELL", "order_price": 15.0, "order_size": 10.0, "order_id": "cabec174-acc6-9204-9ebf-809da3896daf", **kw, }), ], ts_event=0, ts_init=0, ) serialized = ParquetSerializer.serialize(deltas) deserialized = ParquetSerializer.deserialize(cls=OrderBookDeltas, chunk=serialized) # Assert assert deserialized == [deltas] write_objects(catalog=self.catalog, chunk=[deltas]) def test_serialize_and_deserialize_order_book_deltas_grouped(self): kw = { "instrument_id": "AUD/USD.SIM", "ts_event": 0, "ts_init": 0, "book_type": "L2_MBP", } deltas = [ { "action": "ADD", "order_side": "SELL", "order_price": 0.9901, "order_size": 327.25, "order_id": "1", }, { "action": "CLEAR", "order_side": None, "order_price": None, "order_size": None, "order_id": None, }, { "action": "ADD", "order_side": "SELL", "order_price": 0.98039, "order_size": 27.91, "order_id": "2", }, { "action": "ADD", "order_side": "SELL", "order_price": 0.97087, "order_size": 14.43, "order_id": "3", }, ] deltas = OrderBookDeltas( instrument_id=TestStubs.audusd_id(), book_type=BookType.L2_MBP, deltas=[OrderBookDelta.from_dict({ **kw, **d }) for d in deltas], ts_event=0, ts_init=0, ) serialized = ParquetSerializer.serialize(deltas) [deserialized] = ParquetSerializer.deserialize(cls=OrderBookDeltas, chunk=serialized) # Assert assert deserialized == deltas write_objects(catalog=self.catalog, chunk=[deserialized]) assert [d.action for d in deserialized.deltas] == [ BookAction.ADD, BookAction.CLEAR, BookAction.ADD, BookAction.ADD, ] def test_serialize_and_deserialize_order_book_snapshot(self): book = TestStubs.order_book_snapshot() serialized = ParquetSerializer.serialize(book) deserialized = ParquetSerializer.deserialize(cls=OrderBookSnapshot, chunk=serialized) # Assert assert deserialized == [book] write_objects(catalog=self.catalog, chunk=[book]) def test_serialize_and_deserialize_component_state_changed(self): event = TestStubs.event_component_state_changed() serialized = ParquetSerializer.serialize(event) [deserialized ] = ParquetSerializer.deserialize(cls=ComponentStateChanged, chunk=[serialized]) # Assert assert deserialized == event write_objects(catalog=self.catalog, chunk=[event]) def test_serialize_and_deserialize_trading_state_changed(self): event = TestStubs.event_trading_state_changed() serialized = ParquetSerializer.serialize(event) [deserialized] = ParquetSerializer.deserialize(cls=TradingStateChanged, chunk=[serialized]) # Assert assert deserialized == event write_objects(catalog=self.catalog, chunk=[event]) def test_serialize_and_deserialize_account_state(self): event = TestStubs.event_cash_account_state() serialized = ParquetSerializer.serialize(event) [deserialized] = ParquetSerializer.deserialize(cls=AccountState, chunk=serialized) # Assert assert deserialized == event write_objects(catalog=self.catalog, chunk=[event]) @pytest.mark.parametrize( "event_func", [ TestStubs.event_order_accepted, TestStubs.event_order_rejected, TestStubs.event_order_submitted, ], ) def test_serialize_and_deserialize_order_events_base(self, event_func): order = TestStubs.limit_order() event = event_func(order=order) self._test_serialization(obj=event) @pytest.mark.parametrize( "event_func", [ TestStubs.event_order_submitted, TestStubs.event_order_accepted, TestStubs.event_order_canceled, TestStubs.event_order_pending_update, TestStubs.event_order_pending_cancel, TestStubs.event_order_triggered, TestStubs.event_order_expired, TestStubs.event_order_rejected, TestStubs.event_order_canceled, ], ) def test_serialize_and_deserialize_order_events_post_accepted( self, event_func): # Act event = event_func(order=self.order_accepted) assert self._test_serialization(obj=event) @pytest.mark.parametrize( "event_func", [ TestStubs.event_order_filled, ], ) def test_serialize_and_deserialize_order_events_filled(self, event_func): # Act event = event_func(order=self.order_accepted, instrument=AUDUSD_SIM) self._test_serialization(obj=event) @pytest.mark.parametrize( "position_func", [ TestStubs.event_position_opened, TestStubs.event_position_changed, ], ) def test_serialize_and_deserialize_position_events_open_changed( self, position_func): instrument = TestInstrumentProvider.default_fx_ccy("GBPUSD") order3 = self.order_factory.market( instrument.id, OrderSide.BUY, Quantity.from_int(100000), ) fill3 = TestStubs.event_order_filled( order3, instrument=instrument, position_id=PositionId("P-3"), strategy_id=StrategyId("S-1"), last_px=Price.from_str("1.00000"), ) position = Position(instrument=instrument, fill=fill3) event = position_func(position=position) self._test_serialization(obj=event) @pytest.mark.parametrize( "position_func", [ TestStubs.event_position_closed, ], ) def test_serialize_and_deserialize_position_events_closed( self, position_func): instrument = TestInstrumentProvider.default_fx_ccy("GBPUSD") open_order = self.order_factory.market( instrument.id, OrderSide.BUY, Quantity.from_int(100000), ) open_fill = TestStubs.event_order_filled( open_order, instrument=instrument, position_id=PositionId("P-3"), strategy_id=StrategyId("S-1"), last_px=Price.from_str("1.00000"), ) close_order = self.order_factory.market( instrument.id, OrderSide.SELL, Quantity.from_int(100000), ) close_fill = TestStubs.event_order_filled( close_order, instrument=instrument, position_id=PositionId("P-3"), strategy_id=StrategyId("S-1"), last_px=Price.from_str("1.20000"), ) position = Position(instrument=instrument, fill=open_fill) position.apply(close_fill) event = position_func(position=position) self._test_serialization(obj=event) @pytest.mark.parametrize( "instrument", [ TestInstrumentProvider.btcusdt_binance(), TestInstrumentProvider.aapl_equity(), TestInstrumentProvider.es_future(), TestInstrumentProvider.aapl_option(), ], ) def test_serialize_and_deserialize_instruments(self, instrument): serialized = ParquetSerializer.serialize(instrument) assert serialized deserialized = ParquetSerializer.deserialize(cls=type(instrument), chunk=[serialized]) # Assert assert deserialized == [instrument] write_objects(catalog=self.catalog, chunk=[instrument]) df = self.catalog.instruments() assert len(df) == 1 @pytest.mark.parametrize("name, obj", [(obj.__class__.__name__, obj) for obj in nautilus_objects()]) def test_serialize_and_deserialize_all(self, name, obj): # Arrange, Act assert self._test_serialization(obj)
def setup(self): data_catalog_setup() self.catalog = DataCatalog.from_env() self.reader = MockReader() self.line_preprocessor = TestLineProcessor()
def setup(self): data_catalog_setup() self.catalog = DataCatalog.from_env() self.fs = self.catalog.fs self._loaded_data_into_catalog()
def as_catalog(self) -> DataCatalog: return DataCatalog( path=self.catalog_path, fs_protocol=self.fs_protocol, fs_storage_options=self.fs_storage_options, )
def setup(self): data_catalog_setup() self.catalog = DataCatalog.from_env() self.fs = self.catalog.fs self.reader = MockReader()
def validate_data_catalog(catalog: DataCatalog, **kwargs): for cls in catalog.list_data_types(): path = f"{catalog.path}/data/{cls}.parquet" _validate_dataset(catalog=catalog, path=path, **kwargs)