def test_submit(mock_order_class, mock_exchange_class): exchange = mock_exchange_class.return_value broker = Broker(exchange) order = mock_order_class.return_value assert broker.unexecuted == [] broker.submit(order) assert order in broker.unexecuted
def __init__(self, portfolio: Union[Portfolio, str], action_scheme: Union[ActionScheme, str], reward_scheme: Union[RewardScheme, str], feed: DataFeed = None, window_size: int = 1, use_internal=True, **kwargs): """ Arguments: portfolio: The `Portfolio` of wallets used to submit and execute orders from. action_scheme: The component for transforming an action into an `Order` at each timestep. reward_scheme: The component for determining the reward at each timestep. feed (optional): The pipeline of features to pass the observations through. kwargs (optional): Additional arguments for tuning the environments, logging, etc. """ super().__init__() self.portfolio = portfolio self.action_scheme = action_scheme self.reward_scheme = reward_scheme self.feed = feed self.window_size = window_size self.use_internal = use_internal if self.feed: self._external_keys = self.feed.next().keys() self.feed.reset() self.history = ObservationHistory(window_size=window_size) self._broker = Broker(exchanges=self.portfolio.exchanges) self.clock = Clock() self.action_space = None self.observation_space = None self.viewer = None self._enable_logger = kwargs.get('enable_logger', False) self._observation_dtype = kwargs.get('dtype', np.float32) self._observation_lows = kwargs.get('observation_lows', 0) self._observation_highs = kwargs.get('observation_highs', 1) if self._enable_logger: self.logger = logging.getLogger(kwargs.get('logger_name', __name__)) self.logger.setLevel(kwargs.get('log_level', logging.DEBUG)) logging.getLogger('tensorflow').disabled = kwargs.get( 'disable_tensorflow_logger', True) self.compile()
def test_update_on_single_exchange_with_single_order(mock_order_class, mock_exchange_class): exchange = mock_exchange_class.return_value broker = Broker(exchange) order = mock_order_class.return_value order.id = "fake_id" order.start = 0 order.end = None order.is_executable_on = mock.Mock(side_effect=[False, True]) order.attach = mock.Mock(return_value=None) broker.submit(order) # Test order does not execute on first update broker.update() assert order in broker.unexecuted assert order.id not in broker.executed # Test order does execute on second update broker.update() assert order not in broker.unexecuted assert order.id in broker.executed order.attach.assert_called_once_with(broker)
def test_update_on_single_exchange_with_multiple_orders(mock_exchange_class): exchange = mock_exchange_class.return_value exchange.id = "fake_exchange_id" exchange.name = "coinbase" wallets = [Wallet(exchange, 10000 * USD), Wallet(exchange, 0 * BTC)] portfolio = Portfolio(USD, wallets) broker = Broker(exchange) # Submit order 1 o1 = Order(step=0, exchange_name="coinbase", side=TradeSide.BUY, trade_type=TradeType.MARKET, pair=USD / BTC, quantity=5200.00 * USD, portfolio=portfolio, price=7000.00) o1.is_executable_on = mock.MagicMock(side_effect=[False, True]) broker.submit(o1) # Submit order 2 o2 = Order(step=0, exchange_name="coinbase", side=TradeSide.BUY, trade_type=TradeType.MARKET, pair=USD / BTC, quantity=230.00 * USD, portfolio=portfolio, price=7300.00) o2.is_executable_on = mock.MagicMock(side_effect=[True, False]) broker.submit(o2) # No updates have been made yet assert o1 in broker.unexecuted and o1 not in broker.executed assert o2 in broker.unexecuted and o2 not in broker.executed # First update broker.update() assert o1 in broker.unexecuted and o1.id not in broker.executed assert o2 not in broker.unexecuted and o2.id in broker.executed # Second update broker.update() assert o1 not in broker.unexecuted and o1.id in broker.executed assert o2 not in broker.unexecuted and o2.id in broker.executed
def test_on_fill(mock_trade_class, mock_exchange_class): exchange = mock_exchange_class.return_value exchange.id = "fake_exchange_id" broker = Broker(exchange) wallets = [Wallet(exchange, 10000 * USD), Wallet(exchange, 0 * BTC)] portfolio = Portfolio(USD, wallets) order = Order(side=TradeSide.BUY, trade_type=TradeType.MARKET, pair=USD / BTC, quantity=5200.00 * USD, portfolio=portfolio, price=7000.00) order.attach(broker) order.execute(exchange) broker._executed[order.id] = order trade = mock_trade_class.return_value trade.size = 5197.00 trade.commission = 3.00 * USD trade.order_id = order.id assert order.status == OrderStatus.OPEN order.fill(exchange, trade) assert order.status == OrderStatus.FILLED assert order.remaining_size == 0 assert trade in broker.trades[order.id]
def test_init(mock_exchange_class): exchange = mock_exchange_class.return_value broker = Broker(exchange) assert broker assert broker.exchanges == [exchange] assert broker.unexecuted == [] assert broker.executed == {} assert broker.trades == {} exchanges = [ mock_exchange_class.return_value, mock_exchange_class.return_value, mock_exchange_class.return_value ] broker = Broker(exchanges) assert broker assert broker.exchanges == exchanges assert broker.unexecuted == [] assert broker.executed == {} assert broker.trades == {}
def test_on_fill(mock_trade_class, mock_exchange_class): exchange = mock_exchange_class.return_value exchange.options.max_trade_size = 1e6 exchange.id = "fake_exchange_id" exchange.name = "coinbase" exchange.quote_price = lambda pair: Decimal(7000.00) broker = Broker() broker.exchanges = [exchange] wallets = [Wallet(exchange, 10000 * USD), Wallet(exchange, 0 * BTC)] portfolio = Portfolio(USD, wallets) order = Order(step=0, exchange_pair=ExchangePair(exchange, USD / BTC), side=TradeSide.BUY, trade_type=TradeType.MARKET, quantity=5200.00 * USD, portfolio=portfolio, price=7000.00) order.attach(broker) order.execute() broker._executed[order.id] = order trade = mock_trade_class.return_value trade.quantity = 5197.00 * USD trade.commission = 3.00 * USD trade.order_id = order.id assert order.status == OrderStatus.OPEN order.fill(trade) assert order.status == OrderStatus.FILLED assert order.remaining == 0 assert trade in broker.trades[order.id]
def test_reset(mock_exchange_class): exchange = mock_exchange_class.return_value exchange.id = "fake_exchange_id" broker = Broker() broker.exchanges = [exchange] broker._unexecuted = [78, 98, 100] broker._executed = {'a': 1, 'b': 2} broker._trades = {'a': 2, 'b': 3} broker.reset() assert broker.unexecuted == [] assert broker.executed == {} assert broker.trades == {}
def test_cancel_executed_order(mock_order_class, mock_exchange_class): exchange = mock_exchange_class.return_value exchange.options.max_trade_size = 1e6 broker = Broker() broker.exchanges = [exchange] order = mock_order_class.return_value order.cancel = mock.Mock(return_value=None) broker.submit(order) assert order in broker.unexecuted order.status = OrderStatus.CANCELLED with pytest.raises(Warning): broker.cancel(order)
def test_cancel_unexecuted_order(mock_order_class, mock_exchange_class): exchange = mock_exchange_class.return_value broker = Broker() broker.exchanges = [exchange] order = mock_order_class.return_value order.cancel = mock.Mock(return_value=None) order.status = OrderStatus.PENDING broker.submit(order) assert order in broker.unexecuted broker.cancel(order) assert order not in broker.unexecuted order.cancel.assert_called_once_with()
def test_on_fill_with_complex_order(mock_trade_class, mock_exchange_class): exchange = mock_exchange_class.return_value exchange.options.max_trade_size = 1e6 exchange.id = "fake_exchange_id" exchange.name = "coinbase" exchange.quote_price = lambda pair: Decimal(7000.00) broker = Broker() broker.exchanges = [exchange] wallets = [Wallet(exchange, 10000 * USD), Wallet(exchange, 0 * BTC)] portfolio = Portfolio(USD, wallets) side = TradeSide.BUY order = Order(step=0, exchange_pair=ExchangePair(exchange, USD / BTC), side=TradeSide.BUY, trade_type=TradeType.MARKET, quantity=5200.00 * USD, portfolio=portfolio, price=Decimal(7000.00)) risk_criteria = Stop("down", 0.03) ^ Stop("up", 0.02) risk_management = OrderSpec( side=TradeSide.SELL if side == TradeSide.BUY else TradeSide.BUY, trade_type=TradeType.MARKET, exchange_pair=ExchangePair(exchange, USD / BTC), criteria=risk_criteria) order += risk_management order.attach(broker) order.execute() broker._executed[order.id] = order # Execute fake trade price = Decimal(7000.00) scale = order.price / price commission = 3.00 * USD base_size = scale * order.size - commission.size trade = mock_trade_class.return_value trade.order_id = order.id trade.size = base_size trade.quantity = base_size * USD trade.price = price trade.commission = commission base_wallet = portfolio.get_wallet(exchange.id, USD) quote_wallet = portfolio.get_wallet(exchange.id, BTC) base_size = trade.size + trade.commission.size quote_size = (order.price / trade.price) * (trade.size / trade.price) base_wallet.withdraw(quantity=Quantity(USD, size=base_size, path_id=order.path_id), reason="test") quote_wallet.deposit(quantity=Quantity(BTC, size=quote_size, path_id=order.path_id), reason="test") assert trade.order_id in broker.executed.keys() assert trade not in broker.trades assert broker.unexecuted == [] order.fill(trade) assert order.remaining == 0 assert trade in broker.trades[order.id] assert broker.unexecuted != []
class TradingEnvironment(gym.Env, TimeIndexed): def __init__(self, env_config): super().__init__() self.portfolio = env_config['portfolio'] self.exchange = env_config['exchange'] self.action_scheme = env_config['action_scheme'] self.reward_scheme = env_config['reward_scheme'] self.feature_pipeline = env_config.get('feature_pipeline', []) self._window_size = env_config.get('window_size', 10) self._dtype = env_config.get('dtype', np.float32) self._observation_lows = env_config.get('observation_lows', 0) self._observation_highs = env_config.get('observation_highs', 1) self._observe_wallets = env_config.get('observe_wallets', None) kwargs = env_config # self.portfolio = portfolio # self.exchange = exchange # self.action_scheme = action_scheme # self.reward_scheme = reward_scheme # self.feature_pipeline = feature_pipeline # self._window_size = window_size # self._dtype = kwargs.get('dtype', np.float32) # self._observation_lows = kwargs.get('observation_lows', 0) # self._observation_highs = kwargs.get('observation_highs', 1) # self._observe_wallets = kwargs.get('observe_wallets', None) if isinstance(self._observe_wallets, list): self._observe_unlocked_balances = self._observe_wallets self._observe_locked_balances = self._observe_wallets else: self._observe_unlocked_balances = kwargs.get( 'observe_unlocked_balances', []) self._observe_locked_balances = kwargs.get( 'observe_locked_balances', []) self.render_benchmarks: List[Dict] = kwargs.get( 'render_benchmarks', []) self.viewer = None self._enable_logger = kwargs.get('enable_logger', True) if self._enable_logger: self.logger = logging.getLogger(kwargs.get('logger_name', __name__)) self.logger.setLevel(kwargs.get('log_level', logging.DEBUG)) logging.getLogger('tensorflow').disabled = kwargs.get( 'disable_tensorflow_logger', True) self._initial_balances = None self.reset() @property def window_size(self) -> int: """The length of the observation window in the `observation_space`.""" return self._window_size @window_size.setter def window_size(self, window_size: int): self._window_size = window_size @property def portfolio(self) -> Portfolio: """The portfolio of instruments currently held on this exchange.""" return self._portfolio @portfolio.setter def portfolio(self, portfolio: Union[Portfolio, str]): self._portfolio = wallets.get(portfolio) if isinstance( portfolio, str) else portfolio @property def exchange(self) -> Exchange: """The `Exchange` that will be used to feed data from and execute trades within.""" return self._exchange @exchange.setter def exchange(self, exchange: Union[Exchange, str]): self._exchange = exchanges.get(exchange) if isinstance( exchange, str) else exchange self._broker = Broker(self._exchange) @property def broker(self) -> Broker: """The broker used to execute orders within the environment.""" return self._broker @property def episode_trades(self) -> Dict[str, 'Trade']: """A dictionary of trades made this episode, organized by order id.""" return self._broker.trades @property def action_scheme(self) -> ActionScheme: """The component for transforming an action into an `Order` at each time step.""" return self._action_scheme @action_scheme.setter def action_scheme(self, action_scheme: Union[ActionScheme, str]): self._action_scheme = actions.get(action_scheme) if isinstance( action_scheme, str) else action_scheme @property def reward_scheme(self) -> RewardScheme: """The component for determining the reward at each time step.""" return self._reward_scheme @reward_scheme.setter def reward_scheme(self, reward_scheme: Union[RewardScheme, str]): self._reward_scheme = rewards.get(reward_scheme) if isinstance( reward_scheme, str) else reward_scheme @property def feature_pipeline(self) -> FeaturePipeline: """The pipeline of feature transformations to pass the observations through at each time step.""" return self._feature_pipeline @feature_pipeline.setter def feature_pipeline(self, feature_pipeline: Union[FeaturePipeline, str] = None): self._feature_pipeline = features.get(feature_pipeline) if isinstance( feature_pipeline, str) else feature_pipeline @property def wallet_columns(self) -> List[str]: """The list of wallet columns provided by the portfolio.""" if not isinstance(self._observe_unlocked_balances, list) or not all( isinstance(balance, Instrument) for balance in self._observe_unlocked_balances): raise ValueError( 'If used, the `self._observe_wallets` or `self._observe_unlocked_balances` parameter must be of type: List[Instrument]' ) if not isinstance(self._observe_locked_balances, list) or not all( isinstance(balance, Instrument) for balance in self._observe_locked_balances): raise ValueError( 'If used, the `self._observe_wallets` or `self._observe_locked_balances` parameter must be of type: List[Instrument]' ) unlocked_columns = [ instrument.symbol for instrument in self._observe_unlocked_balances ] locked_columns = [ '{}_pending'.format(instrument.symbol) for instrument in self._observe_locked_balances ] return unlocked_columns + locked_columns @property def observation_columns(self) -> List[str]: """The final list of columns in the observation space.""" if not self.wallet_columns: return self._exchange.observation_columns return np.concatenate( [self._exchange.observation_columns, self.wallet_columns]) @property def action_space(self) -> Discrete: return self._action_scheme.action_space @property def observation_space(self) -> Box: """The final shape of the observations generated by the exchange each timestep, after any feature transformations.""" n_features = len(self.observation_columns) if isinstance(self._observation_lows, list) and len(self._observation_lows) != n_features: raise ValueError( 'The length of `observation_lows` provided to the exchange must match the length of `observation_columns`.' ) if isinstance(self._observation_highs, list) and len(self._observation_highs) != n_features: raise ValueError( 'The length of `observation_highs` provided to the exchange must match the length of `observation_columns`.' ) low = self._observation_lows if isinstance( self._observation_lows, list) else np.tile(self._observation_lows, n_features) high = self._observation_highs if isinstance( self._observation_highs, list) else np.tile( self._observation_highs, n_features) if self._window_size > 1: low = np.tile(low, self._window_size).reshape( (self._window_size, n_features)) high = np.tile(high, self._window_size).reshape( (self._window_size, n_features)) #return Box(low=low, high=high, dtype=self._dtype) return Box(low=-np.inf, high=np.inf, shape=(self._window_size, n_features), dtype=self._dtype) def wallet(self, instrument: Instrument) -> 'Wallet': wallet = self._portfolio.get_wallet(self.exchange.id, instrument) return wallet def balance(self, instrument: Instrument) -> 'Quantity': wallet = self.wallet(instrument=instrument) return wallet.balance def locked_balance(self, instrument: Instrument) -> 'Quantity': wallet = self.wallet(instrument=instrument) return wallet.locked_balance def observe_balances(self) -> pd.DataFrame: wallets = pd.DataFrame([], columns=self.wallet_columns) for instrument in self._observe_unlocked_balances: wallets[instrument.symbol] = [self.balance(instrument).size] for instrument in self._observe_locked_balances: wallets['{}_pending'.format( instrument.symbol)] = [self.locked_balance(instrument).size] return wallets def _take_action(self, action: int) -> Order: """Determines a specific trade to be taken and executes it within the exchange. Arguments: action: The int provided by the agent to map to a trade action for this timestep. Returns: The order created by the agent this time step, if any. """ order = self._action_scheme.get_order(action, self._exchange, self._portfolio) if order: self._broker.submit(order) self._broker.update() self._portfolio.update() return order def _next_observation(self) -> np.ndarray: """Returns the next observation from the exchange. Returns: The observation provided by the environments's exchange, often OHLCV or tick trade history data points. """ observation = self._exchange.next_observation(self._window_size) if self._observe_locked_balances or self._observe_unlocked_balances: wallet_balances = self.observe_balances() for column in list(wallet_balances.columns): observation.loc[observation.index[0], column] = wallet_balances[column].values if self._feature_pipeline is not None: observation = self._feature_pipeline.transform(observation) # if len(observation) < self._window_size: # size = self._window_size - len(observation) # padding = np.zeros((size, observation.shape[1])) # padding = pd.DataFrame(padding, columns=observation.columns) # observation = pd.concat([padding, observation], ignore_index=True, sort=False) observation = observation.select_dtypes(include='number') if isinstance(observation, pd.DataFrame): observation = observation.fillna(0, axis=1) observation = observation.values observation = np.nan_to_num(observation) return observation def _get_reward(self) -> float: """Returns the reward for the current timestep. Returns: A float corresponding to the benefit earned by the action taken this step. """ reward = self._reward_scheme.get_reward(self._portfolio) reward = np.nan_to_num(reward) if np.bitwise_not(np.isfinite(reward)): raise ValueError( 'Reward returned by the reward scheme must by a finite float.') return reward def _done(self) -> bool: """Returns whether or not the environments is done and should be restarted. Returns: A boolean signaling whether the environments is done and should be restarted. """ lost_90_percent_net_worth = self._portfolio.profit_loss < 0.1 return lost_90_percent_net_worth or not self._exchange.has_next_observation( window_size=self._window_size) def _info(self, order: Order) -> dict: """Returns any auxiliary, diagnostic, or debugging information for the current timestep. Args: order: The order created during the currente timestep. Returns: info: A dictionary containing the exchange used, the portfolio, the broker, the current timestep, and any order executed this time step. """ return { 'current_step': self.clock.step, 'portfolio': self._portfolio, 'broker': self._broker, 'exchange': self._exchange, 'order': order, } def step(self, action) -> Tuple[pd.DataFrame, float, bool, dict]: """Run one timestep within the environments based on the specified action. Arguments: action: The trade action provided by the agent for this timestep. Returns: observation (pandas.DataFrame): Provided by the environments's exchange, often OHLCV or tick trade history data points. reward (float): An size corresponding to the benefit earned by the action taken this timestep. done (bool): If `True`, the environments is complete and should be restarted. info (dict): Any auxiliary, diagnostic, or debugging information to output. """ order = self._take_action(action) observation = self._next_observation() reward = self._get_reward() done = self._done() info = self._info(order) if self._enable_logger: self.logger.debug('Order: {}'.format(order)) #self.logger.debug('Observation: {}'.format(observation)) self.logger.debug('P/L: {}'.format(self._portfolio.profit_loss)) self.logger.debug('Reward ({}): {}'.format(self.clock.step, reward)) self.logger.debug('Performance: {}'.format( self._portfolio.performance.tail(1))) self.clock.increment() return observation, reward, done, info def reset(self) -> pd.DataFrame: """Resets the state of the environments and returns an initial observation. Returns: The episode's initial observation. """ self.clock.reset() if not self._exchange.is_live: if self._initial_balances is not None: self._portfolio._wallets = {} for balance in self._initial_balances: self._portfolio.add( (self._exchange, balance.instrument, balance.size)) else: self._initial_balances = self._portfolio.total_balances self._action_scheme.reset() self._reward_scheme.reset() self._exchange.reset() self._portfolio.reset() self._broker.reset() observation = self._next_observation() self.clock.increment() return observation def render(self, mode='none'): """Renders the environment via matplotlib.""" if mode == 'log': self.logger.info('Performance: ' + str(self._portfolio.performance)) elif mode == 'chart': if self.viewer is None and hasattr(self.exchange, '_pre_transformed_data'): self.viewer = MatplotlibTradingChart( self.exchange._pre_transformed_data) if self.viewer is not None: self.viewer.render( self.clock.step - 1, self._portfolio.performance['net_worth'].values, self.render_benchmarks, self._broker.trades) def close(self): """Utility method to clean environment before closing.""" if self.viewer is not None: self.viewer.close()
def exchange(self, exchange: Union[Exchange, str]): self._exchange = exchanges.get(exchange) if isinstance( exchange, str) else exchange self._broker = Broker(self._exchange)
from tensortrade.exchanges.simulated import SimulatedExchange from tensortrade.orders import Order, Broker from tensortrade.orders.recipe import Recipe from tensortrade.orders.criteria import StopLoss, StopDirection from tensortrade.wallets import Portfolio, Wallet from tensortrade.trades import TradeSide, TradeType PRICE_COLUMN = "close" data_frame = pd.read_csv("tests/data/input/coinbase-1h-btc-usd.csv") data_frame.columns = map(str.lower, data_frame.columns) data_frame = data_frame.rename(columns={'volume btc': 'volume'}) exchange = SimulatedExchange(data_frame=data_frame, price_column=PRICE_COLUMN, randomize_time_slices=True) broker = Broker(exchange) broker.reset() def test_init(): wallets = [Wallet(exchange, 10000 * USD), Wallet(exchange, 0 * BTC)] portfolio = Portfolio(base_instrument=USD, wallets=wallets) base_wallet = portfolio.get_wallet(exchange.id, USD) quantity = (1 / 10) * base_wallet.balance order = Order(side=TradeSide.BUY, trade_type=TradeType.MARKET, pair=USD / BTC, quantity=quantity, portfolio=portfolio) assert order
def test_cancel_executed_order(mock_order_class, mock_exchange_class): exchange = mock_exchange_class.return_value broker = Broker(exchange) order = mock_order_class.return_value order.cancel = mock.Mock(return_value=None) broker.submit(order) assert order in broker.unexecuted order.status = OrderStatus.OPEN with pytest.raises(Warning): broker.cancel(order) order.status = OrderStatus.PARTIALLY_FILLED with pytest.raises(Warning): broker.cancel(order) order.status = OrderStatus.FILLED with pytest.raises(Warning): broker.cancel(order) order.status = OrderStatus.CANCELLED with pytest.raises(Warning): broker.cancel(order)
def test_on_fill_with_complex_order(mock_trade_class, mock_exchange_class): exchange = mock_exchange_class.return_value exchange.id = "fake_exchange_id" broker = Broker(exchange) wallets = [Wallet(exchange, 10000 * USD), Wallet(exchange, 0 * BTC)] portfolio = Portfolio(USD, wallets) side = TradeSide.BUY order = Order(step=0, side=TradeSide.BUY, trade_type=TradeType.MARKET, pair=USD / BTC, quantity=5200.00 * USD, portfolio=portfolio, price=7000.00) risk_criteria = Stop("down", 0.03) ^ Stop("up", 0.02) risk_management = OrderSpec( side=TradeSide.SELL if side == TradeSide.BUY else TradeSide.BUY, trade_type=TradeType.MARKET, pair=USD / BTC, criteria=risk_criteria) order += risk_management order.attach(broker) order.execute(exchange) broker._executed[order.id] = order # Execute fake trade price = 7000.00 scale = order.price / price commission = 3.00 * USD base_size = scale * order.size - commission.size trade = mock_trade_class.return_value trade.order_id = order.id trade.size = base_size trade.price = price trade.commission = commission base_wallet = portfolio.get_wallet(exchange.id, USD) quote_wallet = portfolio.get_wallet(exchange.id, BTC) base_size = trade.size + trade.commission.size quote_size = (order.price / trade.price) * (trade.size / trade.price) base_wallet -= Quantity(USD, size=base_size, path_id=order.path_id) quote_wallet += Quantity(BTC, size=quote_size, path_id=order.path_id) assert trade.order_id in broker.executed.keys() assert trade not in broker.trades assert broker.unexecuted == [] order.fill(exchange, trade) assert order.remaining_size == 0 assert trade in broker.trades[order.id] assert broker.unexecuted != []
class TradingEnvironment(gym.Env, TimeIndexed): """A trading environments made for use with Gym-compatible reinforcement learning algorithms.""" agent_id: str = None episode_id: str = None def __init__(self, portfolio: Union[Portfolio, str], action_scheme: Union[ActionScheme, str], reward_scheme: Union[RewardScheme, str], feed: DataFeed = None, window_size: int = 1, use_internal: bool = True, renderers: Union[str, List[str], List['BaseRenderer']] = 'screenlog', **kwargs): """ Arguments: portfolio: The `Portfolio` of wallets used to submit and execute orders from. action_scheme: The component for transforming an action into an `Order` at each timestep. reward_scheme: The component for determining the reward at each timestep. feed (optional): The pipeline of features to pass the observations through. renderers (optional): single or list of renderers for output by name or as objects. String Values: 'screenlog', 'filelog', or 'plotly'. None for no rendering. price_history (optional): OHLCV price history feed used for rendering the chart. Required if render_mode is 'plotly'. kwargs (optional): Additional arguments for tuning the environments, logging, etc. """ super().__init__() self.portfolio = portfolio self.action_scheme = action_scheme self.reward_scheme = reward_scheme self.feed = feed self.window_size = window_size self.use_internal = use_internal self._price_history: pd.DataFrame = kwargs.get('price_history', None) if self.feed: self._external_keys = self.feed.next().keys() self.feed.reset() self.history = ObservationHistory(window_size=window_size) self._broker = Broker(exchanges=self.portfolio.exchanges) self.clock = Clock() self.action_space = None self.observation_space = None if not renderers: renderers = [] elif type(renderers) is not list: renderers = [renderers] self._renderers = [] for renderer in renderers: if isinstance(renderer, str): renderer = get(renderer) self._renderers.append(renderer) self._enable_logger = kwargs.get('enable_logger', False) self._observation_dtype = kwargs.get('dtype', np.float32) self._observation_lows = kwargs.get('observation_lows', -np.iinfo(np.int64).max) self._observation_highs = kwargs.get('observation_highs', np.iinfo(np.int64).max) self._max_allowed_loss = kwargs.get('max_allowed_loss', 0.1) if self._enable_logger: self.logger = logging.getLogger(kwargs.get('logger_name', __name__)) self.logger.setLevel(kwargs.get('log_level', logging.DEBUG)) self._max_episodes = None self._max_steps = None logging.getLogger('tensorflow').disabled = kwargs.get('disable_tensorflow_logger', True) self.compile() @property def max_episodes(self) -> int: return self._max_episodes @max_episodes.setter def max_episodes(self, max_episodes: int): self._max_episodes = max_episodes @property def max_steps(self) -> int: return self._max_steps @max_steps.setter def max_steps(self, max_steps: int): self._max_steps = max_steps def compile(self): """ Sets the observation space and the action space of the environment. Creates the internal feed and sets initialization for different components. """ components = [self._broker, self.portfolio, self.action_scheme, self.reward_scheme] + self.portfolio.exchanges for component in components: component.clock = self.clock self.action_scheme.exchange_pairs = self.portfolio.exchange_pairs self.action_scheme.compile() self.action_space = self.action_scheme.action_space if not self.feed: self.feed = create_internal_feed(self.portfolio) else: self.feed = self.feed + create_internal_feed(self.portfolio) initial_obs = self.feed.next() n_features = len(initial_obs.keys()) if self.use_internal else len(self._external_keys) self.observation_space = Box( low=self._observation_lows, high=self._observation_highs, shape=(self.window_size, n_features), dtype=self._observation_dtype ) self.feed.reset() @property def portfolio(self) -> Portfolio: """The portfolio of instruments currently held on this exchange.""" return self._portfolio @portfolio.setter def portfolio(self, portfolio: Union[Portfolio, str]): self._portfolio = wallets.get(portfolio) if isinstance(portfolio, str) else portfolio @property def broker(self) -> Broker: """The broker used to execute orders within the environment.""" return self._broker @property def episode_trades(self) -> Dict[str, 'Trade']: """A dictionary of trades made this episode, organized by order id.""" return self._broker.trades @property def action_scheme(self) -> ActionScheme: """The component for transforming an action into an `Order` at each time step.""" return self._action_scheme @action_scheme.setter def action_scheme(self, action_scheme: Union[ActionScheme, str]): self._action_scheme = actions.get(action_scheme) if isinstance( action_scheme, str) else action_scheme @property def reward_scheme(self) -> RewardScheme: """The component for determining the reward at each time step.""" return self._reward_scheme @reward_scheme.setter def reward_scheme(self, reward_scheme: Union[RewardScheme, str]): self._reward_scheme = rewards.get(reward_scheme) if isinstance( reward_scheme, str) else reward_scheme @property def price_history(self) -> pd.DataFrame: return self._price_history @price_history.setter def price_history(self, price_history): self._price_history = price_history def step(self, action: int) -> Tuple[np.array, float, bool, dict]: """Run one timestep within the environments based on the specified action. Arguments: action: The trade action provided by the agent for this timestep. Returns: observation (pandas.DataFrame): Provided by the environments's exchange, often OHLCV or tick trade history data points. reward (float): An size corresponding to the benefit earned by the action taken this timestep. done (bool): If `True`, the environments is complete and should be restarted. info (dict): Any auxiliary, diagnostic, or debugging information to output. """ order = self.action_scheme.get_order(action, self.portfolio) if order: self._broker.submit(order) self._broker.update() obs_row = self.feed.next() if not self.use_internal: obs_row = {k: obs_row[k] for k in self._external_keys} self.history.push(obs_row) obs = self.history.observe() obs = obs.astype(self._observation_dtype) reward = self.reward_scheme.get_reward(self._portfolio) reward = np.nan_to_num(reward) if np.bitwise_not(np.isfinite(reward)): raise ValueError('Reward returned by the reward scheme must by a finite float.') done = (self.portfolio.profit_loss < self._max_allowed_loss) or not self.feed.has_next() info = { 'step': self.clock.step, 'portfolio': self.portfolio, 'broker': self._broker, 'order': order, } if self._enable_logger: self.logger.debug('Order: {}'.format(order)) self.logger.debug('Observation: {}'.format(obs)) self.logger.debug('P/L: {}'.format(self._portfolio.profit_loss)) self.logger.debug('Reward ({}): {}'.format(self.clock.step, reward)) self.logger.debug('Performance: {}'.format(self._portfolio.performance.tail(1))) self.clock.increment() return obs, reward, done, info def reset(self) -> np.array: """Resets the state of the environments and returns an initial observation. Returns: The episode's initial observation. """ self.episode_id = uuid.uuid4() self.clock.reset() self.feed.reset() self.action_scheme.reset() self.reward_scheme.reset() self.portfolio.reset() self.history.reset() self._broker.reset() for renderer in self._renderers: renderer.reset() obs_row = self.feed.next() if not self.use_internal: obs_row = {k: obs_row[k] for k in self._external_keys} self.history.push(obs_row) obs = self.history.observe() self.clock.increment() return obs def render(self, episode: int = None): """Renders the environment. Arguments: episode: Current episode number (0-based). """ current_step = self.clock.step - 1 for renderer in self._renderers: price_history = None if self._price_history is None else self._price_history[self._price_history.index < current_step] renderer.render(episode=episode, max_episodes=self._max_episodes, step=current_step, max_steps=self._max_steps, price_history=price_history, net_worth=self._portfolio.performance.net_worth, performance=self._portfolio.performance.drop(columns=['base_symbol']), trades=self._broker.trades) def save(self): """Saves the environment. Arguments: episode: Current episode number (0-based). """ for renderer in self._renderers: renderer.save() def close(self): """Utility method to clean environment before closing.""" for renderer in self._renderers: if callable(hasattr(renderer, 'close')): renderer.close() # pylint: disable=no-member
def __init__(self, portfolio: Union[Portfolio, str], action_scheme: Union[ActionScheme, str], reward_scheme: Union[RewardScheme, str], feed: DataFeed = None, window_size: int = 1, use_internal: bool = True, renderers: Union[str, List[str], List['BaseRenderer']] = 'screenlog', **kwargs): """ Arguments: portfolio: The `Portfolio` of wallets used to submit and execute orders from. action_scheme: The component for transforming an action into an `Order` at each timestep. reward_scheme: The component for determining the reward at each timestep. feed (optional): The pipeline of features to pass the observations through. renderers (optional): single or list of renderers for output by name or as objects. String Values: 'screenlog', 'filelog', or 'plotly'. None for no rendering. price_history (optional): OHLCV price history feed used for rendering the chart. Required if render_mode is 'plotly'. kwargs (optional): Additional arguments for tuning the environments, logging, etc. """ super().__init__() self.portfolio = portfolio self.action_scheme = action_scheme self.reward_scheme = reward_scheme self.feed = feed self.window_size = window_size self.use_internal = use_internal self._price_history: pd.DataFrame = kwargs.get('price_history', None) if self.feed: self._external_keys = self.feed.next().keys() self.feed.reset() self.history = ObservationHistory(window_size=window_size) self._broker = Broker(exchanges=self.portfolio.exchanges) self.clock = Clock() self.action_space = None self.observation_space = None if not renderers: renderers = [] elif type(renderers) is not list: renderers = [renderers] self._renderers = [] for renderer in renderers: if isinstance(renderer, str): renderer = get(renderer) self._renderers.append(renderer) self._enable_logger = kwargs.get('enable_logger', False) self._observation_dtype = kwargs.get('dtype', np.float32) self._observation_lows = kwargs.get('observation_lows', -np.iinfo(np.int64).max) self._observation_highs = kwargs.get('observation_highs', np.iinfo(np.int64).max) self._max_allowed_loss = kwargs.get('max_allowed_loss', 0.1) if self._enable_logger: self.logger = logging.getLogger(kwargs.get('logger_name', __name__)) self.logger.setLevel(kwargs.get('log_level', logging.DEBUG)) self._max_episodes = None self._max_steps = None logging.getLogger('tensorflow').disabled = kwargs.get('disable_tensorflow_logger', True) self.compile()
class TradingEnvironment(gym.Env, TimeIndexed): """A trading environments made for use with Gym-compatible reinforcement learning algorithms.""" agent_id: str = None episode_id: str = None def __init__(self, portfolio: Union[Portfolio, str], action_scheme: Union[ActionScheme, str], reward_scheme: Union[RewardScheme, str], feed: DataFeed = None, window_size: int = 1, use_internal=True, **kwargs): """ Arguments: portfolio: The `Portfolio` of wallets used to submit and execute orders from. action_scheme: The component for transforming an action into an `Order` at each timestep. reward_scheme: The component for determining the reward at each timestep. feed (optional): The pipeline of features to pass the observations through. kwargs (optional): Additional arguments for tuning the environments, logging, etc. """ super().__init__() self.portfolio = portfolio self.action_scheme = action_scheme self.reward_scheme = reward_scheme self.feed = feed self.window_size = window_size self.use_internal = use_internal if self.feed: self._external_keys = self.feed.next().keys() self.feed.reset() self.history = ObservationHistory(window_size=window_size) self._broker = Broker(exchanges=self.portfolio.exchanges) self.clock = Clock() self.action_space = None self.observation_space = None self.viewer = None self._enable_logger = kwargs.get('enable_logger', False) self._observation_dtype = kwargs.get('dtype', np.float32) self._observation_lows = kwargs.get('observation_lows', 0) self._observation_highs = kwargs.get('observation_highs', 1) if self._enable_logger: self.logger = logging.getLogger(kwargs.get('logger_name', __name__)) self.logger.setLevel(kwargs.get('log_level', logging.DEBUG)) logging.getLogger('tensorflow').disabled = kwargs.get('disable_tensorflow_logger', True) self.compile() def compile(self): """ Sets the observation space and the action space of the environment. Creates the internal feed and sets initialization for different components. """ for component in [self._broker, self.portfolio, self.action_scheme, self.reward_scheme]: component.clock = self.clock self.action_scheme.set_pairs(exchange_pairs=self.portfolio.exchange_pairs) self.action_space = Discrete(len(self.action_scheme)) if not self.feed: self.feed = create_internal_feed(self.portfolio) self.feed = self.feed + create_internal_feed(self.portfolio) initial_obs = self.feed.next() n_features = len(initial_obs.keys()) if self.use_internal else len(self._external_keys) self.observation_space = Box( low=self._observation_lows, high=self._observation_highs, shape=(self.window_size, n_features), dtype=self._observation_dtype ) self.feed.reset() @property def portfolio(self) -> Portfolio: """The portfolio of instruments currently held on this exchange.""" return self._portfolio @portfolio.setter def portfolio(self, portfolio: Union[Portfolio, str]): self._portfolio = wallets.get(portfolio) if isinstance(portfolio, str) else portfolio @property def broker(self) -> Broker: """The broker used to execute orders within the environment.""" return self._broker @property def episode_trades(self) -> Dict[str, 'Trade']: """A dictionary of trades made this episode, organized by order id.""" return self._broker.trades @property def action_scheme(self) -> ActionScheme: """The component for transforming an action into an `Order` at each time step.""" return self._action_scheme @action_scheme.setter def action_scheme(self, action_scheme: Union[ActionScheme, str]): self._action_scheme = actions.get(action_scheme) if isinstance( action_scheme, str) else action_scheme @property def reward_scheme(self) -> RewardScheme: """The component for determining the reward at each time step.""" return self._reward_scheme @reward_scheme.setter def reward_scheme(self, reward_scheme: Union[RewardScheme, str]): self._reward_scheme = rewards.get(reward_scheme) if isinstance( reward_scheme, str) else reward_scheme def step(self, action: int) -> Tuple[np.array, float, bool, dict]: """Run one timestep within the environments based on the specified action. Arguments: action: The trade action provided by the agent for this timestep. Returns: observation (pandas.DataFrame): Provided by the environments's exchange, often OHLCV or tick trade history data points. reward (float): An size corresponding to the benefit earned by the action taken this timestep. done (bool): If `True`, the environments is complete and should be restarted. info (dict): Any auxiliary, diagnostic, or debugging information to output. """ order = self.action_scheme.get_order(action, self.portfolio) if order: self._broker.submit(order) self._broker.update() obs_row = self.feed.next() if not self.use_internal: obs_row = {k: obs_row[k] for k in self._external_keys} self.history.push(obs_row) obs = self.history.observe() reward = self.reward_scheme.get_reward(self._portfolio) reward = np.nan_to_num(reward) if np.bitwise_not(np.isfinite(reward)): raise ValueError('Reward returned by the reward scheme must by a finite float.') done = (self.portfolio.profit_loss < 0.1) or not self.feed.has_next() info = { 'step': self.clock.step, 'portfolio': self.portfolio, 'broker': self._broker, 'order': order, } if self._enable_logger: self.logger.debug('Order: {}'.format(order)) self.logger.debug('Observation: {}'.format(obs)) self.logger.debug('P/L: {}'.format(self._portfolio.profit_loss)) self.logger.debug('Reward ({}): {}'.format(self.clock.step, reward)) self.logger.debug('Performance: {}'.format(self._portfolio.performance.tail(1))) self.clock.increment() return obs, reward, done, info def reset(self) -> np.array: """Resets the state of the environments and returns an initial observation. Returns: The episode's initial observation. """ self.episode_id = uuid.uuid4() self.clock.reset() self.feed.reset() self.action_scheme.reset() self.reward_scheme.reset() self.portfolio.reset() self.history.reset() self._broker.reset() obs_row = self.feed.next() if not self.use_internal: obs_row = {k: obs_row[k] for k in self._external_keys} self.history.push(obs_row) obs = self.history.observe() self.clock.increment() return obs def render(self, mode='none'): """Renders the environment via matplotlib.""" if mode == 'log': self.logger.info('Performance: ' + str(self._portfolio.performance)) elif mode == 'chart': if self.viewer is None: raise NotImplementedError() self.viewer.render(self.clock.step - 1, self._portfolio.performance, self._broker.trades) def close(self): """Utility method to clean environment before closing.""" if self.viewer is not None: self.viewer.close()
def __init__(self, portfolio: Union[Portfolio, str], action_scheme: Union[ActionScheme, str], reward_scheme: Union[RewardScheme, str], feed: DataFeed = None, window_size: int = 1, use_internal: bool = True, renderer: Union[str, List['AbstractRenderer']] = 'human', **kwargs): """ Arguments: portfolio: The `Portfolio` of wallets used to submit and execute orders from. action_scheme: The component for transforming an action into an `Order` at each timestep. reward_scheme: The component for determining the reward at each timestep. feed (optional): The pipeline of features to pass the observations through. render_mode (optional): rendering mode, 'human' or 'log'. None for no rendering. chart_height (optioanl): int, the chart height for 'human' mode. price_history (optional): OHLCV price history feed used for rendering the chart. Required if render_mode is 'human'. kwargs (optional): Additional arguments for tuning the environments, logging, etc. """ super().__init__() self.portfolio = portfolio self.action_scheme = action_scheme self.reward_scheme = reward_scheme self.feed = feed self.window_size = window_size self.use_internal = use_internal self._price_history: pd.DataFrame = kwargs.get('price_history', None) if self.feed: self._external_keys = self.feed.next().keys() self.feed.reset() self.history = ObservationHistory(window_size=window_size) self._broker = Broker(exchanges=self.portfolio.exchanges) self.clock = Clock() self.action_space = None self.observation_space = None if renderer == 'human': self._renderer = [PlotlyTradingChart()] else: self._renderer = renderer if renderer else [] self._enable_logger = kwargs.get('enable_logger', False) self._observation_dtype = kwargs.get('dtype', np.float32) self._observation_lows = kwargs.get('observation_lows', -np.iinfo(np.int32).max) self._observation_highs = kwargs.get('observation_highs', np.iinfo(np.int32).max) self._max_allowed_loss = kwargs.get('max_allowed_loss', 0.1) if self._enable_logger: self.logger = logging.getLogger(kwargs.get('logger_name', __name__)) self.logger.setLevel(kwargs.get('log_level', logging.DEBUG)) self._max_episodes = None self._max_steps = None logging.getLogger('tensorflow').disabled = kwargs.get( 'disable_tensorflow_logger', True) self.compile()