def test_get_tick_history(): """ Test case to query Arctic TickStore :return: """ start_time = dt.now(TIMEZONE) sim = Simulator(use_arctic=True) query = {'ccy': ['BTC-USD'], 'start_date': 20181231, 'end_date': 20190102} tick_history = sim.get_tick_history(query=query) print('\n{}\n'.format(tick_history)) elapsed = (dt.now(TIMEZONE) - start_time).seconds print('Completed %s in %i seconds' % (__name__, elapsed)) print('DONE. EXITING %s' % __name__)
def test_extract_features() -> None: """ Test case to export *multiple* testing/training data sets for reinforcement learning """ start_time = dt.now(tz=TIMEZONE) sim = Simulator() for ccy in ['ETH-USD']: # for ccy, ccy2 in [('LTC-USD', 'tLTCUSD')]: query = { 'ccy': [ccy], # ccy2], # parameter must be a list 'start_date': 20191208, # parameter format for dates 'end_date': 20191209, # parameter format for dates } sim.extract_features(query) elapsed = (dt.now(tz=TIMEZONE) - start_time).seconds print('Completed %s in %i seconds' % (__name__, elapsed)) print('DONE. EXITING %s' % __name__)
def test_get_orderbook_snapshot_history(): """ Test case to export testing/training data for reinforcement learning :return: """ start_time = dt.now(TIMEZONE) sim = Simulator(use_arctic=True) query = {'ccy': ['LTC-USD'], 'start_date': 20190406, 'end_date': 20190407} orderbook_snapshot_history = sim.get_orderbook_snapshot_history( query=query) filename = '{}_{}'.format(query['ccy'][0], query['start_date']) sim.export_to_csv(data=orderbook_snapshot_history, filename=filename, compress=False) elapsed = (dt.now(TIMEZONE) - start_time).seconds print('Completed %s in %i seconds' % (__name__, elapsed)) print('DONE. EXITING %s' % __name__)
def test_extract_features(): """ Test case to export multiple testing/training data sets for reinforcement learning :return: """ start_time = dt.now(TIMEZONE) sim = Simulator(use_arctic=True) # for ccy in ['BTC-USD', 'ETH-USD', 'LTC-USD']: #, 'BCH-USD']: for ccy, ccy2 in [('LTC-USD', 'tLTCUSD')]: query = { 'ccy': [ccy, ccy2], 'start_date': 20190314, 'end_date': 20190317 } sim.extract_features(query) elapsed = (dt.now(TIMEZONE) - start_time).seconds print('Completed %s in %i seconds' % (__name__, elapsed)) print('DONE. EXITING %s' % __name__)
def test_get_orderbook_snapshot_history() -> None: """ Test case to export testing/training data for reinforcement learning """ start_time = dt.now(tz=TIMEZONE) sim = Simulator() query = {'ccy': ['LTC-USD'], 'start_date': 20190926, 'end_date': 20190928} orderbook_snapshot_history = sim.get_orderbook_snapshot_history( query=query) if orderbook_snapshot_history is None: print('Exiting: orderbook_snapshot_history is NONE') return filename = 'test_' + '{}_{}'.format(query['ccy'][0], query['start_date']) sim.export_to_csv(data=orderbook_snapshot_history, filename=filename, compress=False) elapsed = (dt.now(tz=TIMEZONE) - start_time).seconds print('Completed %s in %i seconds' % (__name__, elapsed)) print('DONE. EXITING %s' % __name__)
def __init__(self, fitting_file='BTC-USD_2019-04-07.csv.xz', testing_file='BTC-USD_2019-04-08.csv.xz', step_size=1, max_position=5, window_size=10, seed=1, action_repeats=10, training=True, format_3d=True, z_score=True, reward_type='default', scale_rewards=True, ema_alpha=EMA_ALPHA): """ Base class for creating environments extending OpenAI's GYM framework. :param fitting_file: historical data used to fit environment data (i.e., previous trading day) :param testing_file: historical data used in environment :param step_size: increment size for steps (NOTE: leave a 1, otherwise market transaction data will be overlooked) :param max_position: maximum number of positions able to hold in inventory :param window_size: number of lags to include in observation space :param seed: random seed number :param action_repeats: number of steps to take in environment after a given action :param training: if TRUE, then randomize starting point in environment :param format_3d: if TRUE, reshape observation space from matrix to tensor :param z_score: if TRUE, normalize data set with Z-Score, otherwise use Min-Max (i.e., range of 0 to 1) :param reward_type: method for calculating the environment's reward: 1) 'trade_completion' --> reward is generated per trade's round trip 2) 'continuous_total_pnl' --> change in realized & unrealized pnl between time steps 3) 'continuous_realized_pnl' --> change in realized pnl between time steps 4) 'continuous_unrealized_pnl' --> change in unrealized pnl between time steps 5) 'normed' --> refer to https://arxiv.org/abs/1804.04216v1 6) 'div' --> reward is generated per trade's round trip divided by inventory count (again, refer to https://arxiv.org/abs/1804.04216v1) 7) 'asymmetrical' --> extended version of *default* and enhanced with a reward for being filled above/below midpoint, and returns only negative rewards for Unrealized PnL to discourage long-term speculation. 8) 'asymmetrical_adj' --> extended version of *default* and enhanced with a reward for being filled above/below midpoint, and weighted up/down unrealized returns. 9) 'default' --> Pct change in Unrealized PnL + Realized PnL of respective time step. :param ema_alpha: decay factor for EMA, usually between 0.9 and 0.9999; if NONE, raw values are returned in place of smoothed values """ # properties required for instantiation self.action_repeats = action_repeats self._seed = seed self._random_state = np.random.RandomState(seed=self._seed) self.training = training self.step_size = step_size self.max_position = max_position self.window_size = window_size self.reward_type = reward_type self.format_3d = format_3d # e.g., [window, features, *NEW_AXIS*] self.sym = testing_file[:7] # slice the CCY from the filename self.scale_rewards = scale_rewards # properties that get reset() self.reward = 0.0 self.done = False self.local_step_number = 0 self.midpoint = 0.0 self.observation = None self.action = 0 self.last_pnl = 0. self.last_midpoint = None self.midpoint_change = None # properties to override in sub-classes self.actions = None self.broker = None self.action_space = None self.observation_space = None # get historical data for simulations self.sim = Sim(z_score=z_score, alpha=ema_alpha) self.prices_, self.data, self.normalized_data = self.sim.load_environment_data( fitting_file=fitting_file, testing_file=testing_file, include_imbalances=True, as_pandas=False) self.best_bid = self.best_ask = None self.max_steps = self.data.shape[ 0] - self.step_size * self.action_repeats - 1 # load indicators into the indicator manager self.tns = IndicatorManager() self.rsi = IndicatorManager() for window in INDICATOR_WINDOW: self.tns.add( ('tns_{}'.format(window), TnS(window=window, alpha=ema_alpha))) self.rsi.add( ('rsi_{}'.format(window), RSI(window=window, alpha=ema_alpha))) # conditionally load PnlNorm, since it calculates in O(n) time complexity self.pnl_norm = PnlNorm( window=INDICATOR_WINDOW[0], alpha=None) if self.reward_type == 'normed' else None # rendering class self._render = TradingGraph(sym=self.sym) # graph midpoint prices self._render.reset_render_data( y_vec=self.prices_[:np.shape(self._render.x_vec)[0]]) # buffer for appending lags self.data_buffer = list()
class BaseEnvironment(Env, ABC): metadata = {'render.modes': ['human']} # Index of specific data points used to generate the observation space # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True) features = Sim.get_feature_labels(include_system_time=False, include_bitfinex=False, include_imbalances=True, include_ema=False, include_spread=True) best_bid_index = features.index('coinbase_bid_distance_0') best_ask_index = features.index('coinbase_ask_distance_0') notional_bid_index = features.index('coinbase_bid_notional_0') notional_ask_index = features.index('coinbase_ask_notional_0') buy_trade_index = features.index('coinbase_buys') sell_trade_index = features.index('coinbase_sells') def __init__(self, fitting_file='BTC-USD_2019-04-07.csv.xz', testing_file='BTC-USD_2019-04-08.csv.xz', step_size=1, max_position=5, window_size=10, seed=1, action_repeats=10, training=True, format_3d=True, z_score=True, reward_type='default', scale_rewards=True, ema_alpha=EMA_ALPHA): """ Base class for creating environments extending OpenAI's GYM framework. :param fitting_file: historical data used to fit environment data (i.e., previous trading day) :param testing_file: historical data used in environment :param step_size: increment size for steps (NOTE: leave a 1, otherwise market transaction data will be overlooked) :param max_position: maximum number of positions able to hold in inventory :param window_size: number of lags to include in observation space :param seed: random seed number :param action_repeats: number of steps to take in environment after a given action :param training: if TRUE, then randomize starting point in environment :param format_3d: if TRUE, reshape observation space from matrix to tensor :param z_score: if TRUE, normalize data set with Z-Score, otherwise use Min-Max (i.e., range of 0 to 1) :param reward_type: method for calculating the environment's reward: 1) 'trade_completion' --> reward is generated per trade's round trip 2) 'continuous_total_pnl' --> change in realized & unrealized pnl between time steps 3) 'continuous_realized_pnl' --> change in realized pnl between time steps 4) 'continuous_unrealized_pnl' --> change in unrealized pnl between time steps 5) 'normed' --> refer to https://arxiv.org/abs/1804.04216v1 6) 'div' --> reward is generated per trade's round trip divided by inventory count (again, refer to https://arxiv.org/abs/1804.04216v1) 7) 'asymmetrical' --> extended version of *default* and enhanced with a reward for being filled above/below midpoint, and returns only negative rewards for Unrealized PnL to discourage long-term speculation. 8) 'asymmetrical_adj' --> extended version of *default* and enhanced with a reward for being filled above/below midpoint, and weighted up/down unrealized returns. 9) 'default' --> Pct change in Unrealized PnL + Realized PnL of respective time step. :param ema_alpha: decay factor for EMA, usually between 0.9 and 0.9999; if NONE, raw values are returned in place of smoothed values """ # properties required for instantiation self.action_repeats = action_repeats self._seed = seed self._random_state = np.random.RandomState(seed=self._seed) self.training = training self.step_size = step_size self.max_position = max_position self.window_size = window_size self.reward_type = reward_type self.format_3d = format_3d # e.g., [window, features, *NEW_AXIS*] self.sym = testing_file[:7] # slice the CCY from the filename self.scale_rewards = scale_rewards # properties that get reset() self.reward = 0.0 self.done = False self.local_step_number = 0 self.midpoint = 0.0 self.observation = None self.action = 0 self.last_pnl = 0. self.last_midpoint = None self.midpoint_change = None # properties to override in sub-classes self.actions = None self.broker = None self.action_space = None self.observation_space = None # get historical data for simulations self.sim = Sim(z_score=z_score, alpha=ema_alpha) self.prices_, self.data, self.normalized_data = self.sim.load_environment_data( fitting_file=fitting_file, testing_file=testing_file, include_imbalances=True, as_pandas=False) self.best_bid = self.best_ask = None self.max_steps = self.data.shape[ 0] - self.step_size * self.action_repeats - 1 # load indicators into the indicator manager self.tns = IndicatorManager() self.rsi = IndicatorManager() for window in INDICATOR_WINDOW: self.tns.add( ('tns_{}'.format(window), TnS(window=window, alpha=ema_alpha))) self.rsi.add( ('rsi_{}'.format(window), RSI(window=window, alpha=ema_alpha))) # conditionally load PnlNorm, since it calculates in O(n) time complexity self.pnl_norm = PnlNorm( window=INDICATOR_WINDOW[0], alpha=None) if self.reward_type == 'normed' else None # rendering class self._render = TradingGraph(sym=self.sym) # graph midpoint prices self._render.reset_render_data( y_vec=self.prices_[:np.shape(self._render.x_vec)[0]]) # buffer for appending lags self.data_buffer = list() @abstractmethod def map_action_to_broker(self, action: int): """ Translate agent's action into an order and submit order to broker. :param action: (int) agent's action for current step :return: (tuple) reward, pnl """ return 0., 0. @abstractmethod def _create_position_features(self): """ Create agent space feature set reflecting the positions held in inventory. :return: (np.array) position features """ return np.array([np.nan], dtype=np.float32) @staticmethod def _trade_completion_reward(step_pnl: float): """ Alternate approach for reward calculation which places greater importance on trades that have returned at least a 1:1 profit-to-loss ratio after transaction fees. :param step_pnl: limit order pnl and any penalties for bad actions :return: normalized reward (-0.1 to 0.1) range, which can be scaled to (-1, 1) in self._get_step_reward() method """ reward = 0.0 if step_pnl > MARKET_ORDER_FEE * 2: # e.g., 2:1 profit to loss ratio reward += 1.0 elif step_pnl > 0.0: reward += step_pnl elif step_pnl < -MARKET_ORDER_FEE: # skew penalty so reward -= 1.0 else: reward -= step_pnl return reward def _asymmetrical_reward(self, long_filled: bool, short_filled: bool, step_pnl: float, dampening=0.15): """ Asymmetrical reward type for environments, which is derived from percentage changes and notional values. The inputs are as follows: (1) Change in exposure value between time steps, in percentage terms; and, (2) Realized PnL from a open order being filled between time steps, in dollar terms. :param long_filled: TRUE if long order is filled within same time step :param short_filled: TRUE if short order is filled within same time step :param step_pnl: limit order pnl and any penalties for bad actions :param dampening: discount factor towards pnl change between time steps :return: (float) """ exposure_change = self.broker.total_inventory_count * self.midpoint_change long_fill_reward = short_fill_reward = 0. if long_filled: long_fill_reward += ((self.midpoint / self.best_bid) - 1.) print("long_fill_reward: {:.6f}".format(long_fill_reward)) if short_filled: short_fill_reward += ((self.best_ask / self.midpoint) - 1.) print("short_fill_reward: {:.6f}".format(short_fill_reward)) reward = (long_fill_reward + short_fill_reward) + \ min(0., exposure_change * dampening) if long_filled: reward += step_pnl if short_filled: reward += step_pnl return reward def _asymmetrical_reward_adj(self, long_filled: bool, short_filled: bool, step_pnl: float, dampening=0.25): """ Asymmetrical reward type for environments with balanced feedback, which is derived from percentage changes and notional values. The inputs are as follows: (1) Change in exposure value between time steps, in percentage terms; and, (2) Realized PnL from a open order being filled between time steps, in dollar terms. :param long_filled: TRUE if long order is filled within same time step :param short_filled: TRUE if short order is filled within same time step :param step_pnl: limit order pnl and any penalties for bad actions :param dampening: discount factor towards pnl change between time steps :return: (float) """ exposure_change = self.broker.total_inventory_count * self.midpoint_change long_fill_reward = short_fill_reward = 0. if long_filled: long_fill_reward += ((self.midpoint / self.best_bid) - 1.) print("long_fill_reward: {:.6f}".format(long_fill_reward)) if short_filled: short_fill_reward += ((self.best_ask / self.midpoint) - 1.) print("short_fill_reward: {:.6f}".format(short_fill_reward)) reward = (long_fill_reward + short_fill_reward) + \ min(0., exposure_change * (1. - dampening)*0.1) + \ max(0., exposure_change * dampening*0.1) if long_filled: reward += step_pnl if short_filled: reward += step_pnl return reward def _default_reward(self, long_filled: bool, short_filled: bool, step_pnl: float): """ Default reward type for environments, which is derived from PnL and order quantity. The inputs are as follows: (1) Change in exposure value between time steps, in dollar terms; and, (2) Realized PnL from a open order being filled between time steps, in dollar terms. :param long_filled: TRUE if long order is filled within same time step :param short_filled: TRUE if short order is filled within same time step :param step_pnl: limit order pnl and any penalties for bad actions :return: """ reward = self.broker.total_inventory_count * self.midpoint_change if long_filled: reward += step_pnl if short_filled: reward += step_pnl return reward def _get_step_reward(self, step_pnl: float, long_filled: bool, short_filled: bool): """ Get reward for current time step. Note: 'reward_type' is set during environment instantiation. :param step_pnl: (float) PnL accrued from order fills at current time step :return: (float) reward """ reward = 0.0 if self.reward_type == 'default': # pnl in dollar terms reward += self._default_reward(long_filled, short_filled, step_pnl) elif self.reward_type == 'asymmetrical': reward += self._asymmetrical_reward(long_filled=long_filled, short_filled=short_filled, step_pnl=step_pnl) elif self.reward_type == 'asymmetrical_adj': reward += self._asymmetrical_reward_adj(long_filled=long_filled, short_filled=short_filled, step_pnl=step_pnl) elif self.reward_type == 'trade_completion': # reward is [-1,1] reward += self._trade_completion_reward(step_pnl=step_pnl) # Note: we do not need to update last_pnl for this reward approach elif self.reward_type == 'continuous_total_pnl': # pnl in percentage new_pnl = self.broker.get_total_pnl(self.best_bid, self.best_ask) difference = new_pnl - self.last_pnl # Difference in PnL over time step # include step_pnl to net out drops in unrealized PnL from position closing reward += difference + step_pnl self.last_pnl = new_pnl elif self.reward_type == 'continuous_realized_pnl': new_pnl = self.broker.realized_pnl reward += new_pnl - self.last_pnl # Difference in PnL self.last_pnl = new_pnl elif self.reward_type == 'continuous_unrealized_pnl': new_pnl = self.broker.get_unrealized_pnl(self.best_bid, self.best_ask) difference = new_pnl - self.last_pnl # Difference in PnL over time step # include step_pnl to net out drops in unrealized PnL from position closing reward += difference + step_pnl self.last_pnl = new_pnl elif self.reward_type == 'normed': # refer to https://arxiv.org/abs/1804.04216v1 new_pnl = self.pnl_norm.raw_value reward += new_pnl - self.last_pnl # Difference in PnL self.last_pnl = new_pnl elif self.reward_type == 'div': reward += step_pnl / max(self.broker.total_inventory_count, 1) else: # Default implementation reward += self._default_reward(long_filled, short_filled, step_pnl) if self.scale_rewards: reward *= 100. # multiply to avoid division error return reward def step(self, action: int): """ Step through environment with action :param action: (int) action to take in environment :return: (tuple) observation, reward, is_done, and empty `dict` """ for current_step in range(self.action_repeats): if self.done: self.reset() return self.observation, self.reward, self.done # reset the reward if there ARE action repeats if current_step == 0: self.reward = 0. step_action = action else: step_action = 0 # Get current step's midpoint self.midpoint = self.prices_[self.local_step_number] self.midpoint_change = (self.midpoint / self.last_midpoint) - 1. # Pass current time step bid/ask prices to broker to calculate PnL, # or if any open orders are to be filled self.best_bid, self.best_ask = self._get_nbbo() buy_volume = self._get_book_data(BaseEnvironment.buy_trade_index) sell_volume = self._get_book_data(BaseEnvironment.sell_trade_index) # Update indicators self.tns.step(buys=buy_volume, sells=sell_volume) self.rsi.step(price=self.midpoint) # Get PnL from any filled LIMIT orders limit_pnl, long_filled, short_filled = self.broker.step_limit_order_pnl( bid_price=self.best_bid, ask_price=self.best_ask, buy_volume=buy_volume, sell_volume=sell_volume, step=self.local_step_number) # Get PnL from any filled MARKET orders AND action penalties for invalid # actions made by the agent for future discouragement step_reward, market_pnl = self.map_action_to_broker( action=step_action) step_pnl = limit_pnl + step_reward + market_pnl # step thru pnl_norm if not None if self.pnl_norm: self.pnl_norm.step(pnl=self.broker.get_unrealized_pnl( bid_price=self.best_bid, ask_price=self.best_ask)) self.reward += self._get_step_reward(step_pnl=step_pnl, long_filled=long_filled, short_filled=short_filled) step_observation = self._get_step_observation(action=action) self.data_buffer.append(step_observation) if len(self.data_buffer) > self.window_size: del self.data_buffer[0] self.local_step_number += self.step_size self.last_midpoint = self.midpoint self.observation = self._get_observation() if self.local_step_number > self.max_steps: self.done = True flatten_pnl = self.broker.flatten_inventory( self.best_bid, self.best_ask) self.reward += self._get_step_reward(step_pnl=flatten_pnl, long_filled=False, short_filled=False) return self.observation, self.reward, self.done, {} def reset(self): """ Reset the environment. :return: (np.array) Observation at first step """ if self.training: self.local_step_number = self._random_state.randint( low=0, high=self.data.shape[0] // 5) else: self.local_step_number = 0 msg = (' {}-{} reset. Episode pnl: {:.4f} with {} trades. ' 'Avg. Trade PnL: {:.4f}. First step: {}').format( self.sym, self._seed, self.broker.realized_pnl, self.broker.total_trade_count, self.broker.average_trade_pnl, self.local_step_number) print(msg) self.reward = 0.0 self.done = False self.broker.reset() self.data_buffer.clear() self.rsi.reset() self.tns.reset() if self.pnl_norm: self.pnl_norm.reset() for step in range(self.window_size + INDICATOR_WINDOW_MAX + 1): self.midpoint = self.prices_[self.local_step_number] self.best_bid, self.best_ask = self._get_nbbo() step_buy_volume = self._get_book_data( BaseEnvironment.buy_trade_index) step_sell_volume = self._get_book_data( BaseEnvironment.sell_trade_index) self.tns.step(buys=step_buy_volume, sells=step_sell_volume) self.rsi.step(price=self.midpoint) # step thru pnl_norm if not None if self.pnl_norm: self.pnl_norm.step(pnl=self.broker.get_unrealized_pnl( bid_price=self.best_bid, ask_price=self.best_ask)) step_observation = self._get_step_observation(action=0) self.data_buffer.append(step_observation) self.local_step_number += self.step_size self.last_midpoint = self.midpoint if len(self.data_buffer) > self.window_size: del self.data_buffer[0] self.midpoint_change = (self.midpoint / self.last_midpoint) - 1. self.observation = self._get_observation() return self.observation def render(self, mode='human'): """ Render midpoint prices :param mode: (str) flag for type of rendering. Only 'human' supported. :return: (void) """ self._render.render(midpoint=self.midpoint, mode=mode) def close(self): """ Free clear memory when closing environment :return: (void) """ self.data = None self.normalized_data = None self.prices_ = None self.broker.reset() self.data_buffer.clear() self.sim = None self.tns = None self.rsi = None self.pnl_norm = None def seed(self, seed=1): """ Set random seed in environment :param seed: (int) random seed number :return: (list) seed number in a list """ self._random_state = np.random.RandomState(seed=seed) self._seed = seed return [seed] @staticmethod def _process_data(_next_state): """ Reshape observation for function approximator :param _next_state: observation space :return: (np.array) clipped observation space """ return np.clip(_next_state.reshape((1, -1)), -10, 10) def _create_action_features(self, action): """ Create a features array for the current time step's action. :param action: (int) action number :return: (np.array) One-hot of current action """ return self.actions[action] def _create_indicator_features(self): """ Create features vector with environment indicators. :return: (np.array) Indicator values for current time step """ return np.array((*self.tns.get_value(), *self.rsi.get_value()), dtype=np.float32).reshape(1, -1) def _get_nbbo(self): """ Get best bid and offer :return: (tuple) best bid and offer """ best_bid = round( self.midpoint - self._get_book_data(BaseEnvironment.best_bid_index), 2) best_ask = round( self.midpoint + self._get_book_data(BaseEnvironment.best_ask_index), 2) return best_bid, best_ask def _get_book_data(self, index=0): """ Return step 'n' of order book snapshot data :param index: (int) step 'n' to look up in order book snapshot history :return: (np.array) order book snapshot vector """ return self.data[self.local_step_number][index] def _get_step_observation(self, action=0): """ Current step observation, NOT including historical data. :param action: (int) current step action :return: (np.array) Current step observation """ step_position_features = self._create_position_features() step_action_features = self._create_action_features(action=action) step_indicator_features = self._create_indicator_features() return np.concatenate( (self._process_data(self.normalized_data[self.local_step_number]), step_indicator_features, step_position_features, step_action_features, np.array([self.reward], dtype=np.float32)), axis=None) def _get_observation(self): """ Current step observation, including historical data. If format_3d is TRUE: Expand the observation space from 2 to 3 dimensions. (note: This is necessary for conv nets in Baselines.) :return: (np.array) Observation state for current time step """ # Note: reversing the data to chronological order is actually faster when # making an array in Python / Numpy, which is odd. #timeit observation = np.asarray(self.data_buffer, dtype=np.float32) if self.format_3d: observation = np.expand_dims(observation, axis=-1) return observation
class MarketMaker(Env): # gym.env required metadata = {'render.modes': ['human']} id = 'market-maker-v0' # constants inventory_features = [ 'long_inventory', 'short_inventory', 'total_unrealized_and_realized_pnl', 'long_unrealized_pnl', 'short_unrealized_pnl', 'buy_distance_to_midpoint', 'short_distance_to_midpoint', 'buy_queue_vol', 'short_queue_vol' ] # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True) features = Sim.get_feature_labels(include_system_time=False, include_bitfinex=False) indicator_features = ['tns', 'rsi'] best_bid_index = features.index('coinbase-bid-distance-0') best_ask_index = features.index('coinbase-ask-distance-0') notional_bid_index = features.index('coinbase-bid-notional-0') notional_ask_index = features.index('coinbase-ask-notional-0') buy_trade_index = features.index('coinbase-buys') sell_trade_index = features.index('coinbase-sells') target_pnl = BROKER_FEE * 10 * 5 # e.g., 5 for max_positions def __init__(self, *, training=True, fitting_file='ETH-USD_2018-12-31.xz', testing_file='ETH-USD_2019-01-01.xz', step_size=1, max_position=5, window_size=10, seed=1, action_repeats=10, format_3d=False): # properties required for instantiation self.action_repeats = action_repeats self._seed = seed self._random_state = np.random.RandomState(seed=self._seed) self.training = training self.step_size = step_size self.max_position = max_position self.window_size = window_size self.format_3d = format_3d # e.g., [window, features, *NEW_AXIS*] self.action = 0 # derive gym.env properties self.actions = np.eye(17) self.sym = testing_file[:7] # slice the CCY from the filename # properties that get reset() self.reward = 0.0 self.done = False self.local_step_number = 0 self.midpoint = 0.0 self.observation = None # get Broker class to keep track of PnL and orders self.broker = Broker(max_position=max_position) # get historical data for simulations self.sim = Sim(use_arctic=False) fitting_data_filepath = '{}/data_exports/{}'.format( self.sim.cwd, fitting_file) data_used_in_environment = '{}/data_exports/{}'.format( self.sim.cwd, testing_file) # print('Fitting data: {}\nTesting Data: {}'.format(fitting_data_filepath, # data_used_in_environment)) fitting_data = self.sim.import_csv(filename=fitting_data_filepath) fitting_data['coinbase_midpoint'] = np.log( fitting_data['coinbase_midpoint'].values) fitting_data['coinbase_midpoint'] = ( fitting_data['coinbase_midpoint'] - fitting_data['coinbase_midpoint'].shift(1)).fillna(method='bfill') self.sim.fit_scaler(fitting_data) del fitting_data self.data = self.sim.import_csv(filename=data_used_in_environment) self.prices_ = self.data[ 'coinbase_midpoint'].values # used to calculate PnL self.normalized_data = self.data.copy() self.data = self.data.values self.max_steps = self.data.shape[0] - self.step_size * \ self.action_repeats - 1 self.normalized_data['coinbase_midpoint'] = \ np.log(self.normalized_data['coinbase_midpoint'].values) self.normalized_data['coinbase_midpoint'] = ( self.normalized_data['coinbase_midpoint'] - self.normalized_data['coinbase_midpoint'].shift(1)).fillna( method='bfill') self.tns = TnS() self.rsi = RSI() logger.info("Pre-scaling {}-{} data...".format(self.sym, self._seed)) self.normalized_data = self.normalized_data.apply(self.sim.z_score, axis=1).values logger.info("...{}-{} pre-scaling complete.".format( self.sym, self._seed)) # rendering class self._render = TradingGraph(sym=self.sym) # graph midpoint prices self._render.reset_render_data( y_vec=self.prices_[:np.shape(self._render.x_vec)[0]]) self.data_buffer = list() self.action_space = spaces.Discrete(len(self.actions)) variable_features_count = len(self.inventory_features) + len(self.actions) + 1 + \ len(MarketMaker.indicator_features) if self.format_3d: shape = (self.window_size, len(MarketMaker.features) + variable_features_count, 1) else: shape = (self.window_size, len(MarketMaker.features) + variable_features_count) self.observation_space = spaces.Box(low=self.data.min(), high=self.data.max(), shape=shape, dtype=np.int) print( 'MarketMaker #{} instantiated.\nself.observation_space.shape : {}'. format(self._seed, self.observation_space.shape)) def __str__(self): return '{} | {}-{}'.format(MarketMaker.id, self.sym, self._seed) def step(self, action): for current_step in range(self.action_repeats): if self.done: self.reset() return self.observation, self.reward, self.done # reset the reward if there ARE action repeats if current_step == 0: self.reward = 0. step_action = action else: step_action = 0 # Get current step's midpoint self.midpoint = self.prices_[self.local_step_number] # Pass current time step midpoint to broker to calculate PnL, # or if any open orders are to be filled step_best_bid, step_best_ask = self._get_nbbo() buy_volume = self._get_book_data(MarketMaker.buy_trade_index) sell_volume = self._get_book_data(MarketMaker.sell_trade_index) self.tns.step(buys=buy_volume, sells=sell_volume) self.rsi.step(price=self.midpoint) step_reward = self.broker.step(bid_price=step_best_bid, ask_price=step_best_ask, buy_volume=buy_volume, sell_volume=sell_volume, step=self.local_step_number) self.reward += self._send_to_broker_and_get_reward(step_action) self.reward += step_reward step_position_features = self._create_position_features() step_action_features = self._create_action_features( action=step_action) step_indicator_features = self._create_indicator_features() step_observation = np.concatenate( (self.process_data( self.normalized_data[self.local_step_number]), step_indicator_features, step_position_features, step_action_features, np.array([self.reward], dtype=np.float32)), axis=None) self.data_buffer.append(step_observation) if len(self.data_buffer) > self.window_size: del self.data_buffer[0] self.local_step_number += self.step_size self.observation = np.array(self.data_buffer, dtype=np.float32) # Expand the observation space from 2 to 3 dimensions. # This is necessary for conv nets in Baselines. if self.format_3d: self.observation = np.expand_dims(self.observation, axis=-1) if self.local_step_number > self.max_steps: self.done = True self.reward += self.broker.flatten_inventory(*self._get_nbbo()) return self.observation, self.reward, self.done, {} def reset(self): if self.training: self.local_step_number = self._random_state.randint( low=1, high=self.data.shape[0] // 4) else: self.local_step_number = 0 msg = ' {}-{} reset. Episode pnl: {:.4f} with {} trades | First step: {}'.format( self.sym, self._seed, self.broker.get_total_pnl(midpoint=self.midpoint), self.broker.get_total_trade_count(), self.local_step_number) logger.info(msg) self.reward = 0.0 self.done = False self.broker.reset() self.data_buffer.clear() self.rsi.reset() self.tns.reset() for step in range(self.window_size + self.tns.window): self.midpoint = self.prices_[self.local_step_number] step_buy_volume = self._get_book_data(MarketMaker.buy_trade_index) step_sell_volume = self._get_book_data( MarketMaker.sell_trade_index) self.tns.step(buys=step_buy_volume, sells=step_sell_volume) self.rsi.step(price=self.midpoint) step_position_features = self._create_position_features() step_action_features = self._create_action_features(action=0) step_indicator_features = self._create_indicator_features() step_observation = np.concatenate( (self.process_data( self.normalized_data[self.local_step_number]), step_indicator_features, step_position_features, step_action_features, np.array([self.reward])), axis=None) self.data_buffer.append(step_observation) self.local_step_number += self.step_size if len(self.data_buffer) > self.window_size: del self.data_buffer[0] self.observation = np.array(self.data_buffer, dtype=np.float32) # Expand the observation space from 2 to 3 dimensions. # This is necessary for conv nets in Baselines. if self.format_3d: self.observation = np.expand_dims(self.observation, axis=-1) return self.observation def render(self, mode='human'): self._render.render(midpoint=self.midpoint, mode=mode) def close(self): logger.info('{}-{} is being closed.'.format(self.id, self.sym)) self.data = None self.normalized_data = None self.prices_ = None self.broker = None self.sim = None self.data_buffer = None self.tns = None self.rsi = None return def seed(self, seed=1): self._random_state = np.random.RandomState(seed=seed) self._seed = seed logger.info('Setting seed in MarketMaker.seed({})'.format(seed)) return [seed] @staticmethod def process_data(_next_state): return np.clip(_next_state.reshape((1, -1)), -10., 10.) # def process_data(self, _next_state): # # return self.sim.scale_state(_next_state).values.reshape((1, -1)) # return np.reshape(_next_state, (1, -1)) def _send_to_broker_and_get_reward(self, action): reward = 0.0 discouragement = 0.000000000001 if action == 0: # do nothing reward += discouragement elif action == 1: reward += self._create_order_at_level(reward, discouragement, level=0, side='long') reward += self._create_order_at_level(reward, discouragement, level=4, side='short') elif action == 2: reward += self._create_order_at_level(reward, discouragement, level=0, side='long') reward += self._create_order_at_level(reward, discouragement, level=9, side='short') elif action == 3: reward += self._create_order_at_level(reward, discouragement, level=0, side='long') reward += self._create_order_at_level(reward, discouragement, level=14, side='short') elif action == 4: reward += self._create_order_at_level(reward, discouragement, level=4, side='long') reward += self._create_order_at_level(reward, discouragement, level=0, side='short') elif action == 5: reward += self._create_order_at_level(reward, discouragement, level=4, side='long') reward += self._create_order_at_level(reward, discouragement, level=4, side='short') elif action == 6: reward += self._create_order_at_level(reward, discouragement, level=4, side='long') reward += self._create_order_at_level(reward, discouragement, level=9, side='short') elif action == 7: reward += self._create_order_at_level(reward, discouragement, level=4, side='long') reward += self._create_order_at_level(reward, discouragement, level=14, side='short') elif action == 8: reward += self._create_order_at_level(reward, discouragement, level=9, side='long') reward += self._create_order_at_level(reward, discouragement, level=0, side='short') elif action == 9: reward += self._create_order_at_level(reward, discouragement, level=9, side='long') reward += self._create_order_at_level(reward, discouragement, level=4, side='short') elif action == 10: reward += self._create_order_at_level(reward, discouragement, level=9, side='long') reward += self._create_order_at_level(reward, discouragement, level=9, side='short') elif action == 11: reward += self._create_order_at_level(reward, discouragement, level=9, side='long') reward += self._create_order_at_level(reward, discouragement, level=14, side='short') elif action == 12: reward += self._create_order_at_level(reward, discouragement, level=14, side='long') reward += self._create_order_at_level(reward, discouragement, level=0, side='short') elif action == 13: reward += self._create_order_at_level(reward, discouragement, level=14, side='long') reward += self._create_order_at_level(reward, discouragement, level=4, side='short') elif action == 14: reward += self._create_order_at_level(reward, discouragement, level=14, side='long') reward += self._create_order_at_level(reward, discouragement, level=9, side='short') elif action == 15: reward += self._create_order_at_level(reward, discouragement, level=14, side='long') reward += self._create_order_at_level(reward, discouragement, level=14, side='short') elif action == 16: reward += self.broker.flatten_inventory(*self._get_nbbo()) else: logger.info("L'action n'exist pas ! Il faut faire attention !") return reward def _create_position_features(self): return np.array( (self.broker.long_inventory.position_count / self.max_position, self.broker.short_inventory.position_count / self.max_position, self.broker.get_total_pnl(midpoint=self.midpoint) / MarketMaker.target_pnl, self.broker.long_inventory.get_unrealized_pnl(self.midpoint) / self.broker.reward_scale, self.broker.short_inventory.get_unrealized_pnl(self.midpoint) / self.broker.reward_scale, self.broker.get_long_order_distance_to_midpoint( midpoint=self.midpoint), self.broker.get_short_order_distance_to_midpoint( midpoint=self.midpoint), *self.broker.get_queues_ahead_features()), dtype=np.float32) def _create_action_features(self, action): return self.actions[action] def _create_indicator_features(self): return np.array((self.tns.get_value(), self.rsi.get_value()), dtype=np.float32) def _create_order_at_level(self, reward, discouragement, level=0, side='long'): adjustment = 1 if level > 0 else 0 if side == 'long': best_bid = self._get_book_data(MarketMaker.best_bid_index + level) above_best_bid = round( self._get_book_data(MarketMaker.best_bid_index + level - adjustment), 2) price_improvement_bid = round(best_bid + 0.01, 2) if above_best_bid == price_improvement_bid: bid_price = round(self.midpoint - best_bid, 2) bid_queue_ahead = self._get_book_data( MarketMaker.notional_bid_index) else: bid_price = round(self.midpoint - price_improvement_bid, 2) bid_queue_ahead = 0. bid_order = Order(ccy=self.sym, side='long', price=bid_price, step=self.local_step_number, queue_ahead=bid_queue_ahead) if self.broker.add(order=bid_order) is False: reward -= discouragement else: reward += discouragement if side == 'short': best_ask = self._get_book_data(MarketMaker.best_bid_index + level) above_best_ask = round( self._get_book_data(MarketMaker.best_ask_index + level - adjustment), 2) price_improvement_ask = round(best_ask - 0.01, 2) if above_best_ask == price_improvement_ask: ask_price = round(self.midpoint + best_ask, 2) ask_queue_ahead = self._get_book_data( MarketMaker.notional_ask_index) else: ask_price = round(self.midpoint + price_improvement_ask, 2) ask_queue_ahead = 0. ask_order = Order(ccy=self.sym, side='short', price=ask_price, step=self.local_step_number, queue_ahead=ask_queue_ahead) if self.broker.add(order=ask_order) is False: reward -= discouragement else: reward += discouragement return reward def _get_nbbo(self): best_bid = round( self.midpoint - self._get_book_data(MarketMaker.best_bid_index), 2) best_ask = round( self.midpoint + self._get_book_data(MarketMaker.best_ask_index), 2) return best_bid, best_ask def _get_book_data(self, index=0): return self.data[self.local_step_number][index]
class BaseEnvironment(Env, ABC): metadata = {'render.modes': ['human']} # Index of specific data points used to generate the observation space # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True) features = Sim.get_feature_labels(include_system_time=False, include_bitfinex=False) best_bid_index = features.index('coinbase_bid_distance_0') best_ask_index = features.index('coinbase_ask_distance_0') notional_bid_index = features.index('coinbase_bid_notional_0') notional_ask_index = features.index('coinbase_ask_notional_0') buy_trade_index = features.index('coinbase_buys') sell_trade_index = features.index('coinbase_sells') # Constants for scaling data target_pnl = 0.03 # 3.0% gain per episode (i.e., day) def __init__(self, fitting_file='LTC-USD_2019-04-07.csv.xz', testing_file='LTC-USD_2019-04-08.csv.xz', step_size=1, max_position=5, window_size=10, seed=1, action_repeats=10, training=True, format_3d=False, z_score=True, reward_type='trade_completion', scale_rewards=True): """ Base class for creating environments extending OpenAI's GYM framework. :param fitting_file: historical data used to fit environment data (i.e., previous trading day) :param testing_file: historical data used in environment :param step_size: increment size for steps (NOTE: leave a 1, otherwise market transaction data will be overlooked) :param max_position: maximum number of positions able to hold in inventory :param window_size: number of lags to include in observation space :param seed: random seed number :param action_repeats: number of steps to take in environment after a given action :param training: if TRUE, then randomize starting point in environment :param format_3d: if TRUE, reshape observation space from matrix to tensor :param z_score: if TRUE, normalize data set with Z-Score, otherwise use Min-Max (i.e., range of 0 to 1) :param reward_type: method for calculating the environment's reward: 1) 'trade_completion' --> reward is generated per trade's round trip 2) 'continuous_total_pnl' --> change in realized & unrealized pnl between time steps 3) 'continuous_realized_pnl' --> change in realized pnl between time steps 4) 'continuous_unrealized_pnl' --> change in unrealized pnl between time steps """ # properties required for instantiation self.action_repeats = action_repeats self._seed = seed self._random_state = np.random.RandomState(seed=self._seed) self.training = training self.step_size = step_size self.max_position = max_position self.window_size = window_size self.reward_type = reward_type self.format_3d = format_3d # e.g., [window, features, *NEW_AXIS*] self.sym = testing_file[:7] # slice the CCY from the filename self.scale_rewards = scale_rewards # properties that get reset() self.reward = 0.0 self.done = False self.local_step_number = 0 self.midpoint = 0.0 self.observation = None self.action = 0 self.last_pnl = 0. # properties to override in sub-classes self.actions = None self.broker = None self.action_space = None self.observation_space = None # get historical data for simulations self.sim = Sim(use_arctic=False, z_score=z_score) self.prices_, self.data, self.normalized_data = self.sim.load_environment_data( fitting_file, testing_file) self.best_bid = self.best_ask = None self.max_steps = self.data.shape[ 0] - self.step_size * self.action_repeats - 1 # load indicators into the indicator manager self.tns = IndicatorManager() self.rsi = IndicatorManager() for window in INDICATOR_WINDOW: self.tns.add(('tns_{}'.format(window), TnS(window=window))) self.rsi.add(('rsi_{}'.format(window), RSI(window=window))) # rendering class self._render = TradingGraph(sym=self.sym) # graph midpoint prices self._render.reset_render_data( y_vec=self.prices_[:np.shape(self._render.x_vec)[0]]) # buffer for appending lags self.data_buffer = list() @abstractmethod def map_action_to_broker(self, action: int): """ Translate agent's action into an order and submit order to broker. :param action: (int) agent's action for current step :return: (tuple) reward, pnl """ return 0., 0. @abstractmethod def _create_position_features(self): """ Create agent space feature set reflecting the positions held in inventory. :return: (np.array) position features """ return np.array([np.nan], dtype=np.float32) def _get_step_reward(self, step_pnl: float): """ Get reward for current time step. Note: 'reward_type' is set during environment instantiation. :param step_pnl: (float) PnL accrued from order fills at current time step :return: (float) reward """ reward = 0. if self.reward_type == 'trade_completion': reward += step_pnl # Note: we do not need to update last_pnl for this reward approach elif self.reward_type == 'continuous_total_pnl': new_pnl = self.broker.get_total_pnl(self.best_bid, self.best_ask) reward += new_pnl - self.last_pnl # Difference in PnL self.last_pnl = new_pnl elif self.reward_type == 'continuous_realized_pnl': new_pnl = self.broker.realized_pnl reward += new_pnl - self.last_pnl # Difference in PnL self.last_pnl = new_pnl elif self.reward_type == 'continuous_unrealized_pnl': new_pnl = self.broker.get_unrealized_pnl(self.best_bid, self.best_ask) reward += new_pnl - self.last_pnl # Difference in PnL self.last_pnl = new_pnl else: print("_get_step_reward() Unknown reward_type: {}".format( self.reward_type)) if self.scale_rewards: reward /= self.broker.reward_scale return reward def step(self, action: int): """ Step through environment with action :param action: (int) action to take in environment :return: (tuple) observation, reward, is_done, and empty `dict` """ for current_step in range(self.action_repeats): if self.done: self.reset() return self.observation, self.reward, self.done # reset the reward if there ARE action repeats if current_step == 0: self.reward = 0. step_action = action else: step_action = 0 # Get current step's midpoint self.midpoint = self.prices_[self.local_step_number] # Pass current time step bid/ask prices to broker to calculate PnL, # or if any open orders are to be filled self.best_bid, self.best_ask = self._get_nbbo() buy_volume = self._get_book_data(BaseEnvironment.buy_trade_index) sell_volume = self._get_book_data(BaseEnvironment.sell_trade_index) # Update indicators self.tns.step(buys=buy_volume, sells=sell_volume) self.rsi.step(price=self.midpoint) # Get PnL from any filled LIMIT orders limit_pnl = self.broker.step_limit_order_pnl( bid_price=self.best_bid, ask_price=self.best_ask, buy_volume=buy_volume, sell_volume=sell_volume, step=self.local_step_number) # Get PnL from any filled MARKET orders AND action penalties for invalid # actions made by the agent for future discouragement step_reward, market_pnl = self.map_action_to_broker( action=step_action) step_pnl = limit_pnl + step_reward + market_pnl self.reward += self._get_step_reward(step_pnl=step_pnl) step_observation = self._get_step_observation(action=action) self.data_buffer.append(step_observation) if len(self.data_buffer) > self.window_size: del self.data_buffer[0] self.local_step_number += self.step_size self.observation = self._get_observation() if self.local_step_number > self.max_steps: self.done = True flatten_pnl = self.broker.flatten_inventory( self.best_bid, self.best_ask) self.reward += self._get_step_reward(step_pnl=flatten_pnl) return self.observation, self.reward, self.done, {} def reset(self): """ Reset the environment. :return: (np.array) Observation at first step """ if self.training: self.local_step_number = self._random_state.randint( low=0, high=self.data.shape[0] // 4) else: self.local_step_number = 0 msg = ' {}-{} reset. Episode pnl: {:.4f} with {} trades. First step: {}'.format( self.sym, self._seed, self.broker.get_total_pnl(self.best_bid, self.best_ask), self.broker.total_trade_count, self.local_step_number) print(msg) self.reward = 0.0 self.done = False self.broker.reset() self.data_buffer.clear() self.rsi.reset() self.tns.reset() for step in range(self.window_size + INDICATOR_WINDOW_MAX): self.midpoint = self.prices_[self.local_step_number] self.best_bid, self.best_ask = self._get_nbbo() step_buy_volume = self._get_book_data( BaseEnvironment.buy_trade_index) step_sell_volume = self._get_book_data( BaseEnvironment.sell_trade_index) self.tns.step(buys=step_buy_volume, sells=step_sell_volume) self.rsi.step(price=self.midpoint) step_observation = self._get_step_observation(action=0) self.data_buffer.append(step_observation) self.local_step_number += self.step_size if len(self.data_buffer) > self.window_size: del self.data_buffer[0] self.observation = self._get_observation() return self.observation def render(self, mode='human'): """ Render midpoint prices :param mode: (str) flag for type of rendering. Only 'human' supported. :return: (void) """ self._render.render(midpoint=self.midpoint, mode=mode) def close(self): """ Free clear memory when closing environment :return: (void) """ self.data = None self.normalized_data = None self.prices_ = None self.broker = None self.sim = None self.data_buffer = None self.tns = None self.rsi = None def seed(self, seed=1): """ Set random seed in environment :param seed: (int) random seed number :return: (list) seed number in a list """ self._random_state = np.random.RandomState(seed=seed) self._seed = seed return [seed] @staticmethod def _process_data(_next_state): """ Reshape observation for function approximator :param _next_state: observation space :return: (np.array) clipped observation space """ return _next_state.reshape((1, -1)) def _create_action_features(self, action): """ Create a features array for the current time step's action. :param action: (int) action number :return: (np.array) One-hot of current action """ return self.actions[action] def _create_indicator_features(self): """ Create features vector with environment indicators. :return: (np.array) Indicator values for current time step """ return np.array((*self.tns.get_value(), *self.rsi.get_value()), dtype=np.float32) def _get_nbbo(self): """ Get best bid and offer :return: (tuple) best bid and offer """ best_bid = round( self.midpoint - self._get_book_data(BaseEnvironment.best_bid_index), 2) best_ask = round( self.midpoint + self._get_book_data(BaseEnvironment.best_ask_index), 2) return best_bid, best_ask def _get_book_data(self, index=0): """ Return step 'n' of order book snapshot data :param index: (int) step 'n' to look up in order book snapshot history :return: (np.array) order book snapshot vector """ return self.data[self.local_step_number][index] def _get_step_observation(self, action=0): """ Current step observation, NOT including historical data. :param action: (int) current step action :return: (np.array) Current step observation """ step_position_features = self._create_position_features() step_action_features = self._create_action_features(action=action) step_indicator_features = self._create_indicator_features() return np.concatenate( (self._process_data(self.normalized_data[self.local_step_number]), step_indicator_features, step_position_features, step_action_features, np.array([self.reward])), axis=None) def _get_observation(self): """ Current step observation, including historical data. If format_3d is TRUE: Expand the observation space from 2 to 3 dimensions. (note: This is necessary for conv nets in Baselines.) :return: (np.array) Observation state for current time step """ observation = np.array(self.data_buffer, dtype=np.float32) if self.format_3d: observation = np.expand_dims(observation, axis=-1) return observation
class PriceJump(Env): metadata = {'render.modes': ['human']} id = 'long-short-v0' action_repeats = 4 inventory_features = [ 'long_inventory', 'short_inventory', 'total_unrealized_and_realized_pnl', 'long_unrealized_pnl', 'short_unrealized_pnl' ] # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True) features = Sim.get_feature_labels(include_system_time=False, include_bitfinex=False) indicator_features = ['tns', 'rsi'] best_bid_index = features.index('coinbase-bid-distance-0') best_ask_index = features.index('coinbase-ask-distance-0') notional_bid_index = features.index('coinbase-bid-notional-0') notional_ask_index = features.index('coinbase-ask-notional-0') buy_trade_index = features.index('coinbase-buys') sell_trade_index = features.index('coinbase-sells') instance_count = 0 def __init__(self, *, training=True, fitting_file='ETH-USD_2018-12-31.xz', testing_file='ETH-USD_2019-01-01.xz', step_size=1, max_position=5, window_size=4, frame_stack=False): # properties required for instantiation PriceJump.instance_count += 1 self._seed = int(PriceJump.instance_count) # seed self._random_state = np.random.RandomState(seed=self._seed) self.training = training self.step_size = step_size self.fee = BROKER_FEE self.max_position = max_position self.window_size = window_size self.frame_stack = frame_stack self.frames_to_add = 3 if self.frame_stack else 0 self.action = 0 # derive gym.env properties self.actions = np.eye(3) self.sym = testing_file[:7] # slice the CCY from the filename # properties that get reset() self.reward = 0.0 self.done = False self.local_step_number = 0 self.midpoint = 0.0 self.observation = None # get Broker class to keep track of PnL and orders self.broker = Broker(max_position=max_position) # get historical data for simulations self.sim = Sim(use_arctic=False) fitting_data_filepath = '{}/data_exports/{}'.format( self.sim.cwd, fitting_file) data_used_in_environment = '{}/data_exports/{}'.format( self.sim.cwd, testing_file) # print('Fitting data: {}\nTesting Data: {}'.format(fitting_data_filepath, # data_used_in_environment)) fitting_data = self.sim.import_csv(filename=fitting_data_filepath) fitting_data['coinbase_midpoint'] = np.log( fitting_data['coinbase_midpoint'].values) fitting_data['coinbase_midpoint'] = fitting_data['coinbase_midpoint']. \ pct_change().fillna(method='bfill') self.sim.fit_scaler(fitting_data) del fitting_data self.data = self.sim.import_csv(filename=data_used_in_environment) self.prices_ = self.data[ 'coinbase_midpoint'].values # used to calculate PnL self.normalized_data = self.data.copy() self.data = self.data.values self.normalized_data['coinbase_midpoint'] = np.log( self.normalized_data['coinbase_midpoint'].values) self.normalized_data['coinbase_midpoint'] = ( self.normalized_data['coinbase_midpoint'] - self.normalized_data['coinbase_midpoint'].shift(1)).fillna( method='bfill') self.tns = TnS() self.rsi = RSI() logger.info("Pre-scaling {}-{} data...".format(self.sym, self._seed)) self.normalized_data = self.normalized_data.apply(self.sim.z_score, axis=1).values logger.info("...{}-{} pre-scaling complete.".format( self.sym, self._seed)) # rendering class self._render = TradingGraph(sym=self.sym) # graph midpoint prices self._render.reset_render_data( y_vec=self.prices_[:np.shape(self._render.x_vec)[0]]) self.data_buffer, self.frame_stacker = list(), list() self.action_space = spaces.Discrete(len(self.actions)) variable_features_count = len(self.inventory_features) + len(self.actions) + 1 + \ len(PriceJump.indicator_features) if self.frame_stack: shape = (4, len(PriceJump.features) + variable_features_count, self.window_size) else: shape = (self.window_size, len(PriceJump.features) + variable_features_count) self.observation_space = spaces.Box(low=self.data.min(), high=self.data.max(), shape=shape, dtype=np.int) print('PriceJump #{} instantiated.\nself.observation_space.shape : {}'. format(PriceJump.instance_count, self.observation_space.shape)) def __str__(self): return '{} | {}-{}'.format(PriceJump.id, self.sym, self._seed) def step(self, action): for current_step in range(PriceJump.action_repeats): if self.done: self.reset() return self.observation, self.reward, self.done # reset the reward if there ARE action repeats if current_step == 0: self.reward = 0. step_action = action else: step_action = 0 # Get current step's midpoint self.midpoint = self.prices_[self.local_step_number] # Pass current time step midpoint to broker to calculate PnL, # or if any open orders are to be filled buy_volume = self._get_book_data(PriceJump.buy_trade_index) sell_volume = self._get_book_data(PriceJump.sell_trade_index) self.tns.step(buys=buy_volume, sells=sell_volume) self.rsi.step(price=self.midpoint) self.broker.step(midpoint=self.midpoint) self.reward += self._send_to_broker_and_get_reward( action=step_action) step_position_features = self._create_position_features() step_action_features = self._create_action_features( action=step_action) step_indicator_features = self._create_indicator_features() step_observation = np.concatenate( (self.process_data( self.normalized_data[self.local_step_number]), step_indicator_features, step_position_features, step_action_features, np.array([self.reward], dtype=np.float32)), axis=None) self.data_buffer.append(step_observation) if len(self.data_buffer) >= self.window_size: self.frame_stacker.append( np.array(self.data_buffer, dtype=np.float32)) del self.data_buffer[0] if len(self.frame_stacker) > self.frames_to_add + 1: del self.frame_stacker[0] self.local_step_number += self.step_size self.observation = np.array(self.frame_stacker, dtype=np.float32) # This removes a dimension to be compatible with the Keras-rl module # because Keras-rl uses its own frame-stacker. There are future # plans to integrate this repository with more reinforcement learning # packages, such as baselines. if self.frame_stack is False: self.observation = np.squeeze(self.observation, axis=0) if self.local_step_number > self.data.shape[0] - 40: self.done = True order = Order(ccy=self.sym, side=None, price=self.midpoint, step=self.local_step_number) self.reward = self.broker.flatten_inventory(order=order) return self.observation, self.reward, self.done, {} def reset(self): if self.training: self.local_step_number = self._random_state.randint( low=1, high=self.data.shape[0] // 4) else: self.local_step_number = 0 logger.info(' {}-{} reset. Episode pnl: {} | First step: {}'.format( self.sym, self._seed, self.broker.get_total_pnl(midpoint=self.midpoint), self.local_step_number)) self.reward = 0.0 self.done = False self.broker.reset() self.data_buffer.clear() self.frame_stacker.clear() self.rsi.reset() self.tns.reset() for step in range(self.window_size + self.frames_to_add + self.tns.window): self.midpoint = self.prices_[self.local_step_number] step_buy_volume = self._get_book_data(PriceJump.buy_trade_index) step_sell_volume = self._get_book_data(PriceJump.sell_trade_index) self.tns.step(buys=step_buy_volume, sells=step_sell_volume) self.rsi.step(price=self.midpoint) step_position_features = self._create_position_features() step_action_features = self._create_action_features(action=0) step_indicator_features = self._create_indicator_features() step_observation = np.concatenate( (self.process_data( self.normalized_data[self.local_step_number]), step_indicator_features, step_position_features, step_action_features, np.array([self.reward])), axis=None) self.data_buffer.append(step_observation) self.local_step_number += self.step_size if step >= self.window_size - 1: self.frame_stacker.append( np.array(self.data_buffer, dtype=np.float32)) del self.data_buffer[0] if len(self.frame_stacker) > self.frames_to_add + 1: del self.frame_stacker[0] self.observation = np.array(self.frame_stacker, dtype=np.float32) # This removes a dimension to be compatible with the Keras-rl module # because Keras-rl uses its own frame-stacker. There are future plans # to integrate this repository with more reinforcement learning packages, # such as baselines. if self.frame_stack is False: self.observation = np.squeeze(self.observation, axis=0) return self.observation def render(self, mode='human'): self._render.render(midpoint=self.midpoint, mode=mode) def close(self): logger.info('{}-{} is being closed.'.format(self.id, self.sym)) self.data = None self.normalized_data = None self.prices_ = None self.broker = None self.sim = None self.data_buffer = None self.tns = None self.rsi = None return def seed(self, seed=1): self._random_state = np.random.RandomState(seed=seed) self._seed = seed logger.info('PriceJump.seed({})'.format(seed)) return [seed] @staticmethod def process_data(_next_state): return np.clip(_next_state.reshape((1, -1)), -10., 10.) # def process_data(self, _next_state): # # return self.sim.scale_state(_next_state).values.reshape((1, -1)) # return np.reshape(_next_state, (1, -1)) def _send_to_broker_and_get_reward(self, action): reward = 0.0 discouragement = 0.000000000001 if action == 0: # do nothing pass elif action == 1: # buy price_fee_adjusted = self.midpoint + (self.fee * self.midpoint) if self.broker.short_inventory_count > 0: order = Order(ccy=self.sym, side='short', price=price_fee_adjusted, step=self.local_step_number) self.broker.remove(order=order) reward += self.broker.get_reward(side=order.side) elif self.broker.long_inventory_count >= 0: order = Order(ccy=self.sym, side='long', price=price_fee_adjusted, step=self.local_step_number) if self.broker.add(order=order) is False: reward -= discouragement else: logger.info( ('gym_trading.get_reward() ' + 'Error for action #{} - ' + 'unable to place an order with broker').format(action)) elif action == 2: # sell price_fee_adjusted = self.midpoint - (self.fee * self.midpoint) if self.broker.long_inventory_count > 0: order = Order(ccy=self.sym, side='long', price=price_fee_adjusted, step=self.local_step_number) self.broker.remove(order=order) reward += self.broker.get_reward(side=order.side) elif self.broker.short_inventory_count >= 0: order = Order(ccy=self.sym, side='short', price=price_fee_adjusted, step=self.local_step_number) if self.broker.add(order=order) is False: reward -= discouragement else: logger.info( 'gym_trading.get_reward() ' + 'Error for action #{} - ' + 'unable to place an order with broker'.format(action)) else: logger.info( ('Unknown action to take in get_reward(): ' + 'action={} | midpoint={}').format(action, self.midpoint)) return reward def _create_position_features(self): return np.array( (self.broker.long_inventory.position_count / self.max_position, self.broker.short_inventory.position_count / self.max_position, self.broker.get_total_pnl(midpoint=self.midpoint), self.broker.long_inventory.get_unrealized_pnl(self.midpoint), self.broker.short_inventory.get_unrealized_pnl(self.midpoint))) def _create_action_features(self, action): return self.actions[action] def _create_indicator_features(self): return np.array((self.tns.get_value(), self.rsi.get_value()), dtype=np.float32) def _get_nbbo(self): best_bid = round( self.midpoint - self._get_book_data(PriceJump.best_bid_index), 2) best_ask = round( self.midpoint + self._get_book_data(PriceJump.best_ask_index), 2) return best_bid, best_ask def _get_book_data(self, index=0): return self.data[self.local_step_number][index]
def __init__(self, *, fitting_file='LTC-USD_2019-04-07.csv.xz', testing_file='LTC-USD_2019-04-08.csv.xz', step_size=1, max_position=5, window_size=10, seed=1, action_repeats=10, training=True, format_3d=False, z_score=True): # properties required for instantiation self.action_repeats = action_repeats self._seed = seed self._random_state = np.random.RandomState(seed=self._seed) self.training = training self.step_size = step_size self.max_position = max_position self.window_size = window_size self.format_3d = format_3d # e.g., [window, features, *NEW_AXIS*] self.action = 0 # derive gym.env properties self.actions = np.eye(3, dtype=np.float32) self.sym = testing_file[:7] # slice the CCY from the filename # properties that get reset() self.reward = 0.0 self.done = False self.local_step_number = 0 self.midpoint = 0.0 self.observation = None # get Broker class to keep track of PnL and orders self.broker = Broker(max_position=max_position) # get historical data for simulations self.sim = Sim(use_arctic=False, z_score=z_score) self.prices_, self.data, self.normalized_data = self.sim.load_environment_data( fitting_file, testing_file) self.max_steps = self.data.shape[0] - self.step_size * \ self.action_repeats - 1 # load indicators into the indicator manager self.tns = IndicatorManager() self.rsi = IndicatorManager() for window in INDICATOR_WINDOW: self.tns.add(('tns_{}'.format(window), TnS(window=window))) self.rsi.add(('rsi_{}'.format(window), RSI(window=window))) # rendering class self._render = TradingGraph(sym=self.sym) # graph midpoint prices self._render.reset_render_data( y_vec=self.prices_[:np.shape(self._render.x_vec)[0]]) # buffer for appending lags self.data_buffer = list() self.action_space = spaces.Discrete(len(self.actions)) self.reset() # reset to load observation.shape self.observation_space = spaces.Box(low=-10, high=10, shape=self.observation.shape, dtype=np.float32) print( '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}' .format(self.sym, self._seed, self.observation_space.shape))
class PriceJump(Env): metadata = {'render.modes': ['human']} id = 'long-short-v0' # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True) features = Sim.get_feature_labels(include_system_time=False, include_bitfinex=False) best_bid_index = features.index('coinbase_bid_distance_0') best_ask_index = features.index('coinbase_ask_distance_0') notional_bid_index = features.index('coinbase_bid_notional_0') notional_ask_index = features.index('coinbase_ask_notional_0') buy_trade_index = features.index('coinbase_buys') sell_trade_index = features.index('coinbase_sells') target_pnl = 0.03 # 3.0% gain per episode (i.e., day) fee = MARKET_ORDER_FEE def __init__(self, *, fitting_file='LTC-USD_2019-04-07.csv.xz', testing_file='LTC-USD_2019-04-08.csv.xz', step_size=1, max_position=5, window_size=10, seed=1, action_repeats=10, training=True, format_3d=False, z_score=True): # properties required for instantiation self.action_repeats = action_repeats self._seed = seed self._random_state = np.random.RandomState(seed=self._seed) self.training = training self.step_size = step_size self.max_position = max_position self.window_size = window_size self.format_3d = format_3d # e.g., [window, features, *NEW_AXIS*] self.action = 0 # derive gym.env properties self.actions = np.eye(3, dtype=np.float32) self.sym = testing_file[:7] # slice the CCY from the filename # properties that get reset() self.reward = 0.0 self.done = False self.local_step_number = 0 self.midpoint = 0.0 self.observation = None # get Broker class to keep track of PnL and orders self.broker = Broker(max_position=max_position) # get historical data for simulations self.sim = Sim(use_arctic=False, z_score=z_score) self.prices_, self.data, self.normalized_data = self.sim.load_environment_data( fitting_file, testing_file) self.max_steps = self.data.shape[0] - self.step_size * \ self.action_repeats - 1 # load indicators into the indicator manager self.tns = IndicatorManager() self.rsi = IndicatorManager() for window in INDICATOR_WINDOW: self.tns.add(('tns_{}'.format(window), TnS(window=window))) self.rsi.add(('rsi_{}'.format(window), RSI(window=window))) # rendering class self._render = TradingGraph(sym=self.sym) # graph midpoint prices self._render.reset_render_data( y_vec=self.prices_[:np.shape(self._render.x_vec)[0]]) # buffer for appending lags self.data_buffer = list() self.action_space = spaces.Discrete(len(self.actions)) self.reset() # reset to load observation.shape self.observation_space = spaces.Box(low=-10, high=10, shape=self.observation.shape, dtype=np.float32) print( '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}' .format(self.sym, self._seed, self.observation_space.shape)) def __str__(self): return '{} | {}-{}'.format(PriceJump.id, self.sym, self._seed) def step(self, action: int): for current_step in range(self.action_repeats): if self.done: self.reset() return self.observation, self.reward, self.done # reset the reward if there ARE action repeats if current_step == 0: self.reward = 0. step_action = action else: step_action = 0 # Get current step's midpoint self.midpoint = self.prices_[self.local_step_number] # Pass current time step midpoint to broker to calculate PnL, # or if any open orders are to be filled buy_volume = self._get_book_data(PriceJump.buy_trade_index) sell_volume = self._get_book_data(PriceJump.sell_trade_index) self.tns.step(buys=buy_volume, sells=sell_volume) self.rsi.step(price=self.midpoint) self.broker.step(midpoint=self.midpoint) self.reward += self._send_to_broker_and_get_reward( action=step_action) step_observation = self._get_step_observation(action=action) self.data_buffer.append(step_observation) if len(self.data_buffer) > self.window_size: del self.data_buffer[0] self.local_step_number += self.step_size self.observation = self._get_observation() if self.local_step_number > self.max_steps: self.done = True order = Order(ccy=self.sym, side=None, price=self.midpoint, step=self.local_step_number) self.reward = self.broker.flatten_inventory(order=order) return self.observation, self.reward, self.done, {} def reset(self): if self.training: self.local_step_number = self._random_state.randint( low=1, high=self.data.shape[0] // 4) else: self.local_step_number = 0 msg = ' {}-{} reset. Episode pnl: {:.4f} with {} trades. First step: {}'.format( self.sym, self._seed, self.broker.get_total_pnl(midpoint=self.midpoint), self.broker.get_total_trade_count(), self.local_step_number) logger.info(msg) self.reward = 0.0 self.done = False self.broker.reset() self.data_buffer.clear() self.rsi.reset() self.tns.reset() for step in range(self.window_size + INDICATOR_WINDOW_MAX): self.midpoint = self.prices_[self.local_step_number] step_buy_volume = self._get_book_data(PriceJump.buy_trade_index) step_sell_volume = self._get_book_data(PriceJump.sell_trade_index) self.tns.step(buys=step_buy_volume, sells=step_sell_volume) self.rsi.step(price=self.midpoint) step_observation = self._get_step_observation(action=0) self.data_buffer.append(step_observation) self.local_step_number += self.step_size if len(self.data_buffer) > self.window_size: del self.data_buffer[0] self.observation = self._get_observation() return self.observation def render(self, mode='human'): self._render.render(midpoint=self.midpoint, mode=mode) def close(self): logger.info('{}-{} is being closed.'.format(self.id, self.sym)) self.data = None self.normalized_data = None self.prices_ = None self.broker = None self.sim = None self.data_buffer = None self.tns = None self.rsi = None return def seed(self, seed=1): self._random_state = np.random.RandomState(seed=seed) self._seed = seed logger.info('Setting seed in PriceJump.seed({})'.format(seed)) return [seed] @staticmethod def _process_data(_next_state): """ Reshape observation and clip outliers (values +/- 10) :param _next_state: observation space :return: (np.array) clipped observation space """ return np.clip(_next_state.reshape((1, -1)), -10., 10.) def _send_to_broker_and_get_reward(self, action: int): """ Create or adjust orders per a specified action and adjust for penalties. :param action: (int) current step's action :return: (float) reward """ reward = 0.0 discouragement = 0.000000000001 if action == 0: # do nothing reward += discouragement elif action == 1: # buy price_fee_adjusted = self.midpoint + (PriceJump.fee * self.midpoint) if self.broker.short_inventory_count > 0: order = Order(ccy=self.sym, side='short', price=price_fee_adjusted, step=self.local_step_number) self.broker.remove(order=order) reward += self.broker.get_reward(side=order.side) / \ self.broker.reward_scale # scale realized PnL elif self.broker.long_inventory_count >= 0: order = Order(ccy=self.sym, side='long', price=price_fee_adjusted, step=self.local_step_number) if self.broker.add(order=order) is False: reward -= discouragement else: logger.info( ('gym_trading.get_reward() ' + 'Error for action #{} - ' + 'unable to place an order with broker').format(action)) elif action == 2: # sell price_fee_adjusted = self.midpoint - (PriceJump.fee * self.midpoint) if self.broker.long_inventory_count > 0: order = Order(ccy=self.sym, side='long', price=price_fee_adjusted, step=self.local_step_number) self.broker.remove(order=order) reward += self.broker.get_reward(side=order.side) / \ self.broker.reward_scale # scale realized PnL elif self.broker.short_inventory_count >= 0: order = Order(ccy=self.sym, side='short', price=price_fee_adjusted, step=self.local_step_number) if self.broker.add(order=order) is False: reward -= discouragement else: logger.info( ('gym_trading.get_reward() ' + 'Error for action #{} - ' + 'unable to place an order with broker').format(action)) else: logger.info( ('Unknown action to take in get_reward(): ' + 'action={} | midpoint={}').format(action, self.midpoint)) return reward def _create_position_features(self): """ Create an array with features related to the agent's inventory :return: (np.array) normalized position features """ return np.array( (self.broker.long_inventory.position_count / self.max_position, self.broker.short_inventory.position_count / self.max_position, self.broker.get_total_pnl(midpoint=self.midpoint) / PriceJump.target_pnl, self.broker.long_inventory.get_unrealized_pnl(self.midpoint) / self.broker.reward_scale, self.broker.short_inventory.get_unrealized_pnl(self.midpoint) / self.broker.reward_scale), dtype=np.float32) def _create_action_features(self, action): """ Create a features array for the current time step's action. :param action: (int) action number :return: (np.array) One-hot of current action """ return self.actions[action] def _create_indicator_features(self): """ Create features vector with environment indicators. :return: (np.array) Indicator values for current time step """ return np.array((*self.tns.get_value(), *self.rsi.get_value()), dtype=np.float32) def _get_nbbo(self): """ Get best bid and offer :return: (tuple) best bid and offer """ best_bid = round( self.midpoint - self._get_book_data(PriceJump.best_bid_index), 2) best_ask = round( self.midpoint + self._get_book_data(PriceJump.best_ask_index), 2) return best_bid, best_ask def _get_book_data(self, index=0): """ Return step 'n' of order book snapshot data :param index: (int) step 'n' to look up in order book snapshot history :return: (np.array) order book snapshot vector """ return self.data[self.local_step_number][index] def _get_step_observation(self, action=0): """ Current step observation, NOT including historical data. :param action: (int) current step action :return: (np.array) Current step observation """ step_position_features = self._create_position_features() step_action_features = self._create_action_features(action=action) step_indicator_features = self._create_indicator_features() return np.concatenate( (self._process_data(self.normalized_data[self.local_step_number]), step_indicator_features, step_position_features, step_action_features, np.array([self.reward])), axis=None) def _get_observation(self): """ Current step observation, including historical data. If format_3d is TRUE: Expand the observation space from 2 to 3 dimensions. (note: This is necessary for conv nets in Baselines.) :return: (np.array) Observation state for current time step """ observation = np.array(self.data_buffer, dtype=np.float32) if self.format_3d: observation = np.expand_dims(observation, axis=-1) return observation
class MarketMaker(Env): # gym.env required metadata = {'render.modes': ['human']} id = 'market-maker-v0' # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True) features = Sim.get_feature_labels(include_system_time=False, include_bitfinex=False) best_bid_index = features.index('coinbase_bid_distance_0') best_ask_index = features.index('coinbase_ask_distance_0') notional_bid_index = features.index('coinbase_bid_notional_0') notional_ask_index = features.index('coinbase_ask_notional_0') buy_trade_index = features.index('coinbase_buys') sell_trade_index = features.index('coinbase_sells') target_pnl = 0.03 # 3.0% gain per episode (i.e., day) def __init__(self, *, fitting_file='LTC-USD_2019-04-07.csv.xz', testing_file='LTC-USD_2019-04-08.csv.xz', step_size=1, max_position=5, window_size=10, seed=1, action_repeats=10, training=True, format_3d=False, z_score=True): # properties required for instantiation self.action_repeats = action_repeats self._seed = seed self._random_state = np.random.RandomState(seed=self._seed) self.training = training self.step_size = step_size self.max_position = max_position self.window_size = window_size self.format_3d = format_3d # e.g., [window, features, *NEW_AXIS*] self.action = 0 # derive gym.env properties self.actions = np.eye(17, dtype=np.float32) self.sym = testing_file[:7] # slice the CCY from the filename # properties that get reset() self.reward = 0.0 self.done = False self.local_step_number = 0 self.midpoint = 0.0 self.observation = None # get Broker class to keep track of PnL and orders self.broker = Broker(max_position=max_position) # get historical data for simulations self.sim = Sim(use_arctic=False, z_score=z_score) self.prices_, self.data, self.normalized_data = self.sim.load_environment_data( fitting_file, testing_file) self.max_steps = self.data.shape[0] - self.step_size * \ self.action_repeats - 1 # load indicators into the indicator manager self.tns = IndicatorManager() self.rsi = IndicatorManager() for window in INDICATOR_WINDOW: self.tns.add(('tns_{}'.format(window), TnS(window=window))) self.rsi.add(('rsi_{}'.format(window), RSI(window=window))) # rendering class self._render = TradingGraph(sym=self.sym) # graph midpoint prices self._render.reset_render_data( y_vec=self.prices_[:np.shape(self._render.x_vec)[0]]) # buffer for appending lags self.data_buffer = list() self.action_space = spaces.Discrete(len(self.actions)) self.reset() # reset to load observation.shape self.observation_space = spaces.Box(low=-10, high=10, shape=self.observation.shape, dtype=np.float32) print( '{} MarketMaker #{} instantiated\nself.observation_space.shape: {}' .format(self.sym, self._seed, self.observation_space.shape)) def __str__(self): return '{} | {}-{}'.format(MarketMaker.id, self.sym, self._seed) def step(self, action: int): for current_step in range(self.action_repeats): if self.done: self.reset() return self.observation, self.reward, self.done # reset the reward if there ARE action repeats if current_step == 0: self.reward = 0. step_action = action else: step_action = 0 # Get current step's midpoint self.midpoint = self.prices_[self.local_step_number] # Pass current time step midpoint to broker to calculate PnL, # or if any open orders are to be filled step_best_bid, step_best_ask = self._get_nbbo() buy_volume = self._get_book_data(MarketMaker.buy_trade_index) sell_volume = self._get_book_data(MarketMaker.sell_trade_index) self.tns.step(buys=buy_volume, sells=sell_volume) self.rsi.step(price=self.midpoint) step_reward = self.broker.step(bid_price=step_best_bid, ask_price=step_best_ask, buy_volume=buy_volume, sell_volume=sell_volume, step=self.local_step_number) self.reward += self._send_to_broker_and_get_reward( action=step_action) self.reward += step_reward step_observation = self._get_step_observation(action=action) self.data_buffer.append(step_observation) if len(self.data_buffer) > self.window_size: del self.data_buffer[0] self.local_step_number += self.step_size self.observation = self._get_observation() if self.local_step_number > self.max_steps: self.done = True self.reward += self.broker.flatten_inventory(*self._get_nbbo()) return self.observation, self.reward, self.done, {} def reset(self): if self.training: self.local_step_number = self._random_state.randint( low=1, high=self.data.shape[0] // 4) else: self.local_step_number = 0 msg = ' {}-{} reset. Episode pnl: {:.4f} with {} trades | First step: {}'.format( self.sym, self._seed, self.broker.get_total_pnl(midpoint=self.midpoint), self.broker.get_total_trade_count(), self.local_step_number) logger.info(msg) self.reward = 0.0 self.done = False self.broker.reset() self.data_buffer.clear() self.rsi.reset() self.tns.reset() for step in range(self.window_size + INDICATOR_WINDOW_MAX): self.midpoint = self.prices_[self.local_step_number] step_buy_volume = self._get_book_data(MarketMaker.buy_trade_index) step_sell_volume = self._get_book_data( MarketMaker.sell_trade_index) self.tns.step(buys=step_buy_volume, sells=step_sell_volume) self.rsi.step(price=self.midpoint) step_observation = self._get_step_observation(action=0) self.data_buffer.append(step_observation) self.local_step_number += self.step_size if len(self.data_buffer) > self.window_size: del self.data_buffer[0] self.observation = self._get_observation() return self.observation def render(self, mode='human'): self._render.render(midpoint=self.midpoint, mode=mode) def close(self): logger.info('{}-{} is being closed.'.format(self.id, self.sym)) self.data = None self.normalized_data = None self.prices_ = None self.broker = None self.sim = None self.data_buffer = None self.tns = None self.rsi = None return def seed(self, seed=1): self._random_state = np.random.RandomState(seed=seed) self._seed = seed logger.info('Setting seed in MarketMaker.seed({})'.format(seed)) return [seed] @staticmethod def _process_data(_next_state): """ Reshape observation and clip outliers (values +/- 10) :param _next_state: observation space :return: (np.array) clipped observation space """ return np.clip(_next_state.reshape((1, -1)), -10., 10.) def _send_to_broker_and_get_reward(self, action: int): """ Create or adjust orders per a specified action and adjust for penalties. :param action: (int) current step's action :return: (float) reward """ reward = 0.0 discouragement = 0.000000000001 if action == 0: # do nothing reward += discouragement elif action == 1: reward += self._create_order_at_level(reward, discouragement, level=0, side='long') reward += self._create_order_at_level(reward, discouragement, level=4, side='short') elif action == 2: reward += self._create_order_at_level(reward, discouragement, level=0, side='long') reward += self._create_order_at_level(reward, discouragement, level=9, side='short') elif action == 3: reward += self._create_order_at_level(reward, discouragement, level=0, side='long') reward += self._create_order_at_level(reward, discouragement, level=14, side='short') elif action == 4: reward += self._create_order_at_level(reward, discouragement, level=4, side='long') reward += self._create_order_at_level(reward, discouragement, level=0, side='short') elif action == 5: reward += self._create_order_at_level(reward, discouragement, level=4, side='long') reward += self._create_order_at_level(reward, discouragement, level=4, side='short') elif action == 6: reward += self._create_order_at_level(reward, discouragement, level=4, side='long') reward += self._create_order_at_level(reward, discouragement, level=9, side='short') elif action == 7: reward += self._create_order_at_level(reward, discouragement, level=4, side='long') reward += self._create_order_at_level(reward, discouragement, level=14, side='short') elif action == 8: reward += self._create_order_at_level(reward, discouragement, level=9, side='long') reward += self._create_order_at_level(reward, discouragement, level=0, side='short') elif action == 9: reward += self._create_order_at_level(reward, discouragement, level=9, side='long') reward += self._create_order_at_level(reward, discouragement, level=4, side='short') elif action == 10: reward += self._create_order_at_level(reward, discouragement, level=9, side='long') reward += self._create_order_at_level(reward, discouragement, level=9, side='short') elif action == 11: reward += self._create_order_at_level(reward, discouragement, level=9, side='long') reward += self._create_order_at_level(reward, discouragement, level=14, side='short') elif action == 12: reward += self._create_order_at_level(reward, discouragement, level=14, side='long') reward += self._create_order_at_level(reward, discouragement, level=0, side='short') elif action == 13: reward += self._create_order_at_level(reward, discouragement, level=14, side='long') reward += self._create_order_at_level(reward, discouragement, level=4, side='short') elif action == 14: reward += self._create_order_at_level(reward, discouragement, level=14, side='long') reward += self._create_order_at_level(reward, discouragement, level=9, side='short') elif action == 15: reward += self._create_order_at_level(reward, discouragement, level=14, side='long') reward += self._create_order_at_level(reward, discouragement, level=14, side='short') elif action == 16: reward += self.broker.flatten_inventory(*self._get_nbbo()) else: logger.info("L'action n'exist pas ! Il faut faire attention !") return reward def _create_position_features(self): """ Create an array with features related to the agent's inventory :return: (np.array) normalized position features """ return np.array( (self.broker.long_inventory.position_count / self.max_position, self.broker.short_inventory.position_count / self.max_position, self.broker.get_total_pnl(midpoint=self.midpoint) / MarketMaker.target_pnl, self.broker.long_inventory.get_unrealized_pnl(self.midpoint) / self.broker.reward_scale, self.broker.short_inventory.get_unrealized_pnl(self.midpoint) / self.broker.reward_scale, self.broker.get_long_order_distance_to_midpoint( midpoint=self.midpoint), self.broker.get_short_order_distance_to_midpoint( midpoint=self.midpoint), *self.broker.get_queues_ahead_features()), dtype=np.float32) def _create_action_features(self, action): """ Create a features array for the current time step's action. :param action: (int) action number :return: (np.array) One-hot of current action """ return self.actions[action] def _create_indicator_features(self): """ Create features vector with environment indicators. :return: (np.array) Indicator values for current time step """ return np.array((*self.tns.get_value(), *self.rsi.get_value()), dtype=np.float32) def _create_order_at_level(self, reward: float, discouragement: float, level=0, side='long'): """ Create a new order at a specified LOB level :param reward: (float) current step reward :param discouragement: (float) penalty deducted from reward for erroneous actions :param level: (int) level in the limit order book :param side: (str) direction of trade e.g., 'long' or 'short' :return: (float) reward with penalties added """ adjustment = 1 if level > 0 else 0 if side == 'long': best = self._get_book_data(MarketMaker.best_bid_index - level) denormalized_best = round(self.midpoint * (best + 1), 2) inside_best = self._get_book_data(MarketMaker.best_bid_index - level + adjustment) denormalized_inside_best = round(self.midpoint * (inside_best + 1), 2) plus_one = denormalized_best + 0.01 if denormalized_inside_best == plus_one: # stick to best bid bid_price = denormalized_best # since LOB is rendered as cumulative notional, deduct the prior price # level to derive the notional value of orders ahead in the queue bid_queue_ahead = self._get_book_data( MarketMaker.notional_bid_index - level) - self._get_book_data( MarketMaker.notional_bid_index - level + adjustment) else: # insert a cent ahead to jump a queue bid_price = plus_one bid_queue_ahead = 0. bid_order = Order(ccy=self.sym, side='long', price=bid_price, step=self.local_step_number, queue_ahead=bid_queue_ahead) if self.broker.add(order=bid_order) is False: reward -= discouragement else: reward += discouragement if side == 'short': best = self._get_book_data(MarketMaker.best_ask_index + level) denormalized_best = round(self.midpoint * (best + 1), 2) inside_best = self._get_book_data(MarketMaker.best_ask_index + level - adjustment) denormalized_inside_best = round(self.midpoint * (inside_best + 1), 2) plus_one = denormalized_best + 0.01 if denormalized_inside_best == plus_one: ask_price = denormalized_best # since LOB is rendered as cumulative notional, deduct the prior price # level to derive the notional value of orders ahead in the queue ask_queue_ahead = self._get_book_data( MarketMaker.notional_ask_index + level) - self._get_book_data( MarketMaker.notional_ask_index + level - adjustment) else: ask_price = plus_one ask_queue_ahead = 0. ask_order = Order(ccy=self.sym, side='short', price=ask_price, step=self.local_step_number, queue_ahead=ask_queue_ahead) if self.broker.add(order=ask_order) is False: reward -= discouragement else: reward += discouragement return reward def _get_nbbo(self): """ Get best bid and offer :return: (tuple) best bid and offer """ best_bid = round( self.midpoint - self._get_book_data(MarketMaker.best_bid_index), 2) best_ask = round( self.midpoint + self._get_book_data(MarketMaker.best_ask_index), 2) return best_bid, best_ask def _get_book_data(self, index=0): """ Return step 'n' of order book snapshot data :param index: (int) step 'n' to look up in order book snapshot history :return: (np.array) order book snapshot vector """ return self.data[self.local_step_number][index] def _get_step_observation(self, action=0): """ Current step observation, NOT including historical data. :param action: (int) current step action :return: (np.array) Current step observation """ step_position_features = self._create_position_features() step_action_features = self._create_action_features(action=action) step_indicator_features = self._create_indicator_features() return np.concatenate( (self._process_data(self.normalized_data[self.local_step_number]), step_indicator_features, step_position_features, step_action_features, np.array([self.reward])), axis=None) def _get_observation(self): """ Current step observation, including historical data. If format_3d is TRUE: Expand the observation space from 2 to 3 dimensions. (note: This is necessary for conv nets in Baselines.) :return: (np.array) Observation state for current time step """ observation = np.array(self.data_buffer, dtype=np.float32) if self.format_3d: observation = np.expand_dims(observation, axis=-1) return observation
class MarketMaker(Env): metadata = {'render.modes': ['human']} id = 'market-maker-v0' action_repeats = 4 bid_price_features = ['coinbase-bid-distance-0', 'coinbase-bid-distance-1', 'coinbase-bid-distance-2', 'coinbase-bid-distance-3', 'coinbase-bid-distance-4', 'coinbase-bid-distance-5', 'coinbase-bid-distance-6', 'coinbase-bid-distance-7', 'coinbase-bid-distance-8', 'coinbase-bid-distance-9'] ask_price_features = ['coinbase-ask-distance-0', 'coinbase-ask-distance-1', 'coinbase-ask-distance-2', 'coinbase-ask-distance-3', 'coinbase-ask-distance-4', 'coinbase-ask-distance-5', 'coinbase-ask-distance-6', 'coinbase-ask-distance-7', 'coinbase-ask-distance-8', 'coinbase-ask-distance-9'] bid_notional_features = ['coinbase-bid-notional-0', 'coinbase-bid-notional-1', 'coinbase-bid-notional-2', 'coinbase-bid-notional-3', 'coinbase-bid-notional-4', 'coinbase-bid-notional-5', 'coinbase-bid-notional-6', 'coinbase-bid-notional-7', 'coinbase-bid-notional-8', 'coinbase-bid-notional-9'] ask_notional_features = ['coinbase-ask-notional-0', 'coinbase-ask-notional-1', 'coinbase-ask-notional-2', 'coinbase-ask-notional-3', 'coinbase-ask-notional-4', 'coinbase-ask-notional-5', 'coinbase-ask-notional-6', 'coinbase-ask-notional-7', 'coinbase-ask-notional-8', 'coinbase-ask-notional-9'] def __init__(self, training=True, fitting_file='ETH-USD_2018-12-31.xz', testing_file='ETH-USD_2019-01-01.xz', step_size=1, max_position=5, window_size=50, seed=1, frame_stack=False): # properties required for instantiation self._random_state = np.random.RandomState(seed=seed) self._seed = seed self.training = training self.step_size = step_size self.fee = BROKER_FEE self.max_position = max_position self.window_size = window_size self.frame_stack = frame_stack self.frames_to_add = 3 if self.frame_stack else 0 self.inventory_features = ['long_inventory', 'short_inventory', 'long_unrealized_pnl', 'short_unrealized_pnl', 'buy_distance_to_midpoint', 'short_distance_to_midpoint'] self._action = 0 # derive gym.env properties self.actions = np.eye(24) self.sym = testing_file[:7] # slice the CCY from the filename # properties that get reset() self.reward = 0.0 self.done = False self._local_step_number = 0 self.midpoint = 0.0 self.observation = None # get historical data for simulations self.broker = Broker(max_position=max_position) self.sim = Sim(use_arctic=False) # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True) self.features = self.sim.get_feature_labels(include_system_time=False, include_bitfinex=False) fitting_data_filepath = '{}/data_exports/{}'.format(self.sim.cwd, fitting_file) data_used_in_environment = '{}/data_exports/{}'.format(self.sim.cwd, testing_file) # print('Fitting data: {}\nTesting Data: {}'.format(fitting_data_filepath, # data_used_in_environment)) self.sim.fit_scaler(self.sim.import_csv(filename=fitting_data_filepath)) self.data = self.sim.import_csv(filename=data_used_in_environment) self.prices = self.data['coinbase_midpoint'].values # used to calculate PnL self.bid_prices = self.data[MarketMaker.bid_price_features].values # used for LOB placement self.ask_prices = self.data[MarketMaker.ask_price_features].values # used for LOB placement self.bid_notionals = self.data[MarketMaker.bid_notional_features].values # used for LOB placement self.ask_notionals = self.data[MarketMaker.ask_notional_features].values # used for LOB placement # self.data = self.data.apply(self.sim.z_score, axis=1) self.data_ = self.data.copy() # used for rendering data self.data = self.data.values # used for the observation space # self.data = None self.data_buffer, self.frame_stacker = list(), list() self.action_space = spaces.Discrete(len(self.actions)) variable_features_count = len(self.inventory_features) + len(self.actions) + 1 if self.frame_stack is False: shape = (len(self.features) + variable_features_count, self.window_size) else: shape = (len(self.features) + variable_features_count, self.window_size, 4) self.observation_space = spaces.Box(low=self.data.min(), high=self.data.max(), shape=shape, dtype=np.int) # attributes for rendering self.line1 = [] self.screen_size = 200 self.y_vec = None self.x_vec = None self._reset_render_data() self.reset() # print('MarketMaker instantiated. ' + # '\nself.observation_space.shape : {}'.format( # self.observation_space.shape)) def __str__(self): return '{} | {}-{}'.format(MarketMaker.id, self.sym, self.seed) def _reset_render_data(self): self.x_vec = np.linspace(0, self.screen_size * 10, self.screen_size + 1)[0:-1] self.y_vec = np.array(self.prices[:np.shape(self.x_vec)[0]]) self.line1 = [] @property def step_number(self): return self._local_step_number def step(self, action_): for current_step in range(MarketMaker.action_repeats): if self.done: self.reset() return self.observation, self.reward, self.done # reset the reward if there are action repeats if current_step == 0: self.reward = 0. action = action_ else: action = 0 # Get current step's midpoint to calculate PnL, or if # an open order got filled. self.midpoint = self.prices[self._local_step_number] _step_reward = self.broker.step( bid_price=self.midpoint - self.bid_prices[self._local_step_number][0], ask_price=self.midpoint + self.ask_prices[self._local_step_number][0], buy_volume=self.data[self._local_step_number][-2], sell_volume=self.data[self._local_step_number][-1], step=self._local_step_number ) self.reward += self._send_to_broker_and_get_reward(action) + _step_reward position_features = self._create_position_features() action_features = self._create_action_features(action=action) _observation = np.concatenate((self.process_data(self.data[self._local_step_number]), position_features, action_features, np.array([self.reward])), axis=None) self.data_buffer.append(_observation) if len(self.data_buffer) >= self.window_size: self.frame_stacker.append(np.array(self.data_buffer, dtype=np.float32)) del self.data_buffer[0] if len(self.frame_stacker) > self.frames_to_add + 1: del self.frame_stacker[0] self._local_step_number += self.step_size # output shape is [n_features, window_size, frames_to_add] e.g., [40, 100, 1] self.observation = np.array(self.frame_stacker, dtype=np.float32).transpose() # This removes a dimension to be compatible with the Keras-rl module # because Keras-rl uses its own frame-stacker. There are future plans to integrate # this repository with more reinforcement learning packages, such as baselines. if self.frame_stack is False: self.observation = self.observation.reshape(self.observation.shape[0], -1) if self._local_step_number > self.data.shape[0] - 8: self.done = True best_bid = round(self.midpoint + self.bid_prices[self._local_step_number][0], 2) best_ask = round(self.midpoint + self.ask_prices[self._local_step_number][0], 2) self.reward += self.broker.flatten_inventory(bid_price=best_bid, ask_price=best_ask) return self.observation, self.reward, self.done, {} def reset(self): if self.training: self._local_step_number = self._random_state.randint(low=1, high=5000) else: self._local_step_number = 0 logger.info(' {}-{} reset. Episode pnl: {} | First step: {}, max_pos: {}'.format( self.sym, self._seed, self.broker.get_total_pnl(midpoint=self.midpoint), self._local_step_number, self.max_position)) self.reward = 0.0 self.done = False self.broker.reset() self.data_buffer.clear() self.frame_stacker.clear() self._reset_render_data() for step in range(self.window_size + self.frames_to_add): position_features = self._create_position_features() action_features = self._create_action_features(action=0) _observation = np.concatenate((self.process_data(self.data[self._local_step_number]), position_features, action_features, np.array([self.reward])), axis=None) self.data_buffer.append(_observation) self._local_step_number += self.step_size if step >= self.window_size - 1: self.frame_stacker.append(np.array(self.data_buffer, dtype=np.float32)) del self.data_buffer[0] if len(self.frame_stacker) > self.frames_to_add + 1: del self.frame_stacker[0] # output shape is [n_features, window_size, frames_to_add] eg [40, 100, 1] self.observation = np.array(self.frame_stacker, dtype=np.float32).transpose() # This removes a dimension to be compatible with the Keras-rl module # because Keras-rl uses its own frame-stacker. There are future plans to integrate # this repository with more reinforcement learning packages, such as baselines. if self.frame_stack is False: self.observation = self.observation.reshape(self.observation.shape[0], -1) return self.observation def render(self, mode='human'): if mode == 'human': self.line1 = _live_plotter(self.x_vec, self.y_vec, self.line1, identifier=self.sym) self.y_vec = np.append(self.y_vec[1:], self.midpoint) def close(self): logger.info('{}-{} is being closed.'.format(self.id, self.sym)) self.data = None self.broker = None self.sim = None self.data_buffer = None plt.close() return def seed(self, seed=1): self._random_state = np.random.RandomState(seed=seed) self._seed = seed return [seed] # @staticmethod # def process_data(_next_state): # return np.clip(_next_state.reshape((1, -1)), -10., 10.) def process_data(self, _next_state): # return self.sim.scale_state(_next_state).values.reshape((1, -1)) return np.reshape(_next_state, (1, -1)) def _send_to_broker_and_get_reward(self, action): reward = 0.0 discouragement = 0.000000000001 if action == 0: # do nothing reward += discouragement elif action == 1: # set bid to inside spread or [ask_price - 0.01] best_bid = self.bid_prices[self._local_step_number][0] best_ask = self.ask_prices[self._local_step_number][0] price = round(max(self.midpoint - best_bid, self.midpoint + best_ask - 0.01), 2) order = Order(ccy=self.sym, side='long', price=price, step=self._local_step_number) if self.broker.add(order=order) is False: reward -= discouragement elif action == 2: # set bid to best_bid - row 0 reward = self._create_bid_order_at_level(reward, discouragement, 0) elif action == 3: # set bid to best_bid - row 1 reward = self._create_bid_order_at_level(reward, discouragement, 1) elif action == 4: # set bid to best_bid - row 2 reward = self._create_bid_order_at_level(reward, discouragement, 2) elif action == 5: # set bid to best_bid - row 3 reward = self._create_bid_order_at_level(reward, discouragement, 3) if action == 6: # set bid to best_bid - row 4 reward = self._create_bid_order_at_level(reward, discouragement, 4) elif action == 7: # set bid to best_bid - row 5 reward = self._create_bid_order_at_level(reward, discouragement, 5) elif action == 8: # set bid to best_bid - row 6 reward = self._create_bid_order_at_level(reward, discouragement, 6) elif action == 9: # set bid to best_bid - row 7 reward = self._create_bid_order_at_level(reward, discouragement, 7) if action == 10: # set bid to best_bid - row 8 reward = self._create_bid_order_at_level(reward, discouragement, 8) elif action == 11: # set bid to best_bid - row 9 reward = self._create_bid_order_at_level(reward, discouragement, 9) elif action == 12: # set ask to inside spread or [bid_price + 0.01] best_bid = self.bid_prices[self._local_step_number][0] best_ask = self.ask_prices[self._local_step_number][0] price = round(min(best_ask + self.midpoint, self.midpoint - best_bid + 0.01), 2) order = Order(ccy=self.sym, side='long', price=price, step=self._local_step_number) if self.broker.add(order=order) is False: reward -= discouragement if action == 13: # set ask to best_bid - row 0 reward = self._create_ask_order_at_level(reward, discouragement, 0) elif action == 14: # set ask to best_bid - row 1 reward = self._create_ask_order_at_level(reward, discouragement, 1) elif action == 15: # set ask to best_bid - row 2 reward = self._create_ask_order_at_level(reward, discouragement, 2) if action == 16: # set ask to best_bid - row 3 reward = self._create_ask_order_at_level(reward, discouragement, 3) elif action == 17: # set ask to best_bid - row 4 reward = self._create_ask_order_at_level(reward, discouragement, 4) elif action == 18: # set ask to best_bid - row 5 reward = self._create_ask_order_at_level(reward, discouragement, 5) if action == 19: # set ask to best_bid - row 6 reward = self._create_ask_order_at_level(reward, discouragement, 6) elif action == 20: # set ask to best_bid - row 7 reward = self._create_ask_order_at_level(reward, discouragement, 7) elif action == 21: # set ask to best_bid - row 8 reward = self._create_ask_order_at_level(reward, discouragement, 8) elif action == 22: # set ask to best_bid - row 9 reward = self._create_ask_order_at_level(reward, discouragement, 9) if action == 23: # flatten all positions best_bid = round(self.midpoint + self.bid_prices[self._local_step_number][0], 2) best_ask = round(self.midpoint + self.ask_prices[self._local_step_number][0], 2) reward += self.broker.flatten_inventory(bid_price=best_bid, ask_price=best_ask) elif action == 24: # logger.info("L'action n.25 n'exist pas ! Il faut faire attention !") pass return reward def _create_position_features(self): return np.array((self.broker.long_inventory.position_count / self.max_position, self.broker.short_inventory.position_count / self.max_position, self.broker.long_inventory.get_unrealized_pnl(self.midpoint), self.broker.short_inventory.get_unrealized_pnl(self.midpoint), self.broker.get_long_order_distance_to_midpoint(midpoint=self.midpoint), self.broker.get_short_order_distance_to_midpoint(midpoint=self.midpoint))) def _create_action_features(self, action): return self.actions[action] def _create_bid_order_at_level(self, reward, discouragement, level=0): if level > 0: above_best_bid = self.bid_prices[self._local_step_number][level-1] best_bid = self.bid_prices[self._local_step_number][level] if round(above_best_bid, 2) == round(best_bid + 0.01, 2): price = round(self.midpoint - best_bid, 2) queue_ahead = self.bid_notionals[self._local_step_number][level] else: price = round(self.midpoint - best_bid + 0.01, 2) queue_ahead = 0. order = Order(ccy=self.sym, side='long', price=price, step=self._local_step_number, queue_ahead=queue_ahead) if self.broker.add(order=order) is False: reward -= discouragement else: best_bid = self.bid_prices[self._local_step_number][level] price = round(self.midpoint - best_bid, 2) queue_ahead = self.bid_notionals[self._local_step_number][level] order = Order(ccy=self.sym, side='long', price=price, step=self._local_step_number, queue_ahead=queue_ahead) if self.broker.add(order=order) is False: reward -= discouragement return reward def _create_ask_order_at_level(self, reward, discouragement, level=0): if level > 0: above_best_ask = self.ask_prices[self._local_step_number][level - 1] best_ask = self.ask_prices[self._local_step_number][level] if round(above_best_ask, 2) == round(best_ask - 0.01, 2): price = round(best_ask + self.midpoint, 2) queue_ahead = self.ask_notionals[self._local_step_number][level] else: price = round(best_ask + 0.01 + self.midpoint, 2) queue_ahead = 0. order = Order(ccy=self.sym, side='short', price=price, step=self._local_step_number, queue_ahead=queue_ahead) if self.broker.add(order=order) is False: reward -= discouragement else: best_ask = self.ask_prices[self._local_step_number][level] price = round(best_ask + self.midpoint, 2) queue_ahead = self.ask_notionals[self._local_step_number][level] order = Order(ccy=self.sym, side='short', price=price, step=self._local_step_number, queue_ahead=queue_ahead) if self.broker.add(order=order) is False: reward -= discouragement return reward
def __init__(self, fitting_file='LTC-USD_2019-04-07.csv.xz', testing_file='LTC-USD_2019-04-08.csv.xz', step_size=1, max_position=5, window_size=10, seed=1, action_repeats=10, training=True, format_3d=False, z_score=True, reward_type='trade_completion', scale_rewards=True): """ Base class for creating environments extending OpenAI's GYM framework. :param fitting_file: historical data used to fit environment data (i.e., previous trading day) :param testing_file: historical data used in environment :param step_size: increment size for steps (NOTE: leave a 1, otherwise market transaction data will be overlooked) :param max_position: maximum number of positions able to hold in inventory :param window_size: number of lags to include in observation space :param seed: random seed number :param action_repeats: number of steps to take in environment after a given action :param training: if TRUE, then randomize starting point in environment :param format_3d: if TRUE, reshape observation space from matrix to tensor :param z_score: if TRUE, normalize data set with Z-Score, otherwise use Min-Max (i.e., range of 0 to 1) :param reward_type: method for calculating the environment's reward: 1) 'trade_completion' --> reward is generated per trade's round trip 2) 'continuous_total_pnl' --> change in realized & unrealized pnl between time steps 3) 'continuous_realized_pnl' --> change in realized pnl between time steps 4) 'continuous_unrealized_pnl' --> change in unrealized pnl between time steps """ # properties required for instantiation self.action_repeats = action_repeats self._seed = seed self._random_state = np.random.RandomState(seed=self._seed) self.training = training self.step_size = step_size self.max_position = max_position self.window_size = window_size self.reward_type = reward_type self.format_3d = format_3d # e.g., [window, features, *NEW_AXIS*] self.sym = testing_file[:7] # slice the CCY from the filename self.scale_rewards = scale_rewards # properties that get reset() self.reward = 0.0 self.done = False self.local_step_number = 0 self.midpoint = 0.0 self.observation = None self.action = 0 self.last_pnl = 0. # properties to override in sub-classes self.actions = None self.broker = None self.action_space = None self.observation_space = None # get historical data for simulations self.sim = Sim(use_arctic=False, z_score=z_score) self.prices_, self.data, self.normalized_data = self.sim.load_environment_data( fitting_file, testing_file) self.best_bid = self.best_ask = None self.max_steps = self.data.shape[ 0] - self.step_size * self.action_repeats - 1 # load indicators into the indicator manager self.tns = IndicatorManager() self.rsi = IndicatorManager() for window in INDICATOR_WINDOW: self.tns.add(('tns_{}'.format(window), TnS(window=window))) self.rsi.add(('rsi_{}'.format(window), RSI(window=window))) # rendering class self._render = TradingGraph(sym=self.sym) # graph midpoint prices self._render.reset_render_data( y_vec=self.prices_[:np.shape(self._render.x_vec)[0]]) # buffer for appending lags self.data_buffer = list()
class PriceJump(Env): metadata = {'render.modes': ['human']} id = 'long-short-v0' # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True) features = Sim.get_feature_labels(include_system_time=False, include_bitfinex=False) best_bid_index = features.index('coinbase-bid-distance-0') best_ask_index = features.index('coinbase-ask-distance-0') notional_bid_index = features.index('coinbase-bid-notional-0') notional_ask_index = features.index('coinbase-ask-notional-0') buy_trade_index = features.index('coinbase-buys') sell_trade_index = features.index('coinbase-sells') target_pnl = BROKER_FEE * 10 * 5 # e.g., 5 for max_positions fee = BROKER_FEE def __init__(self, *, fitting_file='ETH-USD_2018-12-31.xz', testing_file='ETH-USD_2019-01-01.xz', step_size=1, max_position=5, window_size=10, seed=1, action_repeats=10, training=True, format_3d=False, z_score=True): # properties required for instantiation self.action_repeats = action_repeats self._seed = seed self._random_state = np.random.RandomState(seed=self._seed) self.training = training self.step_size = step_size self.max_position = max_position self.window_size = window_size self.format_3d = format_3d # e.g., [window, features, *NEW_AXIS*] self.action = 0 # derive gym.env properties self.actions = np.eye(3) self.sym = testing_file[:7] # slice the CCY from the filename # properties that get reset() self.reward = 0.0 self.done = False self.local_step_number = 0 self.midpoint = 0.0 self.observation = None # get Broker class to keep track of PnL and orders self.broker = Broker(max_position=max_position) # get historical data for simulations self.sim = Sim(use_arctic=False) self.data = self._load_environment_data(fitting_file, testing_file) self.prices_ = self.data[ 'coinbase_midpoint'].values # used to calculate PnL self.normalized_data = self.data.copy() self.data = self.data.values self.max_steps = self.data.shape[0] - self.step_size * \ self.action_repeats - 1 # normalize midpoint data self.normalized_data['coinbase_midpoint'] = \ np.log(self.normalized_data['coinbase_midpoint'].values) self.normalized_data['coinbase_midpoint'] = ( self.normalized_data['coinbase_midpoint'] - self.normalized_data['coinbase_midpoint'].shift(1)).fillna(0.) # load indicators into the indicator manager self.tns = IndicatorManager() self.rsi = IndicatorManager() for window in INDICATOR_WINDOW: self.tns.add(('tns_{}'.format(window), TnS(window=window))) self.rsi.add(('rsi_{}'.format(window), RSI(window=window))) if z_score: logger.info("Pre-scaling {}-{} data...".format( self.sym, self._seed)) self.normalized_data = self.normalized_data.apply(self.sim.z_score, axis=1).values logger.info("...{}-{} pre-scaling complete.".format( self.sym, self._seed)) else: self.normalized_data = self.normalized_data.values # rendering class self._render = TradingGraph(sym=self.sym) # graph midpoint prices self._render.reset_render_data( y_vec=self.prices_[:np.shape(self._render.x_vec)[0]]) # buffer for appending lags self.data_buffer = list() self.action_space = spaces.Discrete(len(self.actions)) self.reset() # reset to load observation.shape self.observation_space = spaces.Box(low=-10, high=10, shape=self.observation.shape, dtype=np.float32) print( '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}' .format(self.sym, self._seed, self.observation_space.shape)) def __str__(self): return '{} | {}-{}'.format(PriceJump.id, self.sym, self._seed) def step(self, action: int): for current_step in range(self.action_repeats): if self.done: self.reset() return self.observation, self.reward, self.done # reset the reward if there ARE action repeats if current_step == 0: self.reward = 0. step_action = action else: step_action = 0 # Get current step's midpoint self.midpoint = self.prices_[self.local_step_number] # Pass current time step midpoint to broker to calculate PnL, # or if any open orders are to be filled buy_volume = self._get_book_data(PriceJump.buy_trade_index) sell_volume = self._get_book_data(PriceJump.sell_trade_index) self.tns.step(buys=buy_volume, sells=sell_volume) self.rsi.step(price=self.midpoint) self.broker.step(midpoint=self.midpoint) self.reward += self._send_to_broker_and_get_reward( action=step_action) step_observation = self._get_step_observation(action=action) self.data_buffer.append(step_observation) if len(self.data_buffer) > self.window_size: del self.data_buffer[0] self.local_step_number += self.step_size self.observation = self._get_observation() if self.local_step_number > self.max_steps: self.done = True order = Order(ccy=self.sym, side=None, price=self.midpoint, step=self.local_step_number) self.reward = self.broker.flatten_inventory(order=order) return self.observation, self.reward, self.done, {} def reset(self): if self.training: self.local_step_number = self._random_state.randint( low=1, high=self.data.shape[0] // 4) else: self.local_step_number = 0 msg = ' {}-{} reset. Episode pnl: {:.4f} with {} trades | First step: {}'.format( self.sym, self._seed, self.broker.get_total_pnl(midpoint=self.midpoint), self.broker.get_total_trade_count(), self.local_step_number) logger.info(msg) self.reward = 0.0 self.done = False self.broker.reset() self.data_buffer.clear() self.rsi.reset() self.tns.reset() for step in range(self.window_size + INDICATOR_WINDOW_MAX): self.midpoint = self.prices_[self.local_step_number] step_buy_volume = self._get_book_data(PriceJump.buy_trade_index) step_sell_volume = self._get_book_data(PriceJump.sell_trade_index) self.tns.step(buys=step_buy_volume, sells=step_sell_volume) self.rsi.step(price=self.midpoint) step_observation = self._get_step_observation(action=0) self.data_buffer.append(step_observation) self.local_step_number += self.step_size if len(self.data_buffer) > self.window_size: del self.data_buffer[0] self.observation = self._get_observation() return self.observation def render(self, mode='human'): self._render.render(midpoint=self.midpoint, mode=mode) def close(self): logger.info('{}-{} is being closed.'.format(self.id, self.sym)) self.data = None self.normalized_data = None self.prices_ = None self.broker = None self.sim = None self.data_buffer = None self.tns = None self.rsi = None return def seed(self, seed=1): self._random_state = np.random.RandomState(seed=seed) self._seed = seed logger.info('Setting seed in PriceJump.seed({})'.format(seed)) return [seed] @staticmethod def _process_data(_next_state): return np.clip(_next_state.reshape((1, -1)), -10., 10.) # def _process_data(self, _next_state): # # return self.sim.scale_state(_next_state).values.reshape((1, -1)) # return np.reshape(_next_state, (1, -1)) def _send_to_broker_and_get_reward(self, action): reward = 0.0 discouragement = 0.000000000001 if action == 0: # do nothing reward += discouragement elif action == 1: # buy price_fee_adjusted = self.midpoint + (PriceJump.fee * self.midpoint) if self.broker.short_inventory_count > 0: order = Order(ccy=self.sym, side='short', price=price_fee_adjusted, step=self.local_step_number) self.broker.remove(order=order) reward += self.broker.get_reward(side=order.side) elif self.broker.long_inventory_count >= 0: order = Order(ccy=self.sym, side='long', price=price_fee_adjusted, step=self.local_step_number) if self.broker.add(order=order) is False: reward -= discouragement else: logger.info( ('gym_trading.get_reward() ' + 'Error for action #{} - ' + 'unable to place an order with broker').format(action)) elif action == 2: # sell price_fee_adjusted = self.midpoint - (PriceJump.fee * self.midpoint) if self.broker.long_inventory_count > 0: order = Order(ccy=self.sym, side='long', price=price_fee_adjusted, step=self.local_step_number) self.broker.remove(order=order) reward += self.broker.get_reward(side=order.side) elif self.broker.short_inventory_count >= 0: order = Order(ccy=self.sym, side='short', price=price_fee_adjusted, step=self.local_step_number) if self.broker.add(order=order) is False: reward -= discouragement else: logger.info( 'gym_trading.get_reward() ' + 'Error for action #{} - ' + 'unable to place an order with broker'.format(action)) else: logger.info( ('Unknown action to take in get_reward(): ' + 'action={} | midpoint={}').format(action, self.midpoint)) return reward def _create_position_features(self): return np.array( (self.broker.long_inventory.position_count / self.max_position, self.broker.short_inventory.position_count / self.max_position, self.broker.get_total_pnl(midpoint=self.midpoint) / PriceJump.target_pnl, self.broker.long_inventory.get_unrealized_pnl(self.midpoint) / self.broker.reward_scale, self.broker.short_inventory.get_unrealized_pnl(self.midpoint) / self.broker.reward_scale), dtype=np.float32) def _create_action_features(self, action): return self.actions[action] def _create_indicator_features(self): return np.array((*self.tns.get_value(), *self.rsi.get_value()), dtype=np.float32) def _get_nbbo(self): best_bid = round( self.midpoint - self._get_book_data(PriceJump.best_bid_index), 2) best_ask = round( self.midpoint + self._get_book_data(PriceJump.best_ask_index), 2) return best_bid, best_ask def _get_book_data(self, index=0): return self.data[self.local_step_number][index] def _get_step_observation(self, action=0): step_position_features = self._create_position_features() step_action_features = self._create_action_features(action=action) step_indicator_features = self._create_indicator_features() return np.concatenate( (self._process_data(self.normalized_data[self.local_step_number]), step_indicator_features, step_position_features, step_action_features, np.array([self.reward])), axis=None) def _get_observation(self): observation = np.array(self.data_buffer, dtype=np.float32) # Expand the observation space from 2 to 3 dimensions. # This is necessary for conv nets in Baselines. if self.format_3d: observation = np.expand_dims(observation, axis=-1) return observation def _load_environment_data(self, fitting_file, testing_file): fitting_data_filepath = '{}/data_exports/{}'.format( self.sim.cwd, fitting_file) data_used_in_environment = '{}/data_exports/{}'.format( self.sim.cwd, testing_file) fitting_data = self.sim.import_csv(filename=fitting_data_filepath) fitting_data['coinbase_midpoint'] = np.log( fitting_data['coinbase_midpoint'].values) fitting_data['coinbase_midpoint'] = ( fitting_data['coinbase_midpoint'] - fitting_data['coinbase_midpoint'].shift(1)).fillna(method='bfill') self.sim.fit_scaler(fitting_data) del fitting_data return self.sim.import_csv(filename=data_used_in_environment)
def __init__(self, *, fitting_file='ETH-USD_2018-12-31.xz', testing_file='ETH-USD_2019-01-01.xz', step_size=1, max_position=5, window_size=10, seed=1, action_repeats=10, training=True, format_3d=False, z_score=True): # properties required for instantiation self.action_repeats = action_repeats self._seed = seed self._random_state = np.random.RandomState(seed=self._seed) self.training = training self.step_size = step_size self.max_position = max_position self.window_size = window_size self.format_3d = format_3d # e.g., [window, features, *NEW_AXIS*] self.action = 0 # derive gym.env properties self.actions = np.eye(3) self.sym = testing_file[:7] # slice the CCY from the filename # properties that get reset() self.reward = 0.0 self.done = False self.local_step_number = 0 self.midpoint = 0.0 self.observation = None # get Broker class to keep track of PnL and orders self.broker = Broker(max_position=max_position) # get historical data for simulations self.sim = Sim(use_arctic=False) self.data = self._load_environment_data(fitting_file, testing_file) self.prices_ = self.data[ 'coinbase_midpoint'].values # used to calculate PnL self.normalized_data = self.data.copy() self.data = self.data.values self.max_steps = self.data.shape[0] - self.step_size * \ self.action_repeats - 1 # normalize midpoint data self.normalized_data['coinbase_midpoint'] = \ np.log(self.normalized_data['coinbase_midpoint'].values) self.normalized_data['coinbase_midpoint'] = ( self.normalized_data['coinbase_midpoint'] - self.normalized_data['coinbase_midpoint'].shift(1)).fillna(0.) # load indicators into the indicator manager self.tns = IndicatorManager() self.rsi = IndicatorManager() for window in INDICATOR_WINDOW: self.tns.add(('tns_{}'.format(window), TnS(window=window))) self.rsi.add(('rsi_{}'.format(window), RSI(window=window))) if z_score: logger.info("Pre-scaling {}-{} data...".format( self.sym, self._seed)) self.normalized_data = self.normalized_data.apply(self.sim.z_score, axis=1).values logger.info("...{}-{} pre-scaling complete.".format( self.sym, self._seed)) else: self.normalized_data = self.normalized_data.values # rendering class self._render = TradingGraph(sym=self.sym) # graph midpoint prices self._render.reset_render_data( y_vec=self.prices_[:np.shape(self._render.x_vec)[0]]) # buffer for appending lags self.data_buffer = list() self.action_space = spaces.Discrete(len(self.actions)) self.reset() # reset to load observation.shape self.observation_space = spaces.Box(low=-10, high=10, shape=self.observation.shape, dtype=np.float32) print( '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}' .format(self.sym, self._seed, self.observation_space.shape))
class PriceJump(Env): metadata = {'render.modes': ['human']} id = 'long-short-v0' action_repeats = 4 def __init__(self, training=True, fitting_file='ETH-USD_2018-12-31.xz', testing_file='ETH-USD_2019-01-01.xz', step_size=1, max_position=1, window_size=50, seed=1, frame_stack=False): # properties required for instantiation self._random_state = np.random.RandomState(seed=seed) self._seed = seed self.training = training self.step_size = step_size self.fee = BROKER_FEE self.max_position = max_position self.window_size = window_size self.frame_stack = frame_stack self.frames_to_add = 3 if self.frame_stack else 0 self.inventory_features = ['long_inventory', 'short_inventory', 'long_unrealized_pnl', 'short_unrealized_pnl'] self._action = 0 # derive gym.env properties self.actions = ((1, 0, 0), # 0. do nothing (0, 1, 0), # 1. buy (0, 0, 1) # 2. sell ) self.sym = testing_file[:7] # slice the CCY from the filename # properties that get reset() self.reward = 0.0 self.done = False self._local_step_number = 0 self.midpoint = 0.0 self.observation = None # get historical data for simulations self.broker = Broker(max_position=max_position) self.sim = Sim(use_arctic=False) # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True) self.features = self.sim.get_feature_labels(include_system_time=False, include_bitfinex=False) fitting_data_filepath = '{}/data_exports/{}'.format(self.sim.cwd, fitting_file) data_used_in_environment = '{}/data_exports/{}'.format(self.sim.cwd, testing_file) print('Fitting data: {}\nTesting Data: {}'.format(fitting_data_filepath, data_used_in_environment)) self.sim.fit_scaler(self.sim.import_csv(filename=fitting_data_filepath)) self.data = self.sim.import_csv(filename=data_used_in_environment) self.prices = self.data['coinbase_midpoint'].values self.data = self.data.apply(self.sim.z_score, axis=1) self.data = self.data.values self.data_buffer, self.frame_stacker = list(), list() self.action_space = spaces.Discrete(len(self.actions)) variable_features_count = len(self.inventory_features) + len(self.actions) + 1 if self.frame_stack is False: shape = (len(self.features) + variable_features_count, self.window_size) else: shape = (len(self.features) + variable_features_count, self.window_size, 4) self.observation_space = spaces.Box(low=self.data.min(), high=self.data.max(), shape=shape, dtype=np.int) self.reset() # print('PriceJump instantiated. ' + # '\nself.observation_space.shape : {}'.format( # self.observation_space.shape)) def __str__(self): return '{} | {}-{}'.format(PriceJump.id, self.sym, self.seed) @property def step_number(self): return self._local_step_number def step(self, action): for current_step in range(PriceJump.action_repeats): if self.done: self.reset() return self.observation, self.reward, self.done position_features = self._create_position_features() action_features = self._create_action_features(action=action) self.midpoint = self.prices[self._local_step_number] self.broker.step(midpoint=self.midpoint) if current_step == 0: self.reward = 0. self.reward += self._send_to_broker_and_get_reward(action=action) _observation = np.concatenate((self.process_data(self.data[self._local_step_number]), position_features, action_features, np.array([self.reward])), axis=None) self.data_buffer.append(_observation) if len(self.data_buffer) >= self.window_size: self.frame_stacker.append(np.array(self.data_buffer, dtype=np.float32)) del self.data_buffer[0] if len(self.frame_stacker) > self.frames_to_add + 1: del self.frame_stacker[0] self._local_step_number += self.step_size # output shape is [n_features, window_size, frames_to_add] eg [40, 100, 1] self.observation = np.array(self.frame_stacker, dtype=np.float32).transpose() # This removes a dimension to be compatible with the Keras-rl module # because Keras-rl uses its own frame-stacker. There are future plans to integrate # this repository with more reinforcement learning packages, such as baselines. if self.frame_stack is False: self.observation = self.observation.reshape(self.observation.shape[0], -1) if self._local_step_number > self.data.shape[0] - 8: self.done = True order = Order(ccy=self.sym, side=None, price=self.midpoint, step=self._local_step_number) self.reward = self.broker.flatten_inventory(order=order) return self.observation, self.reward, self.done, {} def reset(self): if self.training: self._local_step_number = self._random_state.randint(low=1, high=5000) else: self._local_step_number = 0 logger.info(' %s-%i reset. Episode pnl: %.4f | First step: %i, max_pos: %i' % (self.sym, self._seed, self.broker.get_total_pnl(midpoint=self.midpoint), self._local_step_number, self.max_position)) self.reward = 0.0 self.done = False self.broker.reset() self.data_buffer.clear() self.frame_stacker.clear() for step in range(self.window_size + self.frames_to_add): position_features = self._create_position_features() action_features = self._create_action_features(action=0) _observation = np.concatenate((self.process_data(self.data[self._local_step_number]), position_features, action_features, np.array([self.reward])), axis=None) self.data_buffer.append(_observation) self._local_step_number += self.step_size if step >= self.window_size - 1: self.frame_stacker.append(np.array(self.data_buffer, dtype=np.float32)) del self.data_buffer[0] if len(self.frame_stacker) > self.frames_to_add + 1: del self.frame_stacker[0] # output shape is [n_features, window_size, frames_to_add] eg [40, 100, 1] self.observation = np.array(self.frame_stacker, dtype=np.float32).transpose() # This removes a dimension to be compatible with the Keras-rl module # because Keras-rl uses its own frame-stacker. There are future plans to integrate # this repository with more reinforcement learning packages, such as baselines. if self.frame_stack is False: self.observation = self.observation.reshape(self.observation.shape[0], -1) return self.observation def render(self, mode='human'): pass def close(self): logger.info('{}-{} is being closed.'.format(self.id, self.sym)) self.data = None self.broker = None self.sim = None self.data_buffer = None return def seed(self, seed=1): self._random_state = np.random.RandomState(seed=seed) self._seed = seed return [seed] @staticmethod def process_data(_next_state): # return self.sim.scale_state(_next_state).values.reshape((1, -1)) return np.clip(_next_state.reshape((1, -1)), -10., 10.) def _send_to_broker_and_get_reward(self, action): reward = 0.0 if action == 0: # do nothing pass elif action == 1: # buy price_fee_adjusted = self.midpoint + (self.fee * self.midpoint) if self.broker.short_inventory_count > 0: order = Order(ccy=self.sym, side='short', price=price_fee_adjusted, step=self._local_step_number) self.broker.remove(order=order) reward += self.broker.get_reward(side=order.side) elif self.broker.long_inventory_count >= 0: order = Order(ccy=self.sym, side='long', price=price_fee_adjusted, step=self._local_step_number) if self.broker.add(order=order) is False: reward -= 0.00000001 else: logger.warning(('gym_trading.get_reward() ' + 'Error for action #{} - ' + 'unable to place an order with broker').format(action)) elif action == 2: # sell price_fee_adjusted = self.midpoint - (self.fee * self.midpoint) if self.broker.long_inventory_count > 0: order = Order(ccy=self.sym, side='long', price=price_fee_adjusted, step=self._local_step_number) self.broker.remove(order=order) reward += self.broker.get_reward(side=order.side) elif self.broker.short_inventory_count >= 0: order = Order(ccy=self.sym, side='short', price=price_fee_adjusted, step=self._local_step_number) if self.broker.add(order=order) is False: reward -= 0.00000001 else: logger.warning('gym_trading.get_reward() ' + 'Error for action #{} - ' + 'unable to place an order with broker' .format(action)) else: logger.warning(('Unknown action to take in get_reward(): ' + 'action={} | midpoint={}').format(action, self.midpoint)) return reward def _create_position_features(self): return np.array((self.broker.long_inventory.position_count / self.max_position, self.broker.short_inventory.position_count / self.max_position, self.broker.long_inventory.get_unrealized_pnl(self.midpoint), self.broker.short_inventory.get_unrealized_pnl(self.midpoint))) def _create_action_features(self, action): return np.array(self.actions[action])
def __init__(self, *, training=True, fitting_file='ETH-USD_2018-12-31.xz', testing_file='ETH-USD_2019-01-01.xz', step_size=1, max_position=5, window_size=4, frame_stack=False): # properties required for instantiation PriceJump.instance_count += 1 self._seed = int(PriceJump.instance_count) # seed self._random_state = np.random.RandomState(seed=self._seed) self.training = training self.step_size = step_size self.fee = BROKER_FEE self.max_position = max_position self.window_size = window_size self.frame_stack = frame_stack self.frames_to_add = 3 if self.frame_stack else 0 self.action = 0 # derive gym.env properties self.actions = np.eye(3) self.sym = testing_file[:7] # slice the CCY from the filename # properties that get reset() self.reward = 0.0 self.done = False self.local_step_number = 0 self.midpoint = 0.0 self.observation = None # get Broker class to keep track of PnL and orders self.broker = Broker(max_position=max_position) # get historical data for simulations self.sim = Sim(use_arctic=False) fitting_data_filepath = '{}/data_exports/{}'.format( self.sim.cwd, fitting_file) data_used_in_environment = '{}/data_exports/{}'.format( self.sim.cwd, testing_file) # print('Fitting data: {}\nTesting Data: {}'.format(fitting_data_filepath, # data_used_in_environment)) fitting_data = self.sim.import_csv(filename=fitting_data_filepath) fitting_data['coinbase_midpoint'] = np.log( fitting_data['coinbase_midpoint'].values) fitting_data['coinbase_midpoint'] = fitting_data['coinbase_midpoint']. \ pct_change().fillna(method='bfill') self.sim.fit_scaler(fitting_data) del fitting_data self.data = self.sim.import_csv(filename=data_used_in_environment) self.prices_ = self.data[ 'coinbase_midpoint'].values # used to calculate PnL self.normalized_data = self.data.copy() self.data = self.data.values self.normalized_data['coinbase_midpoint'] = np.log( self.normalized_data['coinbase_midpoint'].values) self.normalized_data['coinbase_midpoint'] = ( self.normalized_data['coinbase_midpoint'] - self.normalized_data['coinbase_midpoint'].shift(1)).fillna( method='bfill') self.tns = TnS() self.rsi = RSI() logger.info("Pre-scaling {}-{} data...".format(self.sym, self._seed)) self.normalized_data = self.normalized_data.apply(self.sim.z_score, axis=1).values logger.info("...{}-{} pre-scaling complete.".format( self.sym, self._seed)) # rendering class self._render = TradingGraph(sym=self.sym) # graph midpoint prices self._render.reset_render_data( y_vec=self.prices_[:np.shape(self._render.x_vec)[0]]) self.data_buffer, self.frame_stacker = list(), list() self.action_space = spaces.Discrete(len(self.actions)) variable_features_count = len(self.inventory_features) + len(self.actions) + 1 + \ len(PriceJump.indicator_features) if self.frame_stack: shape = (4, len(PriceJump.features) + variable_features_count, self.window_size) else: shape = (self.window_size, len(PriceJump.features) + variable_features_count) self.observation_space = spaces.Box(low=self.data.min(), high=self.data.max(), shape=shape, dtype=np.int) print('PriceJump #{} instantiated.\nself.observation_space.shape : {}'. format(PriceJump.instance_count, self.observation_space.shape))
def __init__(self, training=True, fitting_file='ETH-USD_2018-12-31.xz', testing_file='ETH-USD_2019-01-01.xz', step_size=1, max_position=1, window_size=50, seed=1, frame_stack=False): # properties required for instantiation self._random_state = np.random.RandomState(seed=seed) self._seed = seed self.training = training self.step_size = step_size self.fee = BROKER_FEE self.max_position = max_position self.window_size = window_size self.frame_stack = frame_stack self.frames_to_add = 3 if self.frame_stack else 0 self.inventory_features = ['long_inventory', 'short_inventory', 'long_unrealized_pnl', 'short_unrealized_pnl'] self._action = 0 # derive gym.env properties self.actions = ((1, 0, 0), # 0. do nothing (0, 1, 0), # 1. buy (0, 0, 1) # 2. sell ) self.sym = testing_file[:7] # slice the CCY from the filename # properties that get reset() self.reward = 0.0 self.done = False self._local_step_number = 0 self.midpoint = 0.0 self.observation = None # get historical data for simulations self.broker = Broker(max_position=max_position) self.sim = Sim(use_arctic=False) # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True) self.features = self.sim.get_feature_labels(include_system_time=False, include_bitfinex=False) fitting_data_filepath = '{}/data_exports/{}'.format(self.sim.cwd, fitting_file) data_used_in_environment = '{}/data_exports/{}'.format(self.sim.cwd, testing_file) print('Fitting data: {}\nTesting Data: {}'.format(fitting_data_filepath, data_used_in_environment)) self.sim.fit_scaler(self.sim.import_csv(filename=fitting_data_filepath)) self.data = self.sim.import_csv(filename=data_used_in_environment) self.prices = self.data['coinbase_midpoint'].values self.data = self.data.apply(self.sim.z_score, axis=1) self.data = self.data.values self.data_buffer, self.frame_stacker = list(), list() self.action_space = spaces.Discrete(len(self.actions)) variable_features_count = len(self.inventory_features) + len(self.actions) + 1 if self.frame_stack is False: shape = (len(self.features) + variable_features_count, self.window_size) else: shape = (len(self.features) + variable_features_count, self.window_size, 4) self.observation_space = spaces.Box(low=self.data.min(), high=self.data.max(), shape=shape, dtype=np.int) self.reset()