def _reset(self, t, i): orderbookState, orderbookIndex = self._get_random_orderbook_state() feature = self._makeFeature(orderbookIndex=orderbookIndex, qty=i) state = ActionState( t, i, {self.featureType.value: feature}) #np.array([[t, i]]) self.execution = None self.orderbookIndex = orderbookIndex self.actionState = state return state.toArray()
def _reset(self, t, i): #self.orderbook = copy.deepcopy(self.orderbookOriginal) # TODO: Slow but currently required to reset after every episode due to change of order book states during matching orderbookState, orderbookIndex = self._get_random_orderbook_state() bidAskFeature = self._makeFeature(orderbookIndex=orderbookIndex) state = ActionState(t, i, {'bidask': bidAskFeature}) #np.array([[t, i]]) self.executionBuy = None self.executionSell = None self.orderbookIndexBuy = orderbookIndex self.orderbookIndexSell = orderbookIndex self.actionStateBuy = state self.actionStateSell = state return state.toArray()
def step(self, action): self.episode += 1 action = self.levels[action] self.episodeActions.append(action) if self.execution is None: self.execution = self._create_execution(action) else: self.execution = self._update_execution(self.execution, action) logging.info('Created/Updated execution.' + '\nAction: ' + str(action) + ' (' + str(self.execution.getOrder().getType()) + ')' + '\nt: ' + str(self.actionState.getT()) + '\nruntime: ' + str(self.execution.getRuntime()) + '\ni: ' + str(self.actionState.getI())) self.execution, counterTrades = self.execution.run(self.orderbook) i_next = self._determine_next_inventory(self.execution) t_next = self._determine_next_time(self.execution.getState().getT()) feature = self._makeFeature( orderbookIndex=self.execution.getOrderbookIndex(), qty=i_next) state_next = ActionState(t_next, i_next, {self.featureType.value: feature}) done = self.execution.isFilled() or state_next.getI() == 0 if done: reward = self.execution.getReward() volumeRatio = 1.0 if self.callbacks is not []: for cb in self.callbacks: cb.on_episode_end( self.episode, { 'episode_reward': reward, 'episode_actions': self.episodeActions }) self.episodeActions = [] else: reward, volumeRatio = self.execution.calculateRewardWeighted( counterTrades, self.I[-1]) logging.info('Run execution.' + '\nTrades: ' + str(len(counterTrades)) + '\nReward: ' + str(reward) + ' (Ratio: ' + str(volumeRatio) + ')' + '\nDone: ' + str(done)) self.orderbookIndex = self.execution.getOrderbookIndex() self.actionState = state_next return state_next.toArray(), reward, done, {}