def _reset(self, t, i):
     self.orderbook = copy.deepcopy(self.orderbookOriginal) # TODO: Slow but currently required to reset after every episode due to change of order book states during matching
     orderbookState, orderbookIndex = self._get_random_orderbook_state()
     bidAskFeature = self._makeFeature(orderbookIndex=orderbookIndex)
     state = ActionState(t, i, {'bidask': bidAskFeature}) #np.array([[t, i]])
     self.execution = None
     self.orderbookIndex = orderbookIndex
     self.actionState = state
     return state.toArray()
示例#2
0
 def update(self, t, i, force_execution=False):
     aiState = ActionState(t, i)
     a = self.ai.chooseAction(aiState)
     # print('Random action: ' + str(level) + ' for state: ' + str(aiState))
     action = self.env.createAction(level=a,
                                    state=aiState,
                                    force_execution=force_execution)
     action.run(self.env.orderbook)
     i_next = self.env.determineNextInventory(action)
     t_next = self.env.determineNextTime(t)
     reward = action.getReward()
     state_next = ActionState(action.getState().getT(),
                              action.getState().getI(),
                              action.getState().getMarket())
     state_next.setT(t_next)
     state_next.setI(i_next)
     #print("Reward " + str(reward) + ": " + str(action.getState()) + " with " + str(action.getA()) + " -> " + str(state_next))
     self.ai.learn(state1=action.getState(),
                   action1=action.getA(),
                   reward=reward,
                   state2=state_next)
     return (t_next, i_next)
    def step(self, action):
        action = self.levels[action]
        if self.execution is None:
            self.execution = self._create_execution(action)
        else:
            self.execution = self._update_execution(self.execution, action)

        logging.info(
            'Created/Updated execution.' +
            '\nAction: ' + str(action) + ' (' + str(self.execution.getOrder().getType()) + ')' +
            '\nt: ' + str(self.actionState.getT()) +
            '\nruntime: ' + str(self.execution.getRuntime()) +
            '\ni: ' + str(self.actionState.getI())
        )
        self.execution, counterTrades = self.execution.run(self.orderbook)

        i_next = self._determine_next_inventory(self.execution)
        t_next = self._determine_next_time(self.execution.getState().getT())

        bidAskFeature = self._makeFeature(orderbookIndex=self.execution.getOrderbookIndex())
        state_next = ActionState(t_next, i_next, {'bidask': bidAskFeature})
        done = self.execution.isFilled() or state_next.getI() == 0
        # if done == True:
        #     #reward = self.execution.getReward()
        #     #volumeRatio = 1.0
        # else:
        reward, volumeRatio = self.execution.calculateRewardWeighted(counterTrades, self.I[-1])

        logging.info(
            'Run execution.' +
            '\nTrades: ' + str(len(counterTrades)) +
            '\nReward: ' + str(reward) + ' (Ratio: ' + str(volumeRatio) + ')' +
            '\nDone: ' + str(done)
        )
        self.orderbookIndex = self.execution.getOrderbookIndex()
        self.actionState = state_next
        return state_next.toArray(), reward, done, {}
示例#4
0
 def testStateEquality(self):
     ai = QLearn([-1, 0, 1])
     a1 = ActionState(1.0, 1.0, {'vol60': 1})
     a2 = ActionState(1.0, 1.0, {'vol60': 1})
     ai.learn(a1, 1, 1.0, a2)
     self.assertEqual(ai.getQAction(a2), 1)
示例#5
0
import unittest
from qlearn import QLearn
from action_state import ActionState
import numpy as np


class QlearnTest(unittest.TestCase):
    def testStateEquality(self):
        ai = QLearn([-1, 0, 1])
        a1 = ActionState(1.0, 1.0, {'vol60': 1})
        a2 = ActionState(1.0, 1.0, {'vol60': 1})
        ai.learn(a1, 1, 1.0, a2)
        self.assertEqual(ai.getQAction(a2), 1)

    #def testQTableLookup(self):
actions = [5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -7, -10, -15, -20]
ai = QLearn(actions)
ai.q = np.load('test_q.npy').item()
ai.q
state = ActionState(30, 0.9, {})
ai.q.get((state, -10))
print(ai.getQAction(state))
示例#6
0
    def backtest(self, q=None, episodes=10, average=False, fixed_a=None):
        if q is None:
            q = self.ai.q
        else:
            self.ai.q = q

        if not q:
            raise Exception('Q-Table is empty, please train first.')

        Ms = []
        #T = self.T[1:len(self.T)]
        for t in [self.T[-1]]:
            logging.info("\n" + "t==" + str(t))
            for i in [self.I[-1]]:
                logging.info("     i==" + str(i))
                actions = []
                state = ActionState(t, i, {})
                #print(state)
                if fixed_a is not None:
                    a = fixed_a
                else:
                    try:
                        a = self.ai.getQAction(state, 0)
                        # print("Q action for state " + str(state) + ": " + str(a))
                    except:
                        # State might not be in Q-Table yet, more training requried.
                        logging.info("State " + str(state) +
                                     " not in Q-Table.")
                        break
                actions.append(a)
                action = self.createAction(level=a,
                                           state=state,
                                           force_execution=False)
                midPrice = action.getReferencePrice()

                #print("before...")
                #print(action)
                action.run(self.orderbook)
                #print("after...")
                #print(action)
                i_next = self.determineNextInventory(action)
                t_next = self.determineNextTime(t)
                # print("i_next: " + str(i_next))
                while i_next != 0:
                    state_next = ActionState(t_next, i_next, {})
                    if fixed_a is not None:
                        a_next = fixed_a
                    else:
                        try:
                            a_next = self.ai.getQAction(state_next, 0)
                            print("t: " + str(t_next))
                            print("i: " + str(i_next))
                            print("Action: " + str(a_next))
                            # print("Q action for next state " + str(state_next) + ": " + str(a_next))
                        except:
                            # State might not be in Q-Table yet, more training requried.
                            # print("State " + str(state_next) + " not in Q-Table.")
                            break
                    actions.append(a_next)
                    #print("Action transition " + str((t, i)) + " -> " + str(aiState_next) + " with " + str(runtime_next) + "s runtime.")

                    runtime_next = self.determineRuntime(t_next)
                    action.setState(state_next)
                    action.update(a_next, runtime_next)
                    action.run(self.orderbook)
                    #print(action)
                    i_next = self.determineNextInventory(action)
                    t_next = self.determineNextTime(t_next)

                price = action.getAvgPrice()
                # TODO: last column is for for the BUY scenario only
                if action.getOrder().getSide() == OrderSide.BUY:
                    profit = midPrice - price
                else:
                    profit = price - midPrice
                Ms.append([state, midPrice, actions, price, profit])
        if not average:
            return Ms
        return self.averageBacktest(Ms)
示例#7
0
 def add_pair(self, state, prob, count):
     new_as = ActionState(state, prob, count)
     self.action_states.append(new_as)