示例#1
0
import unittest
from qlearn import QLearn
from action_state import ActionState
import numpy as np


class QlearnTest(unittest.TestCase):
    def testStateEquality(self):
        ai = QLearn([-1, 0, 1])
        a1 = ActionState(1.0, 1.0, {'vol60': 1})
        a2 = ActionState(1.0, 1.0, {'vol60': 1})
        ai.learn(a1, 1, 1.0, a2)
        self.assertEqual(ai.getQAction(a2), 1)

    #def testQTableLookup(self):
actions = [5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -7, -10, -15, -20]
ai = QLearn(actions)
ai.q = np.load('test_q.npy').item()
ai.q
state = ActionState(30, 0.9, {})
ai.q.get((state, -10))
print(ai.getQAction(state))
示例#2
0
 def testStateEquality(self):
     ai = QLearn([-1, 0, 1])
     a1 = ActionState(1.0, 1.0, {'vol60': 1})
     a2 = ActionState(1.0, 1.0, {'vol60': 1})
     ai.learn(a1, 1, 1.0, a2)
     self.assertEqual(ai.getQAction(a2), 1)
示例#3
0
class AgentQlearn:
    def __init__(self, env):
        self.env = env
        self.levels = levels
        self.ai = QLearn(self.levels)

    def update(self, t, i, force_execution=False):
        aiState = ActionState(t, i)
        a = self.ai.chooseAction(aiState)
        # print('Random action: ' + str(level) + ' for state: ' + str(aiState))
        action = self.env.createAction(level=a,
                                       state=aiState,
                                       force_execution=force_execution)
        action.run(self.env.orderbook)
        i_next = self.env.determineNextInventory(action)
        t_next = self.env.determineNextTime(t)
        reward = action.getReward()
        state_next = ActionState(action.getState().getT(),
                                 action.getState().getI(),
                                 action.getState().getMarket())
        state_next.setT(t_next)
        state_next.setI(i_next)
        #print("Reward " + str(reward) + ": " + str(action.getState()) + " with " + str(action.getA()) + " -> " + str(state_next))
        self.ai.learn(state1=action.getState(),
                      action1=action.getA(),
                      reward=reward,
                      state2=state_next)
        return (t_next, i_next)

    def train(self, episodes=1, force_execution=False):
        for episode in range(int(episodes)):
            for t in self.env.T:
                logging.info("\n" + "t==" + str(t))
                for i in self.env.I:
                    logging.info("     i==" + str(i))
                    logging.info("Action run " + str((t, i)))
                    (t_next, i_next) = self.update(t, i, force_execution)
                    while i_next != 0:
                        if force_execution:
                            raise Exception("Enforced execution left " +
                                            str(i_next) + " unexecuted.")
                        logging.info("Action transition " + str((t, i)) +
                                     " -> " + str((t_next, i_next)))
                        (t_next, i_next) = self.update(t_next, i_next,
                                                       force_execution)

    def backtest(self, q=None, episodes=10, average=False, fixed_a=None):
        if q is None:
            q = self.ai.q
        else:
            self.ai.q = q

        if not q:
            raise Exception('Q-Table is empty, please train first.')

        Ms = []
        #T = self.T[1:len(self.T)]
        for t in [self.env.T[-1]]:
            logging.info("\n" + "t==" + str(t))
            for i in [self.env.I[-1]]:
                logging.info("     i==" + str(i))
                actions = []
                state = ActionState(t, i, {})
                #print(state)
                if fixed_a is not None:
                    a = fixed_a
                else:
                    try:
                        a = self.ai.getQAction(state, 0)
                        print("t: " + str(t))
                        print("i: " + str(i))
                        print("Action: " + str(a))
                        # print("Q action for state " + str(state) + ": " + str(a))
                    except:
                        # State might not be in Q-Table yet, more training requried.
                        logging.info("State " + str(state) +
                                     " not in Q-Table.")
                        break
                actions.append(a)
                action = self.env.createAction(level=a,
                                               state=state,
                                               force_execution=False)
                midPrice = action.getReferencePrice()

                #print("before...")
                #print(action)
                action.run(self.env.orderbook)
                #print("after...")
                #print(action)
                i_next = self.env.determineNextInventory(action)
                t_next = self.env.determineNextTime(t)
                # print("i_next: " + str(i_next))
                while i_next != 0:
                    state_next = ActionState(t_next, i_next, {})
                    if fixed_a is not None:
                        a_next = fixed_a
                    else:
                        try:
                            a_next = self.ai.getQAction(state_next, 0)
                            print("t: " + str(t_next))
                            print("i: " + str(i_next))
                            print("Action: " + str(a_next))
                            # print("Q action for next state " + str(state_next) + ": " + str(a_next))
                        except:
                            # State might not be in Q-Table yet, more training requried.
                            # print("State " + str(state_next) + " not in Q-Table.")
                            break
                    actions.append(a_next)
                    #print("Action transition " + str((t, i)) + " -> " + str(aiState_next) + " with " + str(runtime_next) + "s runtime.")

                    runtime_next = self.env.determineRuntime(t_next)
                    action.setState(state_next)
                    action.update(a_next, runtime_next)
                    action.run(self.env.orderbook)
                    #print(action)
                    i_next = self.env.determineNextInventory(action)
                    t_next = self.env.determineNextTime(t_next)

                price = action.getAvgPrice()
                # TODO: last column is for for the BUY scenario only
                if action.getOrder().getSide() == OrderSide.BUY:
                    profit = midPrice - price
                else:
                    profit = price - midPrice
                Ms.append([state, midPrice, actions, price, profit])
        if not average:
            return Ms
        return self.averageBacktest(Ms)

    def averageBacktest(self, M):
        # Average states within M
        N = []
        observed = []
        for x in M:
            state = x[0]
            if state in observed:
                continue
            observed.append(state)
            paid = []
            reward = []
            for y in M:
                if y[0] == state:
                    paid.append(y[3])
                    reward.append(y[4])
            N.append([state, x[1], x[2], np.average(paid), np.average(reward)])
        return N

    def run(self, epochs_train=1, epochs_test=10):
        if epochs_train > 0:
            agent.train(episodes=epochs_train)
        M = agent.backtest(episodes=epochs_test, average=False)
        M = np.array(M)
        return np.mean(M[0:, 4])

    def simulate(self, epochs_train=1, epochs_test=10, interval=100):
        from agent_utils.ui import UI
        UI.animate(lambda: self.run(epochs_train, epochs_test),
                   interval=interval)