示例#1
0
    def __init__(self, config):
        self._config = config
        self._eps_schedule = LinearSchedule(self._config.eps_begin,
                                            self._config.eps_end,
                                            self._config.nsteps)
        self._lr_schedule = LinearSchedule(self._config.lr_begin,
                                           self._config.lr_end,
                                           self._config.lr_nsteps)
        self._oq = Order_Queue(self._config.order_path)
        self._mq = Message_Queue(self._config.message_path)
        self._bf = ReplayBuffer(1000000, config)

        self._action_fn = self.get_action_fn()

        self.build()
示例#2
0
def main():
    config = Config()
    config.mode = 'test'
    config.dropout = 1.0
    model = Neural_DQN(config)
    #model = DQN(config)
    model.initialize()
    oq = Order_Queue(config.order_path)
    mq = Message_Queue(config.message_path)
    rewards = evaluate_policy(model, oq, mq)
    print(np.mean(rewards))
示例#3
0
class model(object):
    def __init__(self, config):
        self._config = config
        self._eps_schedule = LinearSchedule(self._config.eps_begin,
                                            self._config.eps_end,
                                            self._config.nsteps)
        self._lr_schedule = LinearSchedule(self._config.lr_begin,
                                           self._config.lr_end,
                                           self._config.lr_nsteps)
        self._oq = Order_Queue(self._config.order_path)
        self._mq = Message_Queue(self._config.message_path)
        self._bf = ReplayBuffer(1000000, config)

        self._action_fn = self.get_action_fn()

        self.build()

    def build(self):
        pass

    def initialize(self):
        pass

    def get_random_action(self, state):
        pass

    def get_best_action(self, state):
        ### return action, q value
        pass

    def get_action(self, state):
        if np.random.random() < self._eps_schedule.get_epsilon():
            return self.get_random_action(state)[0]
        else:
            return self.get_best_action(state)[0]

    def get_random_action_fn(self):
        def random_action_fn(t, amount, state, mid_price):
            action = np.random.randint(
                self._config.L)  # action = L for market order
            price = (action -
                     self._config.L // 2) * self._config.base_point + mid_price
            return (price, action)

        return random_action_fn

    def get_action_fn(self):
        def action_fn(t, amount, state, mid_price):
            action = self.get_action(state)
            price = (action -
                     self._config.L // 2) * self._config.base_point + mid_price
            return (price, action)

        return action_fn

    def pad_state(self, states, state_history):
        tmp_states, tmp_its = zip(*states)
        tmp_state = np.concatenate(
            [np.expand_dims(state, -1) for state in tmp_states], axis=-1)
        tmp_state = np.pad(tmp_state,
                           ((0, 0), (0, 0),
                            (state_history - tmp_state.shape[-1], 0)),
                           'constant',
                           constant_values=0)
        tmp_it = tmp_its[-1]
        return ([tmp_state], [tmp_it])

    def simulate_an_episode(self, amount, T, H, start_time, order_direction,
                            action_fn, depth):
        dH = H // T
        self._mq.reset()
        lob_data = self._oq.create_orderbook_time(start_time, self._mq)
        lob = Limit_Order_book(**lob_data,
                               own_amount_to_trade=0,
                               own_init_price=-order_direction *
                               Limit_Order_book._DUMMY_VARIABLE,
                               own_trade_type=order_direction)
        rewards = []
        states = []
        actions = []
        done_mask = []

        amount_remain = amount
        cum_reward = 0

        for t in range(start_time, start_time + H - dH, dH):
            tmp1 = 1.0 * amount_remain / amount  # amount remain
            tmp2 = 1.0 * (start_time + H - t) / H  # time remain
            state = (lob.display_book(depth),
                     np.array([tmp1, tmp2], dtype=float))
            state = self.process_state(state)
            states.append(state)

            mid_price = lob.get_mid_price()
            state_input = self.pad_state(states[-self._config.state_history:],
                                         self._config.state_history)
            price, action = action_fn(start_time + H - t, amount_remain,
                                      state_input, mid_price)
            actions.append(action)
            done_mask.append(False)

            lob.update_own_order(price, amount_remain)

            for idx, message in self._mq.pop_to_next_time(t + dH):
                lob.process(**message)
                if lob.own_amount_to_trade == 0:
                    done_mask.append(True)
                    state = (lob.display_book(depth),
                             np.array([
                                 0, 1.0 * (start_time + H - self._mq._time) / H
                             ],
                                      dtype=float))
                    state = self.process_state(state)
                    states.append(state)
                    rewards.append(lob.own_reward - cum_reward)
                    break
            if done_mask[-1]:
                break
            else:
                # What is going on over here?
                rewards.append(lob.own_reward - cum_reward)
                cum_reward = lob.own_reward
                amount_remain = lob.own_amount_to_trade

        if not done_mask[-1]:
            tmp1 = 1.0 * amount_remain / amount
            tmp2 = 1.0 * (start_time + H - t - dH) / H
            state = (lob.display_book(depth),
                     np.array([tmp1, tmp2], dtype=float))
            state = self.process_state(state)
            states.append(state)
            done_mask.append(False)

            lob.update_own_order(lob.own_trade_type *
                                 Limit_Order_book._DUMMY_VARIABLE)
            if lob.own_amount_to_trade == 0:
                rewards.append(lob.own_reward - cum_reward)
            else:
                rewards.append(-Limit_Order_book._DUMMY_VARIABLE)
            tmp1 = 1.0 * lob.own_amount_to_trade / amount
            state = (lob.display_book(depth), np.array([tmp1, 0], dtype=float))
            state = self.process_state(state)
            states.append(state)
            actions.append(self._config.L)
            done_mask.append(True)
        return (states, rewards, actions, done_mask[1:])

    def sampling_buffer(self):
        for start_time in range(self._config.train_start,
                                self._config.train_end, self._config.H):
            states, rewards, actions, done_mask = self.simulate_an_episode(
                self._config.I, self._config.T, self._config.H, start_time,
                self._config.direction, self._action_fn, self._config.depth)
            self._bf.store(states, actions, rewards, done_mask)

    def process_state(self, state):
        state_book, state_it = state
        state_book = state_book.astype('float32')
        state_book[:, 0] /= 1.e6
        state_book[:, 1] /= 1.e2
        state_book[:, 2] /= 1.e6
        state_book[:, 3] /= 1.e2
        return (state_book, state_it)
	mq.reset()
	mq.jump_to_time(time)

	lob_copy = copy.deepcopy(lob)
	lob_copy.update_own_order(a_price, amount)

	for idx, message in mq.pop_to_next_time(next_time):
		lob_copy.process(**message)
		if lob_copy.own_amount_to_trade == 0:
			break

	return [lob_copy.own_amount_to_trade, lob_copy.own_reward]

path_target = '../data/%s_Q_dp_%s.npy' % (args.tic,args.V)
oq = Order_Queue(file_order)
mq = Message_Queue(file_msg)

if args.mode == 'train':
	np.save(path_target, Calculate_Q(args.V, args.H, args.T, args.I, args.L,oq,mq))
elif args.mode == 'test':
	Q = np.load(path_target)
	Optimal_Q = Optimal_strategy(Q)
	rewards = evaluate_policy(args.test_start, args.test_end, args.order_direction, args.V, args.H, args.T, oq, mq, Optimal_action)
	print(rewards)
	print(np.mean(rewards))





示例#5
0
                    help='Buy 1, Sell -1',
                    type=int)
parser.add_argument('--start', default=34200, help='Start Time', type=float)
parser.add_argument('--end', default=34500, help='End Time', type=float)
parser.add_argument('--adj_freq',
                    default=100,
                    help='Adjustment Frequency',
                    type=float)
parser.add_argument('--tol',
                    default=1e-8,
                    help='Remaining Time To Submit Market Order',
                    type=float)
parser.add_argument('--base_point', default=100, help='Base Point', type=int)
args = parser.parse_args()

mq = Message_Queue(args.file_msg)
lob = Limit_Order_book(own_amount_to_trade=args.order_size,
                       own_init_price=-args.order_direction *
                       Limit_Order_book._DUMMY_VARIABLE,
                       own_trade_type=args.order_direction)
for idx, message in mq.pop_to_next_time(args.start):
    lob.process(**message)


def optimal(time, lob, mq):
    if time == (
            args.end - args.tol
    ):  # This code force that (args.end-args.start) is a multiple of args.tol
        if lob.own_amount_to_trade == 0:
            return lob.own_reward
        else:
示例#6
0
        print('Add Buy Order %s' % status)
    elif idx in [41]:
        print('Add Sell Order %s' % status)
    elif idx in [5]:
        print('Execute Hidden Order %s' % status)
    elif idx in [46]:
        print('Delete Buy Order %s' % status)
    elif idx in [47]:
        print('Delete Sell Order %s' % status)

    if status == '[FAIL]':
        print('ERROR! idx %d msg %s' % (idx, str(msg)))


message_path = '../datasets/LOBSTER_SampleFile_GOOG_2012-06-21_10/GOOG_2012-06-21_34200000_57600000_message_10.csv'
mq = Message_Queue(message_path)

book_path = '../datasets/LOBSTER_SampleFile_GOOG_2012-06-21_10/GOOG_2012-06-21_34200000_57600000_orderbook_10.csv'
df_book = pd.read_csv(book_path, header=None)
level = 10
ask_book = df_book[np.arange(level) * 4].values
ask_size_book = df_book[1 + np.arange(level) * 4].values
bid_book = df_book[2 + np.arange(level) * 4].values
bid_size_book = df_book[3 + np.arange(level) * 4].values
book = np.concatenate([
    tmp[:, :, np.newaxis]
    for tmp in [bid_book, bid_size_book, ask_book, ask_size_book]
],
                      axis=2)

for idx, message in mq.iterate_queue():
示例#7
0
                    default=1,
                    help='Buy 1, Sell -1',
                    type=int)
parser.add_argument('--start', default=34200, help='Start Time', type=float)
parser.add_argument('--end', default=34500, help='End Time', type=float)
parser.add_argument('--adj_freq',
                    default=100,
                    help='Adjustment Frequency',
                    type=float)
parser.add_argument('--tol',
                    default=1e-8,
                    help='Remaining Time To Submit Market Order',
                    type=float)
args = parser.parse_args()

mq = Message_Queue(args.file_msg)
lob = Limit_Order_book(own_amount_to_trade=args.order_size,
                       own_init_price=-args.order_direction *
                       Limit_Order_book._DUMMY_VARIABLE,
                       own_trade_type=args.order_direction)
for idx, message in mq.pop_to_next_time(args.start):
    lob.process(**message)

lob.update_own_order(args.order_direction * Limit_Order_book._DUMMY_VARIABLE)

current_time = args.start
while lob.own_amount_to_trade > 0 and not mq.finished():
    current_time += args.adj_freq
    for idx, message in mq.pop_to_next_time(current_time):
        lob.process(**message)
        if lob.own_amount_to_trade == 0:
                    help='Adjustment Frequency',
                    type=float)
parser.add_argument('--tol',
                    default=100,
                    help='Remaining Time To Submit Market Order',
                    type=float)
parser.add_argument('--num',
                    default=10,
                    help='The number of base points to go',
                    type=int)
args = parser.parse_args()
# Use the train_start and train_end to find the best num. H: the total amount of time to execute the orders.
file_msg = '../datasets/%s_2012-06-21_34200000_57600000_message_10.csv' % (
    args.tic)

mq = Message_Queue(file_msg)
lob = Limit_Order_book(own_amount_to_trade=args.order_size,
                       own_init_price=-args.order_direction *
                       Limit_Order_book._DUMMY_VARIABLE,
                       own_trade_type=args.order_direction)
for idx, message in mq.pop_to_next_time(args.train_start):
    lob.process(**message)

current_mid_price = lob.bid[0] + (lob.ask[0] - lob.bid[0]) // 2
init_price = np.arange(current_mid_price - args.num * args.base_point,
                       current_mid_price + args.num * args.base_point,
                       args.base_point)
init_price = init_price[init_price > 0]

reward = np.zeros(init_price.shape)