Exemplo n.º 1
0
def books_from_lines_v1(lines, debug=False, end=None, drop_out_of_order=False):
    currBook = None
    # keep track of which side the book starts on,
    # if that side repeats we've reached a new book 
    startSide = None 
    nLine = 0
    nBooks = 0
    
    keep_out_of_order = not drop_out_of_order
    maxTimestamp = None
    book_list = [] 
    for line in lines:
        if end and nBooks > end:           
            break 
        nLine += 1
        if line[0:9] == "ORDERBOOK":
            nBooks += 1
            if currBook is not None: 
                if keep_out_of_order or currBook.lastUpdateTime == maxTimestamp: 
                    book_list.append(currBook)
            timestr = line[10:] 
            lastUpdateTime = parse_datetime_opt(timestr)
            if maxTimestamp is None or lastUpdateTime > maxTimestamp:
                maxTimestamp = lastUpdateTime
            currBook = OB(lastUpdateTime = lastUpdateTime)
        else: 
            row = line.split(',')
            side = row[obc.SIDE]
            entry = Order(
                timestamp = parse_datetime_opt(row[obc.TIMESTAMP]), 
                side = side, 
                level = int(row[obc.LEVEL]), 
                price = float(row[obc.PRICE]), 
                size = long(row[obc.SIZE]), 
                #orderdepthcount = int(row[obc.ORDERDEPTHCOUNT])
                #ccy = row[obc.CURRENCY]
            )
            if (side == obc.BID): currBook.add_bid(entry)
            elif (side == obc.OFFER): currBook.add_offer(entry)
    return book_list 
Exemplo n.º 2
0
    def start_new_orderbook(self, line):
        self.at_start_of_file = False
        # periodically clear the order cache so it doesn't eat all the memory
        if len(self.order_cache) > 5000:
            self.order_cache.clear()

        if self.currBook:
            self.books.append(self.currBook)
        _, _, monotonic_time, exchange_time = line.split(',')
        monotonic_seconds, _, monotonic_nanoseconds = monotonic_time.partition(
            ':')
        epoch_seconds, _, exchange_nanoseconds = exchange_time.partition(':')
        epoch_seconds = long(epoch_seconds)
        exchange_seconds = epoch_seconds % self.SECONDS_PER_DAY
        exchange_day = epoch_seconds / self.SECONDS_PER_DAY
        update_time = exchange_seconds * 1000 + long(exchange_nanoseconds) / (
            10**6)
        monotonic_time = long(monotonic_seconds) * 1000 + long(
            monotonic_nanoseconds) / (10**6)
        self.currBook = OB(day=exchange_day,
                           lastUpdateTime=update_time,
                           lastUpdateMonotonic=monotonic_time,
                           actions=self.actions)
        self.actions = []
Exemplo n.º 3
0
    def start_new_orderbook(self, line):         
        self.at_start_of_file = False 
        # periodically clear the order cache so it doesn't eat all the memory 
        if len(self.order_cache) > 5000:
            self.order_cache.clear() 

        if self.currBook:
            self.books.append(self.currBook)
        _, _, monotonic_time, exchange_time = line.split(',')
        monotonic_seconds, _, monotonic_nanoseconds = monotonic_time.partition(':')
        epoch_seconds, _, exchange_nanoseconds = exchange_time.partition(':')
        epoch_seconds = long(epoch_seconds)
        exchange_seconds =  epoch_seconds % self.SECONDS_PER_DAY
        exchange_day = epoch_seconds / self.SECONDS_PER_DAY
        update_time = exchange_seconds * 1000 + long(exchange_nanoseconds) / (10**6)
        monotonic_time = long(monotonic_seconds)*1000 + long(monotonic_nanoseconds) / (10**6)
        self.currBook = OB(
            day = exchange_day, 
            lastUpdateTime = update_time, 
            lastUpdateMonotonic = monotonic_time, 
            actions = self.actions
        )
        self.actions = []
Exemplo n.º 4
0
def books_from_lines_v1(lines, debug=False, end=None, drop_out_of_order=False):
    currBook = None
    # keep track of which side the book starts on,
    # if that side repeats we've reached a new book
    startSide = None
    nLine = 0
    nBooks = 0

    keep_out_of_order = not drop_out_of_order
    maxTimestamp = None
    book_list = []
    for line in lines:
        if end and nBooks > end:
            break
        nLine += 1
        if line[0:9] == "ORDERBOOK":
            nBooks += 1
            if currBook is not None:
                if keep_out_of_order or currBook.lastUpdateTime == maxTimestamp:
                    book_list.append(currBook)
            timestr = line[10:]
            lastUpdateTime = parse_datetime_opt(timestr)
            if maxTimestamp is None or lastUpdateTime > maxTimestamp:
                maxTimestamp = lastUpdateTime
            currBook = OB(lastUpdateTime=lastUpdateTime)
        else:
            row = line.split(',')
            side = row[obc.SIDE]
            entry = Order(
                timestamp=parse_datetime_opt(row[obc.TIMESTAMP]),
                side=side,
                level=int(row[obc.LEVEL]),
                price=float(row[obc.PRICE]),
                size=long(row[obc.SIZE]),
                #orderdepthcount = int(row[obc.ORDERDEPTHCOUNT])
                #ccy = row[obc.CURRENCY]
            )
            if (side == obc.BID): currBook.add_bid(entry)
            elif (side == obc.OFFER): currBook.add_offer(entry)
    return book_list
Exemplo n.º 5
0
class V3_Parser:
    def __init__(self): 
        self.header = {} 
        # sometimes the files are missing their first line, 
        # in which case we need to reconstruct the header info 
        self.done_with_header = False 
        self.currBook = None
        self.books = [] 
        # keep this around for printing debug info on restarts
        self.actions = []    
        self.SECONDS_PER_DAY = 60 * 60 * 24     
        self.order_cache = {}
        self.action_cache = {}


    def header_ok(self, f):
      old_pos = f.tell()
      line = f.readline()
      while len(line) == 0:
        line = f.readline()
      # return the position so we leave no externally visible changes
  
      if line.startswith('V3'):
          v, ccy, _  = line.split(',')
          f.seek(old_pos)
          return v == 'V3'
      elif line.startswith('RESTART'):
        try:
          while not line.startswith('OB'):
            line = f.readline()
          # found an order book, so file is probably OK
          f.seek(old_pos)
          return True
        except StopIteration:
          f.seek(old_pos)
          return False
      else:
        f.seek(old_pos)
        return False 
      
           
    def parse_header(self, line): 
      # make sure nothing else has been parsed yet 
      assert self.currBook == None 
      assert len(self.books) == 0 
      assert self.actions == [] 
      assert len(self.header) == 0
      fields = line.split(',')
      assert len(fields) == 3
      v, ccy_str, _ = fields
      assert v == 'V3'
      self.header['ccy'] = parse_ccy(ccy_str)
      self.done_with_header = True 
          
    def parse_add_action(self, line):
        _, side, volume, price, _ = line.split(',')
        action_type = ADD_ACTION_TYPE,
        side = OFFER_SIDE if side == '1' else BID_SIDE,
        price = parse_float(price) 
        # I think I saw trailing decimal points in some logs, 
        # so parse as float just in case 
        volume = parse_int_with_decimal_point(volume)
        action = (action_type, side, price, volume) 
        if action in self.action_cache: action = self.action_cache[action]
        else: self.action_cache[action] = action 
        self.actions.append(action)
            
    def parse_delete_action(self, line):
        _, side, volume, price, _ = line.split(',')
        action_type = DELETE_ACTION_TYPE
        side = OFFER_SIDE if side == '1' else BID_SIDE,
        price = parse_float(price)
        volume = parse_int_with_decimal_point(volume)
        action = (action_type, side, price, volume)
        if action in self.action_cache: action = self.action_cache[action]
        else: self.action_cache[action] = action
        self.actions.append(action)
          
    def parse_modify_action(self, line):
        _, side, volume, price, _ = line.split(',')
        action_type = MODIFY_ACTION_TYPE
        side = OFFER_SIDE if side == '1' else BID_SIDE
        price = parse_float(price)
        volume = parse_int_with_decimal_point(volume)
        action = (action_type, side, price, volume)
        if action in self.action_cache: action = self.action_cache[action]
        else: self.action_cache[action] = action
        self.actions.append(action)
            
    def start_new_orderbook(self, line):         
        self.at_start_of_file = False 
        # periodically clear the order cache so it doesn't eat all the memory 
        if len(self.order_cache) > 5000:
            self.order_cache.clear() 

        if self.currBook:
            self.books.append(self.currBook)
        _, _, monotonic_time, exchange_time = line.split(',')
        monotonic_seconds, _, monotonic_nanoseconds = monotonic_time.partition(':')
        epoch_seconds, _, exchange_nanoseconds = exchange_time.partition(':')
        epoch_seconds = long(epoch_seconds)
        exchange_seconds =  epoch_seconds % self.SECONDS_PER_DAY
        exchange_day = epoch_seconds / self.SECONDS_PER_DAY
        update_time = exchange_seconds * 1000 + long(exchange_nanoseconds) / (10**6)
        monotonic_time = long(monotonic_seconds)*1000 + long(monotonic_nanoseconds) / (10**6)
        self.currBook = OB(
            day = exchange_day, 
            lastUpdateTime = update_time, 
            lastUpdateMonotonic = monotonic_time, 
            actions = self.actions
        )
        self.actions = []

    def unsupported(self, line):
        assert False 
        
    def ignore(self, line):
        pass

 
    def parse(self, f, debug=False, end=None, drop_out_of_order=False):
     
        dispatch_table = {
            'V': self.parse_header, 
            'A': self.parse_add_action, 
            'D': self.parse_delete_action, 
            'M': self.parse_modify_action,
            'O': self.start_new_orderbook, 
            '#': self.ignore, 
            '\n': self.ignore, 
        }
        for line in f:
          try:
              # this loop used to only dispatch on the first char
              # but I inlined the most common function 'build_orderbook_entry'
              # for slight performance boost 
              if line[0] == 'Q':  
                  if line in self.order_cache: 
                      order = self.order_cache[line]
                  else:
                      _, monotonic_timestamp, side, level, price, size, _ =  line.split(',')
                      seconds, _,  nanoseconds = monotonic_timestamp.partition(':')
                      order = Order(
                          timestamp = int(seconds) * 1000 + int(nanoseconds) / 1000000, 
                          side = (side == '1'), 
                          level = int(level),
                          price = float(price),
                          size = int(size)
                      )
                      self.order_cache[line] = order
                  self.currBook.add_order(order)
              else:
                dispatch_table[line[0]](line) 
           
          except Exception as inst:     
            # sometimes the collector doesn't finish printing 
            # the last orderbook 
            # so skip exceptions at the end of a file 
            curr_pos = f.tell()
            peek_str = f.read(100)
            f.seek(curr_pos)
            if peek_str == '' and len(self.books) > 0:
                print "At last line of data file, ignoring error..." 
                break
            elif "RESTART" in line:
                print "Found restart without preceding newline"
                print ">> ", line
                # had to inline restarts since, if they happen at the 
                # beginning of the file, they force us to wind forward to
                # the first orderbook 
                if self.done_with_header:
                  continue
                else:
                  assert self.currBook == None
                  assert len(self.books) == 0 
                  assert len(self.header) == 0
                  # if we haven't parsed a header yet then skip to first
                  # orderbook we can find 
                  line = next(f)
                  while not line.startswith('OB'):
                    line = next(f)
                  fields = line.split(',')
                  assert len(fields) >= 2
                  ccy_str = fields[1]
                  self.header['ccy'] = parse_ccy(ccy_str)
                  # call the normal routine to start an orderbook
                  self.start_new_orderbook(line)
                  self.done_with_header = True
                  continue
            else: 
                print "Encountered error at line:", line
                print type(inst)     # the exception instance
                print inst           # __str__ allows args to printed directly
                print "Unrecoverable!"
                raise
                
        return self.header, self.books 
Exemplo n.º 6
0
class V3_Parser:
    def __init__(self):
        self.header = {}
        # sometimes the files are missing their first line,
        # in which case we need to reconstruct the header info
        self.done_with_header = False
        self.currBook = None
        self.books = []
        # keep this around for printing debug info on restarts
        self.actions = []
        self.SECONDS_PER_DAY = 60 * 60 * 24
        self.order_cache = {}
        self.action_cache = {}

    def header_ok(self, f):
        old_pos = f.tell()
        line = f.readline()
        while len(line) == 0:
            line = f.readline()
        # return the position so we leave no externally visible changes

        if line.startswith('V3'):
            v, ccy, _ = line.split(',')
            f.seek(old_pos)
            return v == 'V3'
        elif line.startswith('RESTART'):
            try:
                while not line.startswith('OB'):
                    line = f.readline()
                # found an order book, so file is probably OK
                f.seek(old_pos)
                return True
            except StopIteration:
                f.seek(old_pos)
                return False
        else:
            f.seek(old_pos)
            return False

    def parse_header(self, line):
        # make sure nothing else has been parsed yet
        assert self.currBook == None
        assert len(self.books) == 0
        assert self.actions == []
        assert len(self.header) == 0
        fields = line.split(',')
        assert len(fields) == 3
        v, ccy_str, _ = fields
        assert v == 'V3'
        self.header['ccy'] = parse_ccy(ccy_str)
        self.done_with_header = True

    def parse_add_action(self, line):
        _, side, volume, price, _ = line.split(',')
        action_type = ADD_ACTION_TYPE,
        side = OFFER_SIDE if side == '1' else BID_SIDE,
        price = parse_float(price)
        # I think I saw trailing decimal points in some logs,
        # so parse as float just in case
        volume = parse_int_with_decimal_point(volume)
        action = (action_type, side, price, volume)
        if action in self.action_cache: action = self.action_cache[action]
        else: self.action_cache[action] = action
        self.actions.append(action)

    def parse_delete_action(self, line):
        _, side, volume, price, _ = line.split(',')
        action_type = DELETE_ACTION_TYPE
        side = OFFER_SIDE if side == '1' else BID_SIDE,
        price = parse_float(price)
        volume = parse_int_with_decimal_point(volume)
        action = (action_type, side, price, volume)
        if action in self.action_cache: action = self.action_cache[action]
        else: self.action_cache[action] = action
        self.actions.append(action)

    def parse_modify_action(self, line):
        _, side, volume, price, _ = line.split(',')
        action_type = MODIFY_ACTION_TYPE
        side = OFFER_SIDE if side == '1' else BID_SIDE
        price = parse_float(price)
        volume = parse_int_with_decimal_point(volume)
        action = (action_type, side, price, volume)
        if action in self.action_cache: action = self.action_cache[action]
        else: self.action_cache[action] = action
        self.actions.append(action)

    def start_new_orderbook(self, line):
        self.at_start_of_file = False
        # periodically clear the order cache so it doesn't eat all the memory
        if len(self.order_cache) > 5000:
            self.order_cache.clear()

        if self.currBook:
            self.books.append(self.currBook)
        _, _, monotonic_time, exchange_time = line.split(',')
        monotonic_seconds, _, monotonic_nanoseconds = monotonic_time.partition(
            ':')
        epoch_seconds, _, exchange_nanoseconds = exchange_time.partition(':')
        epoch_seconds = long(epoch_seconds)
        exchange_seconds = epoch_seconds % self.SECONDS_PER_DAY
        exchange_day = epoch_seconds / self.SECONDS_PER_DAY
        update_time = exchange_seconds * 1000 + long(exchange_nanoseconds) / (
            10**6)
        monotonic_time = long(monotonic_seconds) * 1000 + long(
            monotonic_nanoseconds) / (10**6)
        self.currBook = OB(day=exchange_day,
                           lastUpdateTime=update_time,
                           lastUpdateMonotonic=monotonic_time,
                           actions=self.actions)
        self.actions = []

    def unsupported(self, line):
        assert False

    def ignore(self, line):
        pass

    def parse(self, f, debug=False, end=None, drop_out_of_order=False):

        dispatch_table = {
            'V': self.parse_header,
            'A': self.parse_add_action,
            'D': self.parse_delete_action,
            'M': self.parse_modify_action,
            'O': self.start_new_orderbook,
            '#': self.ignore,
            '\n': self.ignore,
        }
        for line in f:
            try:
                # this loop used to only dispatch on the first char
                # but I inlined the most common function 'build_orderbook_entry'
                # for slight performance boost
                if line[0] == 'Q':
                    if line in self.order_cache:
                        order = self.order_cache[line]
                    else:
                        _, monotonic_timestamp, side, level, price, size, _ = line.split(
                            ',')
                        seconds, _, nanoseconds = monotonic_timestamp.partition(
                            ':')
                        order = Order(timestamp=int(seconds) * 1000 +
                                      int(nanoseconds) / 1000000,
                                      side=(side == '1'),
                                      level=int(level),
                                      price=float(price),
                                      size=int(size))
                        self.order_cache[line] = order
                    self.currBook.add_order(order)
                else:
                    dispatch_table[line[0]](line)

            except Exception as inst:
                # sometimes the collector doesn't finish printing
                # the last orderbook
                # so skip exceptions at the end of a file
                curr_pos = f.tell()
                peek_str = f.read(100)
                f.seek(curr_pos)
                if peek_str == '' and len(self.books) > 0:
                    print "At last line of data file, ignoring error..."
                    break
                elif "RESTART" in line:
                    print "Found restart without preceding newline"
                    print ">> ", line
                    # had to inline restarts since, if they happen at the
                    # beginning of the file, they force us to wind forward to
                    # the first orderbook
                    if self.done_with_header:
                        continue
                    else:
                        assert self.currBook == None
                        assert len(self.books) == 0
                        assert len(self.header) == 0
                        # if we haven't parsed a header yet then skip to first
                        # orderbook we can find
                        line = next(f)
                        while not line.startswith('OB'):
                            line = next(f)
                        fields = line.split(',')
                        assert len(fields) >= 2
                        ccy_str = fields[1]
                        self.header['ccy'] = parse_ccy(ccy_str)
                        # call the normal routine to start an orderbook
                        self.start_new_orderbook(line)
                        self.done_with_header = True
                        continue
                else:
                    print "Encountered error at line:", line
                    print type(inst)  # the exception instance
                    print inst  # __str__ allows args to printed directly
                    print "Unrecoverable!"
                    raise

        return self.header, self.books