def __try_read_record(self): """Try reading a record. Returns: (data, record_type) tuple. Raises: EOFError: when end of file was reached. InvalidRecordError: when valid record could not be read. """ block_remaining = _BLOCK_SIZE - self.__reader.tell() % _BLOCK_SIZE if block_remaining < _HEADER_LENGTH: return ('', _RECORD_TYPE_NONE) header = self.__reader.read(_HEADER_LENGTH) if len(header) != _HEADER_LENGTH: raise EOFError('Read %s bytes instead of %s' % (len(header), _HEADER_LENGTH)) (masked_crc, length, record_type) = struct.unpack(_HEADER_FORMAT, header) crc = _unmask_crc(masked_crc) if length + _HEADER_LENGTH > block_remaining: raise errors.InvalidRecordError('Length is too big') data = self.__reader.read(length) if len(data) != length: raise EOFError('Not enough data read. Expected: %s but got %s' % (length, len(data))) if record_type == _RECORD_TYPE_NONE: return ('', record_type) actual_crc = crc32c.crc_update(crc32c.CRC_INIT, [record_type]) actual_crc = crc32c.crc_update(actual_crc, data) actual_crc = crc32c.crc_finalize(actual_crc) if actual_crc != crc: raise errors.InvalidRecordError('Data crc does not match') return (data, record_type)
def read(self): """Reads record from current position in reader. Returns: original bytes stored in a single record. """ data = None while True: last_offset = self.tell() try: (chunk, record_type) = self.__try_read_record() if record_type == _RECORD_TYPE_NONE: self.__sync() elif record_type == _RECORD_TYPE_FULL: if data is not None: logging.warning( "Ordering corruption: Got FULL record while already " "in a chunk at offset %d", last_offset) return chunk elif record_type == _RECORD_TYPE_FIRST: if data is not None: logging.warning( "Ordering corruption: Got FIRST record while already " "in a chunk at offset %d", last_offset) data = chunk elif record_type == _RECORD_TYPE_MIDDLE: if data is None: logging.warning( "Ordering corruption: Got MIDDLE record before FIRST " "record at offset %d", last_offset) else: data += chunk elif record_type == _RECORD_TYPE_LAST: if data is None: logging.warning( "Ordering corruption: Got LAST record but no chunk is in " "progress at offset %d", last_offset) else: result = data + chunk data = None return result else: raise errors.InvalidRecordError( "Unsupported record type: %s" % record_type) except errors.InvalidRecordError, e: logging.warning( "Invalid record encountered at %s (%s). Syncing to " "the next block", last_offset, e) data = None self.__sync()