def nextRawKey(self): if not self._block_compressed: record_length = self._readRecordLength() if record_length < 0: return None key_length = self._stream.readInt() key = DataInputBuffer(self._stream.read(key_length)) self._record.reset(self._stream.read(record_length - key_length)) return key else: if hasattr(self, '_block_index') and \ self._block_index < self._record[0]: self._sync_seen = False records, keys_len, keys, values_len, values = self._record key_length = readVInt(keys_len) self._block_index += 1 return DataInputBuffer(keys.read(key_length)) if self._stream.getPos() >= self._end: return None # Read Sync self._stream.readInt() # -1 sync_check = self._stream.read(SYNC_HASH_SIZE) if sync_check != self._sync: raise IOError("File is corrupt") self._sync_seen = True if self._stream.getPos() >= self._end: return None def _readBuffer(): length = readVInt(self._stream) buf = self._stream.read(length) return self._codec.decompressInputStream(buf) records = readVInt(self._stream) keys_len = _readBuffer() keys = _readBuffer() values_len = _readBuffer() values = _readBuffer() self._record = (records, keys_len, keys, values_len, values) self._block_index = 1 key_length = readVInt(keys_len) return DataInputBuffer(keys.read(key_length))
def nextRawValue(self): if not self._block_compressed: if self._decompress: compress_data = self._record.read(self._record.size()) return self._codec.decompressInputStream(compress_data) else: return self._record else: records, keys_len, keys, values_len, values = self._record value_length = readVInt(values_len) return DataInputBuffer(values.read(value_length))
def _readBuffer(): length = readVInt(self._stream) buf = self._stream.read(length) return self._codec.decompressInputStream(buf)
def readFields(self, data_input): self._value = readVInt(data_input)
def readString(data_input): length = readVInt(data_input) bytes = data_input.read(length) return Text.decode(bytes)
def readFields(self, data_input): self._length = readVInt(data_input) self._bytes = data_input.read(self._length)