def __init__(self, host, port, version, effective_user=None, use_sasl=False, hdfs_namenode_principal=None, sock_connect_timeout=10000, sock_request_timeout=10000): '''SocketRpcChannel to connect to a socket server on a user defined port. It possible to define version and effective user for the communication.''' self.host = host self.port = port self.sock = None self.call_id = -3 # First time (when the connection context is sent, the call_id should be -3, otherwise start with 0 and increment) self.version = version self.client_id = str(uuid.uuid4()) self.use_sasl = use_sasl self.hdfs_namenode_principal = hdfs_namenode_principal if self.use_sasl: if not _kerberos_available: raise FatalException( "Kerberos libs not found. Please install snakebite using 'pip install snakebite[kerberos]'" ) kerberos = Kerberos() self.effective_user = effective_user or kerberos.user_principal( ).name else: self.effective_user = effective_user or get_current_username() self.sock_connect_timeout = sock_connect_timeout self.sock_request_timeout = sock_request_timeout
def validate_request(self, request): '''Validate the client request against the protocol file.''' # Check the request is correctly initialized if not request.IsInitialized(): raise FatalException( "Client request (%s) is missing mandatory fields" % type(request))
def readBlock(self, length, pool_id, block_id, generation_stamp, offset, block_token, check_crc): '''Send a read request to given block. If we receive a successful response, we start reading packets. Send read request: +---------------------------------------------------------------------+ | Data Transfer Protocol Version, 2 bytes | +---------------------------------------------------------------------+ | Op code, 1 byte (READ_BLOCK = 81) | +---------------------------------------------------------------------+ | Delimited serialized OpReadBlockProto (varint len + request) | +---------------------------------------------------------------------+ Receive response: +---------------------------------------------------------------------+ | Delimited BlockOpResponseProto (varint len + response) | +---------------------------------------------------------------------+ Start reading packets. Each packet has the following structure: +---------------------------------------------------------------------+ | Packet length (4 bytes/32 bit int) | +---------------------------------------------------------------------+ | Serialized size of header, 2 bytes | +---------------------------------------------------------------------+ | Packet Header Proto | +---------------------------------------------------------------------+ | x checksums, 4 bytes each | +---------------------------------------------------------------------+ | x chunks of payload data | +---------------------------------------------------------------------+ ''' log.debug("%s sending readBlock request" % self) # Send version and opcode self.sock.send(struct.pack('>h', 28)) self.sock.send(struct.pack('b', self.READ_BLOCK)) length = length - offset # Create and send OpReadBlockProto message request = OpReadBlockProto() request.offset = offset request.len = length header = request.header header.clientName = "snakebite" base_header = header.baseHeader # TokenProto token = base_header.token token.identifier = block_token.identifier token.password = block_token.password token.kind = block_token.kind token.service = block_token.service # ExtendedBlockProto block = base_header.block block.poolId = pool_id block.blockId = block_id block.generationStamp = generation_stamp s_request = request.SerializeToString() log_protobuf_message("OpReadBlockProto:", request) self.write_delimited(s_request) byte_stream = RpcBufferedReader(self.sock) block_op_response_bytes = get_delimited_message_bytes(byte_stream)[1] block_op_response = BlockOpResponseProto() block_op_response.ParseFromString(block_op_response_bytes) log_protobuf_message("BlockOpResponseProto", block_op_response) checksum_type = block_op_response.readOpChecksumInfo.checksum.type bytes_per_chunk = block_op_response.readOpChecksumInfo.checksum.bytesPerChecksum log.debug("Checksum type: %s, bytesPerChecksum: %s" % (checksum_type, bytes_per_chunk)) if checksum_type in [self.CHECKSUM_NULL]: checksum_len = 0 elif checksum_type in [self.CHECKSUM_CRC32C, self.CHECKSUM_CRC32]: checksum_len = 4 else: raise FatalException("Checksum type %s not implemented" % checksum_type) total_read = 0 if block_op_response.status == 0: # datatransfer_proto.Status.Value('SUCCESS') while total_read < length: log.debug("== Reading next packet") packet_len = struct.unpack("!I", byte_stream.read(4))[0] log.debug("Packet length: %s", packet_len) serialized_size = struct.unpack("!H", byte_stream.read(2))[0] log.debug("Serialized size: %s", serialized_size) packet_header_bytes = byte_stream.read(serialized_size) packet_header = PacketHeaderProto() packet_header.ParseFromString(packet_header_bytes) log_protobuf_message("PacketHeaderProto", packet_header) data_len = packet_header.dataLen chunks_per_packet = int((data_len + bytes_per_chunk - 1) / bytes_per_chunk) log.debug("Nr of chunks: %d", chunks_per_packet) data_len = packet_len - 4 - chunks_per_packet * checksum_len log.debug("Payload len: %d", data_len) byte_stream.reset() # Collect checksums if check_crc and checksum_type != self.CHECKSUM_NULL: checksums = [] for _ in xrange(0, chunks_per_packet): checksum = self._read_bytes(checksum_len) checksum = struct.unpack("!I", checksum)[0] checksums.append(checksum) else: self._read_bytes(checksum_len * chunks_per_packet) # We use a fixed size buffer (a "load") to read only a couple of chunks at once. bytes_per_load = self.LOAD_SIZE - (self.LOAD_SIZE % bytes_per_chunk) chunks_per_load = int(bytes_per_load / bytes_per_chunk) loads_per_packet = int(math.ceil(bytes_per_chunk * chunks_per_packet / bytes_per_load)) read_on_packet = 0 for i in range(loads_per_packet): load = '' for j in range(chunks_per_load): log.debug("Reading chunk %s in load %s:", j, i) bytes_to_read = min(bytes_per_chunk, data_len - read_on_packet) chunk = self._read_bytes(bytes_to_read) if check_crc and checksum_type != self.CHECKSUM_NULL: checksum_index = i * chunks_per_load + j if checksum_index < len(checksums) and crc(chunk) != checksums[checksum_index]: # it makes sense to retry, so TransientError raise TransientException("Checksum doesn't match") load += chunk total_read += len(chunk) read_on_packet += len(chunk) yield load # Send ClientReadStatusProto message confirming successful read request = ClientReadStatusProto() request.status = 0 # SUCCESS log_protobuf_message("ClientReadStatusProto:", request) s_request = request.SerializeToString() self.write_delimited(s_request) self._close_socket()