示例#1
0
    def get_connection(self, host, port):
        """Open a socket connection to a given host and port and writes the Hadoop header
        The Hadoop RPC protocol looks like this when creating a connection:

        +---------------------------------------------------------------------+
        |  Header, 4 bytes ("hrpc")                                           |
        +---------------------------------------------------------------------+
        |  Version, 1 byte (default verion 9)                                 |
        +---------------------------------------------------------------------+
        |  RPC service class, 1 byte (0x00)                                   |
        +---------------------------------------------------------------------+
        |  Auth protocol, 1 byte (Auth method None = 0)                       |
        +---------------------------------------------------------------------+
        |  Length of the RpcRequestHeaderProto  + length of the               |
        |  of the IpcConnectionContextProto (4 bytes/32 bit int)              |
        +---------------------------------------------------------------------+
        |  Serialized delimited RpcRequestHeaderProto                         |
        +---------------------------------------------------------------------+
        |  Serialized delimited IpcConnectionContextProto                     |
        +---------------------------------------------------------------------+
        """

        log.debug("############## CONNECTING ##############")

        auth = self.AUTH_PROTOCOL_NONE if self.token is None else self.AUTH_PROTOCOL_SASL

        # Open socket
        self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
        self.sock.settimeout(self.timeout)
        # Connect socket to server - defined by host and port arguments
        self.sock.connect((host, port))

        # Send RPC headers
        self.write(self.RPC_HEADER)  # header
        self.write(struct.pack("B", self.version))  # version
        self.write(struct.pack("B", self.RPC_SERVICE_CLASS))  # RPC service class
        self.write(struct.pack("B", auth))  # serialization type (default none)

        if auth == SocketRpcChannel.AUTH_PROTOCOL_SASL:
            self.negotiate_sasl(self.token)
            self.call_id = -3

        rpc_header = self.create_rpc_request_header()
        context = (
            self.create_connection_context()
            if auth is self.AUTH_PROTOCOL_NONE
            else self.create_connection_context_auth()
        )

        header_length = (
            len(rpc_header) + encoder._VarintSize(len(rpc_header)) + len(context) + encoder._VarintSize(len(context))
        )

        if log.getEffectiveLevel() == logging.DEBUG:
            log.debug("Header length: %s (%s)" % (header_length, format_bytes(struct.pack("!I", header_length))))

        self.write(struct.pack("!I", header_length))
        self.write_delimited(rpc_header)
        self.write_delimited(context)
示例#2
0
    def get_connection(self, host, port):
        '''Open a socket connection to a given host and port and writes the Hadoop header
        The Hadoop RPC protocol looks like this when creating a connection:

        +---------------------------------------------------------------------+
        |  Header, 4 bytes ("hrpc")                                           |
        +---------------------------------------------------------------------+
        |  Version, 1 byte (default verion 9)                                 |
        +---------------------------------------------------------------------+
        |  RPC service class, 1 byte (0x00)                                   |
        +---------------------------------------------------------------------+
        |  Auth protocol, 1 byte (Auth method None = 0)                       |
        +---------------------------------------------------------------------+
        |  Length of the RpcRequestHeaderProto  + length of the               |
        |  of the IpcConnectionContextProto (4 bytes/32 bit int)              |
        +---------------------------------------------------------------------+
        |  Serialized delimited RpcRequestHeaderProto                         |
        +---------------------------------------------------------------------+
        |  Serialized delimited IpcConnectionContextProto                     |
        +---------------------------------------------------------------------+
        '''

        log.debug("############## CONNECTING ##############")
        # Open socket
        self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
        self.sock.settimeout(self.sock_connect_timeout / 1000)
        # Connect socket to server - defined by host and port arguments
        self.sock.connect((host, port))
        self.sock.settimeout(self.sock_request_timeout / 1000)

        # Send RPC headers
        self.write(self.RPC_HEADER)                             # header
        self.write(struct.pack('B', self.version))              # version
        self.write(struct.pack('B', self.RPC_SERVICE_CLASS))    # RPC service class
        if self.use_sasl:
            self.write(struct.pack('B', self.AUTH_PROTOCOL_SASL))   # serialization type (protobuf = 0xDF)
        else:
            self.write(struct.pack('B', self.AUTH_PROTOCOL_NONE))   # serialization type (protobuf = 0)

        if self.use_sasl:
            sasl = SaslRpcClient(self, hdfs_namenode_principal=self.hdfs_namenode_principal)
            sasl_connected = sasl.connect()
            if not sasl_connected:
                raise TransientException("SASL is configured, but cannot get connected")

        rpc_header = self.create_rpc_request_header()
        context = self.create_connection_context()

        header_length = len(rpc_header) + encoder._VarintSize(len(rpc_header)) +len(context) + encoder._VarintSize(len(context))

        if log.getEffectiveLevel() == logging.DEBUG:
            log.debug("Header length: %s (%s)" % (header_length, format_bytes(struct.pack('!I', header_length))))

        self.write(struct.pack('!I', header_length))

        self.write_delimited(rpc_header)
        self.write_delimited(context)
示例#3
0
    def get_connection(self, host, port):
        '''Open a socket connection to a given host and port and writes the Hadoop header
        The Hadoop RPC protocol looks like this when creating a connection:

        +---------------------------------------------------------------------+
        |  Header, 4 bytes ("hrpc")                                           |
        +---------------------------------------------------------------------+
        |  Version, 1 byte (default verion 9)                                 |
        +---------------------------------------------------------------------+
        |  RPC service class, 1 byte (0x00)                                   |
        +---------------------------------------------------------------------+
        |  Auth protocol, 1 byte (Auth method None = 0)                       |
        +---------------------------------------------------------------------+
        |  Length of the RpcRequestHeaderProto  + length of the               |
        |  of the IpcConnectionContextProto (4 bytes/32 bit int)              |
        +---------------------------------------------------------------------+
        |  Serialized delimited RpcRequestHeaderProto                         |
        +---------------------------------------------------------------------+
        |  Serialized delimited IpcConnectionContextProto                     |
        +---------------------------------------------------------------------+
        '''

        log.debug("############## CONNECTING ##############")
        # Open socket
        self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
        self.sock.settimeout(self.sock_connect_timeout / 1000)
        # Connect socket to server - defined by host and port arguments
        self.sock.connect((host, port))
        self.sock.settimeout(self.sock_request_timeout / 1000)

        # Send RPC headers
        self.write(self.RPC_HEADER)                             # header
        self.write(struct.pack('B', self.version))              # version
        self.write(struct.pack('B', self.RPC_SERVICE_CLASS))    # RPC service class
        if self.use_sasl:
            self.write(struct.pack('B', self.AUTH_PROTOCOL_SASL))   # serialization type (protobuf = 0xDF)
        else:
            self.write(struct.pack('B', self.AUTH_PROTOCOL_NONE))   # serialization type (protobuf = 0)

        if self.use_sasl:
            sasl = SaslRpcClient(self, hdfs_namenode_principal=self.hdfs_namenode_principal)
            sasl_connected = sasl.connect()
            if not sasl_connected:
                raise TransientException("SASL is configured, but cannot get connected")

        rpc_header = self.create_rpc_request_header()
        context = self.create_connection_context()

        header_length = len(rpc_header) + encoder._VarintSize(len(rpc_header)) +len(context) + encoder._VarintSize(len(context))

        if log.getEffectiveLevel() == logging.DEBUG:
            log.debug("Header length: %s (%s)" % (header_length, format_bytes(struct.pack('!I', header_length))))

        self.write(struct.pack('!I', header_length))

        self.write_delimited(rpc_header)
        self.write_delimited(context)
示例#4
0
    def send_rpc_message(self, method, request):
        '''Sends a Hadoop RPC request to the NameNode.

        The IpcConnectionContextProto, RpcPayloadHeaderProto and HadoopRpcRequestProto
        should already be serialized in the right way (delimited or not) before
        they are passed in this method.

        The Hadoop RPC protocol looks like this for sending requests:

        When sending requests
        +---------------------------------------------------------------------+
        |  Length of the next three parts (4 bytes/32 bit int)                |
        +---------------------------------------------------------------------+
        |  Delimited serialized RpcRequestHeaderProto (varint len + header)   |
        +---------------------------------------------------------------------+
        |  Delimited serialized RequestHeaderProto (varint len + header)      |
        +---------------------------------------------------------------------+
        |  Delimited serialized Request (varint len + request)                |
        +---------------------------------------------------------------------+
        '''
        log.debug("############## SENDING ##############")

        #0. RpcRequestHeaderProto
        rpc_request_header = self.create_rpc_request_header()
        #1. RequestHeaderProto
        request_header = self.create_request_header(method)
        #2. Param
        param = request.SerializeToString()
        if log.getEffectiveLevel() == logging.DEBUG:
            log_protobuf_message("Request", request)

        rpc_message_length = len(rpc_request_header) + encoder._VarintSize(len(rpc_request_header)) + \
                             len(request_header) + encoder._VarintSize(len(request_header)) + \
                             len(param) + encoder._VarintSize(len(param))

        if log.getEffectiveLevel() == logging.DEBUG:
            log.debug("RPC message length: %s (%s)" %
                      (rpc_message_length,
                       format_bytes(struct.pack('!I', rpc_message_length))))

        if self.use_sasl and self.sasl.use_wrap():
            log.debug("SASL QOP requested, wrapping RPC message.")
            self.sasl.wrap(
                struct.pack('!I', rpc_message_length) +
                encoder._VarintBytes(len(rpc_request_header)) +
                rpc_request_header +
                encoder._VarintBytes(len(request_header)) + request_header +
                encoder._VarintBytes(len(param)) + param)
        else:
            self.write(struct.pack('!I', rpc_message_length))
            self.write_delimited(rpc_request_header)
            self.write_delimited(request_header)
            self.write_delimited(param)
示例#5
0
    def send_rpc_message(self, method, request):
        """Sends a Hadoop RPC request to the NameNode.

        The IpcConnectionContextProto, RpcPayloadHeaderProto and HadoopRpcRequestProto
        should already be serialized in the right way (delimited or not) before
        they are passed in this method.

        The Hadoop RPC protocol looks like this for sending requests:

        When sending requests
        +---------------------------------------------------------------------+
        |  Length of the next three parts (4 bytes/32 bit int)                |
        +---------------------------------------------------------------------+
        |  Delimited serialized RpcRequestHeaderProto (varint len + header)   |
        +---------------------------------------------------------------------+
        |  Delimited serialized RequestHeaderProto (varint len + header)      |
        +---------------------------------------------------------------------+
        |  Delimited serialized Request (varint len + request)                |
        +---------------------------------------------------------------------+
        """
        log.debug("############## SENDING ##############")

        # 0. RpcRequestHeaderProto
        rpc_request_header = self.create_rpc_request_header()
        # 1. RequestHeaderProto
        request_header = self.create_request_header(method)
        # 2. Param
        param = request.SerializeToString()
        if log.getEffectiveLevel() == logging.DEBUG:
            log_protobuf_message("Request", request)

        rpc_message_length = (
            len(rpc_request_header)
            + encoder._VarintSize(len(rpc_request_header))
            + len(request_header)
            + encoder._VarintSize(len(request_header))
            + len(param)
            + encoder._VarintSize(len(param))
        )

        if log.getEffectiveLevel() == logging.DEBUG:
            log.debug(
                "RPC message length: %s (%s)"
                % (rpc_message_length, format_bytes(struct.pack("!I", rpc_message_length)))
            )
        self.write(struct.pack("!I", rpc_message_length))

        self.write_delimited(rpc_request_header)
        self.write_delimited(request_header)
        self.write_delimited(param)
示例#6
0
    def create_rpc_request(self, method, request):
        '''Wraps the user's request in an HadoopRpcRequestProto message and serializes it delimited.'''
        s_request = request.SerializeToString()
        log_protobuf_message("Protobuf message", request)
        log.debug("Protobuf message bytes (%d): %s" % (len(s_request), format_bytes(s_request)))
        rpcRequest = hadoop_rpc.HadoopRpcRequestProto()
        rpcRequest.methodName = method.name
        rpcRequest.request = s_request
        rpcRequest.declaringClassProtocolName = "org.apache.hadoop.hdfs.protocol.ClientProtocol"
        rpcRequest.clientProtocolVersion = 1L

        # Serialize delimited
        s_rpcRequest = rpcRequest.SerializeToString()
        log_protobuf_message("RpcRequest (len: %d)" % len(s_rpcRequest), rpcRequest)
        return encoder._VarintBytes(len(s_rpcRequest)) + s_rpcRequest
示例#7
0
def get_delimited_message_bytes(byte_stream):
    ''' Parse a delimited protobuf message. This is done by first getting a protobuf varint from
    the stream that represents the length of the message, then reading that amount of
    from the message and then parse it.
    Since the int can be represented as max 4 bytes, first get 4 bytes and try to decode.
    The decoder returns the value and the position where the value was found, so we need
    to rewind the buffer to the position, because the remaining bytes belong to the message
    after.
    '''

    (length, pos) = decoder._DecodeVarint32(byte_stream.read(4), 0)
    log.debug("Delimited message length (pos %d): %d" % (pos, length))

    byte_stream.rewind(4 - pos)
    message_bytes = byte_stream.read(length)
    log.debug("Delimited message bytes (%d): %s" % (len(message_bytes), format_bytes(message_bytes)))
    return message_bytes
示例#8
0
    def create_rpc_request(self, method, request):
        '''Wraps the user's request in an HadoopRpcRequestProto message and serializes it delimited.'''
        s_request = request.SerializeToString()
        log_protobuf_message("Protobuf message", request)
        log.debug("Protobuf message bytes (%d): %s" %
                  (len(s_request), format_bytes(s_request)))
        rpcRequest = hadoop_rpc.HadoopRpcRequestProto()
        rpcRequest.methodName = method.name
        rpcRequest.request = s_request
        rpcRequest.declaringClassProtocolName = "org.apache.hadoop.hdfs.protocol.ClientProtocol"
        rpcRequest.clientProtocolVersion = 1L

        # Serialize delimited
        s_rpcRequest = rpcRequest.SerializeToString()
        log_protobuf_message("RpcRequest (len: %d)" % len(s_rpcRequest),
                             rpcRequest)
        return encoder._VarintBytes(len(s_rpcRequest)) + s_rpcRequest
示例#9
0
def get_delimited_message_bytes(byte_stream):
    ''' Parse a delimited protobuf message. This is done by first getting a protobuf varint from
    the stream that represents the length of the message, then reading that amount of
    from the message and then parse it.
    Since the int can be represented as max 4 bytes, first get 4 bytes and try to decode.
    The decoder returns the value and the position where the value was found, so we need
    to rewind the buffer to the position, because the remaining bytes belong to the message
    after.
    '''

    (length, pos) = decoder._DecodeVarint32(byte_stream.read(4), 0)
    log.debug("Delimited message length (pos %d): %d" % (pos, length))

    byte_stream.rewind(4 - pos)
    message_bytes = byte_stream.read(length)
    log.debug("Delimited message bytes (%d): %s" %
              (len(message_bytes), format_bytes(message_bytes)))
    return message_bytes
示例#10
0
def get_delimited_message_bytes(byte_stream, nr=4):
    ''' Parse a delimited protobuf message. This is done by first getting a protobuf varint from
    the stream that represents the length of the message, then reading that amount of
    from the message and then parse it.
    Since the int can be represented as max 4 bytes, first get 4 bytes and try to decode.
    The decoder returns the value and the position where the value was found, so we need
    to rewind the buffer to the position, because the remaining bytes belong to the message
    after.
    '''

    (length, pos) = decoder._DecodeVarint32(byte_stream.read(nr), 0)
    if log.getEffectiveLevel() == logging.DEBUG:
        log.debug("Delimited message length (pos %d): %d" % (pos, length))

    delimiter_bytes = nr - pos

    byte_stream.rewind(delimiter_bytes)
    message_bytes = byte_stream.read(length)
    if log.getEffectiveLevel() == logging.DEBUG:
        log.debug("Delimited message bytes (%d): %s" % (len(message_bytes), format_bytes(message_bytes)))

    total_len = length + pos
    return (total_len, message_bytes)
示例#11
0
    def parse_response(self, byte_stream, response_class):
        '''Parses a Hadoop RPC response.

        The RpcResponseHeaderProto contains a status field that marks SUCCESS or ERROR.
        The Hadoop RPC protocol looks like the diagram below for receiving SUCCESS requests.
        +-----------------------------------------------------------+
        |  Delimited serialized RpcResponseHeaderProto              |
        +-----------------------------------------------------------+
        |  Length of the RPC resonse (4 bytes/32 bit int)           |
        +-----------------------------------------------------------+
        |  Serialized RPC response                                  |
        +-----------------------------------------------------------+

        The Hadoop RPC protocol looks like the diagram below for receiving ERROR requests.
        +-----------------------------------------------------------+
        |  Delimited serialized RpcResponseHeaderProto              |
        +-----------------------------------------------------------+
        |  Length of the RPC resonse (4 bytes/32 bit int)           |
        +-----------------------------------------------------------+
        |  Length of the Exeption class name (4 bytes/32 bit int)   |
        +-----------------------------------------------------------+
        |  Exception class name string                              |
        +-----------------------------------------------------------+
        |  Length of the stack trace (4 bytes/32 bit int)           |
        +-----------------------------------------------------------+
        |  Stack trace string                                       |
        +-----------------------------------------------------------+

        If the length of the strings is -1, the strings are null
        '''

        log.debug("############## PARSING ##############")
        log.debug("Payload class: %s" % response_class)

        # Let's see if we deal with an error on protocol level
        check = struct.unpack("!Q", byte_stream.read(8))[0]
        if check == self.ERROR_BYTES:
            self.handle_error(byte_stream)

        byte_stream.rewind(8)
        log.debug("---- Parsing header ----")
        header_bytes = get_delimited_message_bytes(byte_stream)
        header = rpcheaderproto.RpcResponseHeaderProto()
        header.ParseFromString(header_bytes)
        log_protobuf_message("Response header", header)

        if header.status == 0:  # rpcheaderproto.RpcStatusProto.Value('SUCCESS')
            log.debug("---- Parsing response ----")
            response = response_class()
            response_length = self.get_length(byte_stream)

            if response_length == 0:
                return

            response_bytes = byte_stream.read(response_length)
            log.debug("Response bytes (%d): %s" % (len(response_bytes), format_bytes(response_bytes)))

            response.ParseFromString(response_bytes)
            log_protobuf_message("Response", response)
            return response

        elif header.status == 1:  # rpcheaderproto.RpcStatusProto.Value('ERROR')
            self.handle_error(byte_stream)
示例#12
0
    def parse_response(self, byte_stream, response_class):
        '''Parses a Hadoop RPC response.

        The RpcResponseHeaderProto contains a status field that marks SUCCESS or ERROR.
        The Hadoop RPC protocol looks like the diagram below for receiving SUCCESS requests.
        +-----------------------------------------------------------+
        |  Delimited serialized RpcResponseHeaderProto              |
        +-----------------------------------------------------------+
        |  Length of the RPC resonse (4 bytes/32 bit int)           |
        +-----------------------------------------------------------+
        |  Serialized RPC response                                  |
        +-----------------------------------------------------------+

        The Hadoop RPC protocol looks like the diagram below for receiving ERROR requests.
        +-----------------------------------------------------------+
        |  Delimited serialized RpcResponseHeaderProto              |
        +-----------------------------------------------------------+
        |  Length of the RPC resonse (4 bytes/32 bit int)           |
        +-----------------------------------------------------------+
        |  Length of the Exeption class name (4 bytes/32 bit int)   |
        +-----------------------------------------------------------+
        |  Exception class name string                              |
        +-----------------------------------------------------------+
        |  Length of the stack trace (4 bytes/32 bit int)           |
        +-----------------------------------------------------------+
        |  Stack trace string                                       |
        +-----------------------------------------------------------+

        If the length of the strings is -1, the strings are null
        '''

        log.debug("############## PARSING ##############")
        log.debug("Payload class: %s" % response_class)

        # Let's see if we deal with an error on protocol level
        check = struct.unpack("!Q", byte_stream.read(8))[0]
        if check == self.ERROR_BYTES:
            self.handle_error(byte_stream)

        byte_stream.rewind(8)
        log.debug("---- Parsing header ----")
        header_bytes = get_delimited_message_bytes(byte_stream)
        header = rpcheaderproto.RpcResponseHeaderProto()
        header.ParseFromString(header_bytes)
        log_protobuf_message("Response header", header)

        if header.status == 0:  # rpcheaderproto.RpcStatusProto.Value('SUCCESS')
            log.debug("---- Parsing response ----")
            response = response_class()
            response_length = self.get_length(byte_stream)

            if response_length == 0:
                return

            response_bytes = byte_stream.read(response_length)
            log.debug("Response bytes (%d): %s" %
                      (len(response_bytes), format_bytes(response_bytes)))

            response.ParseFromString(response_bytes)
            log_protobuf_message("Response", response)
            return response

        elif header.status == 1:  # rpcheaderproto.RpcStatusProto.Value('ERROR')
            self.handle_error(byte_stream)
示例#13
0
 def write(self, data):
     if log.getEffectiveLevel() == logging.DEBUG:
         log.debug("Sending: %s", format_bytes(data))
     self.sock.send(data)
示例#14
0
    def parse_response(self, byte_stream, response_class):
        '''Parses a Hadoop RPC response.

        The RpcResponseHeaderProto contains a status field that marks SUCCESS or ERROR.
        The Hadoop RPC protocol looks like the diagram below for receiving SUCCESS requests.
        +-----------------------------------------------------------+
        |  Delimited serialized RpcResponseHeaderProto              |
        +-----------------------------------------------------------+
        |  Length of the RPC resonse (4 bytes/32 bit int)           |
        +-----------------------------------------------------------+
        |  Serialized RPC response                                  |
        +-----------------------------------------------------------+

        The Hadoop RPC protocol looks like the diagram below for receiving ERROR requests.
        +-----------------------------------------------------------+
        |  Delimited serialized RpcResponseHeaderProto              |
        +-----------------------------------------------------------+
        |  Length of the RPC resonse (4 bytes/32 bit int)           |
        +-----------------------------------------------------------+
        |  Length of the Exeption class name (4 bytes/32 bit int)   |
        +-----------------------------------------------------------+
        |  Exception class name string                              |
        +-----------------------------------------------------------+
        |  Length of the stack trace (4 bytes/32 bit int)           |
        +-----------------------------------------------------------+
        |  Stack trace string                                       |
        +-----------------------------------------------------------+

        If the lenght of the strings is -1, the strings are null
        '''

        log.debug("############## PARSING ##############")
        log.debug("Payload class: %s" % response_class)

        log.debug("---- Parsing header ----")
        header_bytes = self.get_delimited_nessage_bytes(byte_stream)
        header = rpcheaderproto.RpcResponseHeaderProto()
        header.ParseFromString(header_bytes)
        self.log_protobuf_message("Response header", header)

        if header.status == 0:  # rpcheaderproto.RpcStatusProto.Value('SUCCESS')
            log.debug("---- Parsing response ----")
            response = response_class()
            response_length = self.get_length(byte_stream)

            if response_length == 0:
                return

            response_bytes = byte_stream.read(response_length)
            log.debug("Response bytes (%d): %s" % (len(response_bytes), format_bytes(response_bytes)))

            response.ParseFromString(response_bytes)
            self.log_protobuf_message("Response", response)
            return response

        elif header.status == 1:  # rpcheaderproto.RpcStatusProto.Value('ERROR')
            length = self.get_length(byte_stream)
            log.debug("Class name length: %d" % (length))
            if length == -1:
                class_name = None
            else:
                class_name = byte_stream.read(length)
                log.debug("Class name (%d): %s" % (len(class_name), class_name))

            length = self.get_length(byte_stream)
            log.debug("Stack trace length: %d" % (length))
            if length == -1:
                stack_trace = None
            else:
                stack_trace = byte_stream.read(length)
                log.debug("Stack trace (%d): %s" % (len(stack_trace), stack_trace))

            stack_trace_msg = stack_trace.split("\n")[0]
            log.debug(stack_trace_msg)

            raise RequestError(stack_trace_msg)