示例#1
0
    def get_connection(self, host, port):
        '''Open a socket connection to a given host and port and writes the Hadoop header
        The Hadoop RPC protocol looks like this when creating a connection:

        +---------------------------------------------------------------------+
        |  Header, 4 bytes ("hrpc")                                           |
        +---------------------------------------------------------------------+
        |  Version, 1 byte (default verion 9)                                 |
        +---------------------------------------------------------------------+
        |  RPC service class, 1 byte (0x00)                                   |
        +---------------------------------------------------------------------+
        |  Auth protocol, 1 byte (Auth method None = 0)                       |
        +---------------------------------------------------------------------+
        |  Length of the RpcRequestHeaderProto  + length of the               |
        |  of the IpcConnectionContextProto (4 bytes/32 bit int)              |
        +---------------------------------------------------------------------+
        |  Serialized delimited RpcRequestHeaderProto                         |
        +---------------------------------------------------------------------+
        |  Serialized delimited IpcConnectionContextProto                     |
        +---------------------------------------------------------------------+
        '''

        log.debug("############## CONNECTING ##############")
        # Open socket
        self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
        self.sock.settimeout(self.sock_connect_timeout / 1000)
        # Connect socket to server - defined by host and port arguments
        self.sock.connect((host, port))
        self.sock.settimeout(self.sock_request_timeout / 1000)

        # Send RPC headers
        self.write(self.RPC_HEADER)                             # header
        self.write(struct.pack('B', self.version))              # version
        self.write(struct.pack('B', self.RPC_SERVICE_CLASS))    # RPC service class
        if self.use_sasl:
            self.write(struct.pack('B', self.AUTH_PROTOCOL_SASL))   # serialization type (protobuf = 0xDF)
        else:
            self.write(struct.pack('B', self.AUTH_PROTOCOL_NONE))   # serialization type (protobuf = 0)

        if self.use_sasl:
            sasl = SaslRpcClient(self, hdfs_namenode_principal=self.hdfs_namenode_principal)
            sasl_connected = sasl.connect()
            if not sasl_connected:
                raise TransientException("SASL is configured, but cannot get connected")

        rpc_header = self.create_rpc_request_header()
        context = self.create_connection_context()

        header_length = len(rpc_header) + encoder._VarintSize(len(rpc_header)) +len(context) + encoder._VarintSize(len(context))

        if log.getEffectiveLevel() == logging.DEBUG:
            log.debug("Header length: %s (%s)" % (header_length, format_bytes(struct.pack('!I', header_length))))

        self.write(struct.pack('!I', header_length))

        self.write_delimited(rpc_header)
        self.write_delimited(context)