Пример #1
0
    def __init__(self, host, port=10000, username=None, database='default', configuration=None):
        socket = thrift.transport.TSocket.TSocket(host, port)
        username = username or getpass.getuser()
        configuration = configuration or {}

        def sasl_factory():
            sasl_client = sasl.Client()
            sasl_client.setAttr(b'username', username.encode('latin-1'))
            # Password doesn't matter in PLAIN mode, just needs to be nonempty.
            sasl_client.setAttr(b'password', b'x')
            sasl_client.init()
            return sasl_client

        # PLAIN corresponds to hive.server2.authentication=NONE in hive-site.xml
        self._transport = thrift_sasl.TSaslClientTransport(sasl_factory, b'PLAIN', socket)
        protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol(self._transport)
        self._client = TCLIService.Client(protocol)

        try:
            self._transport.open()
            open_session_req = ttypes.TOpenSessionReq(
                client_protocol=ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1,
                configuration=configuration,
            )
            response = self._client.OpenSession(open_session_req)
            _check_status(response)
            assert(response.sessionHandle is not None), "Expected a session from OpenSession"
            self._sessionHandle = response.sessionHandle
            assert(response.serverProtocolVersion == ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1), \
                "Unable to handle protocol version {}".format(response.serverProtocolVersion)
            with contextlib.closing(self.cursor()) as cursor:
                cursor.execute('USE `{}`'.format(database))
        except:
            self._transport.close()
            raise
 def setup(self):
   host, port = IMPALAD_HS2_HOST_PORT.split(":")
   self.socket = TSocket(host, port)
   self.transport = TBufferedTransport(self.socket)
   self.transport.open()
   self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
   self.hs2_client = TCLIService.Client(self.protocol)
Пример #3
0
 def __init__(self,
              host=None,
              port=10000,
              authMechanism=None,
              user=None,
              password=None,
              database=None,
              cursorclass=Cursor):
     authMechanisms = set(['NOSASL', 'PLAIN', 'KERBEROS', 'LDAP'])
     if authMechanism not in authMechanisms or authMechanism == 'KERBEROS':
         raise NotImplementedError(
             'authMechanism is either not supported or not implemented')
     #Must set a password for thrift, even if it doesn't need one
     #Open issue with python-sasl
     if authMechanism == 'PLAIN' and (password is None
                                      or len(password) == 0):
         password = '******'
     socket = TSocket(host, port)
     self.cursorclass = cursorclass
     if authMechanism == 'NOSASL':
         transport = TBufferedTransport(socket)
     else:
         saslc = sasl.Client()
         saslc.setAttr("username", user)
         saslc.setAttr("password", password)
         saslc.init()
         transport = TSaslClientTransport(saslc, "PLAIN", socket)
     self.client = TCLIService.Client(TBinaryProtocol(transport))
     transport.open()
     res = self.client.OpenSession(TOpenSessionReq())
     self.session = res.sessionHandle
     if database is not None:
         with self.cursor() as cur:
             query = "USE {0}".format(database)
             cur.execute(query)
Пример #4
0
    def __init__(self,
                 host=None,
                 port=10000,
                 authMechanism=None,
                 user=None,
                 password=None,
                 database=None,
                 configuration=None):
        super(Connection, self).__init__(authMechanism)
        #Must set a password for thrift, even if it doesn't need one
        #Open issue with python-sasl
        password = self._check_password(authMechanism, password)
        socket = TSocket(host, port)
        if authMechanism == 'NOSASL':
            transport = TBufferedTransport(socket)
        else:
            saslc, sasl_mech = self._get_sasl_client(host, authMechanism, user,
                                                     password, configuration)
            transport = TSaslClientTransport(saslc, sasl_mech, socket)

        self.client = TCLIService.Client(TBinaryProtocol(transport))
        transport.open()
        res = self.client.OpenSession(
            TOpenSessionReq(configuration=configuration))
        self.session = res.sessionHandle
        if database is not None:
            with self.cursor() as cur:
                query = "USE {0}".format(database)
                cur.execute(query)
Пример #5
0
 def connect(self):
     transport = TSocket.TSocket('10.3.181.235', 10000)
     transport = TTransport.TBufferedTransport(transport)
     protocol = TBinaryProtocol.TBinaryProtocol(transport)
     client = TCLIService.Client(protocol)
     transport.open()
     self._client = client
Пример #6
0
 def connect(self):
     transport = TSocket.TSocket(self.host, self.port)
     transport = TTransport.TBufferedTransport(transport)
     protocol = TBinaryProtocol.TBinaryProtocol(transport)
     client = TCLIService.Client(protocol)
     transport.open()
     self._client = client
Пример #7
0
 def setup(self):
     host, port = (self.cluster.impalads[0].service.hostname,
                   self.cluster.impalads[0].service.hs2_port)
     self.socket = TSocket(host, port)
     self.transport = TBufferedTransport(self.socket)
     self.transport.open()
     self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
     self.hs2_client = TCLIService.Client(self.protocol)
Пример #8
0
 def create_hs2_client(self):
     """Creates a new HS2 client connection to the impalad"""
     host, port = (self.hostname, self.hs2_port)
     socket = TSocket(host, port)
     transport = TBufferedTransport(socket)
     transport.open()
     protocol = TBinaryProtocol.TBinaryProtocol(transport)
     hs2_client = TCLIService.Client(protocol)
     return hs2_client
Пример #9
0
    def __init__(self,
                 unix_socket=None,
                 host=None,
                 port=10000,
                 authMechanism=None,
                 user=None,
                 password=None,
                 database=None,
                 configuration=None,
                 timeout=None):
        authMechanisms = set(['NOSASL', 'PLAIN', 'KERBEROS', 'LDAP'])
        if authMechanism not in authMechanisms:
            raise NotImplementedError(
                'authMechanism is either not supported or not implemented')
        #Must set a password for thrift, even if it doesn't need one
        #Open issue with python-sasl
        if authMechanism == 'PLAIN' and (password is None
                                         or len(password) == 0):
            password = '******'
        if unix_socket is not None:
            socket = TSocket(unix_socket=unix_socket)
        else:
            socket = TSocket(host, port)
        socket.setTimeout(timeout)
        if authMechanism == 'NOSASL':
            transport = TBufferedTransport(socket)
        else:
            sasl_mech = 'PLAIN'
            saslc = sasl.Client()
            saslc.setAttr("username", user)
            saslc.setAttr("password", password)
            if authMechanism == 'KERBEROS':
                krb_host, krb_service = self._get_krb_settings(
                    host, configuration)
                sasl_mech = 'GSSAPI'
                saslc.setAttr("host", krb_host)
                saslc.setAttr("service", krb_service)

            saslc.init()
            transport = TSaslClientTransport(saslc, sasl_mech, socket)

        self.client = TCLIService.Client(TBinaryProtocol(transport))
        transport.open()
        res = self.client.OpenSession(
            TOpenSessionReq(username=user,
                            password=password,
                            configuration=configuration))
        self.session = res.sessionHandle
        if database is not None:
            with self.cursor() as cur:
                query = "USE {0}".format(database)
                cur.execute(query)
Пример #10
0
    def __init__(self, host, port=10000, username=None, database='default', auth='NONE',
                 configuration=None):
        """Connect to HiveServer2

        :param auth: The value of hive.server2.authentication used by HiveServer2
        """
        socket = thrift.transport.TSocket.TSocket(host, port)
        username = username or getpass.getuser()
        configuration = configuration or {}

        if auth == 'NOSASL':
            # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml
            self._transport = thrift.transport.TTransport.TBufferedTransport(socket)
        elif auth == 'NONE':
            def sasl_factory():
                sasl_client = sasl.Client()
                sasl_client.setAttr(b'username', username.encode('latin-1'))
                # Password doesn't matter in NONE mode, just needs to be nonempty.
                sasl_client.setAttr(b'password', b'x')
                sasl_client.init()
                return sasl_client

            # PLAIN corresponds to hive.server2.authentication=NONE in hive-site.xml
            self._transport = thrift_sasl.TSaslClientTransport(sasl_factory, b'PLAIN', socket)
        else:
            raise NotImplementedError(
                "Only NONE & NOSASL authentication are supported, got {}".format(auth))

        protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol(self._transport)
        self._client = TCLIService.Client(protocol)
        # oldest version that still contains features we care about
        # "V6 uses binary type for binary payload (was string) and uses columnar result set"
        protocol_version = ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6

        try:
            self._transport.open()
            open_session_req = ttypes.TOpenSessionReq(
                client_protocol=protocol_version,
                configuration=configuration,
            )
            response = self._client.OpenSession(open_session_req)
            _check_status(response)
            assert response.sessionHandle is not None, "Expected a session from OpenSession"
            self._sessionHandle = response.sessionHandle
            assert response.serverProtocolVersion == protocol_version, \
                "Unable to handle protocol version {}".format(response.serverProtocolVersion)
            with contextlib.closing(self.cursor()) as cursor:
                cursor.execute('USE `{}`'.format(database))
        except:
            self._transport.close()
            raise
Пример #11
0
    def __init__(self,
                 host=None,
                 port=None,
                 username=None,
                 database='default',
                 auth=None,
                 configuration=None,
                 kerberos_service_name=None,
                 password=None,
                 thrift_transport=None):
        """Connect to HiveServer2

        :param host: What host HiveServer2 runs on
        :param port: What port HiveServer2 runs on. Defaults to 10000.
        :param auth: The value of hive.server2.authentication used by HiveServer2.
            Defaults to ``NONE``.
        :param configuration: A dictionary of Hive settings (functionally same as the `set` command)
        :param kerberos_service_name: Use with auth='KERBEROS' only
        :param password: Use with auth='LDAP' or auth='CUSTOM' only
        :param thrift_transport: A ``TTransportBase`` for custom advanced usage.
            Incompatible with host, port, auth, kerberos_service_name, and password.

        The way to support LDAP and GSSAPI is originated from cloudera/Impyla:
        https://github.com/cloudera/impyla/blob/255b07ed973d47a3395214ed92d35ec0615ebf62
        /impala/_thrift_api.py#L152-L160
        """
        username = username or getpass.getuser()
        configuration = configuration or {}

        if (password is not None) != (auth in ('LDAP', 'CUSTOM')):
            raise ValueError(
                "Password should be set if and only if in LDAP or CUSTOM mode; "
                "Remove password or use one of those modes")
        if (kerberos_service_name is not None) != (auth == 'KERBEROS'):
            raise ValueError(
                "kerberos_service_name should be set if and only if in KERBEROS mode"
            )
        if thrift_transport is not None:
            has_incompatible_arg = (host is not None or port is not None
                                    or auth is not None
                                    or kerberos_service_name is not None
                                    or password is not None)
            if has_incompatible_arg:
                raise ValueError(
                    "thrift_transport cannot be used with "
                    "host/port/auth/kerberos_service_name/password")

        if thrift_transport is not None:
            self._transport = thrift_transport
        else:
            if port is None:
                port = 10000
            if auth is None:
                auth = 'NONE'
            socket = thrift.transport.TSocket.TSocket(host, port)
            if auth == 'NOSASL':
                # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml
                self._transport = thrift.transport.TTransport.TBufferedTransport(
                    socket)
            elif auth in ('LDAP', 'KERBEROS', 'NONE', 'CUSTOM'):
                # Defer import so package dependency is optional
                import sasl
                import thrift_sasl

                if auth == 'KERBEROS':
                    # KERBEROS mode in hive.server2.authentication is GSSAPI in sasl library
                    sasl_auth = 'GSSAPI'
                else:
                    sasl_auth = 'PLAIN'
                    if password is None:
                        # Password doesn't matter in NONE mode, just needs to be nonempty.
                        password = '******'

                def sasl_factory():
                    sasl_client = sasl.Client()
                    sasl_client.setAttr('host', host)
                    if sasl_auth == 'GSSAPI':
                        sasl_client.setAttr('service', kerberos_service_name)
                    elif sasl_auth == 'PLAIN':
                        sasl_client.setAttr('username', username)
                        sasl_client.setAttr('password', password)
                    else:
                        raise AssertionError
                    sasl_client.init()
                    return sasl_client

                self._transport = thrift_sasl.TSaslClientTransport(
                    sasl_factory, sasl_auth, socket)
            else:
                # All HS2 config options:
                # https://cwiki.apache.org/confluence/display/Hive/Setting+Up+HiveServer2#SettingUpHiveServer2-Configuration
                # PAM currently left to end user via thrift_transport option.
                raise NotImplementedError(
                    "Only NONE, NOSASL, LDAP, KERBEROS, CUSTOM "
                    "authentication are supported, got {}".format(auth))

        protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol(
            self._transport)
        self._client = TCLIService.Client(protocol)
        # oldest version that still contains features we care about
        # "V6 uses binary type for binary payload (was string) and uses columnar result set"
        protocol_version = ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6

        try:
            self._transport.open()
            open_session_req = ttypes.TOpenSessionReq(
                client_protocol=protocol_version,
                configuration=configuration,
                username=username,
            )
            response = self._client.OpenSession(open_session_req)
            _check_status(response)
            assert response.sessionHandle is not None, "Expected a session from OpenSession"
            self._sessionHandle = response.sessionHandle
            assert response.serverProtocolVersion == protocol_version, \
                "Unable to handle protocol version {}".format(response.serverProtocolVersion)
            with contextlib.closing(self.cursor()) as cursor:
                cursor.execute('USE `{}`'.format(database))
        except:
            self._transport.close()
            raise
Пример #12
0
    def __init__(self,
                 host,
                 port=10000,
                 username=None,
                 database='default',
                 auth='NONE',
                 configuration=None,
                 kerberos_service_name=None,
                 password=None):
        """Connect to HiveServer2

        :param auth: The value of hive.server2.authentication used by HiveServer2
        :param configuration: A dictionary of Hive settings (functionally same as the `set` command)
        :param kerberos_service_name: Use with auth='KERBEROS' only
        :param password: Use with auth='LDAP' only

        The way to support LDAP and GSSAPI is originated from cloudera/Impyla:
        https://github.com/cloudera/impyla/blob/255b07ed973d47a3395214ed92d35ec0615ebf62
        /impala/_thrift_api.py#L152-L160
        """
        socket = thrift.transport.TSocket.TSocket(host, port)
        username = username or getpass.getuser()
        configuration = configuration or {}

        if (password is not None) != (auth == 'LDAP'):
            raise ValueError(
                "password should be set if and only if in LDAP mode")
        if (kerberos_service_name is not None) != (auth == 'KERBEROS'):
            raise ValueError(
                "kerberos_service_name should be set if and only if in KERBEROS mode"
            )

        if auth == 'NOSASL':
            # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml
            self._transport = thrift.transport.TTransport.TBufferedTransport(
                socket)
        elif auth in ('LDAP', 'KERBEROS', 'NONE'):
            if auth == 'KERBEROS':
                # KERBEROS mode in hive.server2.authentication is GSSAPI in sasl library
                sasl_auth = 'GSSAPI'
            else:
                sasl_auth = 'PLAIN'
                if password is None:
                    # Password doesn't matter in NONE mode, just needs to be nonempty.
                    password = b'x'

            def sasl_factory():
                sasl_client = sasl.Client()
                sasl_client.setAttr(b'host', host)
                if sasl_auth == 'GSSAPI':
                    sasl_client.setAttr(b'service', kerberos_service_name)
                elif sasl_auth == 'PLAIN':
                    sasl_client.setAttr(b'username',
                                        username.encode('latin-1'))
                    sasl_client.setAttr(b'password', password)
                else:
                    raise AssertionError
                sasl_client.init()
                return sasl_client

            self._transport = thrift_sasl.TSaslClientTransport(
                sasl_factory, sasl_auth, socket)
        else:
            raise NotImplementedError(
                "Only NONE, NOSASL, LDAP, KERBEROS "
                "authentication are supported, got {}".format(auth))

        protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol(
            self._transport)
        self._client = TCLIService.Client(protocol)
        # oldest version that still contains features we care about
        # "V6 uses binary type for binary payload (was string) and uses columnar result set"
        protocol_version = ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6

        try:
            self._transport.open()
            open_session_req = ttypes.TOpenSessionReq(
                client_protocol=protocol_version,
                configuration=configuration,
            )
            response = self._client.OpenSession(open_session_req)
            _check_status(response)
            assert response.sessionHandle is not None, "Expected a session from OpenSession"
            self._sessionHandle = response.sessionHandle
            assert response.serverProtocolVersion == protocol_version, \
                "Unable to handle protocol version {}".format(response.serverProtocolVersion)
            with contextlib.closing(self.cursor()) as cursor:
                cursor.execute('USE `{}`'.format(database))
        except:
            self._transport.close()
            raise
Пример #13
0
    def __init__(self,
                 host=None,
                 port=None,
                 username=None,
                 database='default',
                 auth=None,
                 configuration=None,
                 kerberos_service_name=None,
                 password=None,
                 thrift_transport=None,
                 thrift_transport_protocol='binary',
                 http_path=None):
        """Connect to HiveServer2

        :param host: What host HiveServer2 runs on
        :param port: What port HiveServer2 runs on. Defaults to 10000.
        :param auth: The value of hive.server2.authentication used by HiveServer2.
            Defaults to ``NONE``.
        :param configuration: A dictionary of Hive settings (functionally same as the `set` command)
        :param kerberos_service_name: Use with auth='KERBEROS' only
        :param password: Use with auth='LDAP' or auth='CUSTOM' only
        :param thrift_transport: A ``TTransportBase`` for custom advanced usage.
            Incompatible with host, port, auth, kerberos_service_name, and password.

        The way to support LDAP and GSSAPI is originated from cloudera/Impyla:
        https://github.com/cloudera/impyla/blob/255b07ed973d47a3395214ed92d35ec0615ebf62
        /impala/_thrift_api.py#L152-L160
        """
        username = username or getpass.getuser()
        configuration = configuration or {}

        if (kerberos_service_name is not None) != (auth == 'KERBEROS'):
            raise ValueError(
                "kerberos_service_name should be set if and only if in KERBEROS mode"
            )
        if thrift_transport is not None:
            has_incompatible_arg = (host is not None or port is not None
                                    or auth is not None
                                    or kerberos_service_name is not None
                                    or password is not None)
            if has_incompatible_arg:
                raise ValueError(
                    "thrift_transport cannot be used with "
                    "host/port/auth/kerberos_service_name/password")

        if thrift_transport is not None:
            self._transport = thrift_transport
        elif thrift_transport_protocol == 'binary':
            self._transport = Connection. \
                create_binary_transport(host=host,
                                        port=port,
                                        username=username,
                                        password=password,
                                        kerberos_service_name=kerberos_service_name,
                                        auth=auth)
        elif thrift_transport_protocol == 'http':
            self._transport = Connection.\
                create_http_transport(host=host,
                                      username=username,
                                      port=port,
                                      http_path=http_path,
                                      password=password,
                                      kerberos_service_name=kerberos_service_name,
                                      auth=auth)
        else:
            raise ValueError("Invalid thrift_transport_protocol: {}".format(
                thrift_transport_protocol))

        protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol(
            self._transport)
        self._client = TCLIService.Client(protocol)
        # oldest version that still contains features we care about
        # "V6 uses binary type for binary payload (was string) and uses columnar result set"
        protocol_version = ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6

        try:
            self._transport.open()
            open_session_req = ttypes.TOpenSessionReq(
                client_protocol=protocol_version,
                configuration=configuration,
                username=username,
            )
            response = self._client.OpenSession(open_session_req)
            _check_status(response)
            assert response.sessionHandle is not None, "Expected a session from OpenSession"
            self._sessionHandle = response.sessionHandle
            assert response.serverProtocolVersion == protocol_version, \
                "Unable to handle protocol version {}".format(response.serverProtocolVersion)
            with contextlib.closing(self.cursor()) as cursor:
                cursor.execute('USE `{}`'.format(database))
        except:
            self._transport.close()
            raise
Пример #14
0
options, args = parser.parse_args()

hs2_host, hs2_port = options.hs2_hostport.split(':')

if options.transport == "plain_sasl":
    # Here we supply a bogus username of "foo" and a bogus password of "bar".
    # We just have to supply *something*, else HS2 will block waiting for user
    # input.  Any bogus username and password are accepted.
    hs2_transport = create_transport(hs2_host, hs2_port, "hive",
                                     options.transport, "foo", "bar")
else:
    hs2_transport = create_transport(hs2_host, hs2_port, "hive",
                                     options.transport)

protocol = TBinaryProtocol.TBinaryProtocol(hs2_transport)
hs2_client = TCLIService.Client(protocol)

# Try to connect to the HiveServer2 service and create a session
now = time.time()
TIMEOUT_SECONDS = 300.0
while time.time() - now < TIMEOUT_SECONDS:
    try:
        hs2_transport.open()
        open_session_req = TCLIService.TOpenSessionReq()
        open_session_req.username = getpass.getuser()
        resp = hs2_client.OpenSession(open_session_req)
        if resp.status.statusCode == TCLIService.TStatusCode.SUCCESS_STATUS:
            close_session_req = TCLIService.TCloseSessionReq()
            close_session_req.sessionHandle = resp.sessionHandle
            hs2_client.CloseSession(close_session_req)
            print "HiveServer2 service is up at %s." % options.hs2_hostport
Пример #15
0
    saslc = sasl.Client()
    saslc.setAttr("username", username)
    saslc.setAttr("password", password)
    saslc.init()
    return saslc


try:

    print "1) Preparing the connection..."
    sock = TSocket(host, port)
    if auth == 'NOSASL':
        transport = TBufferedTransport(sock)
    else:
        transport = TSaslClientTransport(sasl_factory, "PLAIN", sock)
    client = TCLIService.Client(TBinaryProtocol(transport))
    transport.open()

    print "\n2) Opening Session..."
    res = client.OpenSession(
        TOpenSessionReq(username=username, password=password))
    session = res.sessionHandle
    print('Session opened. ( %s )' % session.sessionId)

    ## 3) Show tables
    print "\n3) Try fetching table list..."
    query = TExecuteStatementReq(session,
                                 statement="show tables",
                                 confOverlay={})
    response = client.ExecuteStatement(query)
    opHandle = response.operationHandle
Пример #16
0
    def __init__(self,
                 host=None,
                 port=None,
                 username=None,
                 database='default',
                 auth=None,
                 configuration=None,
                 kerberos_service_name=None,
                 password=None,
                 thrift_transport=None,
                 service_mode='binary',
                 http_path=None,
                 is_zookeeper=False,
                 zookeeper_name_space='hiveserver2',
                 keytab_file=None,
                 krb_conf=None):
        """Connect to HiveServer2

        :param host: What host HiveServer2 runs on
        :param port: What port HiveServer2 runs on. Defaults to 10000.
        :param auth: The value of hive.server2.authentication used by HiveServer2.
            Defaults to ``NONE``.
        :param configuration: A dictionary of Hive settings (functionally same as the `set` command)
        :param kerberos_service_name: Use with auth='KERBEROS' only
        :param password: Use with auth='LDAP' or auth='CUSTOM' only
        :param thrift_transport: A ``TTransportBase`` for custom advanced usage.
            Incompatible with host, port, auth, kerberos_service_name, and password.
        :param service_mode: Set thrift transport mode ('http' or 'binary')
        :param http_path: Use with service_mode='http' only
        :param is_zookeeper: Set whether or not zookeeper method
        :param zookeeper_name_space: Use with service_mode='http' and is_zookeeper='true' only
        :param keytab_file: Use with service_mode='http' and auth='KERBEROS' only
        :param krb_conf: pycquery_krb.common.conf.KerberosConf instance. Use with service_mode='http' and auth='KERBEROS' only

        The way to support LDAP and GSSAPI is originated from cloudera/Impyla:
        https://github.com/cloudera/impyla/blob/255b07ed973d47a3395214ed92d35ec0615ebf62
        /impala/_thrift_api.py#L152-L160
        """
        self._opened = False
        self.auth = auth
        self.kerberos_service_name = kerberos_service_name
        self.username = username or getpass.getuser()
        self.password = password
        self.service_mode = service_mode
        self.keytab_file = keytab_file
        self.auth_lock = threading.Lock()
        self.realm = None
        self.kdc = None
        self.kerb_client = None
        self.krb_conf = krb_conf
        self.expired_time = 0
        configuration = configuration or {}
        last_exception = None

        # if (password is not None) != (auth in ('LDAP', 'CUSTOM')):
        #     raise ValueError("Password should be set if and only if in LDAP or CUSTOM mode; "
        #                      "Remove password or use one of those modes")
        if auth == 'KERBEROS':
            if kerberos_service_name is None:
                raise ValueError(
                    "kerberos_service_name must be set in KERBEROS mode")

            if krb_conf is None:
                raise ValueError("krb_conf must be set in KERBEROS mode")

            p = self.username.split('@')
            self.username = p[0]
            if len(p) > 1:
                self.realm = p[1]
            else:
                self.realm = krb_conf.lib_defaults.default_realm

            if self.realm is None:
                raise ValueError(
                    "Kerberos realm must be specified at username or krb5.conf in KERBEROS mode"
                )

            conf_realm = krb_conf.find_realm(self.realm)
            if conf_realm is None:
                raise ValueError("No matching realm in krb5.conf")
            if len(conf_realm.kdc) == 0:
                raise ValueError(
                    "No kdc information in {} realm of krb5.conf".format(
                        self.realm))

            # use the first kdc in the list for implementation simpleness
            p = conf_realm.kdc[0].split(':')
            self.kdc = {"host": p[0], "port": p[1]}

        if thrift_transport is not None:
            has_incompatible_arg = (host is not None or port is not None
                                    or auth is not None
                                    or kerberos_service_name is not None
                                    or password is not None)
            if has_incompatible_arg:
                raise ValueError(
                    "thrift_transport cannot be used with "
                    "host/port/auth/kerberos_service_name/password")

        if is_zookeeper:
            # It randomly shuffles node information stored in zookeeper.
            remaining_nodes = self._get_hiveserver2_info_with_zookeeper(
                host, port, zookeeper_name_space)
            random.shuffle(remaining_nodes)
        else:
            # Direct access to host and port if not zookeeper.
            remaining_nodes = [{'host': host, 'port': port}]

        # Access nodes sequentially and if they fail, access other nodes.
        while len(remaining_nodes) > 0:
            node = remaining_nodes.pop()
            self.host = node['host']
            self.port = node['port']
            if thrift_transport is not None:
                self._transport = thrift_transport
            elif service_mode == 'binary':
                if self.port is None:
                    self.port = 10000
                if self.auth is None:
                    self.auth = 'NONE'
                socket = thrift.transport.TSocket.TSocket(self.host, self.port)
                if auth == 'NOSASL':
                    # NOSASL corresponds to hive.server2.authentication=NOSASL in hive-site.xml
                    self._transport = thrift.transport.TTransport.TBufferedTransport(
                        socket)
                elif self.auth in ('LDAP', 'KERBEROS', 'NONE', 'CUSTOM',
                                   'NOSASL'):
                    # Defer import so package dependency is optional
                    import sasl
                    import thrift_sasl

                    if self.auth == 'KERBEROS':
                        # KERBEROS mode in hive.server2.authentication is GSSAPI in sasl library
                        sasl_auth = 'GSSAPI'
                    else:
                        sasl_auth = 'PLAIN'
                        if self.password is None:
                            # Password doesn't matter in NONE mode, just needs to be nonempty.
                            self.password = '******'

                    def sasl_factory():
                        sasl_client = sasl.Client()
                        sasl_client.setAttr('host', self.host)
                        if sasl_auth == 'GSSAPI':
                            sasl_client.setAttr('service',
                                                kerberos_service_name)
                        elif sasl_auth == 'PLAIN':
                            sasl_client.setAttr('username', username)
                            sasl_client.setAttr('password', password)
                        else:
                            raise AssertionError
                        sasl_client.init()
                        return sasl_client

                    self._transport = thrift_sasl.TSaslClientTransport(
                        sasl_factory, sasl_auth, socket)

                else:
                    # All HS2 config options:
                    # https://cwiki.apache.org/confluence/display/Hive/Setting+Up+HiveServer2#SettingUpHiveServer2-Configuration
                    # PAM currently left to end user via thrift_transport option.
                    raise NotImplementedError(
                        "Only NONE, NOSASL, LDAP, KERBEROS, CUSTOM "
                        "authentication are supported with binary mode, got {}"
                        .format(auth))

            elif service_mode == 'http':
                if self.auth is None:
                    self.auth = 'NONE'

                if self.auth in ('NONE', 'LDAP', 'KERBEROS', 'CUSTOM'):
                    if self.password is None:
                        self.password = '******'
                    self._transport = thrift.transport.THttpClient.THttpClient(
                        "http://{}:{}/{}".format(self.host, self.port,
                                                 http_path))
                    if auth == 'KERBEROS':
                        self.set_auth_setting()
                    else:
                        auth_header = HTTPBasicAuth(username, self.password)
                        self._transport.setCustomHeaders(
                            auth_header(Request()).headers)
                else:
                    raise NotImplementedError(
                        "Only NONE, NOSASL, LDAP, KERBEROS, CUSTOM "
                        "authentication is supported with http mode, got {}".
                        format(auth))
            else:
                raise NotImplementedError(
                    "Only binary, http are supported for the transport mode, "
                    "got {}".format(service_mode))

            protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol(
                self._transport)
            self._client = TCLIService.Client(protocol)
            # oldest version that still contains features we care about
            # "V6 uses binary type for binary payload (was string) and uses columnar result set"
            protocol_version = ttypes.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6

            try:
                self._transport.open()
                open_session_req = ttypes.TOpenSessionReq(
                    client_protocol=protocol_version,
                    configuration=configuration,
                    username=username,
                )
                response = self._client.OpenSession(open_session_req)
                _check_status(response)
                assert response.sessionHandle is not None, "Expected a session from OpenSession"
                self._sessionHandle = response.sessionHandle
                assert response.serverProtocolVersion == protocol_version, \
                    "Unable to handle protocol version {}".format(response.serverProtocolVersion)
                self._opened = True
                with contextlib.closing(self.cursor()) as cursor:
                    cursor.execute('USE `{}`'.format(database))
                atexit.register(self.close)
            except Exception as ex:
                import traceback
                # If the node fails to access, it will try to reconnect to the remaining node.
                _logger.warning('Failed to connect to %s:%s. (message = %s)' %
                                (self.host, self.port, 'Error opening session'
                                 if isinstance(ex, EOFError) else ex))
                last_exception = ex
                self.close()
            else:
                # If any of the remaining nodes passed to zookeeper is successful, return.
                _logger.info('Connected to %s:%s' % (self.host, self.port))
                return

        # Return the last error I received.
        raise last_exception