Exemplo n.º 1
0
  def __init__(self, query_server, user):
    self.query_server = query_server
    self.user = user

    use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled, auth_username, auth_password = self.get_security()
    LOG.info(
        '%s: use_sasl=%s, mechanism=%s, kerberos_principal_short_name=%s, impersonation_enabled=%s, auth_username=%s' % (
        self.query_server['server_name'], use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled, auth_username)
    )

    self.use_sasl = use_sasl
    self.kerberos_principal_short_name = kerberos_principal_short_name
    self.impersonation_enabled = impersonation_enabled

    if self.query_server['server_name'] == 'impala':
      from impala import conf as impala_conf

      ssl_enabled = impala_conf.SSL.ENABLED.get()
      ca_certs = impala_conf.SSL.CACERTS.get()
      keyfile = impala_conf.SSL.KEY.get()
      certfile = impala_conf.SSL.CERT.get()
      validate = impala_conf.SSL.VALIDATE.get()
      timeout = impala_conf.SERVER_CONN_TIMEOUT.get()
    else:
      ssl_enabled = hiveserver2_use_ssl()
      ca_certs = beeswax_conf.SSL.CACERTS.get()
      keyfile = beeswax_conf.SSL.KEY.get()
      certfile = beeswax_conf.SSL.CERT.get()
      validate = beeswax_conf.SSL.VALIDATE.get()
      timeout = beeswax_conf.SERVER_CONN_TIMEOUT.get()

    if auth_username:
      username = auth_username
      password = auth_password
    else:
      username = user.username
      password = None

    thrift_class = TCLIService
    if self.query_server['server_name'] == 'impala':
      from ImpalaService import ImpalaHiveServer2Service
      thrift_class = ImpalaHiveServer2Service

    self._client = thrift_util.get_client(thrift_class.Client,
                                          query_server['server_host'],
                                          query_server['server_port'],
                                          service_name=query_server['server_name'],
                                          kerberos_principal=kerberos_principal_short_name,
                                          use_sasl=use_sasl,
                                          mechanism=mechanism,
                                          username=username,
                                          password=password,
                                          timeout_seconds=timeout,
                                          use_ssl=ssl_enabled,
                                          ca_certs=ca_certs,
                                          keyfile=keyfile,
                                          certfile=certfile,
                                          validate=validate,
                                          transport_mode=query_server.get('transport_mode', 'socket'),
                                          http_url=query_server.get('http_url', ''))
Exemplo n.º 2
0
    def __init__(self, query_server, user):
        self.query_server = query_server
        self.user = user

        use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled = self.get_security(
        )
        LOG.info(
            'use_sasl=%s, mechanism=%s, kerberos_principal_short_name=%s, impersonation_enabled=%s'
            % (use_sasl, mechanism, kerberos_principal_short_name,
               impersonation_enabled))

        self.use_sasl = use_sasl
        self.kerberos_principal_short_name = kerberos_principal_short_name
        self.impersonation_enabled = impersonation_enabled
        self._client = thrift_util.get_client(
            TCLIService.Client,
            query_server['server_host'],
            query_server['server_port'],
            service_name=query_server['server_name'],
            kerberos_principal=kerberos_principal_short_name,
            use_sasl=use_sasl,
            mechanism=mechanism,
            username=user.username,
            timeout_seconds=conf.SERVER_CONN_TIMEOUT.get(),
            use_ssl=conf.SSL.ENABLED.get(),
            ca_certs=conf.SSL.CACERTS.get(),
            keyfile=conf.SSL.KEY.get(),
            certfile=conf.SSL.CERT.get(),
            validate=conf.SSL.VALIDATE.get())
Exemplo n.º 3
0
    def __init__(self, query_server, user):
        self.query_server = query_server
        self.user = user

        use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled = self.get_security()
        LOG.info(
            "use_sasl=%s, mechanism=%s, kerberos_principal_short_name=%s, impersonation_enabled=%s"
            % (use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled)
        )

        self.use_sasl = use_sasl
        self.kerberos_principal_short_name = kerberos_principal_short_name
        self.impersonation_enabled = impersonation_enabled
        self._client = thrift_util.get_client(
            TCLIService.Client,
            query_server["server_host"],
            query_server["server_port"],
            service_name=query_server["server_name"],
            kerberos_principal=kerberos_principal_short_name,
            use_sasl=use_sasl,
            mechanism=mechanism,
            username=user.username,
            timeout_seconds=conf.SERVER_CONN_TIMEOUT.get(),
            use_ssl=conf.SSL.ENABLED.get(),
            ca_certs=conf.SSL.CACERTS.get(),
            keyfile=conf.SSL.KEY.get(),
            certfile=conf.SSL.CERT.get(),
            validate=conf.SSL.VALIDATE.get(),
        )
Exemplo n.º 4
0
  def __init__(self, query_server, user):
    self.query_server = query_server
    self.user = user

    use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled, auth_username, auth_password = self.get_security()
    LOG.info(
        '%s: use_sasl=%s, mechanism=%s, kerberos_principal_short_name=%s, impersonation_enabled=%s, auth_username=%s' % (
        self.query_server['server_name'], use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled, auth_username)
    )

    self.use_sasl = use_sasl
    self.kerberos_principal_short_name = kerberos_principal_short_name
    self.impersonation_enabled = impersonation_enabled

    if self.query_server['server_name'] == 'impala':
      from impala import conf as impala_conf

      ssl_enabled = impala_conf.SSL.ENABLED.get()
      ca_certs = impala_conf.SSL.CACERTS.get()
      keyfile = impala_conf.SSL.KEY.get()
      certfile = impala_conf.SSL.CERT.get()
      validate = impala_conf.SSL.VALIDATE.get()
      timeout = impala_conf.SERVER_CONN_TIMEOUT.get()
    else:
      ssl_enabled = hiveserver2_use_ssl()
      ca_certs = beeswax_conf.SSL.CACERTS.get()
      keyfile = beeswax_conf.SSL.KEY.get()
      certfile = beeswax_conf.SSL.CERT.get()
      validate = beeswax_conf.SSL.VALIDATE.get()
      timeout = beeswax_conf.SERVER_CONN_TIMEOUT.get()

    if auth_username:
      username = auth_username
      password = auth_password
    else:
      username = user.username
      password = None

    thrift_class = TCLIService
    if self.query_server['server_name'] == 'impala':
      from ImpalaService import ImpalaHiveServer2Service
      thrift_class = ImpalaHiveServer2Service

    self._client = thrift_util.get_client(thrift_class.Client,
                                          query_server['server_host'],
                                          query_server['server_port'],
                                          service_name=query_server['server_name'],
                                          kerberos_principal=kerberos_principal_short_name,
                                          use_sasl=use_sasl,
                                          mechanism=mechanism,
                                          username=username,
                                          password=password,
                                          timeout_seconds=timeout,
                                          use_ssl=ssl_enabled,
                                          ca_certs=ca_certs,
                                          keyfile=keyfile,
                                          certfile=certfile,
                                          validate=validate,
                                          transport_mode=query_server.get('transport_mode', 'socket'),
                                          http_url=query_server.get('http_url', ''))
Exemplo n.º 5
0
  def __init__(self, host, thrift_port, hdfs_port=8020, hadoop_bin_path="hadoop"):
    """
    @param host hostname or IP of the namenode
    @param thrift_port port on which the Thrift plugin is listening
    @param hdfs_port port on which NameNode IPC is listening
    @param hadoop_bin_path path to find the hadoop wrapper script on the
                           installed system - default is fine if it is in
                           the user's PATH env
    """
    self.host = host
    self.thrift_port = thrift_port
    self.hdfs_port = hdfs_port
    self.hadoop_bin_path = hadoop_bin_path
    self._resolve_hadoop_path()

    self.nn_client = thrift_util.get_client(Namenode.Client, host, thrift_port,
        service_name="HDFS Namenode HUE Plugin",
        timeout_seconds=NN_THRIFT_TIMEOUT)

    # The file systems are cached globally.  We store
    # user information in a thread-local variable so that
    # safety can be preserved there.
    self.thread_local = threading.local()
    self.setuser(DEFAULT_USER, DEFAULT_GROUPS)
    LOG.debug("Initialized HadoopFS: %s:%d (%s)", host, thrift_port, hadoop_bin_path)
Exemplo n.º 6
0
  def __init__(self, query_server, user):
    self.query_server = query_server
    self.user = user

    use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled = self.get_security()
    LOG.info('use_sasl=%s, mechanism=%s, kerberos_principal_short_name=%s, impersonation_enabled=%s' % (
             use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled))

    self.use_sasl = use_sasl
    self.kerberos_principal_short_name = kerberos_principal_short_name
    self.impersonation_enabled = impersonation_enabled

    if self.query_server['server_name'] == 'impala':
      ssl_enabled = False
      timeout = impala_conf.SERVER_CONN_TIMEOUT.get()
    else:
      ssl_enabled = beeswax_conf.SSL.ENABLED.get()
      timeout = beeswax_conf.SERVER_CONN_TIMEOUT.get()

    self._client = thrift_util.get_client(TCLIService.Client,
                                          query_server['server_host'],
                                          query_server['server_port'],
                                          service_name=query_server['server_name'],
                                          kerberos_principal=kerberos_principal_short_name,
                                          use_sasl=use_sasl,
                                          mechanism=mechanism,
                                          username=user.username,
                                          timeout_seconds=timeout,
                                          use_ssl=ssl_enabled,
                                          ca_certs=beeswax_conf.SSL.CACERTS.get(),
                                          keyfile=beeswax_conf.SSL.KEY.get(),
                                          certfile=beeswax_conf.SSL.CERT.get(),
                                          validate=beeswax_conf.SSL.VALIDATE.get())
Exemplo n.º 7
0
    def __init__(self, query_server, user):
        self.query_server = query_server
        self.user = user

        use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled, auth_username, auth_password = (
            self.get_security()
        )
        LOG.info(
            "use_sasl=%s, mechanism=%s, kerberos_principal_short_name=%s, impersonation_enabled=%s, auth_username=%s"
            % (use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled, auth_username)
        )

        self.use_sasl = use_sasl
        self.kerberos_principal_short_name = kerberos_principal_short_name
        self.impersonation_enabled = impersonation_enabled

        if self.query_server["server_name"] == "impala":
            from impala import conf as impala_conf

            ssl_enabled = impala_conf.SSL.ENABLED.get()
            ca_certs = impala_conf.SSL.CACERTS.get()
            keyfile = impala_conf.SSL.KEY.get()
            certfile = impala_conf.SSL.CERT.get()
            validate = impala_conf.SSL.VALIDATE.get()
            timeout = impala_conf.SERVER_CONN_TIMEOUT.get()
        else:
            ssl_enabled = hiveserver2_use_ssl()
            ca_certs = beeswax_conf.SSL.CACERTS.get()
            keyfile = beeswax_conf.SSL.KEY.get()
            certfile = beeswax_conf.SSL.CERT.get()
            validate = beeswax_conf.SSL.VALIDATE.get()
            timeout = beeswax_conf.SERVER_CONN_TIMEOUT.get()

        if auth_username:
            username = auth_username
            password = auth_password
        else:
            username = user.username
            password = None

        self._client = thrift_util.get_client(
            TCLIService.Client,
            query_server["server_host"],
            query_server["server_port"],
            service_name=query_server["server_name"],
            kerberos_principal=kerberos_principal_short_name,
            use_sasl=use_sasl,
            mechanism=mechanism,
            username=username,
            password=password,
            timeout_seconds=timeout,
            use_ssl=ssl_enabled,
            ca_certs=ca_certs,
            keyfile=keyfile,
            certfile=certfile,
            validate=validate,
            transport_mode=query_server.get("transport_mode", "socket"),
            http_url=query_server.get("http_url", ""),
        )
Exemplo n.º 8
0
 def __init__(self, host, thrift_port):
     self.client = thrift_util.get_client(
         Jobtracker.Client, host, thrift_port, service_name="Hadoop MR JobTracker", timeout_seconds=JT_THRIFT_TIMEOUT
     )
     self.host = host
     self.thrift_port = thrift_port
     self.request_context = RequestContext()
     self.setuser(DEFAULT_USER, DEFAULT_GROUPS)
Exemplo n.º 9
0
 def setup_class(cls):
     cls.server = SimpleThriftServer()
     cls.server.start_server_process()
     cls.client = thrift_util.get_client(TestService.Client,
                                         'localhost',
                                         cls.server.port,
                                         'Hue Unit Test Client',
                                         timeout_seconds=1)
Exemplo n.º 10
0
def db_client():
  """Get the Thrift client to talk to beeswax server"""

  class UnicodeBeeswaxClient(object):
    """Wrap the thrift client to take and return Unicode"""
    def __init__(self, client):
      self._client = client

    def __getattr__(self, attr):
      if attr in self.__dict__:
        return self.__dict__[attr]
      return getattr(self._client, attr)

    def query(self, query):
      _encode_struct_attr(query, 'query')
      return self._client.query(query)

    def explain(self, query):
      _encode_struct_attr(query, 'query')
      res = self._client.explain(query)
      return _decode_struct_attr(res, 'textual')

    def fetch(self, *args, **kwargs):
      res = self._client.fetch(*args, **kwargs)
      if res.ready:
        res.columns = [ force_unicode(col, errors='replace') for col in res.columns ]
        res.data = [ force_unicode(row, errors='replace') for row in res.data ]
      return res

    def dump_config(self):
      res = self._client.dump_config()
      return force_unicode(res, errors='replace')

    def echo(self, msg):
      return self._client.echo(smart_str(msg))

    def get_log(self, *args, **kwargs):
      res = self._client.get_log(*args, **kwargs)
      return force_unicode(res, errors='replace')

    def get_default_configuration(self, *args, **kwargs):
      config_list = self._client.get_default_configuration(*args, **kwargs)
      for config in config_list:
        _decode_struct_attr(config, 'key')
        _decode_struct_attr(config, 'value')
        _decode_struct_attr(config, 'desc')
      return config_list

    def get_results_metadata(self, *args, **kwargs):
      res = self._client.get_results_metadata(*args, **kwargs)
      return _decode_struct_attr(res, 'table_dir')

  client = thrift_util.get_client(BeeswaxService.Client,
                                conf.BEESWAX_SERVER_HOST.get(),
                                conf.BEESWAX_SERVER_PORT.get(),
                                service_name="Beeswax (Hive UI) Server",
                                timeout_seconds=conf.BEESWAX_SERVER_CONN_TIMEOUT.get())
  return UnicodeBeeswaxClient(client)
Exemplo n.º 11
0
 def connectCluster(self, name):
   target = self.getCluster(name)
   return thrift_util.get_client(get_client_type(),
                                 target['host'],
                                 target['port'],
                                 service_name="Hue HBase Thrift Client for %s" % name,
                                 kerberos_principal=None,
                                 use_sasl=False,
                                 timeout_seconds=None)
Exemplo n.º 12
0
  def __init__(self, query_server, user):
    self.query_server = query_server
    self.user = user

    self._client = thrift_util.get_client(TCLIService.Client,
                                          query_server['server_host'],
                                          query_server['server_port'],
                                          service_name='Hive Server 2',
                                          timeout_seconds=conf.BEESWAX_SERVER_CONN_TIMEOUT.get())
Exemplo n.º 13
0
 def connectCluster(self, name):
   target = self.getCluster(name)
   return thrift_util.get_client(get_client_type(),
                                 target['host'],
                                 target['port'],
                                 service_name="Hue HBase Thrift Client for %s" % name,
                                 kerberos_principal=None,
                                 use_sasl=False,
                                 timeout_seconds=None)
Exemplo n.º 14
0
    def __init__(self, query_server, user):
        self.query_server = query_server
        self.user = user

        self._client = thrift_util.get_client(
            TCLIService.Client,
            query_server['server_host'],
            query_server['server_port'],
            service_name='Hive Server 2',
            timeout_seconds=conf.BEESWAX_SERVER_CONN_TIMEOUT.get())
Exemplo n.º 15
0
def get_client():
  """
  Returns a stub to talk to the server.
  """
  global CACHED_CLIENT
  if CACHED_CLIENT is None:
    CACHED_CLIENT = thrift_util.get_client(JobSubmissionService.Client,
      conf.JOBSUBD_HOST.get(), conf.JOBSUBD_PORT.get(), service_name="JobSubmission Daemon",
      timeout_seconds=JOBSUB_THRIFT_TIMEOUT_SECS)
  return CACHED_CLIENT
Exemplo n.º 16
0
    def __init__(self, query_server, user):
        self.query_server = query_server
        self.user = user

        use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled, ldap_username, ldap_password = self.get_security(
        )
        LOG.info(
            'use_sasl=%s, mechanism=%s, kerberos_principal_short_name=%s, impersonation_enabled=%s'
            % (use_sasl, mechanism, kerberos_principal_short_name,
               impersonation_enabled))

        self.use_sasl = use_sasl
        self.kerberos_principal_short_name = kerberos_principal_short_name
        self.impersonation_enabled = impersonation_enabled

        if self.query_server['server_name'] == 'impala':
            from impala import conf as impala_conf

            ssl_enabled = impala_conf.SSL.ENABLED.get()
            ca_certs = impala_conf.SSL.CACERTS.get()
            keyfile = impala_conf.SSL.KEY.get()
            certfile = impala_conf.SSL.CERT.get()
            validate = impala_conf.SSL.VALIDATE.get()
            timeout = impala_conf.SERVER_CONN_TIMEOUT.get()
        else:
            ssl_enabled = beeswax_conf.SSL.ENABLED.get()
            ca_certs = beeswax_conf.SSL.CACERTS.get()
            keyfile = beeswax_conf.SSL.KEY.get()
            certfile = beeswax_conf.SSL.CERT.get()
            validate = beeswax_conf.SSL.VALIDATE.get()
            timeout = beeswax_conf.SERVER_CONN_TIMEOUT.get()

        if ldap_username:
            username = ldap_username
            password = ldap_password
        else:
            username = user.username
            password = None

        self._client = thrift_util.get_client(
            TCLIService.Client,
            query_server['server_host'],
            query_server['server_port'],
            service_name=query_server['server_name'],
            kerberos_principal=kerberos_principal_short_name,
            use_sasl=use_sasl,
            mechanism=mechanism,
            username=username,
            password=password,
            timeout_seconds=timeout,
            use_ssl=ssl_enabled,
            ca_certs=ca_certs,
            keyfile=keyfile,
            certfile=certfile,
            validate=validate)
Exemplo n.º 17
0
Arquivo: api.py Projeto: liujunlin/hue
 def connectCluster(self, name):
   _security = self._get_security()
   target = self.getCluster(name)
   return thrift_util.get_client(get_client_type(),
                                 target['host'],
                                 target['port'],
                                 service_name="Hue HBase Thrift Client for %s" % name,
                                 kerberos_principal=_security['kerberos_principal_short_name'],
                                 use_sasl=_security['use_sasl'],
                                 timeout_seconds=None,
                                 transport=conf.THRIFT_TRANSPORT.get())
Exemplo n.º 18
0
    def __init__(self, host, port, username):
        self.username = username

        self.client = thrift_util.get_client(  # TODO: kerberos
            SentryPolicyService.Client,
            host,
            port,
            service_name="SentryPolicyService",
            username=self.username,
            timeout_seconds=30,
            multiple=True)
Exemplo n.º 19
0
 def __init__(self, host, thrift_port):
   self.client = thrift_util.get_client(
     Jobtracker.Client, host, thrift_port,
     service_name="Hadoop MR JobTracker HUE Plugin",
     timeout_seconds=JT_THRIFT_TIMEOUT)
   self.host = host
   self.thrift_port = thrift_port
   # We allow a single LiveJobTracker to be used across multiple
   # threads by restricting the stateful components to a thread
   # thread-local.
   self.thread_local = threading.local()
   self.setuser(DEFAULT_USER, DEFAULT_GROUPS)
Exemplo n.º 20
0
  def __init__(self, host, port, username):
    self.username = username

    self.client = thrift_util.get_client( # TODO: kerberos
        SentryPolicyService.Client,
        host,
        port,
        service_name="SentryPolicyService",
        username=self.username,
        timeout_seconds=30,
        multiple=True
    )
Exemplo n.º 21
0
 def connectCluster(self, name):
     _security = self._get_security()
     target = self.getCluster(name)
     return thrift_util.get_client(
         get_client_type(),
         target['host'],
         target['port'],
         service_name="Hue HBase Thrift Client for %s" % name,
         kerberos_principal=_security['kerberos_principal_short_name'],
         use_sasl=_security['use_sasl'],
         timeout_seconds=None,
         transport=conf.THRIFT_TRANSPORT.get())
Exemplo n.º 22
0
  def __init__(self, query_server, user):
    self.query_server = query_server
    self.user = user

    use_sasl, kerberos_principal_short_name = BeeswaxClient.get_security(query_server)

    self._client = thrift_util.get_client(TCLIService.Client,
                                          query_server['server_host'],
                                          query_server['server_port'],
                                          service_name=query_server['server_name'],
                                          kerberos_principal=kerberos_principal_short_name,
                                          use_sasl=use_sasl,
                                          timeout_seconds=conf.BEESWAX_SERVER_CONN_TIMEOUT.get())
Exemplo n.º 23
0
def get_client():
    """
  Returns a stub to talk to the server.
  """
    global CACHED_CLIENT
    if CACHED_CLIENT is None:
        CACHED_CLIENT = thrift_util.get_client(
            JobSubmissionService.Client,
            conf.JOBSUBD_HOST.get(),
            conf.JOBSUBD_PORT.get(),
            service_name="JobSubmission Daemon",
            timeout_seconds=JOBSUB_THRIFT_TIMEOUT_SECS)
    return CACHED_CLIENT
Exemplo n.º 24
0
  def __init__(self, client_class, query_server, user):
    self.query_server = query_server
    self.user = user

    use_sasl, kerberos_principal_short_name = BeeswaxClient.get_security(query_server)

    self._client = thrift_util.get_client(client_class.Client,
                                          query_server['server_host'],
                                          query_server['server_port'],
                                          service_name='Impala',
                                          kerberos_principal=kerberos_principal_short_name,
                                          use_sasl=use_sasl,
                                          timeout_seconds=conf.BEESWAX_SERVER_CONN_TIMEOUT.get())
Exemplo n.º 25
0
    def __init__(self, host, port, username):
        self.username = username
        self.security = self._get_security()

        self.client = thrift_util.get_client(
            SentryPolicyService.Client,
            host,
            port,
            service_name="SentryPolicyService",
            username=self.username,
            timeout_seconds=30,
            multiple=True,
            kerberos_principal=self.security['kerberos_principal_short_name'],
            use_sasl=self.security['use_sasl'],
            mechanism=self.security['mechanism'])
Exemplo n.º 26
0
  def __init__(self, query_server, user):
    self.query_server = query_server
    self.user = user

    use_sasl, mechanism, kerberos_principal_short_name = HiveServerClient.get_security(query_server)

    self._client = thrift_util.get_client(TCLIService.Client,
                                          query_server['server_host'],
                                          query_server['server_port'],
                                          service_name=query_server['server_name'],
                                          kerberos_principal=kerberos_principal_short_name,
                                          use_sasl=use_sasl,
                                          mechanism=mechanism,
                                          username=user.username,
                                          timeout_seconds=conf.BEESWAX_SERVER_CONN_TIMEOUT.get())
Exemplo n.º 27
0
Arquivo: api.py Projeto: QLGu/hue
  def connectCluster(self, name):
    _security = self._get_security()
    target = self.getCluster(name)
    client = thrift_util.get_client(get_client_type(),
                                  target['host'],
                                  target['port'],
                                  service_name="Hue HBase Thrift Client for %s" % name,
                                  kerberos_principal=_security['kerberos_principal_short_name'],
                                  use_sasl=_security['use_sasl'],
                                  timeout_seconds=30,
                                  transport=conf.THRIFT_TRANSPORT.get(),
                                  transport_mode='http' if is_using_thrift_http() else 'socket',
                                  http_url=('https://' if is_using_thrift_ssl() else 'http://') + target['host'] + ':' + str(target['port'])
    )

    return client
Exemplo n.º 28
0
 def __init__(self, host, port, username):
   self.username = username
   self.security = self._get_security()
   
   self.client = thrift_util.get_client(
       SentryPolicyService.Client,
       host,
       port,
       service_name="SentryPolicyService",
       username=self.username,
       timeout_seconds=30,
       multiple=True,
       kerberos_principal=self.security['kerberos_principal_short_name'],
       use_sasl=self.security['use_sasl'],
       mechanism=self.security['mechanism']
   )
Exemplo n.º 29
0
 def __init__(self, host, thrift_port,
              security_enabled=False,
              kerberos_principal="mapred"):
   self.client = thrift_util.get_client(
     Jobtracker.Client, host, thrift_port,
     service_name="Hadoop MR JobTracker HUE Plugin",
     use_sasl=security_enabled,
     kerberos_principal=kerberos_principal,
     timeout_seconds=JT_THRIFT_TIMEOUT)
   self.host = host
   self.thrift_port = thrift_port
   self.security_enabled = security_enabled
   # We allow a single LiveJobTracker to be used across multiple
   # threads by restricting the stateful components to a thread
   # thread-local.
   self.thread_local = threading.local()
   self.setuser(DEFAULT_USER)
Exemplo n.º 30
0
Arquivo: api.py Projeto: dulems/hue-1
    def connectCluster(self, name):
        _security = self._get_security()
        target = self.getCluster(name)
        client = thrift_util.get_client(
            get_client_type(),
            target['host'],
            target['port'],
            service_name="Hue HBase Thrift Client for %s" % name,
            kerberos_principal=_security['kerberos_principal_short_name'],
            use_sasl=_security['use_sasl'],
            timeout_seconds=30,
            transport=conf.THRIFT_TRANSPORT.get(),
            transport_mode='http' if is_using_thrift_http() else 'socket',
            http_url=('https://' if is_using_thrift_ssl() else 'http://') +
            target['host'] + ':' + str(target['port']))

        return client
Exemplo n.º 31
0
    def __init__(self,
                 host,
                 thrift_port,
                 hdfs_port=8020,
                 nn_kerberos_principal="hdfs",
                 dn_kerberos_principal="hdfs",
                 security_enabled=False,
                 hadoop_bin_path="hadoop",
                 temp_dir='/tmp'):
        """
    @param host hostname or IP of the namenode
    @param thrift_port port on which the Thrift plugin is listening
    @param hdfs_port port on which NameNode IPC is listening
    @param hadoop_bin_path path to find the hadoop wrapper script on the
                           installed system - default is fine if it is in
                           the user's PATH env
    @param temp_dir Temporary directory, for mktemp()
    """
        self.host = host
        self.thrift_port = thrift_port
        self.hdfs_port = hdfs_port
        self.security_enabled = security_enabled
        self.nn_kerberos_principal = nn_kerberos_principal
        self.dn_kerberos_principal = dn_kerberos_principal
        self.hadoop_bin_path = hadoop_bin_path
        self._resolve_hadoop_path()
        self.security_enabled = security_enabled
        self._temp_dir = temp_dir

        self.nn_client = thrift_util.get_client(
            Namenode.Client,
            host,
            thrift_port,
            service_name="HDFS Namenode HUE Plugin",
            use_sasl=security_enabled,
            kerberos_principal=nn_kerberos_principal,
            timeout_seconds=NN_THRIFT_TIMEOUT)

        # The file systems are cached globally.  We store
        # user information in a thread-local variable so that
        # safety can be preserved there.
        self.thread_local = threading.local()
        self.setuser(DEFAULT_USER)
        LOG.debug("Initialized HadoopFS: %s:%d (%s)", host, thrift_port,
                  hadoop_bin_path)
Exemplo n.º 32
0
    def __init__(self, query_server, user):
        self.query_server = query_server
        self.user = user

        use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled, ldap_username, ldap_password = self.get_security(
        )
        transport_mode = query_server.get('transport_mode', 'binary')
        http_url = query_server.get('http_url', '')
        LOG.info(
            'use_sasl=%s, mechanism=%s, kerberos_principal_short_name=%s, impersonation_enabled=%s, transport_mode=%s, http_url=%s'
            % (use_sasl, mechanism, kerberos_principal_short_name,
               impersonation_enabled, transport_mode, http_url))

        self.use_sasl = use_sasl
        self.kerberos_principal_short_name = kerberos_principal_short_name
        self.impersonation_enabled = impersonation_enabled

        ssl_enabled = beeswax_conf.SSL.ENABLED.get()
        timeout = beeswax_conf.SERVER_CONN_TIMEOUT.get()

        if ldap_username:
            username = ldap_username
            password = ldap_password
        else:
            username = user.username
            password = None

        self._client = thrift_util.get_client(
            TCLIService.Client,
            query_server['server_host'],
            query_server['server_port'],
            service_name=query_server['server_name'],
            kerberos_principal=kerberos_principal_short_name,
            use_sasl=use_sasl,
            mechanism=mechanism,
            username=username,
            password=password,
            timeout_seconds=timeout,
            use_ssl=ssl_enabled,
            ca_certs=beeswax_conf.SSL.CACERTS.get(),
            keyfile=beeswax_conf.SSL.KEY.get(),
            certfile=beeswax_conf.SSL.CERT.get(),
            validate=beeswax_conf.SSL.VALIDATE.get(),
            transport_mode=transport_mode,
            http_url=http_url)
Exemplo n.º 33
0
  def __init__(self, query_server, user):
    self.query_server = query_server
    self.user = user

    use_sasl, mechanism, kerberos_principal_short_name, hiveserver2_impersonation_enabled = HiveServerClient.get_security(query_server)
    LOG.info('use_sasl=%s, mechanism=%s, kerberos_principal_short_name=%s, hiveserver2_impersonation_enabled=%s' % (
             use_sasl, mechanism, kerberos_principal_short_name, hiveserver2_impersonation_enabled))

    self.hiveserver2_impersonation_enabled = hiveserver2_impersonation_enabled
    self._client = thrift_util.get_client(TCLIService.Client,
                                          query_server['server_host'],
                                          query_server['server_port'],
                                          service_name=query_server['server_name'],
                                          kerberos_principal=kerberos_principal_short_name,
                                          use_sasl=use_sasl,
                                          mechanism=mechanism,
                                          username=user.username,
                                          timeout_seconds=conf.BEESWAX_SERVER_CONN_TIMEOUT.get())
Exemplo n.º 34
0
  def __init__(self, query_server, user):
    self.query_server = query_server
    self.user = user

    use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled = HiveServerClient.get_security(query_server)
    LOG.info('use_sasl=%s, mechanism=%s, kerberos_principal_short_name=%s, impersonation_enabled=%s' % (
             use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled))

    self.impersonation_enabled = impersonation_enabled
    self._client = thrift_util.get_client(TCLIService.Client,
                                          query_server['server_host'],
                                          query_server['server_port'],
                                          service_name=query_server['server_name'],
                                          kerberos_principal=kerberos_principal_short_name,
                                          use_sasl=use_sasl,
                                          mechanism=mechanism,
                                          username=user.username,
                                          timeout_seconds=conf.SERVER_CONN_TIMEOUT.get())
Exemplo n.º 35
0
  def __init__(self, query_server, user):
    self.query_server = query_server
    self.user = user

    use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled, ldap_username, ldap_password = self.get_security()
    transport_mode = query_server.get('transport_mode', 'binary')
    http_url = query_server.get('http_url', '')
    LOG.info('use_sasl=%s, mechanism=%s, kerberos_principal_short_name=%s, impersonation_enabled=%s, transport_mode=%s, http_url=%s' % (
             use_sasl, mechanism, kerberos_principal_short_name, impersonation_enabled, transport_mode, http_url))

    self.use_sasl = use_sasl
    self.kerberos_principal_short_name = kerberos_principal_short_name
    self.impersonation_enabled = impersonation_enabled


    ssl_enabled = beeswax_conf.SSL.ENABLED.get()
    timeout = beeswax_conf.SERVER_CONN_TIMEOUT.get()

    if ldap_username:
      username = ldap_username
      password = ldap_password
    else:
      username = user.username
      password = None

    self._client = thrift_util.get_client(TCLIService.Client,
                                          query_server['server_host'],
                                          query_server['server_port'],
                                          service_name=query_server['server_name'],
                                          kerberos_principal=kerberos_principal_short_name,
                                          use_sasl=use_sasl,
                                          mechanism=mechanism,
                                          username=username,
                                          password=password,
                                          timeout_seconds=timeout,
                                          use_ssl=ssl_enabled,
                                          ca_certs=beeswax_conf.SSL.CACERTS.get(),
                                          keyfile=beeswax_conf.SSL.KEY.get(),
                                          certfile=beeswax_conf.SSL.CERT.get(),
                                          validate=beeswax_conf.SSL.VALIDATE.get(),
                                          transport_mode=transport_mode,
                                          http_url=http_url)
Exemplo n.º 36
0
  def __init__(self, host, thrift_port, hdfs_port=8020, hadoop_bin_path="hadoop"):
    """
    @param host hostname or IP of the namenode
    @param thrift_port port on which the Thrift plugin is listening
    @param hdfs_port port on which NameNode IPC is listening
    @param hadoop_bin_path path to find the hadoop wrapper script on the
                           installed system - default is fine if it is in
                           the user's PATH env
    """
    self.host = host
    self.thrift_port = thrift_port
    self.hdfs_port = hdfs_port
    self.hadoop_bin_path = hadoop_bin_path
    self._resolve_hadoop_path()

    self.nn_client = thrift_util.get_client(Namenode.Client, host, thrift_port, service_name="HDFS Namenode",
                                            timeout_seconds=NN_THRIFT_TIMEOUT)

    self.request_context = RequestContext()
    self.setuser(DEFAULT_USER, DEFAULT_GROUPS)
    LOG.debug("Initialized HadoopFS: %s:%d (%s)", host, thrift_port, hadoop_bin_path)
Exemplo n.º 37
0
 def __init__(self,
              host,
              thrift_port,
              security_enabled=False,
              kerberos_principal="mapred"):
     self.client = thrift_util.get_client(
         Jobtracker.Client,
         host,
         thrift_port,
         service_name="Hadoop MR JobTracker HUE Plugin",
         use_sasl=security_enabled,
         kerberos_principal=kerberos_principal,
         timeout_seconds=JT_THRIFT_TIMEOUT)
     self.host = host
     self.thrift_port = thrift_port
     self.security_enabled = security_enabled
     # We allow a single LiveJobTracker to be used across multiple
     # threads by restricting the stateful components to a thread
     # thread-local.
     self.thread_local = threading.local()
     self.setuser(DEFAULT_USER)
Exemplo n.º 38
0
  def __init__(self, host, thrift_port, hdfs_port=8020,
               nn_kerberos_principal="hdfs",
               dn_kerberos_principal="hdfs",
               security_enabled=False,
               hadoop_bin_path="hadoop",
               temp_dir='/tmp'):
    """
    @param host hostname or IP of the namenode
    @param thrift_port port on which the Thrift plugin is listening
    @param hdfs_port port on which NameNode IPC is listening
    @param hadoop_bin_path path to find the hadoop wrapper script on the
                           installed system - default is fine if it is in
                           the user's PATH env
    @param temp_dir Temporary directory, for mktemp()
    """
    self.host = host
    self.thrift_port = thrift_port
    self.hdfs_port = hdfs_port
    self.security_enabled = security_enabled
    self.nn_kerberos_principal = nn_kerberos_principal
    self.dn_kerberos_principal = dn_kerberos_principal
    self.hadoop_bin_path = hadoop_bin_path
    self._resolve_hadoop_path()
    self.security_enabled = security_enabled
    self._temp_dir = temp_dir

    self.nn_client = thrift_util.get_client(
      Namenode.Client, host, thrift_port,
      service_name="HDFS Namenode HUE Plugin",
      use_sasl=security_enabled,
      kerberos_principal=nn_kerberos_principal,
      timeout_seconds=NN_THRIFT_TIMEOUT)

    # The file systems are cached globally.  We store
    # user information in a thread-local variable so that
    # safety can be preserved there.
    self.thread_local = threading.local()
    self.setuser(DEFAULT_USER)
    LOG.debug("Initialized HadoopFS: %s:%d (%s)", host, thrift_port, hadoop_bin_path)
Exemplo n.º 39
0
def db_client(query_server):
  """Get the Thrift client to talk to beeswax server"""

  class UnicodeBeeswaxClient(object):
    """Wrap the thrift client to take and return Unicode"""
    def __init__(self, client):
      self._client = client

    def __getattr__(self, attr):
      if attr in self.__dict__:
        return self.__dict__[attr]
      return getattr(self._client, attr)

    def query(self, query):
      _encode_struct_attr(query, 'query')
      return self._client.query(query)

    def explain(self, query):
      _encode_struct_attr(query, 'query')
      res = self._client.explain(query)
      return _decode_struct_attr(res, 'textual')

    def fetch(self, *args, **kwargs):
      no_start_over_support = [ config_variable for config_variable in self.get_default_configuration(False)
                                if config_variable.key == 'support_start_over' and config_variable.value == 'false' ]
      if no_start_over_support:
        copy = list(args)
        copy[1] = False
        args = copy

      res = self._client.fetch(*args, **kwargs)
      if res.ready:
        res.columns = [ force_unicode(col, errors='replace') for col in res.columns ]
        res.data = [ force_unicode(row, errors='replace') for row in res.data ]
      return res

    def dump_config(self):
      res = self._client.dump_config()
      return force_unicode(res, errors='replace')

    def echo(self, msg):
      return self._client.echo(smart_str(msg))

    def get_log(self, *args, **kwargs):
      res = self._client.get_log(*args, **kwargs)
      return force_unicode(res, errors='replace')

    def get_default_configuration(self, *args, **kwargs):
      config_list = self._client.get_default_configuration(*args, **kwargs)
      for config in config_list:
        _decode_struct_attr(config, 'key')
        _decode_struct_attr(config, 'value')
        _decode_struct_attr(config, 'desc')
      return config_list

    def get_results_metadata(self, *args, **kwargs):
      res = self._client.get_results_metadata(*args, **kwargs)
      return _decode_struct_attr(res, 'table_dir')

  cluster_conf = hadoop.cluster.get_cluster_conf_for_job_submission()
  use_sasl = cluster_conf is not None and cluster_conf.SECURITY_ENABLED.get()

  client = thrift_util.get_client(BeeswaxService.Client,
                                  query_server['server_host'],
                                  query_server['server_port'],
                                  service_name=query_server['server_name'],
                                  kerberos_principal="hue",
                                  use_sasl=use_sasl,
                                  timeout_seconds=conf.BEESWAX_SERVER_CONN_TIMEOUT.get())
  return UnicodeBeeswaxClient(client)
Exemplo n.º 40
0
def meta_client():
  """Get the Thrift client to talk to the metastore"""

  class UnicodeMetastoreClient(object):
    """Wrap the thrift client to take and return Unicode."""
    def __init__(self, client):
      self._client = client

    def __getattr__(self, attr):
      if attr in self.__dict__:
        return self.__dict__[attr]
      return getattr(self._client, attr)

    def _encode_storage_descriptor(self, sd):
      _encode_struct_attr(sd, 'location')
      for col in sd.cols:
        _encode_struct_attr(col, 'comment')
      self._encode_map(sd.parameters)

    def _decode_storage_descriptor(self, sd):
      _decode_struct_attr(sd, 'location')
      for col in sd.cols:
        _decode_struct_attr(col, 'comment')
      self._decode_map(sd.parameters)

    def _encode_map(self, mapp):
      for key, value in mapp.iteritems():
        mapp[key] = smart_str(value, strings_only=True)

    def _decode_map(self, mapp):
      for key, value in mapp.iteritems():
        mapp[key] = force_unicode(value, strings_only=True, errors='replace')

    def create_database(self, name, description):
      description = smart_str(description)
      return self._client.create_database(name, description)

    def get_database(self, *args, **kwargs):
      db = self._client.get_database(*args, **kwargs)
      return _decode_struct_attr(db, 'description')

    def get_fields(self, *args, **kwargs):
      res = self._client.get_fields(*args, **kwargs)
      for fschema in res:
        _decode_struct_attr(fschema, 'comment')
      return res

    def get_table(self, *args, **kwargs):
      res = self._client.get_table(*args, **kwargs)
      self._decode_storage_descriptor(res.sd)
      self._decode_map(res.parameters)
      return res

    def alter_table(self, dbname, tbl_name, new_tbl):
      self._encode_storage_descriptor(new_tbl.sd)
      self._encode_map(new_tbl.parameters)
      return self._client.alter_table(dbname, tbl_name, new_tbl)

    def _encode_partition(self, part):
      self._encode_storage_descriptor(part.sd)
      self._encode_map(part.parameters)
      return part

    def _decode_partition(self, part):
      self._decode_storage_descriptor(part.sd)
      self._decode_map(part.parameters)
      return part

    def add_partition(self, new_part):
      self._encode_partition(new_part)
      part = self._client.add_partition(new_part)
      return self._decode_partition(part)

    def get_partition(self, *args, **kwargs):
      part = self._client.get_partition(*args, **kwargs)
      return self._decode_partition(part)

    def get_partitions(self, *args, **kwargs):
      part_list = self._client.get_partitions(*args, **kwargs)
      for part in part_list:
        self._decode_partition(part)
      return part_list

    def alter_partition(self, db_name, tbl_name, new_part):
      self._encode_partition(new_part)
      return self._client.alter_partition(db_name, tbl_name, new_part)

  _, host, port = hive_site.get_metastore()
  client = thrift_util.get_client(ThriftHiveMetastore.Client,
                                  host,
                                  port,
                                  service_name="Hive Metastore Server",
                                  timeout_seconds=conf.METASTORE_CONN_TIMEOUT.get())
  return UnicodeMetastoreClient(client)
Exemplo n.º 41
0
def db_client():
  return thrift_util.get_client(BeeswaxService.Client,
                                conf.BEESWAX_SERVER_HOST.get(),
                                conf.BEESWAX_SERVER_PORT.get(),
                                service_name="Beeswax (Hive UI) Server",
                                timeout_seconds=BEESWAX_SERVER_THRIFT_TIMEOUT)
Exemplo n.º 42
0
def meta_client():
  return thrift_util.get_client(ThriftHiveMetastore.Client,
                                conf.BEESWAX_META_SERVER_HOST.get(),
                                conf.BEESWAX_META_SERVER_PORT.get(),
                                service_name="Hive Metadata (Hive UI) Server",
                                timeout_seconds=METASTORE_THRIFT_TIMEOUT)
Exemplo n.º 43
0
  def meta_client(self):
    """Get the Thrift client to talk to the metastore"""

    class UnicodeMetastoreClient(object):
      """Wrap the thrift client to take and return Unicode."""
      def __init__(self, client):
        self._client = client

      def __getattr__(self, attr):
        if attr in self.__dict__:
          return self.__dict__[attr]
        return getattr(self._client, attr)

      def _encode_storage_descriptor(self, sd):
        _encode_struct_attr(sd, 'location')
        for col in sd.cols:
          _encode_struct_attr(col, 'comment')
        self._encode_map(sd.parameters)

      def _decode_storage_descriptor(self, sd):
        _decode_struct_attr(sd, 'location')
        for col in sd.cols:
          _decode_struct_attr(col, 'comment')
        self._decode_map(sd.parameters)

      def _encode_map(self, mapp):
        for key, value in mapp.iteritems():
          mapp[key] = smart_str(value, strings_only=True)

      def _decode_map(self, mapp):
        for key, value in mapp.iteritems():
          mapp[key] = force_unicode(value, strings_only=True, errors='replace')

      def create_database(self, name, description):
        description = smart_str(description)
        return self._client.create_database(name, description)

      def get_database(self, *args, **kwargs):
        db = self._client.get_database(*args, **kwargs)
        return _decode_struct_attr(db, 'description')

      def get_fields(self, *args, **kwargs):
        res = self._client.get_fields(*args, **kwargs)
        for fschema in res:
          _decode_struct_attr(fschema, 'comment')
        return res

      def get_table(self, *args, **kwargs):
        res = self._client.get_table(*args, **kwargs)
        self._decode_storage_descriptor(res.sd)
        self._decode_map(res.parameters)
        return res

      def alter_table(self, dbname, tbl_name, new_tbl):
        self._encode_storage_descriptor(new_tbl.sd)
        self._encode_map(new_tbl.parameters)
        return self._client.alter_table(dbname, tbl_name, new_tbl)

      def _encode_partition(self, part):
        self._encode_storage_descriptor(part.sd)
        self._encode_map(part.parameters)
        return part

      def _decode_partition(self, part):
        self._decode_storage_descriptor(part.sd)
        self._decode_map(part.parameters)
        return part

      def add_partition(self, new_part):
        self._encode_partition(new_part)
        part = self._client.add_partition(new_part)
        return self._decode_partition(part)

      def get_partition(self, *args, **kwargs):
        part = self._client.get_partition(*args, **kwargs)
        return self._decode_partition(part)

      def get_partitions(self, *args, **kwargs):
        part_list = self._client.get_partitions(*args, **kwargs)
        for part in part_list:
          self._decode_partition(part)
        return part_list

      def alter_partition(self, db_name, tbl_name, new_part):
        self._encode_partition(new_part)
        return self._client.alter_partition(db_name, tbl_name, new_part)

    # Use service name from kerberos principal set in hive-site.xml
    _, host, port, metastore_kerberos_principal = hive_site.get_metastore()
    use_sasl, kerberos_principal_short_name = BeeswaxClient.get_security()
    kerberos_principal_short_name = metastore_kerberos_principal and metastore_kerberos_principal.split('/', 1)[0] or None
    client = thrift_util.get_client(ThriftHiveMetastore.Client,
                                    host,
                                    port,
                                    service_name="Hive Metastore Server",
                                    kerberos_principal=kerberos_principal_short_name,
                                    use_sasl=use_sasl,
                                    timeout_seconds=conf.METASTORE_CONN_TIMEOUT.get())
    return UnicodeMetastoreClient(client)
Exemplo n.º 44
0
class BeeswaxClient:
  NO_RESULT_SET_RE = re.compile('DROP|CREATE|ALTER|LOAD|USE', re.IGNORECASE)

  def __init__(self, query_server, user):
    self.user = user
    self.query_server = query_server
    self.db_client = self.db_client(query_server)
    self.meta_client = self.meta_client()

  def make_query(self, hql_query, statement=0):
    # HUE-535 without having to modify Beeswaxd, add 'use database' as first option
    if self.query_server['server_name'] == 'impala':
      configuration = [','.join(['%(key)s=%(value)s' % setting for setting in hql_query.settings])]
    else:
      configuration = ['use ' + hql_query.query.get('database', 'default')]
      configuration.extend(hql_query.get_configuration())

    query_statement = hql_query.get_query_statement(statement)
    thrift_query = BeeswaxService.Query(query=query_statement, configuration=configuration)
    thrift_query.hadoop_user = self.user.username
    return thrift_query


  def get_databases(self, *args, **kwargs):
    if self.query_server['server_name'] == 'impala':
      return ['default']
    else:
      return self.meta_client.get_all_databases()


  def get_tables(self, *args, **kwargs):
    return self.meta_client.get_tables(*args, **kwargs)


  def get_table(self, *args, **kwargs):
    table = self.meta_client.get_table(*args, **kwargs)
    return BeeswaxTable(table)


  def query(self, query, statement=0):
    thrift_query = self.make_query(query, statement)
    handle = self.db_client.query(thrift_query)
    # Fake has_result_set
    has_result_set = not BeeswaxClient.NO_RESULT_SET_RE.match(thrift_query.query) is not None
    return BeeswaxQueryHandle(secret=handle.id, has_result_set=has_result_set, log_context=handle.log_context)


  def fetch(self, handle, start_over=True, rows=-1):
    if rows is None:
      rows = -1

    rpc_handle = handle.get_rpc_handle()
    results = self.db_client.fetch(rpc_handle, start_over, rows)

    if results.ready:
      # Impala does not return the name of the columns, need to fetch separately
      if self.query_server['server_name'] == 'impala':
        results.columns = [column.name for column in self.get_results_metadata(handle).schema.fieldSchemas]
      return BeeswaxDataTable(results)


  def get_log(self, handle):
    return self.db_client.get_log(handle.log_context)


  def get_state(self, handle):
    return self.db_client.get_state(handle)


  def get_results_metadata(self, handle):
    handle = handle.get_rpc_handle()
    return self.db_client.get_results_metadata(handle)


  def close(self, handle):
    handle = handle.get_rpc_handle()

    self.db_client.close(handle)
    self.db_client.clean(handle.log_context)


  def get_partitions(self, db_name, tbl_name, max_parts):
    if max_parts is None:
      max_parts = -1
    return self.meta_client.get_partitions(db_name, tbl_name, max_parts)


  def explain(self, statement):
    thrift_query = self.make_query(statement)
    return self.db_client.explain(thrift_query)


  def echo(self, text):
    return self.db_client.echo(text)


  def getStatus(self):
    return self.meta_client.getStatus()


  def get_default_configuration(self, *args, **kwargs):
    return self.db_client.get_default_configuration(*args, **kwargs)

  @classmethod
  def get_security(cls, query_server=None):
    cluster_conf = hadoop.cluster.get_cluster_conf_for_job_submission()
    use_sasl = cluster_conf is not None and cluster_conf.SECURITY_ENABLED.get()

    if query_server is not None:
      principal = query_server['principal']
    else:
      principal = KERBEROS.HUE_PRINCIPAL.get()

    # We should integrate hive_site.get_metastore() here in the future
    kerberos_principal_short_name = principal.split('/', 1)[0]

    return use_sasl, kerberos_principal_short_name

  def db_client(self, query_server):
    """Get the Thrift client to talk to beeswax server"""

    class UnicodeBeeswaxClient(object):
      """Wrap the thrift client to take and return Unicode"""
      def __init__(self, client):
        self._client = client

      def __getattr__(self, attr):
        if attr in self.__dict__:
          return self.__dict__[attr]
        return getattr(self._client, attr)

      def query(self, query):
        _encode_struct_attr(query, 'query')
        return self._client.query(query)

      def explain(self, query):
        _encode_struct_attr(query, 'query')
        res = self._client.explain(query)
        return _decode_struct_attr(res, 'textual')

      def fetch(self, *args, **kwargs):
        res = self._client.fetch(*args, **kwargs)
        if res.ready:
          res.columns = [ force_unicode(col, errors='replace') for col in res.columns ]
          res.data = [ force_unicode(row, errors='replace') for row in res.data ]
        return res

      def get_state(self, handle):
        """
        get_query_state(query_history) --> state enum

        Find out the *server* state of this query, and translate it to the *client* state.
        Expects to find the server_id from the ``query_history``.
        Return None on error. (It catches all anticipated exceptions.)
        """
        rpc_handle = handle.get_rpc_handle()

        try:
          rpc_state = self._client.get_state(rpc_handle)
          return models.BeeswaxQueryHistory.STATE_MAP[rpc_state]
        except QueryNotFoundException:
          LOG.debug("Query id %s has expired" % (handle.secret,))
          return models.QueryHistory.STATE.expired
        except thrift.transport.TTransport.TTransportException, ex:
          LOG.error("Failed to retrieve server state of submitted query id %s: %s" % (handle.secret, ex)) # queryhistory.id
          return None


      def dump_config(self):
        res = self._client.dump_config()
        return force_unicode(res, errors='replace')

      def echo(self, msg):
        return self._client.echo(smart_str(msg))

      def get_log(self, *args, **kwargs):
        res = self._client.get_log(*args, **kwargs)
        return force_unicode(res, errors='replace')

      def get_default_configuration(self, *args, **kwargs):
        config_list = self._client.get_default_configuration(*args, **kwargs)
        for config in config_list:
          _decode_struct_attr(config, 'key')
          _decode_struct_attr(config, 'value')
          _decode_struct_attr(config, 'desc')
        return config_list

      def get_results_metadata(self, *args, **kwargs):
        res = self._client.get_results_metadata(*args, **kwargs)
        return _decode_struct_attr(res, 'table_dir')

    use_sasl, kerberos_principal_short_name = BeeswaxClient.get_security(query_server)

    client = thrift_util.get_client(BeeswaxService.Client,
                                    query_server['server_host'],
                                    query_server['server_port'],
                                    service_name=query_server['server_name'],
                                    kerberos_principal=kerberos_principal_short_name,
                                    use_sasl=use_sasl,
                                    timeout_seconds=conf.BEESWAX_SERVER_CONN_TIMEOUT.get())
    return UnicodeBeeswaxClient(client)
Exemplo n.º 45
0
class BeeswaxClient:
    def __init__(self, query_server, user):
        self.user = user
        self.query_server = query_server
        self.db_client = self.db_client(query_server)
        self.meta_client = self.meta_client()

    def make_query(self, hql_query):
        thrift_query = BeeswaxService.Query(
            query=hql_query.query['query'],
            configuration=hql_query.get_configuration())
        thrift_query.hadoop_user = self.user.username
        return thrift_query

    def get_tables(self, *args, **kwargs):
        return self.meta_client.get_tables(*args, **kwargs)

    def get_table(self, *args, **kwargs):
        table = self.meta_client.get_table(*args, **kwargs)
        return BeeswaxTable(table)

    def query(self, query):
        thrift_query = self.make_query(query)
        handle = self.db_client.query(thrift_query)
        return BeeswaxQueryHandle(secret=handle.id,
                                  has_result_set=True,
                                  log_context=handle.log_context)

    def fetch(self, handle, start_over=True, rows=-1):
        if rows is None:
            rows = -1

        handle = handle.get_rpc_handle()
        results = self.db_client.fetch(handle, start_over, rows)

        if results.ready:
            return BeeswaxDataTable(results)

    def get_log(self, handle):
        return self.db_client.get_log(handle.log_context)

    def get_state(self, handle):
        return self.db_client.get_state(handle)

    def get_results_metadata(self, handle):
        handle = handle.get_rpc_handle()
        return self.db_client.get_results_metadata(handle)

    def close(self, handle):
        handle = handle.get_rpc_handle()

        self.db_client.close(handle)
        self.db_client.clean(handle.log_context)

    def get_partitions(self, db_name, tbl_name, max_parts):
        if max_parts is None:
            max_parts = -1
        return self.meta_client.get_partitions(db_name, tbl_name, max_parts)

    def explain(self, statement):
        thrift_query = self.make_query(statement)
        return self.db_client.explain(thrift_query)

    def echo(self, text):
        return self.db_client.echo(text)

    def getStatus(self):
        return self.meta_client.getStatus()

    def get_default_configuration(self, *args, **kwargs):
        return self.db_client.get_default_configuration(*args, **kwargs)

    def db_client(self, query_server):
        """Get the Thrift client to talk to beeswax server"""
        class UnicodeBeeswaxClient(object):
            """Wrap the thrift client to take and return Unicode"""
            def __init__(self, client):
                self._client = client

            def __getattr__(self, attr):
                if attr in self.__dict__:
                    return self.__dict__[attr]
                return getattr(self._client, attr)

            def query(self, query):
                _encode_struct_attr(query, 'query')
                return self._client.query(query)

            def explain(self, query):
                _encode_struct_attr(query, 'query')
                res = self._client.explain(query)
                return _decode_struct_attr(res, 'textual')

            def fetch(self, *args, **kwargs):
                res = self._client.fetch(*args, **kwargs)
                if res.ready:
                    res.columns = [
                        force_unicode(col, errors='replace')
                        for col in res.columns
                    ]
                    res.data = [
                        force_unicode(row, errors='replace')
                        for row in res.data
                    ]
                return res

            def get_state(self, handle):
                """
        get_query_state(query_history) --> state enum

        Find out the *server* state of this query, and translate it to the *client* state.
        Expects to find the server_id from the ``query_history``.
        Return None on error. (It catches all anticipated exceptions.)
        """
                rpc_handle = handle.get_rpc_handle()

                try:
                    rpc_state = self._client.get_state(rpc_handle)
                    return models.BeeswaxQueryHistory.STATE_MAP[rpc_state]
                except QueryNotFoundException:
                    LOG.debug("Query id %s has expired" % (handle.secret, ))
                    return models.QueryHistory.STATE.expired
                except thrift.transport.TTransport.TTransportException, ex:
                    LOG.error(
                        "Failed to retrieve server state of submitted query id %s: %s"
                        % (handle.secret, ex))  # queryhistory.id
                    return None

            def dump_config(self):
                res = self._client.dump_config()
                return force_unicode(res, errors='replace')

            def echo(self, msg):
                return self._client.echo(smart_str(msg))

            def get_log(self, *args, **kwargs):
                res = self._client.get_log(*args, **kwargs)
                return force_unicode(res, errors='replace')

            def get_default_configuration(self, *args, **kwargs):
                config_list = self._client.get_default_configuration(
                    *args, **kwargs)
                for config in config_list:
                    _decode_struct_attr(config, 'key')
                    _decode_struct_attr(config, 'value')
                    _decode_struct_attr(config, 'desc')
                return config_list

            def get_results_metadata(self, *args, **kwargs):
                res = self._client.get_results_metadata(*args, **kwargs)
                return _decode_struct_attr(res, 'table_dir')

        cluster_conf = hadoop.cluster.get_cluster_conf_for_job_submission()
        use_sasl = cluster_conf is not None and cluster_conf.SECURITY_ENABLED.get(
        )

        client = thrift_util.get_client(
            BeeswaxService.Client,
            query_server['server_host'],
            query_server['server_port'],
            service_name=query_server['server_name'],
            kerberos_principal="hue",
            use_sasl=use_sasl,
            timeout_seconds=conf.BEESWAX_SERVER_CONN_TIMEOUT.get())
        return UnicodeBeeswaxClient(client)