def establish_beeswax_connection(query_config): """Establish a connection to the user specified impalad. Args: query_config (QueryExecConfig) Returns: ImpalaBeeswaxClient is the connection suceeds, None otherwise. """ use_kerberos = query_config.use_kerberos user = query_config.user password = query_config.password use_ssl = query_config.use_ssl # If the impalad is for the form host, convert it to host:port that the Impala beeswax # client accepts. if len(query_config.impalad.split(":")) == 1: query_config.impalad = "{0}:{1}".format(query_config.impalad, DEFAULT_BEESWAX_PORT) client = None try: client = ImpalaBeeswaxClient(query_config.impalad, use_kerberos=use_kerberos, user=user, password=password, use_ssl=use_ssl) # Try connect client.connect() # Set the exec options. client.set_query_options(query_config.exec_options) LOG.info("Connected to %s" % query_config.impalad) except Exception, e: LOG.error("Error connecting: {0}".format(str(e)))
def __init__(self, host_port, use_kerberos=False, user=None, password=None, use_ssl=False): self.__beeswax_client = ImpalaBeeswaxClient(host_port, use_kerberos, user=user, password=password, use_ssl=use_ssl) self.__host_port = host_port self.QUERY_STATES = self.__beeswax_client.query_states
def establish_beeswax_connection(query, query_config): """Establish a connection to the user specified impalad. Args: query_config (QueryExecConfig) Returns: (boolean, ImpalaBeeswaxClient): True if successful """ # TODO: Make this generic, for hive etc. use_kerberos = query_config.use_kerberos client = ImpalaBeeswaxClient(query_config.impalad, use_kerberos=use_kerberos) # Try connect client.connect() # Set the exec options. client.set_query_options(query_config.exec_options) LOG.info("Connected to %s" % query_config.impalad) return (True, client)
def establish_beeswax_connection(query, query_config): """Establish a connection to the user specified impalad. Args: query_config (QueryExecConfig) Returns: (boolean, ImpalaBeeswaxClient): True if successful """ use_kerberos = query_config.use_kerberos # If the impalad is for the form host, convert it to host:port that the Impala beeswax # client accepts. if len(query_config.impalad.split(":")) == 1: query_config.impalad = "{0}:{1}".format(query_config.impalad, DEFAULT_BEESWAX_PORT) client = ImpalaBeeswaxClient(query_config.impalad, use_kerberos=use_kerberos) # Try connect client.connect() # Set the exec options. client.set_query_options(query_config.exec_options) LOG.info("Connected to %s" % query_config.impalad) return (True, client)
def establish_beeswax_connection(query_config): """Establish a connection to the user specified impalad. Args: query_config (QueryExecConfig) Returns: ImpalaBeeswaxClient is the connection suceeds, None otherwise. """ use_kerberos = query_config.use_kerberos # If the impalad is for the form host, convert it to host:port that the Impala beeswax # client accepts. if len(query_config.impalad.split(":")) == 1: query_config.impalad = "{0}:{1}".format(query_config.impalad, DEFAULT_BEESWAX_PORT) client = None try: client = ImpalaBeeswaxClient(query_config.impalad, use_kerberos=use_kerberos) # Try connect client.connect() # Set the exec options. client.set_query_options(query_config.exec_options) LOG.info("Connected to %s" % query_config.impalad) except Exception, e: LOG.error("Error connecting: {0}".format(str(e)))
def client_factory(): impala_client = ImpalaBeeswaxClient(options.impalad, use_kerberos=options.use_kerberos, use_ssl=options.use_ssl) impala_client.connect() yield impala_client impala_client.close_connection()
class BeeswaxConnection(ImpalaConnection): def __init__(self, host_port, use_kerberos=False, user=None, password=None, use_ssl=False): self.__beeswax_client = ImpalaBeeswaxClient(host_port, use_kerberos, user=user, password=password, use_ssl=use_ssl) self.__host_port = host_port self.QUERY_STATES = self.__beeswax_client.query_states def set_configuration_option(self, name, value): # Only set the option if it's not already set to the same value. if self.__beeswax_client.get_query_option(name) != value: LOG.info('SET %s=%s;' % (name, value)) self.__beeswax_client.set_query_option(name, value) def get_default_configuration(self): result = {} for item in self.__beeswax_client.get_default_configuration(): result[item.key] = item.value return result def clear_configuration(self): self.__beeswax_client.clear_query_options() # A hook in conftest sets tests.common.current_node. if hasattr(tests.common, "current_node"): self.set_configuration_option("client_identifier", tests.common.current_node) def connect(self): LOG.info("-- connecting to: %s" % self.__host_port) self.__beeswax_client.connect() # TODO: rename to close_connection def close(self): LOG.info("-- closing connection to: %s" % self.__host_port) self.__beeswax_client.close_connection() def close_query(self, operation_handle): LOG.info("-- closing query for operation handle: %s" % operation_handle) self.__beeswax_client.close_query(operation_handle.get_handle()) def execute(self, sql_stmt, user=None): LOG.info("-- executing against %s\n%s;\n" % (self.__host_port, sql_stmt)) return self.__beeswax_client.execute(sql_stmt, user=user) def execute_async(self, sql_stmt, user=None): LOG.info("-- executing async: %s\n%s;\n" % (self.__host_port, sql_stmt)) beeswax_handle = self.__beeswax_client.execute_query_async(sql_stmt, user=user) return OperationHandle(beeswax_handle, sql_stmt) def cancel(self, operation_handle): LOG.info("-- canceling operation: %s" % operation_handle) return self.__beeswax_client.cancel_query(operation_handle.get_handle()) def get_state(self, operation_handle): LOG.info("-- getting state for operation: %s" % operation_handle) return self.__beeswax_client.get_state(operation_handle.get_handle()) def get_exec_summary(self, operation_handle): LOG.info("-- getting exec summary operation: %s" % operation_handle) return self.__beeswax_client.get_exec_summary(operation_handle.get_handle()) def get_runtime_profile(self, operation_handle): LOG.info("-- getting runtime profile operation: %s" % operation_handle) return self.__beeswax_client.get_runtime_profile(operation_handle.get_handle()) def wait_for_finished_timeout(self, operation_handle, timeout): LOG.info("-- waiting for query to reach FINISHED state: %s" % operation_handle) return self.__beeswax_client.wait_for_finished_timeout( operation_handle.get_handle(), timeout) def wait_for_admission_control(self, operation_handle): LOG.info("-- waiting for completion of the admission control processing of the " "query: %s" % operation_handle) return self.__beeswax_client.wait_for_admission_control(operation_handle.get_handle()) def get_admission_result(self, operation_handle): LOG.info("-- getting the admission result: %s" % operation_handle) return self.__beeswax_client.get_admission_result(operation_handle.get_handle()) def get_log(self, operation_handle): LOG.info("-- getting log for operation: %s" % operation_handle) return self.__beeswax_client.get_log(operation_handle.get_handle()) def fetch(self, sql_stmt, operation_handle, max_rows = -1): LOG.info("-- fetching results from: %s" % operation_handle) return self.__beeswax_client.fetch_results( sql_stmt, operation_handle.get_handle(), max_rows)
class BeeswaxConnection(ImpalaConnection): def __init__(self, host_port, use_kerberos=False, user=None, password=None, use_ssl=False): self.__beeswax_client = ImpalaBeeswaxClient(host_port, use_kerberos, user=user, password=password, use_ssl=use_ssl) self.__host_port = host_port self.QUERY_STATES = self.__beeswax_client.query_states def set_configuration_option(self, name, value): # Only set the option if it's not already set to the same value. if self.__beeswax_client.get_query_option(name) != value: LOG.info('SET %s=%s;' % (name, value)) self.__beeswax_client.set_query_option(name, value) def get_default_configuration(self): result = {} for item in self.__beeswax_client.get_default_configuration(): result[item.key] = item.value return result def clear_configuration(self): self.__beeswax_client.clear_query_options() # A hook in conftest sets tests.common.current_node. if hasattr(tests.common, "current_node"): self.set_configuration_option("client_identifier", tests.common.current_node) def connect(self): LOG.info("-- connecting to: %s" % self.__host_port) self.__beeswax_client.connect() # TODO: rename to close_connection def close(self): LOG.info("-- closing connection to: %s" % self.__host_port) self.__beeswax_client.close_connection() def close_query(self, operation_handle): LOG.info("-- closing query for operation handle: %s" % operation_handle) self.__beeswax_client.close_query(operation_handle.get_handle()) def close_dml(self, operation_handle): LOG.info("-- closing DML query for operation handle: %s" % operation_handle) self.__beeswax_client.close_dml(operation_handle.get_handle()) def execute(self, sql_stmt, user=None): LOG.info("-- executing against %s\n" % (self.__host_port)) log_sql_stmt(sql_stmt) return self.__beeswax_client.execute(sql_stmt, user=user) def execute_async(self, sql_stmt, user=None): LOG.info("-- executing async: %s\n" % (self.__host_port)) log_sql_stmt(sql_stmt) beeswax_handle = self.__beeswax_client.execute_query_async(sql_stmt, user=user) return OperationHandle(beeswax_handle, sql_stmt) def cancel(self, operation_handle): LOG.info("-- canceling operation: %s" % operation_handle) return self.__beeswax_client.cancel_query( operation_handle.get_handle()) def get_state(self, operation_handle): LOG.info("-- getting state for operation: %s" % operation_handle) return self.__beeswax_client.get_state(operation_handle.get_handle()) def state_is_finished(self, operation_handle): LOG.info("-- checking finished state for operation: {0}".format( operation_handle)) return self.get_state( operation_handle) == self.QUERY_STATES["FINISHED"] def get_exec_summary(self, operation_handle): LOG.info("-- getting exec summary operation: %s" % operation_handle) return self.__beeswax_client.get_exec_summary( operation_handle.get_handle()) def get_runtime_profile(self, operation_handle): LOG.info("-- getting runtime profile operation: %s" % operation_handle) return self.__beeswax_client.get_runtime_profile( operation_handle.get_handle()) def wait_for_finished_timeout(self, operation_handle, timeout): LOG.info("-- waiting for query to reach FINISHED state: %s" % operation_handle) return self.__beeswax_client.wait_for_finished_timeout( operation_handle.get_handle(), timeout) def wait_for_admission_control(self, operation_handle): LOG.info( "-- waiting for completion of the admission control processing of the " "query: %s" % operation_handle) return self.__beeswax_client.wait_for_admission_control( operation_handle.get_handle()) def get_admission_result(self, operation_handle): LOG.info("-- getting the admission result: %s" % operation_handle) return self.__beeswax_client.get_admission_result( operation_handle.get_handle()) def get_log(self, operation_handle): LOG.info("-- getting log for operation: %s" % operation_handle) return self.__beeswax_client.get_log( operation_handle.get_handle().log_context) def fetch(self, sql_stmt, operation_handle, max_rows=-1): LOG.info("-- fetching results from: %s" % operation_handle) return self.__beeswax_client.fetch_results( sql_stmt, operation_handle.get_handle(), max_rows)
class BeeswaxConnection(ImpalaConnection): def __init__(self, host_port, use_kerberos=False, user=None, password=None, use_ssl=False): self.__beeswax_client = ImpalaBeeswaxClient(host_port, use_kerberos, user=user, password=password, use_ssl=use_ssl) self.__host_port = host_port self.QUERY_STATES = self.__beeswax_client.query_states def set_configuration_option(self, name, value): # Only set the option if it's not already set to the same value. if self.__beeswax_client.get_query_option(name) != value: LOG.info('SET %s=%s;' % (name, value)) self.__beeswax_client.set_query_option(name, value) def get_configuration(self): return self.__beeswax_client.get_query_options def set_configuration(self, config_option_dict): assert config_option_dict is not None, "config_option_dict cannot be None" self.clear_configuration() for name, value in config_option_dict.iteritems(): self.set_configuration_option(name, value) def clear_configuration(self): self.__beeswax_client.clear_query_options() def connect(self): LOG.info("-- connecting to: %s" % self.__host_port) self.__beeswax_client.connect() # TODO: rename to close_connection def close(self): LOG.info("-- closing connection to: %s" % self.__host_port) self.__beeswax_client.close_connection() def close_query(self, operation_handle): LOG.info("-- closing query for operation handle: %s" % operation_handle) self.__beeswax_client.close_query(operation_handle.get_handle()) def execute(self, sql_stmt): LOG.info("-- executing against %s\n%s;\n" % (self.__host_port, sql_stmt)) return self.__beeswax_client.execute(sql_stmt) def execute_async(self, sql_stmt): LOG.info("-- executing async: %s\n%s;\n" % (self.__host_port, sql_stmt)) return OperationHandle( self.__beeswax_client.execute_query_async(sql_stmt)) def cancel(self, operation_handle): LOG.info("-- canceling operation: %s" % operation_handle) return self.__beeswax_client.cancel_query( operation_handle.get_handle()) def get_state(self, operation_handle): LOG.info("-- getting state for operation: %s" % operation_handle) return self.__beeswax_client.get_state(operation_handle.get_handle()) def get_runtime_profile(self, operation_handle): LOG.info("-- getting runtime profile operation: %s" % operation_handle) return self.__beeswax_client.get_runtime_profile( operation_handle.get_handle()) def get_log(self, operation_handle): LOG.info("-- getting log for operation: %s" % operation_handle) return self.__beeswax_client.get_log(operation_handle.get_handle()) def refresh(self): """Invalidate the Impalad catalog""" return self.execute("invalidate metadata") def invalidate_table(self, table_name): """Invalidate a specific table from the catalog""" return self.execute("invalidate metadata %s" % (table_name)) def refresh_table(self, db_name, table_name): """Refresh a specific table from the catalog""" return self.execute("refresh %s.%s" % (db_name, table_name)) def fetch(self, sql_stmt, operation_handle, max_rows=-1): LOG.info("-- fetching results from: %s" % operation_handle) return self.__beeswax_client.fetch_results( sql_stmt, operation_handle.get_handle(), max_rows)
class BeeswaxConnection(ImpalaConnection): def __init__(self, host_port, use_kerberos=False, user=None, password=None, use_ssl=False): self.__beeswax_client = ImpalaBeeswaxClient(host_port, use_kerberos, user=user, password=password, use_ssl=use_ssl) self.__host_port = host_port self.QUERY_STATES = self.__beeswax_client.query_states def set_configuration_option(self, name, value): # Only set the option if it's not already set to the same value. if self.__beeswax_client.get_query_option(name) != value: LOG.info('SET %s=%s;' % (name, value)) self.__beeswax_client.set_query_option(name, value) def get_configuration(self): return self.__beeswax_client.get_query_options def set_configuration(self, config_option_dict): assert config_option_dict is not None, "config_option_dict cannot be None" self.clear_configuration() for name, value in config_option_dict.iteritems(): self.set_configuration_option(name, value) def clear_configuration(self): self.__beeswax_client.clear_query_options() def connect(self): LOG.info("-- connecting to: %s" % self.__host_port) self.__beeswax_client.connect() # TODO: rename to close_connection def close(self): LOG.info("-- closing connection to: %s" % self.__host_port) self.__beeswax_client.close_connection() def close_query(self, operation_handle): LOG.info("-- closing query for operation handle: %s" % operation_handle) self.__beeswax_client.close_query(operation_handle.get_handle()) def execute(self, sql_stmt): LOG.info("-- executing against %s\n%s;\n" % (self.__host_port, sql_stmt)) return self.__beeswax_client.execute(sql_stmt) def execute_async(self, sql_stmt): LOG.info("-- executing async: %s\n%s;\n" % (self.__host_port, sql_stmt)) return OperationHandle(self.__beeswax_client.execute_query_async(sql_stmt)) def cancel(self, operation_handle): LOG.info("-- canceling operation: %s" % operation_handle) return self.__beeswax_client.cancel_query(operation_handle.get_handle()) def get_state(self, operation_handle): LOG.info("-- getting state for operation: %s" % operation_handle) return self.__beeswax_client.get_state(operation_handle.get_handle()) def get_runtime_profile(self, operation_handle): LOG.info("-- getting runtime profile operation: %s" % operation_handle) return self.__beeswax_client.get_runtime_profile(operation_handle.get_handle()) def get_log(self, operation_handle): LOG.info("-- getting log for operation: %s" % operation_handle) return self.__beeswax_client.get_log(operation_handle.get_handle()) def refresh(self): """Invalidate the Impalad catalog""" return self.execute("invalidate metadata") def invalidate_table(self, table_name): """Invalidate a specific table from the catalog""" return self.execute("invalidate metadata %s" % (table_name)) def refresh_table(self, db_name, table_name): """Refresh a specific table from the catalog""" return self.execute("refresh %s.%s" % (db_name, table_name)) def fetch(self, sql_stmt, operation_handle, max_rows = -1): LOG.info("-- fetching results from: %s" % operation_handle) return self.__beeswax_client.fetch_results( sql_stmt, operation_handle.get_handle(), max_rows)
def __init__(self, host_port, use_kerberos=False): self.__beeswax_client = ImpalaBeeswaxClient(host_port, use_kerberos) self.__host_port = host_port self.QUERY_STATES = self.__beeswax_client.query_states
parser.add_option("--use_kerberos", action="store_true", default=False, help="Compute stats on a kerberized cluster.") parser.add_option("--use_ssl", action="store_true", default=False, help="Compute stats on a cluster with SSL enabled.") parser.add_option("--db_names", dest="db_names", default=None, help="Comma-separated list of database names for which to compute "\ "stats. Can be used in conjunction with the --table_names flag. "\ "If not specified, compute stats will run on tables from all "\ "databases.") parser.add_option("--table_names", dest="table_names", default=None, help="Comma-separated list of table names to compute stats over. A"\ " substring comparison is done. If no tables are specified stats "\ "are computed across all tables.") options, args = parser.parse_args() table_names = None if options.table_names is not None: table_names = [name.lower().strip() for name in options.table_names.split(',')] db_names = None if options.db_names is not None: db_names = [name.lower().strip() for name in options.db_names.split(',')] impala_client = ImpalaBeeswaxClient(options.impalad, use_kerberos=options.use_kerberos, use_ssl=options.use_ssl) impala_client.connect() try: compute_stats(impala_client, db_names=db_names, table_names=table_names, continue_on_error=options.continue_on_error) finally: impala_client.close_connection()