class ImpaladProcess(BaseImpalaProcess): def __init__(self, cmd): super(ImpaladProcess, self).__init__(cmd, socket.gethostname()) self.service = ImpaladService(self.hostname, self._get_webserver_port(default=25000), self.__get_beeswax_port(default=21000), self.__get_be_port(default=22000), self.__get_hs2_port(default=21050)) def __get_beeswax_port(self, default=None): return int(self._get_arg_value('beeswax_port', default)) def __get_be_port(self, default=None): return int(self._get_arg_value('be_port', default)) def __get_hs2_port(self, default=None): return int(self._get_arg_value('hs2_port', default)) def start(self, wait_until_ready=True): """Starts the impalad and waits until the service is ready to accept connections.""" super(ImpaladProcess, self).start() if wait_until_ready: self.service.wait_for_metric_value('impala-server.ready', expected_value=1, timeout=30)
class ImpaladProcess(BaseImpalaProcess): def __init__(self, cmd): super(ImpaladProcess, self).__init__(cmd, socket.gethostname()) self.service = ImpaladService(self.hostname, self._get_webserver_port(default=25000), self.__get_beeswax_port(default=21000), self.__get_be_port(default=22000), self.__get_hs2_port(default=21050)) def __get_beeswax_port(self, default=None): return int(self._get_arg_value('beeswax_port', default)) def __get_be_port(self, default=None): return int(self._get_arg_value('be_port', default)) def __get_hs2_port(self, default=None): return int(self._get_arg_value('hs2_port', default)) def start(self, wait_until_ready=True): """Starts the impalad and waits until the service is ready to accept connections.""" restart_cmd = [IMPALAD_PATH] + self.cmd[1:] + ['&'] LOG.info("Starting Impalad process: %s" % ' '.join(restart_cmd)) os.system(' '.join(restart_cmd)) if wait_until_ready: self.service.wait_for_metric_value('impala-server.ready', expected_value=1, timeout=30)
def test_auto_reconnect_after_impalad_died(self): """Test reconnect after restarting the remote impalad without using connect;""" # Use pexpect instead of ImpalaShell() since after using get_result() in ImpalaShell() # to check Disconnect, send_cmd() will no longer have any effect so we can not check # reconnect. impalad = ImpaladService(socket.getfqdn()) start_num_queries = impalad.get_metric_value(NUM_QUERIES) proc = pexpect.spawn(' '.join([SHELL_CMD, "-i localhost:21000"])) proc.expect("21000] default>") proc.sendline("use tpch;") # wait for the USE command to finish impalad.wait_for_metric_value(NUM_QUERIES, start_num_queries + 1) impalad.wait_for_num_in_flight_queries(0) # Disconnect self.cluster.impalads[0].kill() proc.sendline("show tables;") # Search from [1:] since the square brackets "[]" are special characters in regex proc.expect(ImpalaShellClass.DISCONNECTED_PROMPT[1:]) # Restarting Impalad self.cluster.impalads[0].start() # Check reconnect proc.sendline("show tables;") proc.expect("nation") proc.expect("21000] tpch>")
def test_auto_reconnect_after_impalad_died(self): """Test reconnect after restarting the remote impalad without using connect;""" # Use pexpect instead of ImpalaShell() since after using get_result() in ImpalaShell() # to check Disconnect, send_cmd() will no longer have any effect so we can not check # reconnect. impalad = ImpaladService(socket.getfqdn()) # Iterate over test vector within test function to avoid restarting cluster. for vector in [ ImpalaTestVector([value]) for value in create_beeswax_dimension() ]: cmd = get_shell_cmd(vector) proc = pexpect.spawn(cmd[0], cmd[1:]) proc.expect("{0}] default>".format(get_impalad_port(vector))) # ImpalaShell startup may issue query to get server info - get num queries after # starting shell. start_num_queries = impalad.get_metric_value(NUM_QUERIES) proc.sendline("use tpch;") # wait for the USE command to finish impalad.wait_for_metric_value(NUM_QUERIES, start_num_queries + 1) assert impalad.wait_for_num_in_flight_queries(0) # Disconnect self.cluster.impalads[0].kill() proc.sendline("show tables;") # Search from [1:] since the square brackets "[]" are special characters in regex proc.expect(ImpalaShellClass.DISCONNECTED_PROMPT[1:]) # Restarting Impalad self.cluster.impalads[0].start() # Check reconnect proc.sendline("show tables;") proc.expect("nation") proc.expect("{0}] tpch>".format(get_impalad_port(vector)))
class ImpaladProcess(BaseImpalaProcess): def __init__(self, cmd, container_id=None, port_map=None): super(ImpaladProcess, self).__init__(cmd, container_id, port_map) self.service = ImpaladService(self.hostname, self.get_webserver_port(), self.__get_beeswax_port(), self.__get_be_port(), self.__get_hs2_port(), self._get_webserver_certificate_file()) def _get_default_webserver_port(self): return DEFAULT_IMPALAD_WEBSERVER_PORT def __get_beeswax_port(self): return int(self._get_port('beeswax_port', DEFAULT_BEESWAX_PORT)) def __get_be_port(self): return int(self._get_port('be_port', DEFAULT_BE_PORT)) def __get_hs2_port(self): return int(self._get_port('hs2_port', DEFAULT_HS2_PORT)) def start(self, wait_until_ready=True): """Starts the impalad and waits until the service is ready to accept connections.""" restart_args = self.cmd[1:] LOG.info( "Starting Impalad process with args: {0}".format(restart_args)) run_daemon("impalad", restart_args) if wait_until_ready: self.service.wait_for_metric_value('impala-server.ready', expected_value=1, timeout=30) def wait_for_catalog(self): """Waits for a catalog copy to be received by the impalad. When its received, additionally waits for client ports to be opened.""" start_time = time.time() beeswax_port_is_open = False hs2_port_is_open = False num_dbs = 0 num_tbls = 0 while ((time.time() - start_time < CLUSTER_WAIT_TIMEOUT_IN_SECONDS) and not (beeswax_port_is_open and hs2_port_is_open)): try: num_dbs, num_tbls = self.service.get_metric_values( ["catalog.num-databases", "catalog.num-tables"]) beeswax_port_is_open = self.service.beeswax_port_is_open() hs2_port_is_open = self.service.hs2_port_is_open() except Exception: LOG.exception( ("Client services not ready. Waiting for catalog cache: " "({num_dbs} DBs / {num_tbls} tables). Trying again ..." ).format(num_dbs=num_dbs, num_tbls=num_tbls)) sleep(0.5) if not hs2_port_is_open or not beeswax_port_is_open: raise RuntimeError( "Unable to open client ports within {num_seconds} seconds.". format(num_seconds=CLUSTER_WAIT_TIMEOUT_IN_SECONDS))
class ImpaladProcess(BaseImpalaProcess): def __init__(self, cmd, container_id=None, port_map=None): super(ImpaladProcess, self).__init__(cmd, container_id, port_map) self.service = ImpaladService(self.hostname, self.get_webserver_port(), self.__get_beeswax_port(), self.__get_be_port(), self.__get_hs2_port(), self._get_webserver_certificate_file()) def _get_default_webserver_port(self): return DEFAULT_IMPALAD_WEBSERVER_PORT def __get_beeswax_port(self): return int(self._get_port('beeswax_port', DEFAULT_BEESWAX_PORT)) def __get_be_port(self): return int(self._get_port('be_port', DEFAULT_BE_PORT)) def __get_hs2_port(self): return int(self._get_port('hs2_port', DEFAULT_HS2_PORT)) def start(self, wait_until_ready=True): """Starts the impalad and waits until the service is ready to accept connections.""" restart_args = self.cmd[1:] LOG.info("Starting Impalad process with args: {0}".format(restart_args)) run_daemon("impalad", restart_args) if wait_until_ready: self.service.wait_for_metric_value('impala-server.ready', expected_value=1, timeout=30) def wait_for_catalog(self): """Waits for a catalog copy to be received by the impalad. When its received, additionally waits for client ports to be opened.""" start_time = time.time() beeswax_port_is_open = False hs2_port_is_open = False num_dbs = 0 num_tbls = 0 while ((time.time() - start_time < CLUSTER_WAIT_TIMEOUT_IN_SECONDS) and not (beeswax_port_is_open and hs2_port_is_open)): try: num_dbs, num_tbls = self.service.get_metric_values( ["catalog.num-databases", "catalog.num-tables"]) beeswax_port_is_open = self.service.beeswax_port_is_open() hs2_port_is_open = self.service.hs2_port_is_open() except Exception: LOG.exception(("Client services not ready. Waiting for catalog cache: " "({num_dbs} DBs / {num_tbls} tables). Trying again ...").format( num_dbs=num_dbs, num_tbls=num_tbls)) sleep(0.5) if not hs2_port_is_open or not beeswax_port_is_open: raise RuntimeError( "Unable to open client ports within {num_seconds} seconds.".format( num_seconds=CLUSTER_WAIT_TIMEOUT_IN_SECONDS))
def test_auto_reconnect(self): impalad = ImpaladService(socket.getfqdn()) start_num_queries = impalad.get_metric_value(NUM_QUERIES) p = ImpalaShell() p.send_cmd("USE functional") # wait for the USE command to finish impalad.wait_for_metric_value(NUM_QUERIES, start_num_queries + 1) impalad.wait_for_num_in_flight_queries(0) self._start_impala_cluster([]) p.send_cmd("SHOW TABLES") result = p.get_result() assert "alltypesaggmultifilesnopart" in result.stdout
def test_ssl(self, vector): self._verify_negative_cases(vector) # TODO: This is really two different tests, but the custom cluster takes too long to # start. Make it so that custom clusters can be specified across test suites. self._validate_positive_cases(vector, "%s/server-cert.pem" % self.CERT_DIR) # No certificate checking: will accept any cert. self._validate_positive_cases(vector, ) # Test cancelling a query impalad = ImpaladService(socket.getfqdn()) assert impalad.wait_for_num_in_flight_queries(0) impalad.wait_for_metric_value( 'impala-server.backend-num-queries-executing', 0) p = ImpalaShell(vector, args=["--ssl"]) p.send_cmd("SET DEBUG_ACTION=0:OPEN:WAIT") p.send_cmd("select count(*) from functional.alltypes") # Wait until the query has been planned and started executing, at which point it # should be cancellable. impalad.wait_for_metric_value( 'impala-server.backend-num-queries-executing', 1, timeout=60) LOG = logging.getLogger('test_client_ssl') LOG.info("Cancelling query") num_tries = 0 # In practice, sending SIGINT to the shell process doesn't always seem to get caught # (and a search shows up some bugs in Python where SIGINT might be ignored). So retry # for 30s until one signal takes. while impalad.get_num_in_flight_queries() == 1: time.sleep(1) LOG.info("Sending signal...") os.kill(p.pid(), signal.SIGINT) num_tries += 1 assert num_tries < 30, ( "SIGINT was not caught by shell within 30s. Queries: " + json.dumps(impalad.get_queries_json(), indent=2)) p.send_cmd("profile") result = p.get_result() print result.stderr assert "Query Status: Cancelled" in result.stdout assert impalad.wait_for_num_in_flight_queries(0)
def test_auto_reconnect(self): impalad = ImpaladService(socket.getfqdn()) # Iterate over test vector within test function to avoid restarting cluster. for vector in\ [ImpalaTestVector([value]) for value in create_client_protocol_dimension()]: p = ImpalaShell(vector) # ImpalaShell startup may issue query to get server info - get num queries after # starting shell. start_num_queries = impalad.get_metric_value(NUM_QUERIES) p.send_cmd("USE functional") # wait for the USE command to finish impalad.wait_for_metric_value(NUM_QUERIES, start_num_queries + 1) assert impalad.wait_for_num_in_flight_queries(0) self._start_impala_cluster([]) p.send_cmd("SHOW TABLES") result = p.get_result() assert "alltypesaggmultifilesnopart" in result.stdout, result.stdout
def test_ddl_queries_are_closed(self): """Regression test for IMPALA-1317 The shell does not call close() for alter, use and drop queries, leaving them in flight. This test issues those queries in interactive mode, and checks the debug webpage to confirm that they've been closed. TODO: Add every statement type. """ TMP_DB = 'inflight_test_db' TMP_TBL = 'tmp_tbl' MSG = '%s query should be closed' NUM_QUERIES = 'impala-server.num-queries' impalad = ImpaladService(socket.getfqdn()) p = ImpalaShell() try: start_num_queries = impalad.get_metric_value(NUM_QUERIES) p.send_cmd('create database if not exists %s' % TMP_DB) p.send_cmd('use %s' % TMP_DB) impalad.wait_for_metric_value(NUM_QUERIES, start_num_queries + 2) assert impalad.wait_for_num_in_flight_queries(0), MSG % 'use' p.send_cmd('create table %s(i int)' % TMP_TBL) p.send_cmd('alter table %s add columns (j int)' % TMP_TBL) impalad.wait_for_metric_value(NUM_QUERIES, start_num_queries + 4) assert impalad.wait_for_num_in_flight_queries(0), MSG % 'alter' p.send_cmd('drop table %s' % TMP_TBL) impalad.wait_for_metric_value(NUM_QUERIES, start_num_queries + 5) assert impalad.wait_for_num_in_flight_queries(0), MSG % 'drop' finally: run_impala_shell_interactive("drop table if exists %s.%s;" % (TMP_DB, TMP_TBL)) run_impala_shell_interactive("drop database if exists foo;")
class ImpaladProcess(BaseImpalaProcess): def __init__(self, cmd): super(ImpaladProcess, self).__init__(cmd, socket.gethostname()) self.service = ImpaladService(self.hostname, self._get_webserver_port(default=25000), self.__get_beeswax_port(default=21000), self.__get_be_port(default=22000), self.__get_hs2_port(default=21050)) def __get_beeswax_port(self, default=None): return int(self._get_arg_value('beeswax_port', default)) def __get_be_port(self, default=None): return int(self._get_arg_value('be_port', default)) def __get_hs2_port(self, default=None): return int(self._get_arg_value('hs2_port', default)) def start(self, wait_until_ready=True): """Starts the impalad and waits until the service is ready to accept connections.""" super(ImpaladProcess, self).start() self.service.wait_for_metric_value('impala-server.ready', expected_value=1, timeout=30)
def test_ddl_queries_are_closed(self, vector): """Regression test for IMPALA-1317 The shell does not call close() for alter, use and drop queries, leaving them in flight. This test issues those queries in interactive mode, and checks the debug webpage to confirm that they've been closed. TODO: Add every statement type. """ # Disconnect existing clients so there are no open sessions. self.close_impala_clients() TMP_DB = 'inflight_test_db' TMP_TBL = 'tmp_tbl' MSG = '%s query should be closed' NUM_QUERIES = 'impala-server.num-queries' impalad = ImpaladService(socket.getfqdn()) self._wait_for_num_open_sessions( vector, impalad, 0, "Open sessions found after closing all clients.") p = ImpalaShell(vector) try: start_num_queries = impalad.get_metric_value(NUM_QUERIES) p.send_cmd('create database if not exists %s' % TMP_DB) p.send_cmd('use %s' % TMP_DB) impalad.wait_for_metric_value(NUM_QUERIES, start_num_queries + 2) assert impalad.wait_for_num_in_flight_queries(0), MSG % 'use' p.send_cmd('create table %s(i int)' % TMP_TBL) p.send_cmd('alter table %s add columns (j int)' % TMP_TBL) impalad.wait_for_metric_value(NUM_QUERIES, start_num_queries + 4) assert impalad.wait_for_num_in_flight_queries(0), MSG % 'alter' p.send_cmd('drop table %s' % TMP_TBL) impalad.wait_for_metric_value(NUM_QUERIES, start_num_queries + 5) assert impalad.wait_for_num_in_flight_queries(0), MSG % 'drop' finally: # get_result() must be called to exit the shell. p.get_result() self._wait_for_num_open_sessions(vector, impalad, 0, "shell should close sessions.") run_impala_shell_interactive( vector, "drop table if exists %s.%s;" % (TMP_DB, TMP_TBL)) run_impala_shell_interactive(vector, "drop database if exists foo;") self.create_impala_clients()