Пример #1
0
    def test_spilling(self, vector):
        """Tests that query results which don't fully fit into memory are spilled to disk.
    The test runs a query asynchronously and wait for the PeakUnpinnedBytes counter in
    the PLAN_ROOT_SINK section of the runtime profile to reach a non-zero value. Then
    it fetches all the results and validates them."""
        query = "select * from functional.alltypes order by id limit 1500"
        exec_options = vector.get_value('exec_option')

        # Set lower values for spill-to-disk configs to force the above query to spill
        # spooled results.
        exec_options['min_spillable_buffer_size'] = 8 * 1024
        exec_options['default_spillable_buffer_size'] = 8 * 1024
        exec_options['max_result_spooling_mem'] = 32 * 1024
        exec_options['max_row_size'] = 16 * 1024

        # Execute the query without result spooling and save the results for later validation
        base_result = self.execute_query(query, exec_options)
        assert base_result.success, "Failed to run {0} when result spooling is disabled" \
                                    .format(query)

        exec_options['spool_query_results'] = 'true'

        # Amount of time to wait for the PeakUnpinnedBytes counter in the PLAN_ROOT_SINK
        # section of the profile to reach a non-zero value.
        timeout = 30

        # Regexes to look for in the runtime profiles.
        # PeakUnpinnedBytes can show up in exec nodes as well, so we only look for the
        # PeakUnpinnedBytes metrics in the PLAN_ROOT_SINK section of the profile.
        unpinned_bytes_regex = "PLAN_ROOT_SINK[\s\S]*?PeakUnpinnedBytes.*\([1-9][0-9]*\)"
        # The PLAN_ROOT_SINK should have 'Spilled' in the 'ExecOption' info string.
        spilled_exec_option_regex = "ExecOption:.*Spilled"
        # PLAN_ROOT_SINK's reservation limit should be set at MAX_RESULT_SPOOLING_MEM = 32 KB.
        plan_root_sink_reservation_limit = "PLAN_ROOT_SINK[\s\S]*?ReservationLimit: 32.00 KB"

        # Fetch the runtime profile every 0.5 seconds until either the timeout is hit, or
        # PeakUnpinnedBytes shows up in the profile.
        start_time = time.time()
        handle = self.execute_query_async(query, exec_options)
        try:
            while not re.search(unpinned_bytes_regex, self.client.get_runtime_profile(handle)) \
                and time.time() - start_time < timeout:
                time.sleep(0.5)
            profile = self.client.get_runtime_profile(handle)
            if not re.search(unpinned_bytes_regex, profile):
                raise Timeout(
                    "Query {0} did not spill spooled results within the timeout {1}"
                    .format(query, timeout))
            # At this point PLAN_ROOT_SINK must have spilled, so spilled_exec_option_regex
            # should be in the profile as well.
            assert re.search(spilled_exec_option_regex, profile)
            # Check that PLAN_ROOT_SINK reservation limit is set accordingly.
            assert re.search(plan_root_sink_reservation_limit, profile)
            result = self.client.fetch(query, handle)
            assert result.data == base_result.data
        finally:
            self.client.close_query(handle)
Пример #2
0
 def restart(self):
     LOG.info("Restarting Impala")
     command = self._api.restart()
     command = command.wait(timeout=(60 * 15))
     if command.active:
         raise Timeout("Timeout waiting for Impala to restart")
     if not command.success:
         raise Exception("Failed to restart Impala: %s" %
                         command.resultMessage)
Пример #3
0
 def _request_web_page(self,
                       relative_url,
                       params={},
                       timeout_secs=DEFAULT_TIMEOUT):
     url = "http://%s:%s%s" % (self.host_name, self.web_ui_port,
                               relative_url)
     try:
         resp = requests.get(url, params=params, timeout=timeout_secs)
     except requests.exceptions.Timeout as e:
         raise Timeout(underlying_exception=e)
     resp.raise_for_status()
     return resp
Пример #4
0
 def wait_for_state(self, handle, expected_state, timeout):
   """Waits for the given 'query_handle' to reach the 'expected_state'. If it does not
   reach the given state within 'timeout' seconds, the method throws an AssertionError.
   """
   start_time = time.time()
   actual_state = self.client.get_state(handle)
   while actual_state != expected_state and time.time() - start_time < timeout:
     actual_state = self.client.get_state(handle)
     time.sleep(0.5)
   if actual_state != expected_state:
     raise Timeout("query '%s' did not reach expected state '%s', last known state '%s'"
                   % (handle.get_handle().id, expected_state, actual_state))
Пример #5
0
 def wait_for_any_state(self, handle, expected_states, timeout):
   """Waits for the given 'query_handle' to reach one of 'expected_states'. If it does
   not reach one of the given states within 'timeout' seconds, the method throws an
   AssertionError. Returns the final state.
   """
   start_time = time.time()
   actual_state = self.client.get_state(handle)
   while actual_state not in expected_states and time.time() - start_time < timeout:
     actual_state = self.client.get_state(handle)
     time.sleep(0.5)
   if actual_state not in expected_states:
     raise Timeout("query {0} did not reach one of the expected states {1}, "
                   "last known state {2}".format(handle.get_handle().id, expected_states,
                   actual_state))
   return actual_state
Пример #6
0
  def __wait_until_retried(self, handle, timeout=300):
    """Wait until the given query handle has been retried. This is achieved by polling the
    runtime profile of the query and checking the 'Retry Status' field."""
    retried_state = "RETRIED"

    def __get_retry_status():
      profile = self.__get_original_query_profile(handle.get_handle().id)
      retry_status = re.search("Retry Status: (.*)", profile)
      return retry_status.group(1) if retry_status else None

    start_time = time.time()
    retry_status = __get_retry_status()
    while retry_status != retried_state and time.time() - start_time < timeout:
      retry_status = __get_retry_status()
      time.sleep(0.5)
    if retry_status != retried_state:
      raise Timeout("query {0} was not retried within timeout".format
          (handle.get_handle().id))
Пример #7
0
  def test_multiple_fetch_multiple_batches_timeout(self):
    """Test the query option FETCH_ROWS_TIMEOUT_MS by running a query with a DELAY
    DEBUG_ACTION and a low value for the fetch timeout. This test issues fetch requests
    in a loop until all results have been returned, and validates that some of the fetch
    requests timed out. It is similar to test_fetch_multiple_batches_timeout except it
    issues multiple fetch requests that are expected to timeout."""
    num_rows = 100
    statement = "select * from functional.alltypes limit {0}".format(num_rows)
    execute_statement_resp = self.execute_statement(statement,
        conf_overlay={'batch_size': '1', 'debug_action': '0:GETNEXT:DELAY',
                      'fetch_rows_timeout_ms': '1'})
    HS2TestSuite.check_response(execute_statement_resp)

    # Wait for rows to be available for fetch.
    get_operation_status_resp = self.wait_for_operation_state(
        execute_statement_resp.operationHandle,
        TCLIService.TOperationState.FINISHED_STATE, timeout=30)
    HS2TestSuite.check_response(get_operation_status_resp)

    # The timeout to wait for fetch requests to fetch all rows.
    timeout = 30

    start_time = time()
    num_fetched = 0
    num_fetch_requests = 0

    # Fetch results until either the timeout is hit or all rows have been fetched.
    while num_fetched != num_rows and time() - start_time < timeout:
      sleep(0.5)
      fetch_results_resp = self.hs2_client.FetchResults(TCLIService.TFetchResultsReq(
          operationHandle=execute_statement_resp.operationHandle, maxRows=num_rows))
      HS2TestSuite.check_response(fetch_results_resp)
      num_fetched += HS2TestSuite.get_num_rows(fetch_results_resp.results)
      num_fetch_requests += 1
    if num_fetched != num_rows:
      raise Timeout("Query {0} did not fetch all results within the timeout {1}"
                    .format(statement, timeout))
    # The query produces 100 RowBatches, each batch was delayed 100ms before it was sent
    # to the PlanRootSink. Each fetch request requested all 100 rows, but since the
    # timeout is set to such a low value, multiple fetch requests should be necessary to
    # read all rows.
    assert num_fetch_requests >= 5
Пример #8
0
 def _request_web_page(self,
                       relative_url,
                       params={},
                       timeout_secs=DEFAULT_TIMEOUT):
     if self.cluster.use_ssl:
         scheme = 'https'
     else:
         scheme = 'http'
     url = '{scheme}://{host}:{port}{url}'.format(scheme=scheme,
                                                  host=self.host_name,
                                                  port=self.web_ui_port,
                                                  url=relative_url)
     try:
         verify_ca = self.cluster.ca_cert if self.cluster.ca_cert is not None else False
         resp = requests.get(url,
                             params=params,
                             timeout=timeout_secs,
                             verify=verify_ca)
     except requests.exceptions.Timeout as e:
         raise Timeout(underlying_exception=e)
     resp.raise_for_status()
     return resp
Пример #9
0
  def fetch_num_rows(hs2_client, op_handle, num_rows, statement):
    """Fetch the specified number of rows in the given op_handle and validate that the
    number of rows returned matches the expected number of rows. If the op_handle does
    not return the expected number of rows within a timeout, an error is thrown."""
    # The timeout to wait for fetch requests to fetch all rows.
    timeout = 30

    start_time = time()
    num_fetched = 0

    # Fetch results until either the timeout is hit or all rows have been fetched.
    while num_fetched != num_rows and time() - start_time < timeout:
      sleep(0.5)
      fetch_results_resp = hs2_client.FetchResults(
          TCLIService.TFetchResultsReq(operationHandle=op_handle,
              maxRows=num_rows - num_fetched))
      HS2TestSuite.check_response(fetch_results_resp)
      num_fetched += HS2TestSuite.get_num_rows(fetch_results_resp.results)
    if num_fetched != num_rows:
      raise Timeout("Query {0} did not fetch all results within the timeout {1}"
                    .format(statement, timeout))
    assert num_fetched == num_rows
Пример #10
0
 def _request_web_page(self,
                       relative_url,
                       params={},
                       timeout_secs=DEFAULT_TIMEOUT):
     if self.cluster.use_ssl:
         scheme = 'https'
     else:
         scheme = 'http'
     url = '{scheme}://{host}:{port}{url}'.format(scheme=scheme,
                                                  host=self.host_name,
                                                  port=self.web_ui_port,
                                                  url=relative_url)
     try:
         # verify=False is needed because of self-signed certifiates
         # TODO: support a CA bundle that users could point to instead
         resp = requests.get(url,
                             params=params,
                             timeout=timeout_secs,
                             verify=False)
     except requests.exceptions.Timeout as e:
         raise Timeout(underlying_exception=e)
     resp.raise_for_status()
     return resp
Пример #11
0
    def shell(self, cmd, cmd_prepend="set -euo pipefail\n", timeout_secs=None):
        """Executes a command and returns its output. If the command's return code is
       non-zero or the command times out, an exception is raised.
    """
        cmd = textwrap.dedent(cmd.strip())
        if cmd_prepend:
            cmd = cmd_prepend + cmd
        LOG.debug("Running command via ssh on %s:\n%s" % (self.host_name, cmd))
        transport = self.get_transport()
        for is_first_attempt in (True, False):
            try:
                channel = transport.open_session()
                break
            except Exception as e:
                if is_first_attempt:
                    LOG.warn("Error opening ssh session: %s" % e)
                    self.close()
                    self.connect(self.host_name, **self.connect_kwargs)
                else:
                    raise Exception("Unable to open ssh session to %s: %s" %
                                    (self.host_name, e))
        channel.set_combine_stderr(True)
        channel.exec_command(cmd)
        process = RemoteProcess(channel)

        deadline = time.time(
        ) + timeout_secs if timeout_secs is not None else None
        while True:
            retcode = process.poll()
            if retcode is not None or (deadline and time.time() > deadline):
                break
            time.sleep(0.1)

        if retcode == 0:
            return process.stdout.read().decode("utf-8").encode(
                "ascii", errors="ignore")
        if retcode is None:
            if process.channel.recv_ready():
                output = process.channel.recv(None)
            else:
                output = ""
            if process.channel.recv_stderr_ready():
                err = process.channel.recv_stderr(None)
            else:
                err = ""
        else:
            output = process.stdout.read()
            err = process.stderr.read()
        if output:
            output = output.decode("utf-8").encode("ascii", errors="ignore")
        else:
            output = "(No stdout)"
        if err:
            err = err.decode("utf-8").encode("ascii", errors="ignore")
        else:
            err = "(No stderr)"
        if retcode is None:
            raise Timeout(
                "Command timed out after %s seconds\ncmd: %s\nstdout: %s\nstderr: %s"
                % (timeout_secs, cmd, output, err))
        raise Exception(("Command returned non-zero exit code: %s"
                         "\ncmd: %s\nstdout: %s\nstderr: %s") %
                        (retcode, cmd, output, err))
Пример #12
0
def shell(cmd,
          cmd_prepend="set -euo pipefail\n",
          stdout=PIPE,
          stderr=STDOUT,
          timeout_secs=None,
          **popen_kwargs):
    """Executes a command and returns its output. If the command's return code is non-zero
     or the command times out, an exception is raised.
  """
    cmd = dedent(cmd.strip())
    if cmd_prepend:
        cmd = cmd_prepend + cmd
    LOG.debug("Running command with %s timeout: %s" %
              ("no" if timeout_secs is None else
               ("%s second" % timeout_secs), cmd))
    process = Popen(cmd,
                    shell=True,
                    executable="/bin/bash",
                    stdout=stdout,
                    stderr=stderr,
                    **popen_kwargs)

    stdout_fileno = process.stdout and process.stdout.fileno()
    stderr_fileno = process.stderr and process.stderr.fileno()
    remaining_fds = list()
    if stdout_fileno is not None:
        remaining_fds.append(stdout_fileno)
    if stderr_fileno is not None:
        remaining_fds.append(stderr_fileno)
    stdout = list()
    stderr = list()

    def _read_available_output():
        while True:
            available_fds, _, _ = select(remaining_fds, [], [], 0)
            if not available_fds:
                return
            for fd in available_fds:
                data = os.read(fd, 4096)
                if fd == stdout_fileno:
                    if not data:
                        del remaining_fds[0]
                    else:
                        stdout.append(data)
                elif fd == stderr_fileno:
                    if not data:
                        del remaining_fds[-1]
                    else:
                        stderr.append(data)

    deadline = time() + timeout_secs if timeout_secs is not None else None
    while True:
        # The subprocess docs indicate that stdout/err need to be drained while waiting
        # if the PIPE option is used.
        _read_available_output()
        retcode = process.poll()
        if retcode is not None or (deadline and time() > deadline):
            break
        sleep(0.1)
    _read_available_output()

    output = "".join(stdout)
    if retcode == 0:
        return output

    if not output:
        output = "(No stdout)"
    err = "".join(stderr) if stderr else "(No stderr)"
    if retcode is None:
        raise Timeout(
            "Command timed out after %s seconds\ncmd: %s\nstdout: %s\nstderr: %s"
            % (timeout_secs, cmd, output, err))
    raise Exception(
        ("Command returned non-zero exit code: %s"
         "\ncmd: %s\nstdout: %s\nstderr: %s") % (retcode, cmd, output, err))