Exemplo n.º 1
0
  def _start_impala_cluster(cls, options, log_dir=os.getenv('LOG_DIR', "/tmp/"),
      cluster_size=CLUSTER_SIZE, num_coordinators=NUM_COORDINATORS,
      use_exclusive_coordinators=False, log_level=1, expected_num_executors=CLUSTER_SIZE):
    cls.impala_log_dir = log_dir
    # We ignore TEST_START_CLUSTER_ARGS here. Custom cluster tests specifically test that
    # certain custom startup arguments work and we want to keep them independent of dev
    # environments.
    cmd = [os.path.join(IMPALA_HOME, 'bin/start-impala-cluster.py'),
           '--cluster_size=%d' % cluster_size,
           '--num_coordinators=%d' % num_coordinators,
           '--log_dir=%s' % log_dir,
           '--log_level=%s' % log_level]

    if use_exclusive_coordinators:
      cmd.append("--use_exclusive_coordinators")

    if os.environ.get("ERASURE_CODING") == "true":
      cmd.append("--impalad_args=--default_query_options=allow_erasure_coded_files=true")

    try:
      check_call(cmd + options, close_fds=True)
    finally:
      # Failure tests expect cluster to be initialised even if start-impala-cluster fails.
      cls.cluster = ImpalaCluster()
    statestored = cls.cluster.statestored
    if statestored is None:
      raise Exception("statestored was not found")

    # The number of statestore subscribers is
    # cluster_size (# of impalad) + 1 (for catalogd).
    expected_subscribers = cluster_size + 1

    statestored.service.wait_for_live_subscribers(expected_subscribers, timeout=60)
    for impalad in cls.cluster.impalads:
      impalad.service.wait_for_num_known_live_backends(expected_num_executors, timeout=60)
Exemplo n.º 2
0
    def test_run_invalidate_refresh(self, vector):
        """Verifies that running concurrent invalidate table/catalog and refresh commands
    don't cause failures with other running workloads and ensures catalog versions
    are strictly increasing."""
        target_db = self.execute_scalar('select current_database()',
                                        vector=vector)
        impala_cluster = ImpalaCluster()
        impalad = impala_cluster.impalads[0].service
        catalogd = impala_cluster.catalogd.service

        for i in xrange(NUM_ITERATIONS):
            # Get the catalog versions for the table before running the workload
            before_versions = dict()
            before_versions['catalogd'] =\
                self.get_table_version(catalogd, target_db, 'lineitem')
            before_versions['impalad'] = self.get_table_version(
                impalad, target_db, 'lineitem')

            self.run_test_case('stress-with-invalidate-refresh', vector)

            # Get the catalog versions for the table after running the workload
            after_versions = dict()
            after_versions['catalogd'] = self.get_table_version(
                catalogd, target_db, 'lineitem')
            after_versions['impalad'] = self.get_table_version(
                impalad, target_db, 'lineitem')

            # Catalog versions should be strictly increasing
            assert before_versions['impalad'] < after_versions['impalad']
            assert before_versions['catalogd'] < after_versions['catalogd']
Exemplo n.º 3
0
 def test_low_mem_limit_no_fragments(self, vector):
   self.low_memory_limit_test(vector, 'tpch-q14', self.MIN_MEM_FOR_TPCH['Q14'])
   self.low_memory_limit_test(vector, 'tpch-q18', self.MIN_MEM_FOR_TPCH['Q18'])
   self.low_memory_limit_test(vector, 'tpch-q20', self.MIN_MEM_FOR_TPCH['Q20'])
   for impalad in ImpalaCluster().impalads:
     verifier = MetricVerifier(impalad.service)
     verifier.wait_for_metric("impala-server.num-fragments-in-flight", 0)
Exemplo n.º 4
0
    def test_hive_udfs_missing_jar(self, vector):
        """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present
    on HDFS"""
        # Copy hive-exec.jar to a temporary file
        jar_path = "tmp/" + get_random_id(5) + ".jar"
        self.hdfs_client.copy('test-warehouse/hive-exec.jar', jar_path)
        drop_fn_stmt = "drop function if exists default.pi_missing_jar()"
        create_fn_stmt = "create function default.pi_missing_jar() returns double \
        location '/%s' symbol='org.apache.hadoop.hive.ql.udf.UDFPI'" % jar_path

        cluster = ImpalaCluster()
        impalad = cluster.get_any_impalad()
        client = impalad.service.create_beeswax_client()
        # Create and drop functions with sync_ddl to make sure they are reflected
        # in every impalad.
        exec_option = vector.get_value('exec_option')
        exec_option['sync_ddl'] = 1

        self.execute_query_expect_success(client, drop_fn_stmt, exec_option)
        self.execute_query_expect_success(client, create_fn_stmt, exec_option)
        # Delete the udf jar
        self.hdfs_client.delete_file_dir(jar_path)

        different_impalad = cluster.get_different_impalad(impalad)
        client = different_impalad.service.create_beeswax_client()
        # Run a query using the udf from an impalad other than the one
        # we used to create the function. This is to bypass loading from
        # the cache
        try:
            self.execute_query_using_client(client,
                                            "select default.pi_missing_jar()",
                                            vector)
            assert False, "Query expected to fail"
        except ImpalaBeeswaxException, e:
            assert "Failed to get file info" in str(e)
Exemplo n.º 5
0
def run_stress_workload(queries, database, workload, start_delay,
        kill_frequency, concurrency, iterations, num_impalads):
  """Runs the given set of queries against the the given database. 'concurrency' controls
  how many concurrent streams of the queries are run, and 'iterations' controls how many
  times the workload is run. 'num_impalads' controls the number of impalads to launch.
  The 'kill_frequency' and 'start_delay' are used to configure the impalad killer thread.
  'workload' is purely used for debugging purposes."""

  # Create the global QueryRetryLatch.
  global completed_queries_latch
  completed_queries_latch = QueryRetryLatch(concurrency)

  # Start the Impala cluster and set the coordinator.
  start_impala_cluster(num_impalads)
  cluster = ImpalaCluster()
  impala_coordinator = cluster.impalads[0]

  # Start the 'random impalad killer' thread.
  start_random_impalad_killer(kill_frequency, start_delay, cluster)

  # Run the stress test 'iterations' times.
  for i in xrange(iterations):
    LOG.info("Starting iteration {0} of workload {1}".format(i, workload))
    run_concurrent_workloads(concurrency, impala_coordinator, database,
        queries)

  # Print the total number of queries retried.
  global total_queries_retried_lock
  global total_queries_retried
  total_queries_retried_lock.acquire()
  LOG.info("Total queries retried {0}".format(total_queries_retried))
  total_queries_retried_lock.release()
Exemplo n.º 6
0
 def _run_query_all_impalads(self, exec_options, query, expected):
     impala_cluster = ImpalaCluster()
     for impalad in impala_cluster.impalads:
         client = impalad.service.create_beeswax_client()
         result = self.execute_query_expect_success(client, query,
                                                    exec_options)
         assert result.data == expected
Exemplo n.º 7
0
    def __test_invalid_result_caching(self, sql_stmt):
        """ Tests that invalid requests for query-result caching fail
    using the given sql_stmt."""
        impala_cluster = ImpalaCluster()
        impalad = impala_cluster.impalads[0].service

        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = sql_stmt
        execute_statement_req.confOverlay = dict()

        # Test that a malformed result-cache size returns an error.
        execute_statement_req.confOverlay[
            self.IMPALA_RESULT_CACHING_OPT] = "bad_number"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(
            execute_statement_resp, TCLIService.TStatusCode.ERROR_STATUS,
            "Invalid value 'bad_number' for 'impala.resultset.cache.size' option"
        )
        self.__verify_num_cached_rows(0)
        assert 0 == impalad.get_num_in_flight_queries()

        # Test that a result-cache size exceeding the per-Impalad maximum returns an error.
        # The default maximum result-cache size is 100000.
        execute_statement_req.confOverlay[
            self.IMPALA_RESULT_CACHING_OPT] = "100001"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(
            execute_statement_resp, TCLIService.TStatusCode.ERROR_STATUS,
            "Requested result-cache size of 100001 exceeds Impala's maximum of 100000"
        )
        self.__verify_num_cached_rows(0)
        assert 0 == impalad.get_num_in_flight_queries()
Exemplo n.º 8
0
def wait_for_cluster(timeout_in_seconds=CLUSTER_WAIT_TIMEOUT_IN_SECONDS):
  """Checks if the cluster is "ready"

  A cluster is deemed "ready" if:
    - All backends are registered with the statestore.
    - Each impalad knows about all other impalads.
    - Each coordinator impalad's catalog cache is ready.
  This information is retrieved by querying the statestore debug webpage
  and each individual impalad's metrics webpage.
  """
  impala_cluster = ImpalaCluster()
  # impalad processes may take a while to come up.
  wait_for_impala_process_count(impala_cluster)

  # TODO: fix this for coordinator-only nodes as well.
  expected_num_backends = options.cluster_size
  if options.catalog_init_delays != "":
    for delay in options.catalog_init_delays.split(","):
      if int(delay.strip()) != 0: expected_num_backends -= 1

  for impalad in impala_cluster.impalads:
    impalad.service.wait_for_num_known_live_backends(expected_num_backends,
        timeout=CLUSTER_WAIT_TIMEOUT_IN_SECONDS, interval=2)
    if impalad._get_arg_value("is_coordinator", default="true") == "true" and \
       impalad._get_arg_value("stress_catalog_init_delay_ms", default=0) == 0:
      wait_for_catalog(impalad)
Exemplo n.º 9
0
    def test_views_describe(self, vector, unique_database):
        # IMPALA-6896: Tests that altered views can be described by all impalads.
        impala_cluster = ImpalaCluster()
        impalads = impala_cluster.impalads
        view_name = "%s.test_describe_view" % unique_database
        query_opts = vector.get_value('exec_option')
        first_client = impalads[0].service.create_beeswax_client()
        try:
            # Create a view and verify it's visible.
            self.execute_query_expect_success(
                first_client, "create view {0} as "
                "select * from functional.alltypes".format(view_name),
                query_opts)
            self._verify_describe_view(vector, view_name,
                                       "select * from functional.alltypes")

            # Alter the view and verify the alter is visible.
            self.execute_query_expect_success(
                first_client, "alter view {0} as "
                "select * from functional.alltypesagg".format(view_name),
                query_opts)
            self._verify_describe_view(vector, view_name,
                                       "select * from functional.alltypesagg")
        finally:
            first_client.close()
Exemplo n.º 10
0
    def _verify_describe_view(self, vector, view_name, expected_substr):
        """
    Verify across all impalads that the view 'view_name' has the given substring in its
    expanded SQL.

    If SYNC_DDL is enabled, the verification should complete immediately. Otherwise,
    loops waiting for the expected condition to pass.
    """
        if vector.get_value('exec_option')['sync_ddl']:
            num_attempts = 1
        else:
            num_attempts = 60
        for impalad in ImpalaCluster().impalads:
            client = impalad.service.create_beeswax_client()
            try:
                for attempt in itertools.count(1):
                    assert attempt <= num_attempts, "ran out of attempts"
                    try:
                        result = self.execute_query_expect_success(
                            client, "describe formatted %s" % view_name)
                        exp_line = [
                            l for l in result.data if 'View Expanded' in l
                        ][0]
                    except ImpalaBeeswaxException, e:
                        # In non-SYNC_DDL tests, it's OK to get a "missing view" type error
                        # until the metadata propagates.
                        exp_line = "Exception: %s" % e
                    if expected_substr in exp_line.lower():
                        return
                    time.sleep(1)
            finally:
                client.close()
  def test_jvm_mem_tracking(self, vector):
    service = ImpalaCluster().impalads[0].service
    verifier = MemUsageVerifier(service)
    proc_values = verifier.get_mem_usage_values('Process')
    proc_total = proc_values['total']
    proc_limit = proc_values['limit']
    max_heap_size = verifier.get_mem_usage_values('JVM: max heap size')['total']
    non_heap_committed = verifier.get_mem_usage_values('JVM: non-heap committed')['total']
    MB = 1024 * 1024
    LOG.info("proc_total={0}, max_heap_size={1} non_heap_committed={2}".format(
        proc_total, max_heap_size, non_heap_committed))
    # The max heap size will be lower than -Xmx but should be in the same general range.
    assert max_heap_size >= 900 * MB and max_heap_size <= 1024 * MB
    # The non-heap committed value is hard to predict but should be non-zero.
    assert non_heap_committed > 0
    # Process mem consumption should include both of the above values.
    assert proc_total > max_heap_size + non_heap_committed

    # Make sure that the admittable memory is within 100MB of the process limit
    # minus the heap size (there may be some rounding errors).
    backend_json = json.loads(service.read_debug_webpage('backends?json'))
    admit_limit_human_readable = backend_json['backends'][0]['admit_mem_limit']
    admit_limit = parse_mem_value(admit_limit_human_readable)
    LOG.info("proc_limit={0}, admit_limit={1}".format(proc_limit, admit_limit))
    assert abs(admit_limit - (proc_limit - max_heap_size)) <= 100 * MB
 def _start_impala_cluster(cls,
                           options,
                           log_dir=os.getenv('LOG_DIR', "/tmp/"),
                           cluster_size=CLUSTER_SIZE,
                           log_level=1):
     cls.impala_log_dir = log_dir
     cmd = [
         os.path.join(IMPALA_HOME, 'bin/start-impala-cluster.py'),
         '--cluster_size=%d' % cluster_size,
         '--log_dir=%s' % log_dir,
         '--log_level=%s' % log_level
     ]
     try:
         check_call(cmd + options, close_fds=True)
     finally:
         # Failure tests expect cluster to be initialised even if start-impala-cluster fails.
         cls.cluster = ImpalaCluster()
     statestored = cls.cluster.statestored
     if statestored is None:
         raise Exception("statestored was not found")
     statestored.service.wait_for_live_subscribers(NUM_SUBSCRIBERS,
                                                   timeout=60)
     for impalad in cls.cluster.impalads:
         impalad.service.wait_for_num_known_live_backends(CLUSTER_SIZE,
                                                          timeout=60)
Exemplo n.º 13
0
    def test_low_mem_limit_low_selectivity_scan(self, cursor, mem_limit,
                                                vector):
        """Tests that the queries specified in this test suite run under the given
    memory limits."""
        exec_options = dict(
            (k, str(v))
            for k, v in vector.get_value('exec_option').iteritems())
        exec_options['mem_limit'] = "{0}m".format(mem_limit)
        for i, q in enumerate(self.QUERIES):
            try:
                cursor.execute(q, configuration=exec_options)
                cursor.fetchall()
            except Exception as e:
                if (mem_limit > self.QUERY_MEM_LIMITS[i]):
                    raise
                assert "Memory limit exceeded" in str(e)

        # IMPALA-4654: Validate the fix for a bug where LimitReached() wasn't respected in
        # the KuduScanner and the limit query above would result in a fragment running an
        # additional minute. This ensures that the num fragments 'in flight' reaches 0 in
        # less time than IMPALA-4654 was reproducing (~60sec) but yet still enough time that
        # this test won't be flaky.
        verifiers = [
            MetricVerifier(i.service) for i in ImpalaCluster().impalads
        ]
        for v in verifiers:
            v.wait_for_metric("impala-server.num-fragments-in-flight",
                              0,
                              timeout=30)
Exemplo n.º 14
0
    def test_failure_in_prepare_multi_fragment(self):
        # Test that if one fragment fails that the others are cleaned up during the ensuing
        # cancellation.
        verifiers = [
            MetricVerifier(i.service) for i in ImpalaCluster().impalads
        ]
        # Fail the scan node
        self.client.execute("SET DEBUG_ACTION='-1:0:PREPARE:FAIL'")

        # Force a query plan that will have three fragments or more.
        try:
            self.client.execute(
                "SELECT COUNT(*) FROM functional.alltypes a JOIN [SHUFFLE] \
        functional.alltypes b on a.id = b.id")
            assert "Query should have thrown an error"
        except ImpalaBeeswaxException:
            pass

        for v in verifiers:
            # Long timeout required because fragments may be blocked while sending data. The
            # default value of --datastream_sender_timeout_ms is 120s before they wake up and
            # cancel themselves.
            #
            # TODO: Fix when we have cancellable RPCs.
            v.wait_for_metric(self.IN_FLIGHT_FRAGMENTS, 0, timeout=125)
Exemplo n.º 15
0
  def test_views_describe(self, vector, unique_database):
    # IMPALA-6896: Tests that altered views can be described by all impalads.
    impala_cluster = ImpalaCluster()
    impalads = impala_cluster.impalads
    first_client = impalads[0].service.create_beeswax_client()
    try:
      self.execute_query_expect_success(first_client,
                                        "create view {0}.test_describe_view as "
                                        "select * from functional.alltypes"
                                        .format(unique_database), {'sync_ddl': 1})
      self.execute_query_expect_success(first_client,
                                        "alter view {0}.test_describe_view as "
                                        "select * from functional.alltypesagg"
                                        .format(unique_database))
    finally:
      first_client.close()

    for impalad in impalads:
      client = impalad.service.create_beeswax_client()
      try:
        while True:
          result = self.execute_query_expect_success(
              client, "describe formatted {0}.test_describe_view"
              .format(unique_database))
          if any("select * from functional.alltypesagg" in s.lower()
                 for s in result.data):
            break
          time.sleep(1)
      finally:
        client.close()
Exemplo n.º 16
0
    def test_hash_join_timer(self, vector):
        # This test runs serially because it requires the query to come back within
        # some amount of time. Running this with other tests makes it hard to bound
        # that time. It also assumes that it will be able to get a thread token to
        # execute the join build in parallel.
        test_case = vector.get_value('test cases')
        query = test_case[0]
        join_type = test_case[1]

        # Ensure that the cluster is idle before starting the test query.
        for impalad in ImpalaCluster().impalads:
            verifier = MetricVerifier(impalad.service)
            verifier.wait_for_metric("impala-server.num-fragments-in-flight",
                                     0)

        # Execute the query. The query summary and profile are stored in 'result'.
        result = self.execute_query(query, vector.get_value('exec_option'))

        # Parse the query summary; The join node is "id=3".
        # In the ExecSummary, search for the join operator's summary and verify the
        # avg and max times are within acceptable limits.
        exec_summary = result.exec_summary
        check_execsummary_count = 0
        join_node_name = "03:%s" % (join_type)
        for line in exec_summary:
            if line['operator'] == join_node_name:
                avg_time_ms = line['avg_time'] / self.NANOS_PER_MILLI
                self.__verify_join_time(avg_time_ms, "ExecSummary Avg")
                max_time_ms = line['max_time'] / self.NANOS_PER_MILLI
                self.__verify_join_time(max_time_ms, "ExecSummary Max")
                check_execsummary_count += 1
        assert (check_execsummary_count == 1), \
            "Unable to verify ExecSummary: {0}".format(exec_summary)

        # Parse the query profile; The join node is "id=3".
        # In the profiles, search for lines containing "(id=3)" and parse for the avg and
        # non-child times to verify that they are within acceptable limits. Also verify
        # that the build side is built in a different thread by searching for the string:
        # "Join Build-Side Prepared Asynchronously"
        profile = result.runtime_profile
        check_fragment_count = 0
        asyn_build = False
        for line in profile.split("\n"):
            if ("(id=3)" in line):
                # Sample line:
                # HASH_JOIN_NODE (id=3):(Total: 3s580ms, non-child: 11.89ms, % non-child: 0.31%)
                strip1 = re.split("non-child: ", line)[1]
                non_child_time = re.split(", ", strip1)[0]
                non_child_time_ms = parse_duration_string_ms(non_child_time)
                self.__verify_join_time(non_child_time_ms,
                                        "Fragment non-child")
                check_fragment_count += 1
            # Search for "Join Build-Side Prepared Asynchronously"
            if ("Join Build-Side Prepared Asynchronously" in line):
                asyn_build = True
        assert (asyn_build
                ), "Join is not prepared asynchronously: {0}".format(profile)
        assert (check_fragment_count > 1), \
            "Unable to verify Fragment or Average Fragment: {0}".format(profile)
Exemplo n.º 17
0
 def test_query_profile_encoded_unknown_query_id(self):
   """Test that /query_profile_encoded error message starts with the expected line in
   case of missing query and does not contain any leading whitespace.
   """
   cluster = ImpalaCluster()
   impalad = cluster.get_any_impalad()
   result = impalad.service.read_debug_webpage("query_profile_encoded?query_id=123")
   assert result.startswith("Could not obtain runtime profile: Query id")
Exemplo n.º 18
0
  def test_execqueryfinstances_timeout(self, vector):
    for i in range(3):
      ex= self.execute_query_expect_failure(self.client, self.TEST_QUERY)
      assert "RPC recv timed out" in str(ex)
    verifiers = [ MetricVerifier(i.service) for i in ImpalaCluster().impalads ]

    for v in verifiers:
      v.wait_for_metric("impala-server.num-fragments-in-flight", 0)
      v.verify_num_unused_buffers()
Exemplo n.º 19
0
  def test_create_drop_data_src(self, vector):
    """This will create, run, and drop the same data source repeatedly, exercising
    the lib cache mechanism.
    """
    create_ds_stmt = ("CREATE DATA SOURCE test_data_src "
        "LOCATION '%s/data-sources/test-data-source.jar' "
        "CLASS 'com.cloudera.impala.extdatasource.AllTypesDataSource' "
        "API_VERSION 'V1'" % WAREHOUSE)
    create_tbl_stmt = """CREATE TABLE data_src_tbl (x int)
        PRODUCED BY DATA SOURCE test_data_src('dummy_init_string')"""
    drop_ds_stmt = "drop data source %s test_data_src"
    drop_tbl_stmt = "drop table %s data_src_tbl"
    select_stmt = "select * from data_src_tbl limit 1"
    class_cache_hits_metric = "external-data-source.class-cache.hits"
    class_cache_misses_metric = "external-data-source.class-cache.misses"

    create_stmts = [create_ds_stmt, create_tbl_stmt]
    drop_stmts = [drop_tbl_stmt, drop_ds_stmt]

    # Get the impalad to capture metrics
    impala_cluster = ImpalaCluster()
    impalad = impala_cluster.get_first_impalad()

    # Initial metric values
    class_cache_hits = impalad.service.get_metric_value(class_cache_hits_metric)
    class_cache_misses = impalad.service.get_metric_value(class_cache_misses_metric)
    # Test with 1 node so we can check the metrics on only the coordinator
    vector.get_value('exec_option')['num_nodes'] = 1
    num_iterations = 2
    self.create_drop_ddl(vector, "data_src_test", create_stmts, drop_stmts,
        select_stmt, num_iterations)

    # Check class cache metrics. Shouldn't have any new cache hits, there should be
    # 2 cache misses for every iteration (jar is loaded by both the FE and BE).
    expected_cache_misses = class_cache_misses + (num_iterations * 2)
    impalad.service.wait_for_metric_value(class_cache_hits_metric, class_cache_hits)
    impalad.service.wait_for_metric_value(class_cache_misses_metric,
        expected_cache_misses)

    # Test with a table that caches the class
    create_tbl_stmt = """CREATE TABLE data_src_tbl (x int)
        PRODUCED BY DATA SOURCE test_data_src('CACHE_CLASS::dummy_init_string')"""
    create_stmts = [create_ds_stmt, create_tbl_stmt]
    # Run once before capturing metrics because the class already may be cached from
    # a previous test run.
    # TODO: Provide a way to clear the cache
    self.create_drop_ddl(vector, "data_src_test", create_stmts, drop_stmts,
        select_stmt, 1)

    # Capture metric values and run again, should hit the cache.
    class_cache_hits = impalad.service.get_metric_value(class_cache_hits_metric)
    class_cache_misses = impalad.service.get_metric_value(class_cache_misses_metric)
    self.create_drop_ddl(vector, "data_src_test", create_stmts, drop_stmts,
        select_stmt, 1)
    impalad.service.wait_for_metric_value(class_cache_hits_metric, class_cache_hits + 2)
    impalad.service.wait_for_metric_value(class_cache_misses_metric, class_cache_misses)
Exemplo n.º 20
0
  def _start_impala_cluster(cls, options, impala_log_dir=os.getenv('LOG_DIR', "/tmp/"),
      cluster_size=DEFAULT_CLUSTER_SIZE, num_coordinators=NUM_COORDINATORS,
      use_exclusive_coordinators=False, log_level=1,
      expected_num_executors=DEFAULT_CLUSTER_SIZE, default_query_options=None):
    cls.impala_log_dir = impala_log_dir
    # We ignore TEST_START_CLUSTER_ARGS here. Custom cluster tests specifically test that
    # certain custom startup arguments work and we want to keep them independent of dev
    # environments.
    cmd = [os.path.join(IMPALA_HOME, 'bin/start-impala-cluster.py'),
           '--cluster_size=%d' % cluster_size,
           '--num_coordinators=%d' % num_coordinators,
           '--log_dir=%s' % impala_log_dir,
           '--log_level=%s' % log_level]

    if use_exclusive_coordinators:
      cmd.append("--use_exclusive_coordinators")

    if pytest.config.option.use_local_catalog:
      cmd.append("--impalad_args=--use_local_catalog=1")
      cmd.append("--catalogd_args=--catalog_topic_mode=minimal")

    if pytest.config.option.pull_incremental_statistics:
      cmd.append("--impalad_args=%s --catalogd_args=%s" %
                 ("--pull_incremental_statistics", "--pull_incremental_statistics"))

    default_query_option_kvs = []
    # Put any defaults first, then any arguments after that so they can override defaults.
    if os.environ.get("ERASURE_CODING") == "true":
      default_query_option_kvs.append(("allow_erasure_coded_files", "true"))
    if default_query_options is not None:
      default_query_option_kvs.extend(default_query_options)
    # Add the default query options after any arguments. This will override any default
    # options set in --impalad_args by design to force tests to pass default_query_options
    # into this function directly.
    options.append("--impalad_args=--default_query_options={0}".format(
        ','.join(["{0}={1}".format(k, v) for k, v in default_query_option_kvs])))

    logging.info("Starting cluster with command: %s" %
                 " ".join(pipes.quote(arg) for arg in cmd + options))
    try:
      check_call(cmd + options, close_fds=True)
    finally:
      # Failure tests expect cluster to be initialised even if start-impala-cluster fails.
      cls.cluster = ImpalaCluster()
    statestored = cls.cluster.statestored
    if statestored is None:
      raise Exception("statestored was not found")

    # The number of statestore subscribers is
    # cluster_size (# of impalad) + 1 (for catalogd).
    expected_subscribers = cluster_size + 1

    statestored.service.wait_for_live_subscribers(expected_subscribers, timeout=60)
    for impalad in cls.cluster.impalads:
      impalad.service.wait_for_num_known_live_backends(expected_num_executors, timeout=60)
Exemplo n.º 21
0
  def execute_query_verify_metrics(self, query, query_options=None, repeat=1):
    for i in range(repeat):
      try:
        self.execute_query(query, query_options)
      except ImpalaBeeswaxException:
        pass
    verifiers = [ MetricVerifier(i.service) for i in ImpalaCluster().impalads ]

    for v in verifiers:
      v.wait_for_metric("impala-server.num-fragments-in-flight", 0)
      v.verify_num_unused_buffers()
Exemplo n.º 22
0
 def verify_mem_usage(self, non_zero_peak_metrics):
   """Verifies that the memory used by KRPC is returned to the memtrackers and that
   metrics in 'non_zero_peak_metrics' have a peak value > 0.
   """
   verifiers = [ MemUsageVerifier(i.service) for i in ImpalaCluster().impalads ]
   for verifier in verifiers:
     for metric_name in ALL_METRICS:
       usage = verifier.get_mem_usage_values(metric_name)
       assert usage["total"] == 0
       if metric_name in non_zero_peak_metrics:
         assert usage["peak"] > 0, metric_name
Exemplo n.º 23
0
  def test_failure_in_prepare(self):
    # Fail the scan node
    self.client.execute("SET DEBUG_ACTION='-1:0:PREPARE:FAIL'");
    try:
      self.client.execute("SELECT COUNT(*) FROM functional.alltypes")
      assert "Query should have thrown an error"
    except ImpalaBeeswaxException:
      pass
    verifiers = [ MetricVerifier(i.service) for i in ImpalaCluster().impalads ]

    for v in verifiers:
      v.wait_for_metric("impala-server.num-fragments-in-flight", 0)
Exemplo n.º 24
0
 def test_insert_mem_limit(self, vector):
   if (vector.get_value('table_format').file_format == 'parquet'):
     vector.get_value('exec_option')['COMPRESSION_CODEC'] = \
         vector.get_value('compression_codec')
   self.run_test_case('QueryTest/insert-mem-limit', vector,
       multiple_impalad=vector.get_value('exec_option')['sync_ddl'] == 1)
   # IMPALA-7023: These queries can linger and use up memory, causing subsequent
   # tests to hit memory limits. Wait for some time to allow the query to
   # be reclaimed.
   verifiers = [ MetricVerifier(i.service) for i in ImpalaCluster().impalads ]
   for v in verifiers:
     v.wait_for_metric("impala-server.num-fragments-in-flight", 0, timeout=60)
Exemplo n.º 25
0
    def test_pull_stats_profile(self, vector, unique_database):
        """Checks that the frontend profile includes metrics when computing
       incremental statistics.
    """
        try:
            client = ImpalaCluster().impalads[0].service.create_beeswax_client(
            )
            create = "create table test like functional.alltypes"
            load = "insert into test partition(year, month) select * from functional.alltypes"
            insert = """insert into test partition(year=2009, month=1) values
                  (29349999, true, 4, 4, 4, 40,4.400000095367432,40.4,
                  "10/21/09","4","2009-10-21 03:24:09.600000000")"""
            stats_all = "compute incremental stats test"
            stats_part = "compute incremental stats test partition (year=2009,month=1)"

            # Checks that profile does not have metrics for incremental stats when
            # the operation is not 'compute incremental stats'.
            self.execute_query_expect_success(client,
                                              "use %s" % unique_database)
            profile = self.execute_query_expect_success(client,
                                                        create).runtime_profile
            assert profile.count("StatsFetch") == 0
            # Checks that incremental stats metrics are present when 'compute incremental
            # stats' is run. Since the table has no stats, expect that no bytes are fetched.
            self.execute_query_expect_success(client, load)
            profile = self.execute_query_expect_success(
                client, stats_all).runtime_profile
            assert profile.count("StatsFetch") > 1
            assert profile.count("StatsFetch.CompressedBytes: 0") == 1
            # Checks that bytes fetched is non-zero since incremental stats are present now
            # and should have been fetched.
            self.execute_query_expect_success(client, insert)
            profile = self.execute_query_expect_success(
                client, stats_part).runtime_profile
            assert profile.count("StatsFetch") > 1
            assert profile.count("StatsFetch.CompressedBytes") == 1
            assert profile.count("StatsFetch.CompressedBytes: 0") == 0
            # Adds a partition, computes stats, and checks that the metrics in the profile
            # reflect the operation.
            alter = "alter table test add partition(year=2011, month=1)"
            insert_new_partition = """
          insert into test partition(year=2011, month=1) values
          (29349999, true, 4, 4, 4, 40,4.400000095367432,40.4,
          "10/21/09","4","2009-10-21 03:24:09.600000000")
          """
            self.execute_query_expect_success(client, alter)
            self.execute_query_expect_success(client, insert_new_partition)
            profile = self.execute_query_expect_success(
                client, stats_all).runtime_profile
            assert profile.count("StatsFetch.TotalPartitions: 25") == 1
            assert profile.count("StatsFetch.NumPartitionsWithStats: 24") == 1
        finally:
            client.close()
 def setup_method(self, method):
     cluster_args = ""
     for arg in [IMPALAD_ARGS, STATESTORED_ARGS, CATALOGD_ARGS]:
         if arg in method.func_dict:
             cluster_args += "--%s=\"%s\" " % (arg, method.func_dict[arg])
     # Start a clean new cluster before each test
     self.__start_impala_cluster(cluster_args)
     self.cluster = ImpalaCluster()
     statestored = self.cluster.statestored
     statestored.service.wait_for_live_subscribers(NUM_SUBSCRIBERS,
                                                   timeout=60)
     for impalad in self.cluster.impalads:
         impalad.service.wait_for_num_known_live_backends(CLUSTER_SIZE,
                                                          timeout=60)
Exemplo n.º 27
0
    def test_failure_in_prepare(self):
        # Fail the scan node
        verifiers = [
            MetricVerifier(i.service) for i in ImpalaCluster().impalads
        ]
        self.client.execute("SET DEBUG_ACTION='-1:0:PREPARE:FAIL'")
        try:
            self.client.execute("SELECT COUNT(*) FROM functional.alltypes")
            assert "Query should have thrown an error"
        except ImpalaBeeswaxException:
            pass

        for v in verifiers:
            v.wait_for_metric(self.IN_FLIGHT_FRAGMENTS, 0)
Exemplo n.º 28
0
def print_metrics(substring):
    """Prints metrics with the given substring in the name"""
    for impalad in ImpalaCluster().impalads:
        print ">" * 80
        port = impalad._get_webserver_port()
        print "connections metrics for impalad at port {0}:".format(port)
        debug_info = json.loads(
            ImpaladService(
                impalad.hostname,
                webserver_port=port).open_debug_webpage('metrics?json').read())
        for metric in debug_info['metric_group']['metrics']:
            if substring in metric['name']:
                print json.dumps(metric, indent=1)
        print "<" * 80
  def test_queries_closed(self):
    """Regression test for IMPALA-897"""
    args = '-f %s/test_close_queries.sql --quiet -B' % QUERY_FILE_PATH
    cmd = "%s %s" % (SHELL_CMD, args)
    # Execute the shell command async
    p = Popen(shlex.split(cmd), shell=False, stdout=PIPE, stderr=PIPE)

    impala_cluster = ImpalaCluster()
    impalad = impala_cluster.impalads[0].service
    # The last query in the test SQL script will sleep for 10 seconds, so sleep
    # here for 5 seconds and verify the number of in-flight queries is 1.
    sleep(5)
    assert 1 == impalad.get_num_in_flight_queries()
    assert get_shell_cmd_result(p).rc == 0
    assert 0 == impalad.get_num_in_flight_queries()
Exemplo n.º 30
0
  def execute_query_then_cancel(self, query, vector, repeat = 1):
    for _ in range(repeat):
      handle = self.execute_query_async(query, vector.get_value('exec_option'))
      self.client.fetch(query, handle)
      try:
        self.client.cancel(handle)
      except ImpalaBeeswaxException:
        pass
      finally:
        self.client.close_query(handle)
    verifiers = [ MetricVerifier(i.service) for i in ImpalaCluster().impalads ]

    for v in verifiers:
      v.wait_for_metric("impala-server.num-fragments-in-flight", 0)
      v.verify_num_unused_buffers()