Пример #1
0
  def test_create_drop_data_src(self, vector):
    """This will create, run, and drop the same data source repeatedly, exercising
    the lib cache mechanism.
    """
    create_ds_stmt = ("CREATE DATA SOURCE test_data_src "
        "LOCATION '%s/data-sources/test-data-source.jar' "
        "CLASS 'com.cloudera.impala.extdatasource.AllTypesDataSource' "
        "API_VERSION 'V1'" % WAREHOUSE)
    create_tbl_stmt = """CREATE TABLE data_src_tbl (x int)
        PRODUCED BY DATA SOURCE test_data_src('dummy_init_string')"""
    drop_ds_stmt = "drop data source %s test_data_src"
    drop_tbl_stmt = "drop table %s data_src_tbl"
    select_stmt = "select * from data_src_tbl limit 1"
    class_cache_hits_metric = "external-data-source.class-cache.hits"
    class_cache_misses_metric = "external-data-source.class-cache.misses"

    create_stmts = [create_ds_stmt, create_tbl_stmt]
    drop_stmts = [drop_tbl_stmt, drop_ds_stmt]

    # Get the impalad to capture metrics
    impala_cluster = ImpalaCluster()
    impalad = impala_cluster.get_first_impalad()

    # Initial metric values
    class_cache_hits = impalad.service.get_metric_value(class_cache_hits_metric)
    class_cache_misses = impalad.service.get_metric_value(class_cache_misses_metric)
    # Test with 1 node so we can check the metrics on only the coordinator
    vector.get_value('exec_option')['num_nodes'] = 1
    num_iterations = 2
    self.create_drop_ddl(vector, "data_src_test", create_stmts, drop_stmts,
        select_stmt, num_iterations)

    # Check class cache metrics. Shouldn't have any new cache hits, there should be
    # 2 cache misses for every iteration (jar is loaded by both the FE and BE).
    expected_cache_misses = class_cache_misses + (num_iterations * 2)
    impalad.service.wait_for_metric_value(class_cache_hits_metric, class_cache_hits)
    impalad.service.wait_for_metric_value(class_cache_misses_metric,
        expected_cache_misses)

    # Test with a table that caches the class
    create_tbl_stmt = """CREATE TABLE data_src_tbl (x int)
        PRODUCED BY DATA SOURCE test_data_src('CACHE_CLASS::dummy_init_string')"""
    create_stmts = [create_ds_stmt, create_tbl_stmt]
    # Run once before capturing metrics because the class already may be cached from
    # a previous test run.
    # TODO: Provide a way to clear the cache
    self.create_drop_ddl(vector, "data_src_test", create_stmts, drop_stmts,
        select_stmt, 1)

    # Capture metric values and run again, should hit the cache.
    class_cache_hits = impalad.service.get_metric_value(class_cache_hits_metric)
    class_cache_misses = impalad.service.get_metric_value(class_cache_misses_metric)
    self.create_drop_ddl(vector, "data_src_test", create_stmts, drop_stmts,
        select_stmt, 1)
    impalad.service.wait_for_metric_value(class_cache_hits_metric, class_cache_hits + 2)
    impalad.service.wait_for_metric_value(class_cache_misses_metric, class_cache_misses)
Пример #2
0
  def test_create_drop_data_src(self, vector):
    """This will create, run, and drop the same data source repeatedly, exercising
    the lib cache mechanism.
    """
    create_ds_stmt = ("CREATE DATA SOURCE test_data_src "
        "LOCATION '%s/data-sources/test-data-source.jar' "
        "CLASS 'com.cloudera.impala.extdatasource.AllTypesDataSource' "
        "API_VERSION 'V1'" % WAREHOUSE)
    create_tbl_stmt = """CREATE TABLE data_src_tbl (x int)
        PRODUCED BY DATA SOURCE test_data_src('dummy_init_string')"""
    drop_ds_stmt = "drop data source %s test_data_src"
    drop_tbl_stmt = "drop table %s data_src_tbl"
    select_stmt = "select * from data_src_tbl limit 1"
    class_cache_hits_metric = "external-data-source.class-cache.hits"
    class_cache_misses_metric = "external-data-source.class-cache.misses"

    create_stmts = [create_ds_stmt, create_tbl_stmt]
    drop_stmts = [drop_tbl_stmt, drop_ds_stmt]

    # Get the impalad to capture metrics
    impala_cluster = ImpalaCluster()
    impalad = impala_cluster.get_first_impalad()

    # Initial metric values
    class_cache_hits = impalad.service.get_metric_value(class_cache_hits_metric)
    class_cache_misses = impalad.service.get_metric_value(class_cache_misses_metric)
    # Test with 1 node so we can check the metrics on only the coordinator
    vector.get_value('exec_option')['num_nodes'] = 1
    num_iterations = 2
    self.create_drop_ddl(vector, "data_src_test", create_stmts, drop_stmts,
        select_stmt, num_iterations)

    # Check class cache metrics. Shouldn't have any new cache hits, there should be
    # 2 cache misses for every iteration (jar is loaded by both the FE and BE).
    expected_cache_misses = class_cache_misses + (num_iterations * 2)
    impalad.service.wait_for_metric_value(class_cache_hits_metric, class_cache_hits)
    impalad.service.wait_for_metric_value(class_cache_misses_metric,
        expected_cache_misses)

    # Test with a table that caches the class
    create_tbl_stmt = """CREATE TABLE data_src_tbl (x int)
        PRODUCED BY DATA SOURCE test_data_src('CACHE_CLASS::dummy_init_string')"""
    create_stmts = [create_ds_stmt, create_tbl_stmt]
    # Run once before capturing metrics because the class already may be cached from
    # a previous test run.
    # TODO: Provide a way to clear the cache
    self.create_drop_ddl(vector, "data_src_test", create_stmts, drop_stmts,
        select_stmt, 1)

    # Capture metric values and run again, should hit the cache.
    class_cache_hits = impalad.service.get_metric_value(class_cache_hits_metric)
    class_cache_misses = impalad.service.get_metric_value(class_cache_misses_metric)
    self.create_drop_ddl(vector, "data_src_test", create_stmts, drop_stmts,
        select_stmt, 1)
    impalad.service.wait_for_metric_value(class_cache_hits_metric, class_cache_hits + 2)
    impalad.service.wait_for_metric_value(class_cache_misses_metric, class_cache_misses)