Пример #1
0
 def test_kll(self, vector, unique_database):
     create_table_from_parquet(self.client, unique_database,
                               'kll_sketches_from_hive')
     create_table_from_parquet(self.client, unique_database,
                               'kll_sketches_from_impala')
     self.run_test_case('QueryTest/datasketches-kll', vector,
                        unique_database)
Пример #2
0
    def test_page_index(self, vector, unique_database):
        """Test that using the Parquet page index works well. The various test files
    contain queries that exercise the page selection and value-skipping logic against
    columns with different types and encodings."""
        create_table_from_parquet(self.client, unique_database,
                                  'decimals_1_10')
        create_table_from_parquet(self.client, unique_database,
                                  'nested_decimals')
        create_table_from_parquet(self.client, unique_database,
                                  'double_nested_decimals')
        create_table_from_parquet(self.client, unique_database,
                                  'alltypes_tiny_pages')
        create_table_from_parquet(self.client, unique_database,
                                  'alltypes_tiny_pages_plain')

        for batch_size in [0, 1]:
            vector.get_value('exec_option')['batch_size'] = batch_size
            self.run_test_case('QueryTest/parquet-page-index', vector,
                               unique_database)
            self.run_test_case('QueryTest/nested-types-parquet-page-index',
                               vector, unique_database)
            self.run_test_case(
                'QueryTest/parquet-page-index-alltypes-tiny-pages', vector,
                unique_database)
            self.run_test_case(
                'QueryTest/parquet-page-index-alltypes-tiny-pages-plain',
                vector, unique_database)

        for batch_size in [0, 32]:
            vector.get_value('exec_option')['batch_size'] = batch_size
            self.run_test_case('QueryTest/parquet-page-index-large', vector,
                               unique_database)
Пример #3
0
 def test_invalid_stats(self, vector, unique_database):
     """IMPALA-6538" Test that reading parquet files with statistics with invalid
 'min_value'/'max_value' fields works correctly. 'min_value' and 'max_value' are both
 NaNs, therefore we need to ignore them"""
     create_table_from_parquet(self.client, unique_database,
                               'min_max_is_nan')
     self.run_test_case('QueryTest/parquet-invalid-minmax-stats', vector,
                        unique_database)
 def _test_conversion_with_validation(self, vector, unique_database):
   """Test that timestamp validation also works as expected when converting timestamps.
   Runs as part of test_conversion() to avoid restarting the cluster."""
   create_table_from_parquet(self.client, unique_database,
                             "out_of_range_timestamp_hive_211")
   create_table_from_parquet(self.client, unique_database,
                             "out_of_range_timestamp2_hive_211")
   # Allow tests to override abort_or_error
   del vector.get_value('exec_option')['abort_on_error']
   self.run_test_case('QueryTest/out-of-range-timestamp-local-tz-conversion',
        vector, unique_database)
Пример #5
0
 def test_invalid_stats(self, vector, unique_database):
   """IMPALA-6538" Test that reading parquet files with statistics with invalid
   'min_value'/'max_value' fields works correctly. 'min_value' and 'max_value' are both
   NaNs, therefore we need to ignore them"""
   create_table_from_parquet(self.client, unique_database, 'min_max_is_nan')
   self.run_test_case('QueryTest/parquet-invalid-minmax-stats', vector, unique_database)
Пример #6
0
    def test_page_index(self, vector, unique_database):
        """Test that using the Parquet page index works well. The various test files
    contain queries that exercise the page selection and value-skipping logic against
    columns with different types and encodings."""
        new_vector = deepcopy(vector)
        del new_vector.get_value('exec_option')['abort_on_error']
        create_table_from_parquet(self.client, unique_database,
                                  'decimals_1_10')
        create_table_from_parquet(self.client, unique_database,
                                  'nested_decimals')
        create_table_from_parquet(self.client, unique_database,
                                  'double_nested_decimals')
        create_table_from_parquet(self.client, unique_database,
                                  'alltypes_tiny_pages')
        create_table_from_parquet(self.client, unique_database,
                                  'alltypes_tiny_pages_plain')
        create_table_from_parquet(self.client, unique_database,
                                  'alltypes_empty_pages')
        create_table_from_parquet(self.client, unique_database,
                                  'alltypes_invalid_pages')
        create_table_from_parquet(self.client, unique_database,
                                  'customer_multiblock_page_index')

        for batch_size in [0, 1]:
            new_vector.get_value('exec_option')['batch_size'] = batch_size
            self.run_test_case('QueryTest/parquet-page-index', new_vector,
                               unique_database)
            self.run_test_case('QueryTest/nested-types-parquet-page-index',
                               new_vector, unique_database)
            self.run_test_case(
                'QueryTest/parquet-page-index-alltypes-tiny-pages', new_vector,
                unique_database)
            self.run_test_case(
                'QueryTest/parquet-page-index-alltypes-tiny-pages-plain',
                new_vector, unique_database)

        for batch_size in [0, 1, 2, 3, 4, 8, 16, 32, 64, 128, 256, 512]:
            new_vector.get_value('exec_option')['batch_size'] = batch_size
            self.run_test_case('QueryTest/parquet-page-index-large',
                               new_vector, unique_database)