Пример #1
0
 def create_test_database(self, unique_database):
     create_stmt = 'create table {db}.{tbl} (        '\
                   '  int8_col TINYINT,              '\
                   '  int16_col SMALLINT,            '\
                   '  int32_col INT,                 '\
                   '  int64_col BIGINT,              '\
                   '  float_col FLOAT,               '\
                   '  double_col DOUBLE,             '\
                   '  string_col STRING,             '\
                   '  char_col VARCHAR(3)            '\
                   ')                                '\
                   'stored as parquet                '
     create_table_and_copy_files(
         self.client, create_stmt, unique_database, 'parquet_bloom_filter',
         ['testdata/data/parquet-bloom-filtering.parquet'])
Пример #2
0
  def test_deprecated_stats(self, vector, unique_database):
    """Test that reading parquet files with statistics with deprecated 'min'/'max' fields
    works correctly. The statistics will be used for known-good types (boolean, integral,
    float) and will be ignored for all other types (string, decimal, timestamp)."""

    # We use CTAS instead of "create table like" to convert the partition columns into
    # normal table columns.
    create_table_and_copy_files(self.client, 'create table {db}.{tbl} stored as parquet '
                                             'as select * from functional.alltypessmall '
                                             'limit 0',
                                unique_database, 'deprecated_stats',
                                ['testdata/data/deprecated_statistics.parquet'])
    # The test makes assumptions about the number of row groups that are processed and
    # skipped inside a fragment, so we ensure that the tests run in a single fragment.
    vector.get_value('exec_option')['num_nodes'] = 1
    self.run_test_case('QueryTest/parquet-deprecated-stats', vector, unique_database)
Пример #3
0
  def test_deprecated_stats(self, vector, unique_database):
    """Test that reading parquet files with statistics with deprecated 'min'/'max' fields
    works correctly. The statistics will be used for known-good types (boolean, integral,
    float) and will be ignored for all other types (string, decimal, timestamp)."""

    # We use CTAS instead of "create table like" to convert the partition columns into
    # normal table columns.
    create_table_and_copy_files(self.client, 'create table {db}.{tbl} stored as parquet '
                                             'as select * from functional.alltypessmall '
                                             'limit 0',
                                unique_database, 'deprecated_stats',
                                ['testdata/data/deprecated_statistics.parquet'])
    # The test makes assumptions about the number of row groups that are processed and
    # skipped inside a fragment, so we ensure that the tests run in a single fragment.
    vector.get_value('exec_option')['num_nodes'] = 1
    self.run_test_case('QueryTest/parquet-deprecated-stats', vector, unique_database)
Пример #4
0
 def test_fileformat_support(self, vector, unique_database):
     """ Test that scanning and writing DATE is supported for text tables only."""
     # This test specifies databases and locations explicitly. No need to execute it for
     # anything other than text fileformat on HDFS.
     if vector.get_value('table_format').file_format != 'text':
         pytest.skip()
     # Parquet table with date column
     TABLE_NAME = "parquet_date_tbl"
     CREATE_SQL = "CREATE TABLE {0}.{1} (date_col DATE) STORED AS PARQUET".format(
         unique_database, TABLE_NAME)
     create_table_and_copy_files(self.client, CREATE_SQL, unique_database,
                                 TABLE_NAME,
                                 ["/testdata/data/date_tbl.parquet"])
     # Avro table with date column
     TABLE_NAME = "avro_date_tbl"
     CREATE_SQL = "CREATE TABLE {0}.{1} (date_col DATE) STORED AS AVRO".format(
         unique_database, TABLE_NAME)
     create_table_and_copy_files(self.client, CREATE_SQL, unique_database,
                                 TABLE_NAME,
                                 ["/testdata/data/date_tbl.avro"])
     # Partitioned table with parquet and avro partitions
     TABLE_NAME = "date_tbl"
     CREATE_SQL = """CREATE TABLE {0}.{1} (date_col DATE)
     PARTITIONED BY (date_part DATE)""".format(unique_database, TABLE_NAME)
     self.client.execute(CREATE_SQL)
     # Add partitions
     ADD_PART_SQL = """ALTER TABLE {0}.{1} ADD PARTITION (date_part='1899-12-31')
     LOCATION '/test-warehouse/{0}.db/parquet_date_tbl'
     PARTITION (date_part='1999-12-31')
     LOCATION '/test-warehouse/{0}.db/avro_date_tbl'
     """.format(unique_database, TABLE_NAME)
     self.client.execute(ADD_PART_SQL)
     # Parquet fileformat
     SET_PART_FF_SQL = """ALTER TABLE {0}.{1} PARTITION (date_part='1899-12-31')
     SET FILEFORMAT PARQUET""".format(unique_database, TABLE_NAME)
     self.client.execute(SET_PART_FF_SQL)
     # Avro fileformat
     SET_PART_FF_SQL = """ALTER TABLE {0}.{1} PARTITION (date_part='1999-12-31')
     SET FILEFORMAT AVRO""".format(unique_database, TABLE_NAME)
     self.client.execute(SET_PART_FF_SQL)
     # Test scanning/writing tables with different fileformats.
     self.run_test_case('QueryTest/date-fileformat-support',
                        vector,
                        use_db=unique_database)
Пример #5
0
 def _create_test_table_from_file(self, db_name, filename):
     create_table_and_copy_files(self.client, self.create_stmt, db_name,
                                 'parquet_bloom_filter', [filename])