def test_set_column_orders(self, vector, unique_database, tmpdir): """Tests that the Parquet writers set FileMetaData::column_orders.""" source_table = "functional_parquet.alltypessmall" target_table = "test_set_column_orders" qualified_target_table = "{0}.{1}".format(unique_database, target_table) hdfs_path = get_fs_path("/test-warehouse/{0}.db/{1}/".format( unique_database, target_table)) # Create table query = "create table {0} like {1} stored as parquet".format( qualified_target_table, source_table) self.execute_query(query) # Insert data query = ( "insert into {0} partition(year, month) select * from {1}").format( qualified_target_table, source_table) self.execute_query(query) # Download hdfs files and verify column orders check_call(['hdfs', 'dfs', '-get', hdfs_path, tmpdir.strpath]) expected_col_orders = [ColumnOrder(TYPE_ORDER=TypeDefinedOrder())] * 11 for root, subdirs, files in os.walk(tmpdir.strpath): for f in files: parquet_file = os.path.join(root, str(f)) file_meta_data = get_parquet_metadata(parquet_file) assert file_meta_data.column_orders == expected_col_orders
def test_set_column_orders(self, vector, unique_database, tmpdir): """Tests that the Parquet writers set FileMetaData::column_orders.""" source_table = "functional_parquet.alltypessmall" target_table = "test_set_column_orders" qualified_target_table = "{0}.{1}".format(unique_database, target_table) hdfs_path = get_fs_path("/test-warehouse/{0}.db/{1}/".format( unique_database, target_table)) # Create table query = "create table {0} like {1} stored as parquet".format( qualified_target_table, source_table) self.execute_query(query) # Insert data query = ( "insert into {0} partition(year, month) select * from {1}").format( qualified_target_table, source_table) self.execute_query(query) # Download hdfs files and verify column orders file_metadata_list = get_parquet_metadata_from_hdfs_folder( hdfs_path, tmpdir.strpath) expected_col_orders = [ColumnOrder(TYPE_ORDER=TypeDefinedOrder())] * 11 for file_metadata in file_metadata_list: assert file_metadata.column_orders == expected_col_orders