def test_partition_cols_supported(self, pa, df_full): # GH #23283 partition_cols = ["bool", "int"] df = df_full with tm.ensure_clean_dir() as path: df.to_parquet(path, partition_cols=partition_cols, compression=None) check_partition_names(path, partition_cols) assert read_parquet(path).shape == df.shape
def test_partition_cols_string(self, pa, df_full): # GH #27117 partition_cols = "bool" partition_cols_list = [partition_cols] df = df_full with tm.ensure_clean_dir() as path: df.to_parquet(path, partition_cols=partition_cols, compression=None) check_partition_names(path, partition_cols_list) assert read_parquet(path).shape == df.shape
def test_partition_cols_pathlib(self, pa, df_compat, path_type): # GH 35902 partition_cols = "B" partition_cols_list = [partition_cols] df = df_compat with tm.ensure_clean_dir() as path_str: path = path_type(path_str) df.to_parquet(path, partition_cols=partition_cols_list)
def test_to_csv_zip_infer_name(self, filename, expected_arcname): # GH 39465 df = DataFrame({"ABC": [1]}) with tm.ensure_clean_dir() as dir: path = Path(dir, filename) df.to_csv(path, compression="zip") with ZipFile(path) as zp: assert len(zp.filelist) == 1 archived_file = zp.filelist[0].filename assert archived_file == expected_arcname
def test_partition_cols_supported(self, pa, df_full): # GH #23283 partition_cols = ["bool", "int"] df = df_full with tm.ensure_clean_dir() as path: df.to_parquet(path, partition_cols=partition_cols, compression=None) import pyarrow.parquet as pq dataset = pq.ParquetDataset(path, validate_schema=False) assert len(dataset.partitions.partition_names) == 2 assert dataset.partitions.partition_names == set(partition_cols)
def test_partition_cols_string(self, pa, df_full): # GH #27117 partition_cols = "bool" partition_cols_list = [partition_cols] df = df_full with tm.ensure_clean_dir() as path: df.to_parquet(path, partition_cols=partition_cols, compression=None) import pyarrow.parquet as pq dataset = pq.ParquetDataset(path, validate_schema=False) assert len(dataset.partitions.partition_names) == 1 assert dataset.partitions.partition_names == set(partition_cols_list)
def test_error_on_using_partition_cols_and_partition_on(self, fp, df_full): # GH #23283 partition_cols = ["bool", "int"] df = df_full with pytest.raises(ValueError): with tm.ensure_clean_dir() as path: df.to_parquet( path, engine="fastparquet", compression=None, partition_on=partition_cols, partition_cols=partition_cols, )
def test_error_on_using_partition_cols_and_partition_on(self, fp, df_full): # GH #23283 partition_cols = ["bool", "int"] df = df_full msg = ( "Cannot use both partition_on and partition_cols. Use partition_cols for " "partitioning data") with pytest.raises(ValueError, match=msg): with tm.ensure_clean_dir() as path: df.to_parquet( path, engine="fastparquet", compression=None, partition_on=partition_cols, partition_cols=partition_cols, )
def test_partition_on_supported(self, fp, df_full): # GH #23283 partition_cols = ["bool", "int"] df = df_full with tm.ensure_clean_dir() as path: df.to_parquet( path, engine="fastparquet", compression=None, partition_on=partition_cols, ) assert os.path.exists(path) import fastparquet actual_partition_cols = fastparquet.ParquetFile(path, False).cats assert len(actual_partition_cols) == 2
def test_partition_cols_string(self, fp, df_full): # GH #27117 partition_cols = "bool" df = df_full with tm.ensure_clean_dir() as path: df.to_parquet( path, engine="fastparquet", partition_cols=partition_cols, compression=None, ) assert os.path.exists(path) import fastparquet actual_partition_cols = fastparquet.ParquetFile(path, False).cats assert len(actual_partition_cols) == 1
def test_ambiguous_archive_tar(): with tm.ensure_clean_dir() as dir: csvAPath = os.path.join(dir, "a.csv") with open(csvAPath, "w") as a: a.write("foo,bar\n") csvBPath = os.path.join(dir, "b.csv") with open(csvBPath, "w") as b: b.write("foo,bar\n") tarpath = os.path.join(dir, "archive.tar") with tarfile.TarFile(tarpath, "w") as tar: tar.add(csvAPath, "a.csv") tar.add(csvBPath, "b.csv") with pytest.raises(ValueError, match="Multiple files found in TAR archive"): pd.read_csv(tarpath)
def test_create_temp_directory(): with tm.ensure_clean_dir() as path: assert os.path.exists(path) assert os.path.isdir(path) assert not os.path.exists(path)