def test_to_csv_compression(self, df, encoding, compression): with tm.ensure_clean() as filename: df.to_csv(filename, compression=compression, encoding=encoding) # test the round trip - to_csv -> read_csv result = read_csv( filename, compression=compression, index_col=0, encoding=encoding ) tm.assert_frame_equal(df, result) # test the round trip using file handle - to_csv -> read_csv handles = get_handle( filename, "w", compression=compression, encoding=encoding ) df.to_csv(handles.handle, encoding=encoding) assert not handles.handle.closed handles.close() result = pd.read_csv( filename, compression=compression, encoding=encoding, index_col=0, squeeze=True, ) tm.assert_frame_equal(df, result) # explicitly make sure file is compressed with tm.decompress_file(filename, compression) as fh: text = fh.read().decode(encoding or "utf8") for col in df.columns: assert col in text with tm.decompress_file(filename, compression) as fh: tm.assert_frame_equal(df, read_csv(fh, index_col=0, encoding=encoding))
def test_write_infer(self, ext, get_random_path): base = get_random_path path1 = base + ext path2 = base + ".raw" compression = None for c in self._compression_to_extension: if self._compression_to_extension[c] == ext: compression = c break with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: df = tm.makeDataFrame() # write to compressed file by inferred compression method df.to_pickle(p1) # decompress with tm.decompress_file(p1, compression=compression) as f: with open(p2, "wb") as fh: fh.write(f.read()) # read decompressed file df2 = pd.read_pickle(p2, compression=None) tm.assert_frame_equal(df, df2)
def test_to_csv_compression(self, s, encoding, compression): with tm.ensure_clean() as filename: s.to_csv(filename, compression=compression, encoding=encoding, header=True) # test the round trip - to_csv -> read_csv result = pd.read_csv( filename, compression=compression, encoding=encoding, index_col=0, squeeze=True, ) tm.assert_series_equal(s, result) # test the round trip using file handle - to_csv -> read_csv f, _handles = get_handle(filename, "w", compression=compression, encoding=encoding) with f: s.to_csv(f, encoding=encoding, header=True) result = pd.read_csv( filename, compression=compression, encoding=encoding, index_col=0, squeeze=True, ) tm.assert_series_equal(s, result) # explicitly ensure file was compressed with tm.decompress_file(filename, compression) as fh: text = fh.read().decode(encoding or "utf8") assert s.name in text with tm.decompress_file(filename, compression) as fh: tm.assert_series_equal( s, pd.read_csv(fh, index_col=0, squeeze=True, encoding=encoding))
def test_gz_lineend(self): # GH 25311 df = DataFrame({"a": [1, 2]}) expected_rows = ["a", "1", "2"] expected = tm.convert_rows_list_to_csv_str(expected_rows) with tm.ensure_clean("__test_gz_lineend.csv.gz") as path: df.to_csv(path, index=False) with tm.decompress_file(path, compression="gzip") as f: result = f.read().decode("utf-8") assert result == expected
def test_compression_roundtrip(compression): df = pd.DataFrame( [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=["A", "B"], columns=["X", "Y", "Z"], ) with tm.ensure_clean() as path: df.to_json(path, compression=compression) tm.assert_frame_equal(df, pd.read_json(path, compression=compression)) # explicitly ensure file was compressed. with tm.decompress_file(path, compression) as fh: result = fh.read().decode("utf8") tm.assert_frame_equal(df, pd.read_json(result))
def test_write_explicit(self, compression, get_random_path): base = get_random_path path1 = base + ".compressed" path2 = base + ".raw" with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: df = tm.makeDataFrame() # write to compressed file df.to_pickle(p1, compression=compression) # decompress with tm.decompress_file(p1, compression=compression) as f: with open(p2, "wb") as fh: fh.write(f.read()) # read decompressed file df2 = pd.read_pickle(p2, compression=None) tm.assert_frame_equal(df, df2)