def test_read_infer(self, ext, get_random_path): if ext == '.xz': tm._skip_if_no_lzma() base = get_random_path path1 = base + ".raw" path2 = base + ext compression = None for c in self._compression_to_extension: if self._compression_to_extension[c] == ext: compression = c break with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: df = tm.makeDataFrame() # write to uncompressed file df.to_pickle(p1, compression=None) # compress self.compress_file(p1, p2, compression=compression) # read compressed file by inferred compression method df2 = pd.read_pickle(p2) tm.assert_frame_equal(df, df2)
def test_lines_with_compression(compression): if compression == 'xz': tm._skip_if_no_lzma() with tm.ensure_clean() as path: df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') df.to_json(path, orient='records', lines=True, compression=compression) roundtripped_df = pd.read_json(path, lines=True, compression=compression) assert_frame_equal(df, roundtripped_df)
def test_chunksize_with_compression(compression): if compression == 'xz': tm._skip_if_no_lzma() with tm.ensure_clean() as path: df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}') df.to_json(path, orient='records', lines=True, compression=compression) roundtripped_df = pd.concat(pd.read_json(path, lines=True, chunksize=1, compression=compression)) assert_frame_equal(df, roundtripped_df)
def test_chunksize_with_compression(compression): if compression == 'xz': tm._skip_if_no_lzma() with tm.ensure_clean() as path: df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}') df.to_json(path, orient='records', lines=True, compression=compression) roundtripped_df = pd.concat( pd.read_json(path, lines=True, chunksize=1, compression=compression)) assert_frame_equal(df, roundtripped_df)
def test_compression_roundtrip(compression): if compression == 'xz': tm._skip_if_no_lzma() df = pd.DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) with tm.ensure_clean() as path: df.to_json(path, compression=compression) assert_frame_equal(df, pd.read_json(path, compression=compression)) # explicitly ensure file was compressed. uncompressed_content = decompress_file(path, compression) assert_frame_equal(df, pd.read_json(uncompressed_content))
def test_xz(self): lzma = tm._skip_if_no_lzma() with open(self.csv1, "rb") as data_file: data = data_file.read() expected = self.read_csv(self.csv1) with tm.ensure_clean() as path: tmp = lzma.LZMAFile(path, mode="wb") tmp.write(data) tmp.close() result = self.read_csv(path, compression="xz") tm.assert_frame_equal(result, expected) with open(path, "rb") as f: result = self.read_csv(f, compression="xz") tm.assert_frame_equal(result, expected) with tm.ensure_clean("test.xz") as path: tmp = lzma.LZMAFile(path, mode="wb") tmp.write(data) tmp.close() result = self.read_csv(path, compression="infer") tm.assert_frame_equal(result, expected)
def test_compression_roundtrip(compression): if compression == 'xz': tm._skip_if_no_lzma() df = pd.DataFrame( [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) with tm.ensure_clean() as path: df.to_json(path, compression=compression) assert_frame_equal(df, pd.read_json(path, compression=compression)) # explicitly ensure file was compressed. uncompressed_content = decompress_file(path, compression) assert_frame_equal(df, pd.read_json(uncompressed_content))
def test_xz(self): lzma = tm._skip_if_no_lzma() with open(self.csv1, 'rb') as data_file: data = data_file.read() expected = self.read_csv(self.csv1) with tm.ensure_clean() as path: tmp = lzma.LZMAFile(path, mode='wb') tmp.write(data) tmp.close() result = self.read_csv(path, compression='xz') tm.assert_frame_equal(result, expected) with open(path, 'rb') as f: result = self.read_csv(f, compression='xz') tm.assert_frame_equal(result, expected) with tm.ensure_clean('test.xz') as path: tmp = lzma.LZMAFile(path, mode='wb') tmp.write(data) tmp.close() result = self.read_csv(path, compression='infer') tm.assert_frame_equal(result, expected)
def test_with_s3_url(compression): boto3 = pytest.importorskip('boto3') pytest.importorskip('s3fs') if compression == 'xz': tm._skip_if_no_lzma() df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') with moto.mock_s3(): conn = boto3.resource("s3", region_name="us-east-1") bucket = conn.create_bucket(Bucket="pandas-test") with tm.ensure_clean() as path: df.to_json(path, compression=compression) with open(path, 'rb') as f: bucket.put_object(Key='test-1', Body=f) roundtripped_df = pd.read_json('s3://pandas-test/test-1', compression=compression) assert_frame_equal(df, roundtripped_df)
def test_to_csv_compression_xz(self): # GH11852 # use the compression kw in to_csv tm._skip_if_no_lzma() df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) with ensure_clean() as filename: df.to_csv(filename, compression="xz") # test the round trip - to_csv -> read_csv rs = read_csv(filename, compression="xz", index_col=0) assert_frame_equal(df, rs) # explicitly make sure file is xzipped lzma = compat.import_lzma() f = lzma.open(filename, 'rb') assert_frame_equal(df, read_csv(f, index_col=0)) f.close()
def test_read_explicit(self, compression, get_random_path): # issue 11666 if compression == 'xz': tm._skip_if_no_lzma() base = get_random_path path1 = base + ".raw" path2 = base + ".compressed" with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: df = tm.makeDataFrame() # write to uncompressed file df.to_pickle(p1, compression=None) # compress self.compress_file(p1, p2, compression=compression) # read compressed file df2 = pd.read_pickle(p2, compression=compression) tm.assert_frame_equal(df, df2)
def check_table(self, url, compression, engine): if url.endswith('.xz'): tm._skip_if_no_lzma() url_table = read_table(url, compression=compression, engine=engine) tm.assert_frame_equal(url_table, self.local_table)