示例#1
0
    def test_to_csv_compression(self, df, encoding, compression):

        with ensure_clean() as filename:

            df.to_csv(filename, compression=compression, encoding=encoding)
            # test the round trip - to_csv -> read_csv
            result = read_csv(filename, compression=compression,
                              index_col=0, encoding=encoding)
            assert_frame_equal(df, result)

            # test the round trip using file handle - to_csv -> read_csv
            f, _handles = _get_handle(filename, 'w', compression=compression,
                                      encoding=encoding)
            with f:
                df.to_csv(f, encoding=encoding)
            result = pd.read_csv(filename, compression=compression,
                                 encoding=encoding, index_col=0, squeeze=True)
            assert_frame_equal(df, result)

            # explicitly make sure file is compressed
            with tm.decompress_file(filename, compression) as fh:
                text = fh.read().decode(encoding or 'utf8')
                for col in df.columns:
                    assert col in text

            with tm.decompress_file(filename, compression) as fh:
                assert_frame_equal(df, read_csv(fh,
                                                index_col=0,
                                                encoding=encoding))
示例#2
0
    def test_to_csv_compression(self, df, encoding, compression):

        with ensure_clean() as filename:

            df.to_csv(filename, compression=compression, encoding=encoding)
            # test the round trip - to_csv -> read_csv
            result = read_csv(
                filename, compression=compression, index_col=0, encoding=encoding
            )
            assert_frame_equal(df, result)

            # test the round trip using file handle - to_csv -> read_csv
            f, _handles = _get_handle(
                filename, "w", compression=compression, encoding=encoding
            )
            with f:
                df.to_csv(f, encoding=encoding)
            result = pd.read_csv(
                filename,
                compression=compression,
                encoding=encoding,
                index_col=0,
                squeeze=True,
            )
            assert_frame_equal(df, result)

            # explicitly make sure file is compressed
            with tm.decompress_file(filename, compression) as fh:
                text = fh.read().decode(encoding or "utf8")
                for col in df.columns:
                    assert col in text

            with tm.decompress_file(filename, compression) as fh:
                assert_frame_equal(df, read_csv(fh, index_col=0, encoding=encoding))
示例#3
0
    def test_to_csv_compression(self, compression):

        s = Series([0.123456, 0.234567, 0.567567],
                   index=['A', 'B', 'C'],
                   name='X')

        with ensure_clean() as filename:

            s.to_csv(filename, compression=compression, header=True)

            # test the round trip - to_csv -> read_csv
            rs = pd.read_csv(filename,
                             compression=compression,
                             index_col=0,
                             squeeze=True)
            assert_series_equal(s, rs)

            # explicitly ensure file was compressed
            f = tm.decompress_file(filename, compression=compression)
            text = f.read().decode('utf8')
            assert s.name in text
            f.close()

            f = tm.decompress_file(filename, compression=compression)
            assert_series_equal(s, pd.read_csv(f, index_col=0, squeeze=True))
            f.close()
示例#4
0
    def test_to_csv_compression(self, s, encoding, compression):

        with ensure_clean() as filename:

            s.to_csv(filename, compression=compression, encoding=encoding,
                     header=True)
            # test the round trip - to_csv -> read_csv
            result = pd.read_csv(filename, compression=compression,
                                 encoding=encoding, index_col=0, squeeze=True)
            assert_series_equal(s, result)

            # test the round trip using file handle - to_csv -> read_csv
            f, _handles = _get_handle(filename, 'w', compression=compression,
                                      encoding=encoding)
            with f:
                s.to_csv(f, encoding=encoding, header=True)
            result = pd.read_csv(filename, compression=compression,
                                 encoding=encoding, index_col=0, squeeze=True)
            assert_series_equal(s, result)

            # explicitly ensure file was compressed
            with tm.decompress_file(filename, compression) as fh:
                text = fh.read().decode(encoding or 'utf8')
                assert s.name in text

            with tm.decompress_file(filename, compression) as fh:
                assert_series_equal(s, pd.read_csv(fh,
                                                   index_col=0,
                                                   squeeze=True,
                                                   encoding=encoding))
示例#5
0
    def test_to_csv_compression(self, s, encoding, compression):

        with ensure_clean() as filename:

            s.to_csv(filename, compression=compression, encoding=encoding,
                     header=True)
            # test the round trip - to_csv -> read_csv
            result = pd.read_csv(filename, compression=compression,
                                 encoding=encoding, index_col=0, squeeze=True)
            assert_series_equal(s, result)

            # test the round trip using file handle - to_csv -> read_csv
            f, _handles = _get_handle(filename, 'w', compression=compression,
                                      encoding=encoding)
            with f:
                s.to_csv(f, encoding=encoding, header=True)
            result = pd.read_csv(filename, compression=compression,
                                 encoding=encoding, index_col=0, squeeze=True)
            assert_series_equal(s, result)

            # explicitly ensure file was compressed
            with tm.decompress_file(filename, compression) as fh:
                text = fh.read().decode(encoding or 'utf8')
                assert s.name in text

            with tm.decompress_file(filename, compression) as fh:
                assert_series_equal(s, pd.read_csv(fh,
                                                   index_col=0,
                                                   squeeze=True,
                                                   encoding=encoding))
示例#6
0
    def test_to_csv_compression(self, df, encoding, compression):

        with ensure_clean() as filename:

            df.to_csv(filename, compression=compression, encoding=encoding)

            # test the round trip - to_csv -> read_csv
            result = read_csv(filename,
                              compression=compression,
                              index_col=0,
                              encoding=encoding)

            with open(filename, 'w') as fh:
                df.to_csv(fh, compression=compression, encoding=encoding)

            result_fh = read_csv(filename,
                                 compression=compression,
                                 index_col=0,
                                 encoding=encoding)
            assert_frame_equal(df, result)
            assert_frame_equal(df, result_fh)

            # explicitly make sure file is compressed
            with tm.decompress_file(filename, compression) as fh:
                text = fh.read().decode(encoding or 'utf8')
                for col in df.columns:
                    assert col in text

            with tm.decompress_file(filename, compression) as fh:
                assert_frame_equal(
                    df, read_csv(fh, index_col=0, encoding=encoding))
示例#7
0
    def test_to_csv_compression(self, df, encoding, compression):

        with ensure_clean() as filename:

            df.to_csv(filename, compression=compression, encoding=encoding)

            # test the round trip - to_csv -> read_csv
            result = read_csv(filename, compression=compression,
                              index_col=0, encoding=encoding)

            with open(filename, 'w') as fh:
                df.to_csv(fh, compression=compression, encoding=encoding)

            result_fh = read_csv(filename, compression=compression,
                                 index_col=0, encoding=encoding)
            assert_frame_equal(df, result)
            assert_frame_equal(df, result_fh)

            # explicitly make sure file is compressed
            with tm.decompress_file(filename, compression) as fh:
                text = fh.read().decode(encoding or 'utf8')
                for col in df.columns:
                    assert col in text

            with tm.decompress_file(filename, compression) as fh:
                assert_frame_equal(df, read_csv(fh,
                                                index_col=0,
                                                encoding=encoding))
示例#8
0
    def test_to_csv_compression(self, compression_no_zip):

        df = DataFrame(
            [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
            index=['A', 'B'],
            columns=['X', 'Y', 'Z'])

        with ensure_clean() as filename:

            df.to_csv(filename, compression=compression_no_zip)

            # test the round trip - to_csv -> read_csv
            rs = read_csv(filename,
                          compression=compression_no_zip,
                          index_col=0)
            assert_frame_equal(df, rs)

            # explicitly make sure file is compressed
            with tm.decompress_file(filename, compression_no_zip) as fh:
                text = fh.read().decode('utf8')
                for col in df.columns:
                    assert col in text

            with tm.decompress_file(filename, compression_no_zip) as fh:
                assert_frame_equal(df, read_csv(fh, index_col=0))
示例#9
0
    def test_write_infer(self, ext, get_random_path):
        base = get_random_path
        path1 = base + ext
        path2 = base + ".raw"
        compression = None
        for c in self._compression_to_extension:
            if self._compression_to_extension[c] == ext:
                compression = c
                break

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to compressed file by inferred compression method
            df.to_pickle(p1)

            # decompress
            with tm.decompress_file(p1, compression=compression) as f:
                with open(p2, "wb") as fh:
                    fh.write(f.read())

            # read decompressed file
            df2 = pd.read_pickle(p2, compression=None)

            tm.assert_frame_equal(df, df2)
示例#10
0
    def test_write_infer(self, ext, get_random_path):
        base = get_random_path
        path1 = base + ext
        path2 = base + ".raw"
        compression = None
        for c in self._compression_to_extension:
            if self._compression_to_extension[c] == ext:
                compression = c
                break

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to compressed file by inferred compression method
            df.to_pickle(p1)

            # decompress
            with tm.decompress_file(p1, compression=compression) as f:
                with open(p2, "wb") as fh:
                    fh.write(f.read())

            # read decompressed file
            df2 = pd.read_pickle(p2, compression=None)

            tm.assert_frame_equal(df, df2)
示例#11
0
    def test_gz_lineend(self):
        # GH 25311
        df = pd.DataFrame({'a': [1, 2]})
        expected_rows = ['a', '1', '2']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        with ensure_clean('__test_gz_lineend.csv.gz') as path:
            df.to_csv(path, index=False)
            with tm.decompress_file(path, compression='gzip') as f:
                result = f.read().decode('utf-8')

        assert result == expected
示例#12
0
    def test_gz_lineend(self):
        # GH 25311
        df = pd.DataFrame({"a": [1, 2]})
        expected_rows = ["a", "1", "2"]
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        with ensure_clean("__test_gz_lineend.csv.gz") as path:
            df.to_csv(path, index=False)
            with tm.decompress_file(path, compression="gzip") as f:
                result = f.read().decode("utf-8")

        assert result == expected
示例#13
0
    def test_gz_lineend(self):
        # GH 25311
        df = pd.DataFrame({'a': [1, 2]})
        expected_rows = ['a', '1', '2']
        expected = tm.convert_rows_list_to_csv_str(expected_rows)
        with ensure_clean('__test_gz_lineend.csv.gz') as path:
            df.to_csv(path, index=False)
            with tm.decompress_file(path, compression='gzip') as f:
                result = f.read().decode('utf-8')

        assert result == expected
示例#14
0
文件: test_io.py 项目: jess010/pandas
    def test_to_csv_compression(self, compression):

        s = Series([0.123456, 0.234567, 0.567567], index=['A', 'B', 'C'],
                   name='X')

        with ensure_clean() as filename:

            s.to_csv(filename, compression=compression, header=True)

            # test the round trip - to_csv -> read_csv
            rs = pd.read_csv(filename, compression=compression, index_col=0,
                             squeeze=True)
            assert_series_equal(s, rs)

            # explicitly ensure file was compressed
            with tm.decompress_file(filename, compression=compression) as fh:
                text = fh.read().decode('utf8')
                assert s.name in text

            with tm.decompress_file(filename, compression=compression) as fh:
                assert_series_equal(s, pd.read_csv(fh,
                                                   index_col=0, squeeze=True))
示例#15
0
def test_compression_roundtrip(compression):
    df = pd.DataFrame([[0.123456, 0.234567, 0.567567],
                       [12.32112, 123123.2, 321321.2]],
                      index=['A', 'B'], columns=['X', 'Y', 'Z'])

    with tm.ensure_clean() as path:
        df.to_json(path, compression=compression)
        assert_frame_equal(df, pd.read_json(path, compression=compression))

        # explicitly ensure file was compressed.
        with tm.decompress_file(path, compression) as fh:
            result = fh.read().decode('utf8')
        assert_frame_equal(df, pd.read_json(result))
示例#16
0
    def test_to_csv_compression(self, compression):

        df = DataFrame([[0.123456, 0.234567, 0.567567],
                        [12.32112, 123123.2, 321321.2]],
                       index=['A', 'B'], columns=['X', 'Y', 'Z'])

        with ensure_clean() as filename:

            df.to_csv(filename, compression=compression)

            # test the round trip - to_csv -> read_csv
            rs = read_csv(filename, compression=compression,
                          index_col=0)
            assert_frame_equal(df, rs)

            # explicitly make sure file is compressed
            with tm.decompress_file(filename, compression) as fh:
                text = fh.read().decode('utf8')
                for col in df.columns:
                    assert col in text

            with tm.decompress_file(filename, compression) as fh:
                assert_frame_equal(df, read_csv(fh, index_col=0))
示例#17
0
def test_compression_roundtrip(compression):
    df = pd.DataFrame(
        [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
        index=['A', 'B'],
        columns=['X', 'Y', 'Z'])

    with tm.ensure_clean() as path:
        df.to_json(path, compression=compression)
        assert_frame_equal(df, pd.read_json(path, compression=compression))

        # explicitly ensure file was compressed.
        with tm.decompress_file(path, compression) as fh:
            result = fh.read().decode('utf8')
        assert_frame_equal(df, pd.read_json(result))
示例#18
0
    def test_write_explicit(self, compression, get_random_path):
        base = get_random_path
        path1 = base + ".compressed"
        path2 = base + ".raw"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to compressed file
            df.to_pickle(p1, compression=compression)

            # decompress
            with tm.decompress_file(p1, compression=compression) as f:
                with open(p2, "wb") as fh:
                    fh.write(f.read())

            # read decompressed file
            df2 = pd.read_pickle(p2, compression=None)

            tm.assert_frame_equal(df, df2)
示例#19
0
    def test_write_explicit(self, compression, get_random_path):
        base = get_random_path
        path1 = base + ".compressed"
        path2 = base + ".raw"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to compressed file
            df.to_pickle(p1, compression=compression)

            # decompress
            with tm.decompress_file(p1, compression=compression) as f:
                with open(p2, "wb") as fh:
                    fh.write(f.read())

            # read decompressed file
            df2 = pd.read_pickle(p2, compression=None)

            tm.assert_frame_equal(df, df2)