Пример #1
0
    def test_read_infer(self, ext, get_random_path):
        if ext == '.xz':
            tm._skip_if_no_lzma()

        base = get_random_path
        path1 = base + ".raw"
        path2 = base + ext
        compression = None
        for c in self._compression_to_extension:
            if self._compression_to_extension[c] == ext:
                compression = c
                break

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to uncompressed file
            df.to_pickle(p1, compression=None)

            # compress
            self.compress_file(p1, p2, compression=compression)

            # read compressed file by inferred compression method
            df2 = pd.read_pickle(p2)

            tm.assert_frame_equal(df, df2)
Пример #2
0
    def test_read_infer(self, ext, get_random_path):
        if ext == '.xz':
            tm._skip_if_no_lzma()

        base = get_random_path
        path1 = base + ".raw"
        path2 = base + ext
        compression = None
        for c in self._compression_to_extension:
            if self._compression_to_extension[c] == ext:
                compression = c
                break

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to uncompressed file
            df.to_pickle(p1, compression=None)

            # compress
            self.compress_file(p1, p2, compression=compression)

            # read compressed file by inferred compression method
            df2 = pd.read_pickle(p2)

            tm.assert_frame_equal(df, df2)
Пример #3
0
def test_lines_with_compression(compression):
    if compression == 'xz':
        tm._skip_if_no_lzma()

    with tm.ensure_clean() as path:
        df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
        df.to_json(path, orient='records', lines=True, compression=compression)
        roundtripped_df = pd.read_json(path, lines=True,
                                       compression=compression)
        assert_frame_equal(df, roundtripped_df)
Пример #4
0
def test_chunksize_with_compression(compression):
    if compression == 'xz':
        tm._skip_if_no_lzma()

    with tm.ensure_clean() as path:
        df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}')
        df.to_json(path, orient='records', lines=True, compression=compression)

        roundtripped_df = pd.concat(pd.read_json(path, lines=True, chunksize=1,
                                                 compression=compression))
        assert_frame_equal(df, roundtripped_df)
Пример #5
0
def test_lines_with_compression(compression):
    if compression == 'xz':
        tm._skip_if_no_lzma()

    with tm.ensure_clean() as path:
        df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
        df.to_json(path, orient='records', lines=True, compression=compression)
        roundtripped_df = pd.read_json(path,
                                       lines=True,
                                       compression=compression)
        assert_frame_equal(df, roundtripped_df)
Пример #6
0
def test_chunksize_with_compression(compression):
    if compression == 'xz':
        tm._skip_if_no_lzma()

    with tm.ensure_clean() as path:
        df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}')
        df.to_json(path, orient='records', lines=True, compression=compression)

        roundtripped_df = pd.concat(
            pd.read_json(path,
                         lines=True,
                         chunksize=1,
                         compression=compression))
        assert_frame_equal(df, roundtripped_df)
Пример #7
0
def test_compression_roundtrip(compression):
    if compression == 'xz':
        tm._skip_if_no_lzma()

    df = pd.DataFrame([[0.123456, 0.234567, 0.567567],
                       [12.32112, 123123.2, 321321.2]],
                      index=['A', 'B'], columns=['X', 'Y', 'Z'])

    with tm.ensure_clean() as path:
        df.to_json(path, compression=compression)
        assert_frame_equal(df, pd.read_json(path, compression=compression))

        # explicitly ensure file was compressed.
        uncompressed_content = decompress_file(path, compression)
        assert_frame_equal(df, pd.read_json(uncompressed_content))
Пример #8
0
    def test_xz(self):
        lzma = tm._skip_if_no_lzma()

        with open(self.csv1, "rb") as data_file:
            data = data_file.read()
            expected = self.read_csv(self.csv1)

        with tm.ensure_clean() as path:
            tmp = lzma.LZMAFile(path, mode="wb")
            tmp.write(data)
            tmp.close()

            result = self.read_csv(path, compression="xz")
            tm.assert_frame_equal(result, expected)

            with open(path, "rb") as f:
                result = self.read_csv(f, compression="xz")
                tm.assert_frame_equal(result, expected)

        with tm.ensure_clean("test.xz") as path:
            tmp = lzma.LZMAFile(path, mode="wb")
            tmp.write(data)
            tmp.close()
            result = self.read_csv(path, compression="infer")
            tm.assert_frame_equal(result, expected)
Пример #9
0
def test_compression_roundtrip(compression):
    if compression == 'xz':
        tm._skip_if_no_lzma()

    df = pd.DataFrame(
        [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
        index=['A', 'B'],
        columns=['X', 'Y', 'Z'])

    with tm.ensure_clean() as path:
        df.to_json(path, compression=compression)
        assert_frame_equal(df, pd.read_json(path, compression=compression))

        # explicitly ensure file was compressed.
        uncompressed_content = decompress_file(path, compression)
        assert_frame_equal(df, pd.read_json(uncompressed_content))
Пример #10
0
    def test_xz(self):
        lzma = tm._skip_if_no_lzma()

        with open(self.csv1, 'rb') as data_file:
            data = data_file.read()
            expected = self.read_csv(self.csv1)

        with tm.ensure_clean() as path:
            tmp = lzma.LZMAFile(path, mode='wb')
            tmp.write(data)
            tmp.close()

            result = self.read_csv(path, compression='xz')
            tm.assert_frame_equal(result, expected)

            with open(path, 'rb') as f:
                result = self.read_csv(f, compression='xz')
                tm.assert_frame_equal(result, expected)

        with tm.ensure_clean('test.xz') as path:
            tmp = lzma.LZMAFile(path, mode='wb')
            tmp.write(data)
            tmp.close()
            result = self.read_csv(path, compression='infer')
            tm.assert_frame_equal(result, expected)
Пример #11
0
def test_with_s3_url(compression):
    boto3 = pytest.importorskip('boto3')
    pytest.importorskip('s3fs')
    if compression == 'xz':
        tm._skip_if_no_lzma()

    df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
    with moto.mock_s3():
        conn = boto3.resource("s3", region_name="us-east-1")
        bucket = conn.create_bucket(Bucket="pandas-test")

        with tm.ensure_clean() as path:
            df.to_json(path, compression=compression)
            with open(path, 'rb') as f:
                bucket.put_object(Key='test-1', Body=f)

        roundtripped_df = pd.read_json('s3://pandas-test/test-1',
                                       compression=compression)
        assert_frame_equal(df, roundtripped_df)
Пример #12
0
def test_with_s3_url(compression):
    boto3 = pytest.importorskip('boto3')
    pytest.importorskip('s3fs')
    if compression == 'xz':
        tm._skip_if_no_lzma()

    df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}')
    with moto.mock_s3():
        conn = boto3.resource("s3", region_name="us-east-1")
        bucket = conn.create_bucket(Bucket="pandas-test")

        with tm.ensure_clean() as path:
            df.to_json(path, compression=compression)
            with open(path, 'rb') as f:
                bucket.put_object(Key='test-1', Body=f)

        roundtripped_df = pd.read_json('s3://pandas-test/test-1',
                                       compression=compression)
        assert_frame_equal(df, roundtripped_df)
Пример #13
0
    def test_to_csv_compression_xz(self):
        # GH11852
        # use the compression kw in to_csv
        tm._skip_if_no_lzma()
        df = DataFrame([[0.123456, 0.234567, 0.567567],
                        [12.32112, 123123.2, 321321.2]],
                       index=['A', 'B'], columns=['X', 'Y', 'Z'])

        with ensure_clean() as filename:

            df.to_csv(filename, compression="xz")

            # test the round trip - to_csv -> read_csv
            rs = read_csv(filename, compression="xz", index_col=0)
            assert_frame_equal(df, rs)

            # explicitly make sure file is xzipped
            lzma = compat.import_lzma()
            f = lzma.open(filename, 'rb')
            assert_frame_equal(df, read_csv(f, index_col=0))
            f.close()
Пример #14
0
    def test_to_csv_compression_xz(self):
        # GH11852
        # use the compression kw in to_csv
        tm._skip_if_no_lzma()
        df = DataFrame([[0.123456, 0.234567, 0.567567],
                        [12.32112, 123123.2, 321321.2]],
                       index=['A', 'B'], columns=['X', 'Y', 'Z'])

        with ensure_clean() as filename:

            df.to_csv(filename, compression="xz")

            # test the round trip - to_csv -> read_csv
            rs = read_csv(filename, compression="xz", index_col=0)
            assert_frame_equal(df, rs)

            # explicitly make sure file is xzipped
            lzma = compat.import_lzma()
            f = lzma.open(filename, 'rb')
            assert_frame_equal(df, read_csv(f, index_col=0))
            f.close()
Пример #15
0
    def test_read_explicit(self, compression, get_random_path):
        # issue 11666
        if compression == 'xz':
            tm._skip_if_no_lzma()

        base = get_random_path
        path1 = base + ".raw"
        path2 = base + ".compressed"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to uncompressed file
            df.to_pickle(p1, compression=None)

            # compress
            self.compress_file(p1, p2, compression=compression)

            # read compressed file
            df2 = pd.read_pickle(p2, compression=compression)

            tm.assert_frame_equal(df, df2)
Пример #16
0
    def test_read_explicit(self, compression, get_random_path):
        # issue 11666
        if compression == 'xz':
            tm._skip_if_no_lzma()

        base = get_random_path
        path1 = base + ".raw"
        path2 = base + ".compressed"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to uncompressed file
            df.to_pickle(p1, compression=None)

            # compress
            self.compress_file(p1, p2, compression=compression)

            # read compressed file
            df2 = pd.read_pickle(p2, compression=compression)

            tm.assert_frame_equal(df, df2)
Пример #17
0
 def check_table(self, url, compression, engine):
     if url.endswith('.xz'):
         tm._skip_if_no_lzma()
     url_table = read_table(url, compression=compression, engine=engine)
     tm.assert_frame_equal(url_table, self.local_table)