Exemplo n.º 1
0
    def test_xz(self):
        lzma = compat.import_lzma()

        with open(self.csv1, 'rb') as data_file:
            data = data_file.read()
            expected = self.read_csv(self.csv1)

        with tm.ensure_clean() as path:
            tmp = lzma.LZMAFile(path, mode='wb')
            tmp.write(data)
            tmp.close()

            result = self.read_csv(path, compression='xz')
            tm.assert_frame_equal(result, expected)

            with open(path, 'rb') as f:
                result = self.read_csv(f, compression='xz')
                tm.assert_frame_equal(result, expected)

        with tm.ensure_clean('test.xz') as path:
            tmp = lzma.LZMAFile(path, mode='wb')
            tmp.write(data)
            tmp.close()
            result = self.read_csv(path, compression='infer')
            tm.assert_frame_equal(result, expected)
Exemplo n.º 2
0
def _get_handle(path, mode, encoding=None, compression=None, memory_map=False):
    """Gets file handle for given path and mode.
    """
    if compression is not None:
        if encoding is not None and not compat.PY3:
            msg = 'encoding + compression not yet supported in Python 2'
            raise ValueError(msg)

        if compression == 'gzip':
            import gzip
            f = gzip.GzipFile(path, mode)
        elif compression == 'bz2':
            import bz2
            f = bz2.BZ2File(path, mode)
        elif compression == 'zip':
            import zipfile
            zip_file = zipfile.ZipFile(path)
            zip_names = zip_file.namelist()

            if len(zip_names) == 1:
                file_name = zip_names.pop()
                f = zip_file.open(file_name)
            elif len(zip_names) == 0:
                raise ValueError(
                    'Zero files found in ZIP file {}'.format(path))
            else:
                raise ValueError(
                    'Multiple files found in ZIP file.'
                    ' Only one file per ZIP :{}'.format(zip_names))
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path, mode)
        else:
            raise ValueError('Unrecognized compression type: %s' % compression)
        if compat.PY3:
            from io import TextIOWrapper
            f = TextIOWrapper(f, encoding=encoding)
        return f
    else:
        if compat.PY3:
            if encoding:
                f = open(path, mode, encoding=encoding)
            else:
                f = open(path, mode, errors='replace')
        else:
            f = open(path, mode)

    if memory_map and hasattr(f, 'fileno'):
        try:
            g = MMapWrapper(f)
            f.close()
            f = g
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f
Exemplo n.º 3
0
    def test_xz(self):
        lzma = compat.import_lzma()

        with open(self.csv1, 'rb') as data_file:
            data = data_file.read()
            expected = self.read_csv(self.csv1)

        with tm.ensure_clean() as path:
            tmp = lzma.LZMAFile(path, mode='wb')
            tmp.write(data)
            tmp.close()

            result = self.read_csv(path, compression='xz')
            tm.assert_frame_equal(result, expected)

            with open(path, 'rb') as f:
                result = self.read_csv(f, compression='xz')
                tm.assert_frame_equal(result, expected)

        with tm.ensure_clean('test.xz') as path:
            tmp = lzma.LZMAFile(path, mode='wb')
            tmp.write(data)
            tmp.close()
            result = self.read_csv(path, compression='infer')
            tm.assert_frame_equal(result, expected)
Exemplo n.º 4
0
def _get_handle(path, mode, encoding=None, compression=None, memory_map=False):
    """Gets file handle for given path and mode.
    """
    if compression is not None:
        if encoding is not None and not compat.PY3:
            msg = 'encoding + compression not yet supported in Python 2'
            raise ValueError(msg)

        if compression == 'gzip':
            import gzip
            f = gzip.GzipFile(path, mode)
        elif compression == 'bz2':
            import bz2
            f = bz2.BZ2File(path, mode)
        elif compression == 'zip':
            import zipfile
            zip_file = zipfile.ZipFile(path)
            zip_names = zip_file.namelist()

            if len(zip_names) == 1:
                file_name = zip_names.pop()
                f = zip_file.open(file_name)
            elif len(zip_names) == 0:
                raise ValueError('Zero files found in ZIP file {}'
                                 .format(path))
            else:
                raise ValueError('Multiple files found in ZIP file.'
                                 ' Only one file per ZIP :{}'
                                 .format(zip_names))
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path, mode)
        else:
            raise ValueError('Unrecognized compression type: %s' %
                             compression)
        if compat.PY3:
            from io import TextIOWrapper
            f = TextIOWrapper(f, encoding=encoding)
        return f
    else:
        if compat.PY3:
            if encoding:
                f = open(path, mode, encoding=encoding)
            else:
                f = open(path, mode, errors='replace')
        else:
            f = open(path, mode)

    if memory_map and hasattr(f, 'fileno'):
        try:
            f = MMapWrapper(f)
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f
Exemplo n.º 5
0
def lzma_file():
    """
    Try to load the `LZMAFile` class from `backports.lzma`.

    Returns
    -------
    klass : type or None
    """
    try:
        lzma = compat.import_lzma()
    except ImportError:
        lzma = None

    return getattr(lzma, "LZMAFile", None)
Exemplo n.º 6
0
def lzma_file():
    """
    Try to load the `LZMAFile` class from `backports.lzma`.

    Returns
    -------
    klass : type or None
    """
    try:
        lzma = compat.import_lzma()
    except ImportError:
        lzma = None

    return getattr(lzma, "LZMAFile", None)
Exemplo n.º 7
0
def _get_handle(path, mode, encoding=None, compression=None):
    """Gets file handle for given path and mode.
    """
    if compression is not None:
        if encoding is not None and not compat.PY3:
            msg = 'encoding + compression not yet supported in Python 2'
            raise ValueError(msg)

        if compression == 'gzip':
            import gzip
            f = gzip.GzipFile(path, mode)
        elif compression == 'bz2':
            import bz2
            f = bz2.BZ2File(path, mode)
        elif compression == 'zip':
            import zipfile
            zip_file = zipfile.ZipFile(path)
            zip_names = zip_file.namelist()

            if len(zip_names) == 1:
                file_name = zip_names.pop()
                f = zip_file.open(file_name)
            elif len(zip_names) == 0:
                raise ValueError('Zero files found in ZIP file {}'
                                 .format(path))
            else:
                raise ValueError('Multiple files found in ZIP file.'
                                 ' Only one file per ZIP :{}'
                                 .format(zip_names))
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path, mode)
        else:
            raise ValueError('Unrecognized compression type: %s' %
                             compression)
        if compat.PY3:
            from io import TextIOWrapper
            f = TextIOWrapper(f, encoding=encoding)
        return f
    else:
        if compat.PY3:
            if encoding:
                f = open(path, mode, encoding=encoding)
            else:
                f = open(path, mode, errors='replace')
        else:
            f = open(path, mode)

    return f
Exemplo n.º 8
0
def _get_handle(path, mode, encoding=None, compression=None):
    """Gets file handle for given path and mode.
    """
    if compression is not None:
        if encoding is not None and not compat.PY3:
            msg = 'encoding + compression not yet supported in Python 2'
            raise ValueError(msg)

        if compression == 'gzip':
            import gzip
            f = gzip.GzipFile(path, mode)
        elif compression == 'bz2':
            import bz2
            f = bz2.BZ2File(path, mode)
        elif compression == 'zip':
            import zipfile
            zip_file = zipfile.ZipFile(path)
            zip_names = zip_file.namelist()

            if len(zip_names) == 1:
                file_name = zip_names.pop()
                f = zip_file.open(file_name)
            elif len(zip_names) == 0:
                raise ValueError('Zero files found in ZIP file {}'
                                 .format(path))
            else:
                raise ValueError('Multiple files found in ZIP file.'
                                 ' Only one file per ZIP :{}'
                                 .format(zip_names))
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path, mode)
        else:
            raise ValueError('Unrecognized compression type: %s' %
                             compression)
        if compat.PY3:
            from io import TextIOWrapper
            f = TextIOWrapper(f, encoding=encoding)
        return f
    else:
        if compat.PY3:
            if encoding:
                f = open(path, mode, encoding=encoding)
            else:
                f = open(path, mode, errors='replace')
        else:
            f = open(path, mode)

    return f
Exemplo n.º 9
0
def decompress_file(path, compression):
    if compression is None:
        f = open(path, 'rb')
    elif compression == 'gzip':
        import gzip
        f = gzip.GzipFile(path, 'rb')
    elif compression == 'bz2':
        import bz2
        f = bz2.BZ2File(path, 'rb')
    elif compression == 'xz':
        lzma = compat.import_lzma()
        f = lzma.open(path, 'rb')
    else:
        msg = 'Unrecognized compression type: {}'.format(compression)
        raise ValueError(msg)

    result = f.read().decode('utf8')
    f.close()
    return result
Exemplo n.º 10
0
def decompress_file(path, compression):
    if compression is None:
        f = open(path, 'rb')
    elif compression == 'gzip':
        import gzip
        f = gzip.GzipFile(path, 'rb')
    elif compression == 'bz2':
        import bz2
        f = bz2.BZ2File(path, 'rb')
    elif compression == 'xz':
        lzma = compat.import_lzma()
        f = lzma.open(path, 'rb')
    else:
        msg = 'Unrecognized compression type: {}'.format(compression)
        raise ValueError(msg)

    result = f.read().decode('utf8')
    f.close()
    return result
Exemplo n.º 11
0
    def test_to_csv_compression_xz(self):
        # GH11852
        # use the compression kw in to_csv
        df = DataFrame([[0.123456, 0.234567, 0.567567],
                        [12.32112, 123123.2, 321321.2]],
                       index=['A', 'B'], columns=['X', 'Y', 'Z'])

        with ensure_clean() as filename:

            df.to_csv(filename, compression="xz")

            # test the round trip - to_csv -> read_csv
            rs = read_csv(filename, compression="xz", index_col=0)
            assert_frame_equal(df, rs)

            # explicitly make sure file is xzipped
            lzma = compat.import_lzma()
            f = lzma.open(filename, 'rb')
            assert_frame_equal(df, read_csv(f, index_col=0))
            f.close()
Exemplo n.º 12
0
    def test_to_csv_compression_xz(self):
        # GH11852
        # use the compression kw in to_csv
        tm._skip_if_no_lzma()
        df = DataFrame([[0.123456, 0.234567, 0.567567],
                        [12.32112, 123123.2, 321321.2]],
                       index=['A', 'B'], columns=['X', 'Y', 'Z'])

        with ensure_clean() as filename:

            df.to_csv(filename, compression="xz")

            # test the round trip - to_csv -> read_csv
            rs = read_csv(filename, compression="xz", index_col=0)
            assert_frame_equal(df, rs)

            # explicitly make sure file is xzipped
            lzma = compat.import_lzma()
            f = lzma.open(filename, 'rb')
            assert_frame_equal(df, read_csv(f, index_col=0))
            f.close()
Exemplo n.º 13
0
def _skip_if_no_lzma():
    try:
        import_lzma()
    except ImportError:
        return True
Exemplo n.º 14
0
def _get_handle(path_or_buf, mode, encoding=None, compression=None,
                memory_map=False, is_text=True):
    """
    Get file handle for given path/buffer and mode.

    Parameters
    ----------
    path_or_buf :
        a path (str) or buffer
    mode : str
        mode to open path_or_buf with
    encoding : str or None
    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default None
        If 'infer' and `filepath_or_buffer` is path-like, then detect
        compression from the following extensions: '.gz', '.bz2', '.zip',
        or '.xz' (otherwise no compression).
    memory_map : boolean, default False
        See parsers._parser_params for more information.
    is_text : boolean, default True
        whether file/buffer is in text format (csv, json, etc.), or in binary
        mode (pickle, etc.)

    Returns
    -------
    f : file-like
        A file-like object
    handles : list of file-like objects
        A list of file-like object that were opened in this function.
    """
    try:
        from s3fs import S3File
        need_text_wrapping = (BytesIO, S3File)
    except ImportError:
        need_text_wrapping = (BytesIO,)

    handles = list()
    f = path_or_buf

    # Convert pathlib.Path/py.path.local or string
    path_or_buf = _stringify_path(path_or_buf)
    is_path = isinstance(path_or_buf, compat.string_types)

    if is_path:
        compression = _infer_compression(path_or_buf, compression)

    if compression:

        if compat.PY2 and not is_path and encoding:
            msg = 'compression with encoding is not yet supported in Python 2'
            raise ValueError(msg)

        # GZ Compression
        if compression == 'gzip':
            import gzip
            if is_path:
                f = gzip.open(path_or_buf, mode)
            else:
                f = gzip.GzipFile(fileobj=path_or_buf)

        # BZ Compression
        elif compression == 'bz2':
            import bz2
            if is_path:
                f = bz2.BZ2File(path_or_buf, mode)
            elif compat.PY2:
                # Python 2's bz2 module can't take file objects, so have to
                # run through decompress manually
                f = StringIO(bz2.decompress(path_or_buf.read()))
                path_or_buf.close()
            else:
                f = bz2.BZ2File(path_or_buf)

        # ZIP Compression
        elif compression == 'zip':
            zf = BytesZipFile(path_or_buf, mode)
            # Ensure the container is closed as well.
            handles.append(zf)
            if zf.mode == 'w':
                f = zf
            elif zf.mode == 'r':
                zip_names = zf.namelist()
                if len(zip_names) == 1:
                    f = zf.open(zip_names.pop())
                elif len(zip_names) == 0:
                    raise ValueError('Zero files found in ZIP file {}'
                                     .format(path_or_buf))
                else:
                    raise ValueError('Multiple files found in ZIP file.'
                                     ' Only one file per ZIP: {}'
                                     .format(zip_names))

        # XZ Compression
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path_or_buf, mode)

        # Unrecognized Compression
        else:
            msg = 'Unrecognized compression type: {}'.format(compression)
            raise ValueError(msg)

        handles.append(f)

    elif is_path:
        if compat.PY2:
            # Python 2
            mode = "wb" if mode == "w" else mode
            f = open(path_or_buf, mode)
        elif encoding:
            # Python 3 and encoding
            f = open(path_or_buf, mode, encoding=encoding, newline="")
        elif is_text:
            # Python 3 and no explicit encoding
            f = open(path_or_buf, mode, errors='replace', newline="")
        else:
            # Python 3 and binary mode
            f = open(path_or_buf, mode)
        handles.append(f)

    # in Python 3, convert BytesIO or fileobjects passed with an encoding
    if (compat.PY3 and is_text and
            (compression or isinstance(f, need_text_wrapping))):
        from io import TextIOWrapper
        f = TextIOWrapper(f, encoding=encoding, newline='')
        handles.append(f)

    if memory_map and hasattr(f, 'fileno'):
        try:
            g = MMapWrapper(f)
            f.close()
            f = g
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f, handles
Exemplo n.º 15
0
def _get_handle(path_or_buf, mode, encoding=None, compression=None,
                memory_map=False, is_text=True):
    """
    Get file handle for given path/buffer and mode.

    Parameters
    ----------
    path_or_buf :
        a path (str) or buffer
    mode : str
        mode to open path_or_buf with
    encoding : str or None
    compression : str or None
        Supported compression protocols are gzip, bz2, zip, and xz
    memory_map : boolean, default False
        See parsers._parser_params for more information.
    is_text : boolean, default True
        whether file/buffer is in text format (csv, json, etc.), or in binary
        mode (pickle, etc.)

    Returns
    -------
    f : file-like
        A file-like object
    handles : list of file-like objects
        A list of file-like object that were opened in this function.
    """
    try:
        from s3fs import S3File
        need_text_wrapping = (BytesIO, S3File)
    except ImportError:
        need_text_wrapping = (BytesIO,)

    handles = list()
    f = path_or_buf

    # Convert pathlib.Path/py.path.local or string
    path_or_buf = _stringify_path(path_or_buf)
    is_path = isinstance(path_or_buf, compat.string_types)

    if compression:

        if compat.PY2 and not is_path and encoding:
            msg = 'compression with encoding is not yet supported in Python 2'
            raise ValueError(msg)

        # GZ Compression
        if compression == 'gzip':
            import gzip
            if is_path:
                f = gzip.open(path_or_buf, mode)
            else:
                f = gzip.GzipFile(fileobj=path_or_buf)

        # BZ Compression
        elif compression == 'bz2':
            import bz2
            if is_path:
                f = bz2.BZ2File(path_or_buf, mode)
            elif compat.PY2:
                # Python 2's bz2 module can't take file objects, so have to
                # run through decompress manually
                f = StringIO(bz2.decompress(path_or_buf.read()))
                path_or_buf.close()
            else:
                f = bz2.BZ2File(path_or_buf)

        # ZIP Compression
        elif compression == 'zip':
            zf = BytesZipFile(path_or_buf, mode)
            if zf.mode == 'w':
                f = zf
            elif zf.mode == 'r':
                zip_names = zf.namelist()
                if len(zip_names) == 1:
                    f = zf.open(zip_names.pop())
                elif len(zip_names) == 0:
                    raise ValueError('Zero files found in ZIP file {}'
                                     .format(path_or_buf))
                else:
                    raise ValueError('Multiple files found in ZIP file.'
                                     ' Only one file per ZIP: {}'
                                     .format(zip_names))

        # XZ Compression
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path_or_buf, mode)

        # Unrecognized Compression
        else:
            msg = 'Unrecognized compression type: {}'.format(compression)
            raise ValueError(msg)

        handles.append(f)

    elif is_path:
        if compat.PY2:
            # Python 2
            f = open(path_or_buf, mode)
        elif encoding:
            # Python 3 and encoding
            f = open(path_or_buf, mode, encoding=encoding)
        elif is_text:
            # Python 3 and no explicit encoding
            f = open(path_or_buf, mode, errors='replace')
        else:
            # Python 3 and binary mode
            f = open(path_or_buf, mode)
        handles.append(f)

    # in Python 3, convert BytesIO or fileobjects passed with an encoding
    if compat.PY3 and is_text and\
            (compression or isinstance(f, need_text_wrapping)):
        from io import TextIOWrapper
        f = TextIOWrapper(f, encoding=encoding)
        handles.append(f)

    if memory_map and hasattr(f, 'fileno'):
        try:
            g = MMapWrapper(f)
            f.close()
            f = g
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f, handles
Exemplo n.º 16
0
import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
    Index,
    Series,
    period_range,
)
import pandas._testing as tm

from pandas.tseries.offsets import (
    Day,
    MonthEnd,
)

lzma = import_lzma()


@pytest.fixture(scope="module")
def current_pickle_data():
    # our current version pickle data
    from pandas.tests.io.generate_legacy_storage_files import create_pickle_data

    return create_pickle_data()


# ---------------------
# comparison functions
# ---------------------
def compare_element(result, expected, typ, version=None):
    if isinstance(expected, Index):
Exemplo n.º 17
0
"""
Tests compressed data parsing functionality for all
of the parsers defined in parsers.py
"""

import pytest

import pandas as pd
import pandas.compat as compat
import pandas.util.testing as tm
import pandas.util._test_decorators as td

import gzip
import bz2
try:
    lzma = compat.import_lzma()
except ImportError:
    lzma = None


class CompressionTests(object):
    def test_zip(self):
        import zipfile

        with open(self.csv1, 'rb') as data_file:
            data = data_file.read()
            expected = self.read_csv(self.csv1)

        with tm.ensure_clean('test_file.zip') as path:
            tmp = zipfile.ZipFile(path, mode='w')
            tmp.writestr('test_file', data)
Exemplo n.º 18
0
def _get_handle(path_or_buf, mode, encoding=None, compression=None,
                memory_map=False):
    """
    Get file handle for given path/buffer and mode.

    Parameters
    ----------
    path_or_buf :
        a path (str) or buffer
    mode : str
        mode to open path_or_buf with
    encoding : str or None
    compression : str or None
        Supported compression protocols are gzip, bz2, zip, and xz
    memory_map : boolean, default False
        See parsers._parser_params for more information.

    Returns
    -------
    f : file-like
        A file-like object
    handles : list of file-like objects
        A list of file-like object that were openned in this function.
    """

    handles = list()
    f = path_or_buf
    is_path = isinstance(path_or_buf, compat.string_types)

    if compression:

        if compat.PY2 and not is_path and encoding:
            msg = 'compression with encoding is not yet supported in Python 2'
            raise ValueError(msg)

        # GZ Compression
        if compression == 'gzip':
            import gzip
            if is_path:
                f = gzip.open(path_or_buf, mode)
            else:
                f = gzip.GzipFile(fileobj=path_or_buf)

        # BZ Compression
        elif compression == 'bz2':
            import bz2
            if is_path:
                f = bz2.BZ2File(path_or_buf, mode)
            elif compat.PY2:
                # Python 2's bz2 module can't take file objects, so have to
                # run through decompress manually
                f = StringIO(bz2.decompress(path_or_buf.read()))
                path_or_buf.close()
            else:
                f = bz2.BZ2File(path_or_buf)

        # ZIP Compression
        elif compression == 'zip':
            import zipfile
            zip_file = zipfile.ZipFile(path_or_buf)
            zip_names = zip_file.namelist()
            if len(zip_names) == 1:
                f = zip_file.open(zip_names.pop())
            elif len(zip_names) == 0:
                raise ValueError('Zero files found in ZIP file {}'
                                 .format(path_or_buf))
            else:
                raise ValueError('Multiple files found in ZIP file.'
                                 ' Only one file per ZIP: {}'
                                 .format(zip_names))

        # XZ Compression
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path_or_buf, mode)

        # Unrecognized Compression
        else:
            msg = 'Unrecognized compression type: {}'.format(compression)
            raise ValueError(msg)

        handles.append(f)

    elif is_path:
        if compat.PY2:
            # Python 2
            f = open(path_or_buf, mode)
        elif encoding:
            # Python 3 and encoding
            f = open(path_or_buf, mode, encoding=encoding)
        else:
            # Python 3 and no explicit encoding
            f = open(path_or_buf, mode, errors='replace')
        handles.append(f)

    # in Python 3, convert BytesIO or fileobjects passed with an encoding
    if compat.PY3 and (compression or isinstance(f, need_text_wrapping)):
        from io import TextIOWrapper
        f = TextIOWrapper(f, encoding=encoding)
        handles.append(f)

    if memory_map and hasattr(f, 'fileno'):
        try:
            g = MMapWrapper(f)
            f.close()
            f = g
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f, handles
Exemplo n.º 19
0
def _skip_if_no_lzma():
    try:
        import_lzma()
    except ImportError:
        return True
Exemplo n.º 20
0
def _get_handle(source, mode, encoding=None, compression=None, memory_map=False):
    """Gets file handle for given path and mode.
    """

    f = source
    is_path = isinstance(source, compat.string_types)

    # in Python 3, convert BytesIO or fileobjects passed with an encoding
    if compat.PY3 and isinstance(source, compat.BytesIO):
        from io import TextIOWrapper

        return TextIOWrapper(source, encoding=encoding)

    elif compression is not None:
        compression = compression.lower()
        if encoding is not None and not compat.PY3 and not is_path:
            msg = 'encoding + compression not yet supported in Python 2'
            raise ValueError(msg)

        # GZ Compression
        if compression == 'gzip':
            import gzip

            f = gzip.GzipFile(source, mode) \
                if is_path else gzip.GzipFile(fileobj=source)

        # BZ Compression
        elif compression == 'bz2':
            import bz2

            if is_path:
                f = bz2.BZ2File(source, mode)

            else:
                f = bz2.BZ2File(source) if compat.PY3 else StringIO(
                    bz2.decompress(source.read()))
                # Python 2's bz2 module can't take file objects, so have to
                # run through decompress manually

        # ZIP Compression
        elif compression == 'zip':
            import zipfile
            zip_file = zipfile.ZipFile(source)
            zip_names = zip_file.namelist()

            if len(zip_names) == 1:
                f = zip_file.open(zip_names.pop())
            elif len(zip_names) == 0:
                raise ValueError('Zero files found in ZIP file {}'
                                 .format(source))
            else:
                raise ValueError('Multiple files found in ZIP file.'
                                 ' Only one file per ZIP :{}'
                                 .format(zip_names))

        # XZ Compression
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(source, mode)

        else:
            raise ValueError('Unrecognized compression: %s' % compression)

        if compat.PY3:
            from io import TextIOWrapper

            f = TextIOWrapper(f, encoding=encoding)

        return f

    elif is_path:
        if compat.PY3:
            if encoding:
                f = open(source, mode, encoding=encoding)
            else:
                f = open(source, mode, errors='replace')
        else:
            f = open(source, mode)

    if memory_map and hasattr(f, 'fileno'):
        try:
            g = MMapWrapper(f)
            f.close()
            f = g
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f
Exemplo n.º 21
0
of the parsers defined in parsers.py
"""

import bz2
import gzip

import pytest

import pandas.compat as compat
import pandas.util._test_decorators as td

import pandas as pd
import pandas.util.testing as tm

try:
    lzma = compat.import_lzma()
except ImportError:
    lzma = None


class CompressionTests(object):

    def test_zip(self):
        import zipfile

        with open(self.csv1, 'rb') as data_file:
            data = data_file.read()
            expected = self.read_csv(self.csv1)

        with tm.ensure_clean('test_file.zip') as path:
            with zipfile.ZipFile(path, mode='w') as tmp: