示例#1
0
def test_open_fh_compression(decompress, compression):
    buf = io.BytesIO()
    with sh.open(buf, "w", compression=compression) as fh:
        fh.write("hello world")
    assert decompress(buf.getvalue()) == b"hello world"
    buf.seek(0)
    with sh.open(buf, "r", compression=compression) as fh:
        assert fh.read() == "hello world"
示例#2
0
def test_open_binary(tmpdir, openfunc, ext, compression):
    fname = '%s/test_open%s' % (tmpdir, ext)

    with sh.open(fname, 'wb', compression=compression) as fh:
        fh.write(b"Hello world")
    with openfunc(fname, 'rb') as fh:
        assert fh.read() == b"Hello world"
    with sh.open(fname, 'ab', compression=compression) as fh:
        fh.write(b" and universe")
    with sh.open(fname, 'rb', compression=compression) as fh:
        assert fh.read() == b"Hello world and universe"
示例#3
0
def test_open_binary(str_or_path, tmpdir, openfunc, ext, compression):
    fname = str_or_path(f"{tmpdir}/test_open{ext}")

    with sh.open(fname, "wb", compression=compression) as fh:
        fh.write(b"Hello world")
    with openfunc(fname, "rb") as fh:
        assert fh.read() == b"Hello world"
    with sh.open(fname, "ab", compression=compression) as fh:
        fh.write(b" and universe")
    with sh.open(fname, "rb", compression=compression) as fh:
        assert fh.read() == b"Hello world and universe"
示例#4
0
def test_open_context(tmpdir, openfunc, ext, compression):
    os.environ['UNITTEST_BASH'] = str(tmpdir)
    fname = '%s/test_open%s' % (tmpdir, ext)
    fname_env = '$UNITTEST_BASH/test_open%s' % ext

    with sh.open(fname_env, 'w', compression=compression) as fh:
        fh.write("Hello world")
    check_fd_was_closed('test_open')
    with openfunc(fname, 'rt') as fh:
        assert fh.read() == "Hello world"
    with sh.open(fname_env, 'a', compression=compression) as fh:
        fh.write(" and universe")
    check_fd_was_closed('test_open')
    with sh.open(fname_env, 'r', compression=compression) as fh:
        assert fh.read() == "Hello world and universe"
    check_fd_was_closed('test_open')
示例#5
0
def test_open_context(str_or_path, tmpdir, openfunc, ext, compression):
    os.environ["UNITTEST_BASH"] = str(tmpdir)
    fname = f"{tmpdir}/test_open{ext}"
    fname_env = str_or_path(f"$UNITTEST_BASH/test_open{ext}")

    with sh.open(fname_env, "w", compression=compression) as fh:
        fh.write("Hello world")
    check_fd_was_closed("test_open")
    with openfunc(fname, "rt") as fh:
        assert fh.read() == "Hello world"
    with sh.open(fname_env, "a", compression=compression) as fh:
        fh.write(" and universe")
    check_fd_was_closed("test_open")
    with sh.open(fname_env, "r", compression=compression) as fh:
        assert fh.read() == "Hello world and universe"
    check_fd_was_closed("test_open")
示例#6
0
def test_open_exclusive_success(str_or_path, tmpdir, openfunc, ext,
                                compression):
    fname = str_or_path(f"{tmpdir}/test_open{ext}")
    with sh.open(fname, "x", compression=compression) as fh:
        fh.write("Hello world")
    with openfunc(fname, "rt") as fh:
        assert fh.read() == "Hello world"
示例#7
0
def test_open_kwargs(tmpdir, openfunc, ext, compression, newline):
    # **kwargs are passed verbatim to the underlying function
    fname = f"{tmpdir}/test_open{ext}"

    with sh.open(fname, "w", compression=compression, newline=newline) as fh:
        fh.write("Hello\nworld")
    with openfunc(fname, "rb") as fh:
        assert fh.read() == b"Hello" + newline.encode("utf8") + b"world"
示例#8
0
def test_open_nocontext(tmpdir, openfunc, ext, compression):
    fname = '%s/test_open%s' % (tmpdir, ext)
    fh = sh.open(fname, 'w', compression=compression)
    fh.write("Hello world")
    fh.close()
    check_fd_was_closed('test_open')
    with openfunc(fname, 'rt') as fh:
        assert fh.read() == "Hello world"
示例#9
0
def test_open_kwargs(tmpdir, openfunc, ext, compression, newline):
    # **kwargs are passed verbatim to the underlying function
    fname = '%s/test_open%s' % (tmpdir, ext)

    with sh.open(fname, 'w', compression=compression, newline=newline) as fh:
        fh.write("Hello\nworld")
    with openfunc(fname, 'rb') as fh:
        assert fh.read() == b"Hello" + newline.encode('utf8') + b"world"
示例#10
0
def test_open_nocontext(str_or_path, tmpdir, openfunc, ext, compression):
    fname = str_or_path(f"{tmpdir}/test_open{ext}")
    fh = sh.open(fname, "w", compression=compression)
    fh.write("Hello world")
    fh.close()
    check_fd_was_closed("test_open")
    with openfunc(fname, "rt") as fh:
        assert fh.read() == "Hello world"
示例#11
0
def test_concatenate_t4(str_or_path, tmpdir):
    # Output file does not already exist
    out = str_or_path(f"{tmpdir}/out")
    in1 = str_or_path(f"{tmpdir}/in1")
    in2 = str_or_path(f"{tmpdir}/in2")

    with sh.open(in1, "w") as fh:
        fh.write("2")
    with sh.open(in2, "w") as fh:
        fh.write("3")

    sh.concatenate([in1, in2], out, "a")
    with sh.open(out) as fh:
        assert fh.read() == "2\n3\n"
    sh.concatenate([in1, in2], out)
    with sh.open(out) as fh:
        assert fh.read() == "2\n3\n"
示例#12
0
def test_concatenate_t4(tmpdir):
    # Output file does not already exist
    out = '%s/out' % tmpdir
    in1 = '%s/in1' % tmpdir
    in2 = '%s/in2' % tmpdir

    with sh.open(in1, 'w') as fh:
        fh.write('2')
    with sh.open(in2, 'w') as fh:
        fh.write('3')

    sh.concatenate([in1, in2], out, 'a')
    with sh.open(out) as fh:
        assert fh.read() == '2\n3\n'
    sh.concatenate([in1, in2], out)
    with sh.open(out) as fh:
        assert fh.read() == '2\n3\n'
示例#13
0
def test_concatenate_b(str_or_path, tmpdir):
    # Binary mode
    out = str_or_path(f"{tmpdir}/out")
    in1 = str_or_path(f"{tmpdir}/in1")
    in2 = str_or_path(f"{tmpdir}/in2")

    with sh.open(out, "wb") as fh:
        fh.write(b"1")
    with sh.open(in1, "wb") as fh:
        fh.write(b"2")
    with sh.open(in2, "wb") as fh:
        fh.write(b"3")

    sh.concatenate([in1, in2], out, "ab")
    with sh.open(out, "rb") as fh:
        assert fh.read() == b"123"
    sh.concatenate([in1, in2], out, "wb")
    with sh.open(out, "rb") as fh:
        assert fh.read() == b"23"
示例#14
0
def test_concatenate_t3(tmpdir):
    # Output file already exists and it is empty
    out = '%s/out' % tmpdir
    in1 = '%s/in1' % tmpdir
    in2 = '%s/in2' % tmpdir

    with sh.open(out, 'w') as fh:
        pass
    with sh.open(in1, 'w') as fh:
        fh.write('2\n')
    with sh.open(in2, 'w') as fh:
        fh.write('3\n')

    sh.concatenate([in1, in2], out, 'a')
    with sh.open(out) as fh:
        assert fh.read() == '2\n3\n'
    sh.concatenate([in1, in2], out)
    with sh.open(out) as fh:
        assert fh.read() == '2\n3\n'
示例#15
0
def test_concatenate_b(tmpdir):
    # Binary mode
    out = '%s/out' % tmpdir
    in1 = '%s/in1' % tmpdir
    in2 = '%s/in2' % tmpdir

    with sh.open(out, 'wb') as fh:
        fh.write(b'1')
    with sh.open(in1, 'wb') as fh:
        fh.write(b'2')
    with sh.open(in2, 'wb') as fh:
        fh.write(b'3')

    sh.concatenate([in1, in2], out, 'ab')
    with sh.open(out, 'rb') as fh:
        assert fh.read() == b'123'
    sh.concatenate([in1, in2], out, 'wb')
    with sh.open(out, 'rb') as fh:
        assert fh.read() == b'23'
示例#16
0
def test_concatenate_t3(str_or_path, tmpdir):
    # Output file already exists and it is empty
    out = str_or_path(f"{tmpdir}/out")
    in1 = str_or_path(f"{tmpdir}/in1")
    in2 = str_or_path(f"{tmpdir}/in2")

    with sh.open(out, "w") as fh:
        pass
    with sh.open(in1, "w") as fh:
        fh.write("2\n")
    with sh.open(in2, "w") as fh:
        fh.write("3\n")

    sh.concatenate([in1, in2], out, "a")
    with sh.open(out) as fh:
        assert fh.read() == "2\n3\n"
    sh.concatenate([in1, in2], out)
    with sh.open(out) as fh:
        assert fh.read() == "2\n3\n"
示例#17
0
def test_concatenate_t2(str_or_path, tmpdir, newline):
    # Output file already exists and is non-empty. Files end with a newline.
    out = str_or_path(f"{tmpdir}/out")
    in1 = str_or_path(f"{tmpdir}/in1")
    in2 = str_or_path(f"{tmpdir}/in2")

    with sh.open(out, "w", newline=newline) as fh:
        fh.write("1\n")
    with sh.open(in1, "w", newline=newline) as fh:
        fh.write("2\n3\n")
    with sh.open(in2, "w", newline=newline) as fh:
        fh.write("4\n")

    n = newline.encode("utf-8")
    sh.concatenate([in1, in2], out, "a", newline=newline)
    with sh.open(out, "rb") as fh:
        assert fh.read() == b"1" + n + b"2" + n + b"3" + n + b"4" + n
    sh.concatenate([in1, in2], out, newline=newline)
    with sh.open(out, "rb") as fh:
        assert fh.read() == b"2" + n + b"3" + n + b"4" + n
示例#18
0
def test_concatenate_t2(tmpdir, newline):
    # Output file already exists and is non-empty. Files end with a newline.
    out = '%s/out' % tmpdir
    in1 = '%s/in1' % tmpdir
    in2 = '%s/in2' % tmpdir

    with sh.open(out, 'w', newline=newline) as fh:
        fh.write('1\n')
    with sh.open(in1, 'w', newline=newline) as fh:
        fh.write('2\n3\n')
    with sh.open(in2, 'w', newline=newline) as fh:
        fh.write('4\n')

    n = newline.encode('utf-8')
    sh.concatenate([in1, in2], out, 'a', newline=newline)
    with sh.open(out, 'rb') as fh:
        assert fh.read() == b'1' + n + b'2' + n + b'3' + n + b'4' + n
    sh.concatenate([in1, in2], out, newline=newline)
    with sh.open(out, 'rb') as fh:
        assert fh.read() == b'2' + n + b'3' + n + b'4' + n
示例#19
0
def read_csv(path_or_buf, unstack=True):
    """Parse an NDCSV file into a :class:`xarray.DataArray`.

    This function is conceptually similar to :func:`pandas.read_csv`, except
    that it only works for files that are strictly formatted according to
    :doc:`format` and, by design, does not offer any of the many config
    switches available in :func:`pandas.read_csv`.

    :param path_or_buf:
        One of:

        - .csv file path
        - .csv.gz / .csv.bz2 / .csv.xz file path (the compression algorithm
          is inferred automatically)
        - file-like object open for reading. It must support rewinding through
          ``seek(0)``.

    :param bool unstack:
        Set to True (the default) to automatically unstack any and all stacked
        dimensions in the output xarray, using first-seen order. Note that this
        differs from :meth:`xarray.DataArray.unstack`, which may occasionally
        use alphabetical order instead.
        All indices must be unique for the unstack to succeed. Non-index coords
        can be duplicated.

        Set to False to return the stacked dimensions as they appear in
        the CSV file.
    :returns:
        xarray.DataArray
    """
    if isinstance(path_or_buf, str):
        with sh.open(path_or_buf) as fh:
            return read_csv(fh, unstack=unstack)

    xa = _buf_to_xarray(path_or_buf)
    assert xa.ndim in (0, 1, 2)
    # print("==== _buf_to_array:\n%s" % xa)

    xa = _coords_format_conversion(xa)
    assert xa.ndim in (0, 1, 2)
    # print("==== _coords_format_conversion:\n%s" % xa)

    if xa.ndim == 1:
        xa = _unpack(xa, xa.dims[0], unstack)
        # print("==== _unpack(dim_0):\n%s" % xa)
    elif xa.ndim == 2:
        dims = xa.dims
        xa = _unpack(xa, dims[0], unstack)
        # print("==== _unpack(dim_0):\n%s" % xa)
        xa = _unpack(xa, dims[1], unstack)
        # print("==== _unpack(dim_1):\n%s" % xa)

    return xa
示例#20
0
def test_concatenate_t1(str_or_path, tmpdir, newline):
    # Output file already exists and is non-empty. Files end without a newline.
    # Test compression.
    out = str_or_path(f"{tmpdir}/out.gz")
    in1 = str_or_path(f"{tmpdir}/in1")
    in2 = str_or_path(f"{tmpdir}/in2.bz2")

    with sh.open(out, "w") as fh:
        fh.write("1")
    with sh.open(in1, "w") as fh:
        fh.write("2\n3")
    with sh.open(in2, "w") as fh:
        fh.write("4")

    n = newline.encode("utf-8")
    sh.concatenate([in1, in2], out, "a", newline=newline)
    with sh.open(out, "rb") as fh:
        assert fh.read() == b"1" + n + b"2" + n + b"3" + n + b"4" + n
    # Defaults to mode='w'
    sh.concatenate([in1, in2], out, newline=newline)
    with sh.open(out, "rb") as fh:
        assert fh.read() == b"2" + n + b"3" + n + b"4" + n
示例#21
0
def test_concatenate_t1(tmpdir, newline):
    # Output file already exists and is non-empty. Files end without a newline.
    # Test compression.
    out = '%s/out.gz' % tmpdir
    in1 = '%s/in1' % tmpdir
    in2 = '%s/in2.bz2' % tmpdir

    with sh.open(out, 'w') as fh:
        fh.write('1')
    with sh.open(in1, 'w') as fh:
        fh.write('2\n3')
    with sh.open(in2, 'w') as fh:
        fh.write('4')

    n = newline.encode('utf-8')
    sh.concatenate([in1, in2], out, 'a', newline=newline)
    with sh.open(out, 'rb') as fh:
        assert fh.read() == b'1' + n + b'2' + n + b'3' + n + b'4' + n
    # Defaults to mode='w'
    sh.concatenate([in1, in2], out, newline=newline)
    with sh.open(out, 'rb') as fh:
        assert fh.read() == b'2' + n + b'3' + n + b'4' + n
示例#22
0
def write_csv(
    array: xarray.DataArray | pandas.Series | pandas.DataFrame,
    path_or_buf: str | IO | None = None,
):
    """Write an n-dimensional array to an NDCSV file.

    Any number of dimensions are supported. If the array has more than two
    dimensions, all dimensions beyond the first are automatically stacked
    together on the columns of the CSV file; if you want to stack dimensions on
    the rows you'll need to manually invoke :meth:`xarray.DataArray.stack`
    beforehand.

    This function is conceptually similar to :meth:`pandas.DataFrame.to_csv`,
    except that none of the many configuration settings is made available to
    the end user, in order to ensure consistency in the output file.

    :param array:
        One of:

        - :class:`xarray.DataArray`
        - :class:`pandas.Series`
        - :class:`pandas.DataFrame`

    :param path_or_buf:
        One of:

        - .csv file path
        - .csv.gz / .csv.bz2 / .csv.xz file path (the compression algorithm
          is inferred automatically)
        - file-like object open for writing
        - None (the result is returned as a string)
    """
    if path_or_buf is None:
        buf = io.StringIO()
        write_csv(array, buf)
        return buf.getvalue()

    if isinstance(path_or_buf, str):
        # Automatically detect .csv or .csv.gz extension
        with sh.open(path_or_buf, "w") as fh:
            write_csv(array, fh)
        return

    if isinstance(array, xarray.DataArray):
        _write_csv_dataarray(array, path_or_buf)
    elif isinstance(array, (pandas.Series, pandas.DataFrame)):
        _write_csv_pandas(array, path_or_buf)
    else:
        raise TypeError(
            "Input data is not a xarray.DataArray, pandas.Series or pandas.DataFrame"
        )
示例#23
0
def test_open_encoding(tmpdir, openfunc, ext, compression):
    TEXT = "Crème brûlée"
    TEXT_REPLACED = "Cr�me br�l�e"
    fname_utf8 = '%s/test_utf8%s' % (tmpdir, ext)
    fname_latin1 = '%s/test_latin1%s' % (tmpdir, ext)

    with openfunc(fname_utf8, 'wt', encoding='utf-8') as fh:
        fh.write(TEXT)
    with openfunc(fname_latin1, 'wt', encoding='latin1') as fh:
        fh.write(TEXT)

    # sh.open must always default to utf-8
    with sh.open(fname_utf8, compression=compression) as fh:
        assert fh.read() == TEXT
    with sh.open(fname_latin1, compression=compression,
                 encoding='latin1') as fh:
        assert fh.read() == TEXT
    # sh.open must always default to replace unrecognized characters with ?
    with sh.open(fname_latin1, compression=compression) as fh:
        assert fh.read() == TEXT_REPLACED
    with pytest.raises(UnicodeDecodeError):
        with sh.open(fname_latin1, errors='strict',
                     compression=compression) as fh:
            fh.read()
示例#24
0
def test_open_encoding(tmpdir, openfunc, ext, compression):
    TEXT = "Crème brûlée"
    TEXT_REPLACED = "Cr�me br�l�e"
    fname_utf8 = f"{tmpdir}/test_utf8{ext}"
    fname_latin1 = f"{tmpdir}/test_latin1{ext}"

    with openfunc(fname_utf8, "wt", encoding="utf-8") as fh:
        fh.write(TEXT)
    with openfunc(fname_latin1, "wt", encoding="latin1") as fh:
        fh.write(TEXT)

    # sh.open must always default to utf-8
    with sh.open(fname_utf8, compression=compression) as fh:
        assert fh.read() == TEXT
    with sh.open(fname_latin1, compression=compression,
                 encoding="latin1") as fh:
        assert fh.read() == TEXT
    # sh.open must always default to replace unrecognized characters with ?
    with sh.open(fname_latin1, compression=compression) as fh:
        assert fh.read() == TEXT_REPLACED
    with pytest.raises(UnicodeDecodeError):
        with sh.open(fname_latin1, errors="strict",
                     compression=compression) as fh:
            fh.read()
示例#25
0
def test_open_exclusive_failure(tmpdir, openfunc, ext, compression):
    fname = f"{tmpdir}/test_open{ext}"
    with open(fname, "w"):
        pass
    with pytest.raises(FileExistsError):
        sh.open(fname, "x", compression=compression)
示例#26
0
def test_open_fh_no_compression(compression):
    buf = io.BytesIO()
    with pytest.raises(TypeError):
        sh.open(buf, compression=compression)
示例#27
0
def test_open_exclusive_failure(tmpdir, openfunc, ext, compression):
    fname = '%s/test_open%s' % (tmpdir, ext)
    with open(fname, 'w'):
        pass
    with pytest.raises(FileExistsError):
        sh.open(fname, 'x', compression=compression)
示例#28
0
def test_open_fd_invalid_compression():
    r, _ = os.pipe()
    with pytest.raises(TypeError):
        sh.open(r, "rb", compression="gzip")
示例#29
0
def test_open_invalid_compression():
    with pytest.raises(ValueError):
        sh.open("foo", compression="unk")
示例#30
0
def test_open_fd():
    r, w = os.pipe()
    with sh.open(r, "rb", buffering=0) as fh_r:
        with sh.open(w, "wb", buffering=0) as fh_w:
            fh_w.write(b"hello world\n")
            assert fh_r.readline() == b"hello world\n"