Python from_bcolz示例，dask.dataframe.from_bcolz Python示例

示例#1

0

显示文件

def test_from_bcolz():
    bcolz = pytest.importorskip('bcolz')

    t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']],
                     names=['x', 'y', 'a'])
    d = dd.from_bcolz(t, chunksize=2)
    assert d.npartitions == 2
    assert str(d.dtypes['a']) == 'category'
    assert list(d.x.compute(get=get_sync)) == [1, 2, 3]
    assert list(d.a.compute(get=get_sync)) == ['a', 'b', 'a']

    d = dd.from_bcolz(t, chunksize=2, index='x')
    L = list(d.index.compute(get=get_sync))
    assert L == [1, 2, 3] or L == [1, 3, 2]

示例#2

0

显示文件

文件： test_io.py 项目： trentwatt/dask

def test_from_bcolz_no_lock():
    bcolz = pytest.importorskip("bcolz")
    locktype = type(Lock())

    t = bcolz.ctable(
        [[1, 2, 3], [1.0, 2.0, 3.0], ["a", "b", "a"]], names=["x", "y", "a"], chunklen=2
    )
    a = dd.from_bcolz(t, chunksize=2)
    b = dd.from_bcolz(t, chunksize=2, lock=True)
    c = dd.from_bcolz(t, chunksize=2, lock=False)
    assert_eq(a, b)
    assert_eq(a, c)

    assert not any(isinstance(item, locktype) for v in c.dask.values() for item in v)

示例#3

0

显示文件

def test_from_bcolz():
    bcolz = pytest.importorskip('bcolz')

    t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']],
                     names=['x', 'y', 'a'])
    d = dd.from_bcolz(t, chunksize=2)
    assert d._known_dtype
    assert d.npartitions == 2
    assert str(d.dtypes['a']) == 'category'
    assert list(d.x.compute(get=get_sync)) == [1, 2, 3]
    assert list(d.a.compute(get=get_sync)) == ['a', 'b', 'a']
    L = list(d.index.compute(get=get_sync))
    assert L == [0, 1, 2]

    d = dd.from_bcolz(t, chunksize=2, index='x')
    L = list(d.index.compute(get=get_sync))
    assert L == [1, 2, 3] or L == [1, 3, 2]

    # Names
    assert (sorted(dd.from_bcolz(t, chunksize=2).dask) == sorted(
        dd.from_bcolz(t, chunksize=2).dask))
    assert (sorted(dd.from_bcolz(t, chunksize=2).dask) != sorted(
        dd.from_bcolz(t, chunksize=3).dask))

    dsk = dd.from_bcolz(t, chunksize=3).dask

    t.append((4, 4., 'b'))
    t.flush()

    assert (sorted(dd.from_bcolz(t, chunksize=2).dask) != sorted(dsk))

示例#4

0

显示文件

文件： test_io.py 项目： aparwal7/6242

def test_from_bcolz():
    bcolz = pytest.importorskip("bcolz")

    t = bcolz.ctable([[1, 2, 3], [1.0, 2.0, 3.0], ["a", "b", "a"]],
                     names=["x", "y", "a"])
    d = dd.from_bcolz(t, chunksize=2)
    assert d.npartitions == 2
    assert is_categorical_dtype(d.dtypes["a"])
    assert list(d.x.compute(scheduler="sync")) == [1, 2, 3]
    assert list(d.a.compute(scheduler="sync")) == ["a", "b", "a"]
    L = list(d.index.compute(scheduler="sync"))
    assert L == [0, 1, 2]

    d = dd.from_bcolz(t, chunksize=2, index="x")
    L = list(d.index.compute(scheduler="sync"))
    assert L == [1, 2, 3] or L == [1, 3, 2]

    # Names
    assert sorted(dd.from_bcolz(t, chunksize=2).dask) == sorted(
        dd.from_bcolz(t, chunksize=2).dask)
    assert sorted(dd.from_bcolz(t, chunksize=2).dask) != sorted(
        dd.from_bcolz(t, chunksize=3).dask)

    dsk = dd.from_bcolz(t, chunksize=3).dask

    t.append((4, 4.0, "b"))
    t.flush()

    assert sorted(dd.from_bcolz(t, chunksize=2).dask) != sorted(dsk)

示例#5

0

显示文件

文件： test_io.py 项目： nooperpudd/dask

def test_from_bcolz():
    bcolz = pytest.importorskip('bcolz')

    t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']],
                     names=['x', 'y', 'a'])
    d = dd.from_bcolz(t, chunksize=2)
    assert d.npartitions == 2
    assert str(d.dtypes['a']) == 'category'
    assert list(d.x.compute(get=get_sync)) == [1, 2, 3]
    assert list(d.a.compute(get=get_sync)) == ['a', 'b', 'a']

    d = dd.from_bcolz(t, chunksize=2, index='x')
    L = list(d.index.compute(get=get_sync))
    assert L == [1, 2, 3] or L == [1, 3, 2]

    # Names
    assert sorted(dd.from_bcolz(t, chunksize=2).dask) == \
           sorted(dd.from_bcolz(t, chunksize=2).dask)
    assert sorted(dd.from_bcolz(t, chunksize=2).dask) != \
           sorted(dd.from_bcolz(t, chunksize=3).dask)

    dsk = dd.from_bcolz(t, chunksize=3).dask

    t.append((4, 4., 'b'))
    t.flush()

    assert sorted(dd.from_bcolz(t, chunksize=2).dask) != \
           sorted(dsk)

示例#6

0

显示文件

文件： test_io.py 项目： floriango/dask

def test_from_bcolz():
    bcolz = pytest.importorskip('bcolz')

    t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']],
                     names=['x', 'y', 'a'])
    d = dd.from_bcolz(t, chunksize=2)
    assert d.npartitions == 2
    assert is_categorical_dtype(d.dtypes['a'])
    assert list(d.x.compute(scheduler='sync')) == [1, 2, 3]
    assert list(d.a.compute(scheduler='sync')) == ['a', 'b', 'a']
    L = list(d.index.compute(scheduler='sync'))
    assert L == [0, 1, 2]

    d = dd.from_bcolz(t, chunksize=2, index='x')
    L = list(d.index.compute(scheduler='sync'))
    assert L == [1, 2, 3] or L == [1, 3, 2]

    # Names
    assert (sorted(dd.from_bcolz(t, chunksize=2).dask) ==
            sorted(dd.from_bcolz(t, chunksize=2).dask))
    assert (sorted(dd.from_bcolz(t, chunksize=2).dask) !=
            sorted(dd.from_bcolz(t, chunksize=3).dask))

    dsk = dd.from_bcolz(t, chunksize=3).dask

    t.append((4, 4., 'b'))
    t.flush()

    assert (sorted(dd.from_bcolz(t, chunksize=2).dask) !=
            sorted(dsk))

示例#7

0

显示文件

文件： test_io.py 项目： floriango/dask

def test_from_bcolz_no_lock():
    bcolz = pytest.importorskip('bcolz')
    locktype = type(Lock())

    t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']],
                     names=['x', 'y', 'a'], chunklen=2)
    a = dd.from_bcolz(t, chunksize=2)
    b = dd.from_bcolz(t, chunksize=2, lock=True)
    c = dd.from_bcolz(t, chunksize=2, lock=False)
    assert_eq(a, b)
    assert_eq(a, c)

    assert not any(isinstance(item, locktype)
                   for v in c.dask.values()
                   for item in v)

示例#8

0

显示文件

def test_from_bcolz_no_lock():
    bcolz = pytest.importorskip('bcolz')
    locktype = type(Lock())

    t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']],
                     names=['x', 'y', 'a'],
                     chunklen=2)
    a = dd.from_bcolz(t, chunksize=2)
    b = dd.from_bcolz(t, chunksize=2, lock=True)
    c = dd.from_bcolz(t, chunksize=2, lock=False)
    eq(a, b)
    eq(a, c)

    assert not any(
        isinstance(item, locktype) for v in c.dask.values() for item in v)

示例#9

0

显示文件

文件： test_io.py 项目： floriango/dask

def test_from_bcolz_column_order():
    bcolz = pytest.importorskip('bcolz')

    t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']],
                     names=['x', 'y', 'a'])
    df = dd.from_bcolz(t, chunksize=2)
    assert list(df.loc[0].compute().columns) == ['x', 'y', 'a']

示例#10

0

显示文件

文件： test_io.py 项目： aparwal7/6242

def test_from_bcolz_column_order():
    bcolz = pytest.importorskip("bcolz")

    t = bcolz.ctable([[1, 2, 3], [1.0, 2.0, 3.0], ["a", "b", "a"]],
                     names=["x", "y", "a"])
    df = dd.from_bcolz(t, chunksize=2)
    assert list(df.loc[0].compute().columns) == ["x", "y", "a"]

示例#11

0

显示文件

def test_from_bcolz():
    try:
        import bcolz
    except ImportError:
        return

    t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']],
                     names=['x', 'y', 'a'])
    d = dd.from_bcolz(t, chunksize=2)
    assert d.npartitions == 2
    assert str(d.dtypes['a']) == 'category'
    assert list(d.x.compute(get=dask.get)) == [1, 2, 3]
    assert list(d.a.compute(get=dask.get)) == ['a', 'b', 'a']

    d = dd.from_bcolz(t, chunksize=2, index='x')
    assert list(d.index.compute()) == [1, 2, 3]

示例#12

0

显示文件

文件： test_io.py 项目： bjlittle/dask

def test_from_bcolz():
    try:
        import bcolz
    except ImportError:
        return

    t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']],
                     names=['x', 'y', 'a'])
    d = dd.from_bcolz(t, chunksize=2)
    assert d.npartitions == 2
    assert str(d.dtypes['a']) == 'category'
    assert list(d.x.compute(get=dask.get)) == [1, 2, 3]
    assert list(d.a.compute(get=dask.get)) == ['a', 'b', 'a']

    d = dd.from_bcolz(t, chunksize=2, index='x')
    assert list(d.index.compute()) == [1, 2, 3]

示例#13

0

显示文件

def test_from_bcolz_column_order():
    bcolz = pytest.importorskip('bcolz')

    t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']],
                     names=['x', 'y', 'a'])
    df = dd.from_bcolz(t, chunksize=2)
    assert list(df.loc[0].compute().columns) == ['x', 'y', 'a']

示例#14

0

显示文件

文件： test_io.py 项目： hgz2373294/dask

def test_from_bcolz_filename():
    bcolz = pytest.importorskip("bcolz")

    with tmpfile(".bcolz") as fn:
        t = bcolz.ctable([[1, 2, 3], [1.0, 2.0, 3.0], ["a", "b", "a"]], names=["x", "y", "a"], rootdir=fn)
        t.flush()

        d = dd.from_bcolz(fn, chunksize=2)
        assert list(d.x.compute()) == [1, 2, 3]

示例#15

0

显示文件

def test_from_bcolz_filename():
    bcolz = pytest.importorskip('bcolz')

    with tmpfile('.bcolz') as fn:
        t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']],
                         names=['x', 'y', 'a'],
                         rootdir=fn)
        t.flush()

        d = dd.from_bcolz(fn, chunksize=2)
        assert list(d.x.compute()) == [1, 2, 3]

示例#16

0

显示文件

文件： test_io.py 项目： floriango/dask

def test_from_bcolz_filename():
    bcolz = pytest.importorskip('bcolz')

    with tmpfile('.bcolz') as fn:
        t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']],
                         names=['x', 'y', 'a'],
                         rootdir=fn)
        t.flush()

        d = dd.from_bcolz(fn, chunksize=2)
        assert list(d.x.compute()) == [1, 2, 3]

示例#17

0

显示文件

文件： test_io.py 项目： freeman-lab/dask

def test_from_bcolz_filename():
    try:
        import bcolz
    except ImportError:
        return
    with tmpfile('.bcolz') as fn:
        t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']],
                         names=['x', 'y', 'a'],
                         rootdir=fn)
        t.flush()

        d = dd.from_bcolz(fn, chunksize=2)
        assert list(d.x.compute()) == [1, 2, 3]

示例#18

0

显示文件

def test_from_bcolz_filename():
    try:
        import bcolz
    except ImportError:
        return
    with tmpfile('.bcolz') as fn:
        t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']],
                         names=['x', 'y', 'a'],
                         rootdir=fn)
        t.flush()

        d = dd.from_bcolz(fn, chunksize=2)
        assert list(d.x.compute()) == [1, 2, 3]

示例#19

0

显示文件

文件： test_io.py 项目： aparwal7/6242

def test_from_bcolz_filename():
    bcolz = pytest.importorskip("bcolz")

    with tmpfile(".bcolz") as fn:
        t = bcolz.ctable(
            [[1, 2, 3], [1.0, 2.0, 3.0], ["a", "b", "a"]],
            names=["x", "y", "a"],
            rootdir=fn,
        )
        t.flush()

        d = dd.from_bcolz(fn, chunksize=2)
        assert list(d.x.compute()) == [1, 2, 3]

示例#20

0

显示文件

文件： test_io.py 项目： hgz2373294/dask

def test_from_bcolz():
    bcolz = pytest.importorskip("bcolz")

    t = bcolz.ctable([[1, 2, 3], [1.0, 2.0, 3.0], ["a", "b", "a"]], names=["x", "y", "a"])
    d = dd.from_bcolz(t, chunksize=2)
    assert d.npartitions == 2
    assert str(d.dtypes["a"]) == "category"
    assert list(d.x.compute(get=get_sync)) == [1, 2, 3]
    assert list(d.a.compute(get=get_sync)) == ["a", "b", "a"]

    d = dd.from_bcolz(t, chunksize=2, index="x")
    L = list(d.index.compute(get=get_sync))
    assert L == [1, 2, 3] or L == [1, 3, 2]

    # Names
    assert sorted(dd.from_bcolz(t, chunksize=2).dask) == sorted(dd.from_bcolz(t, chunksize=2).dask)
    assert sorted(dd.from_bcolz(t, chunksize=2).dask) != sorted(dd.from_bcolz(t, chunksize=3).dask)

    dsk = dd.from_bcolz(t, chunksize=3).dask

    t.append((4, 4.0, "b"))
    t.flush()

    assert sorted(dd.from_bcolz(t, chunksize=2).dask) != sorted(dsk)

示例#21

0

显示文件

文件： test_io.py 项目： aparwal7/6242

    def check(i):
        t = bcolz.ctable([[1, 2, 3], [1.0, 2.0, 3.0], ["a", "b", "a"]],
                         names=["x", "y", "a"])
        d = dd.from_bcolz(t, chunksize=2)
        assert d.npartitions == 2
        assert is_categorical_dtype(d.dtypes["a"])
        assert list(d.x.compute(scheduler="sync")) == [1, 2, 3]
        assert list(d.a.compute(scheduler="sync")) == ["a", "b", "a"]

        d = dd.from_bcolz(t, chunksize=2, index="x")
        L = list(d.index.compute(scheduler="sync"))
        assert L == [1, 2, 3] or L == [1, 3, 2]

        # Names
        assert sorted(dd.from_bcolz(t, chunksize=2).dask) == sorted(
            dd.from_bcolz(t, chunksize=2).dask)
        assert sorted(dd.from_bcolz(t, chunksize=2).dask) != sorted(
            dd.from_bcolz(t, chunksize=3).dask)

示例#22

0

显示文件

    def check():
        t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']],
                         names=['x', 'y', 'a'])
        d = dd.from_bcolz(t, chunksize=2)
        assert d.npartitions == 2
        assert str(d.dtypes['a']) == 'category'
        assert list(d.x.compute(get=get_sync)) == [1, 2, 3]
        assert list(d.a.compute(get=get_sync)) == ['a', 'b', 'a']

        d = dd.from_bcolz(t, chunksize=2, index='x')
        L = list(d.index.compute(get=get_sync))
        assert L == [1, 2, 3] or L == [1, 3, 2]

        # Names
        assert (sorted(dd.from_bcolz(t, chunksize=2).dask) == sorted(
            dd.from_bcolz(t, chunksize=2).dask))
        assert (sorted(dd.from_bcolz(t, chunksize=2).dask) != sorted(
            dd.from_bcolz(t, chunksize=3).dask))

示例#23

0

显示文件

文件： test_io.py 项目： acrosby/dask

    def check():
        t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']],
                         names=['x', 'y', 'a'])
        d = dd.from_bcolz(t, chunksize=2)
        assert d.npartitions == 2
        assert str(d.dtypes['a']) == 'category'
        assert list(d.x.compute(get=get_sync)) == [1, 2, 3]
        assert list(d.a.compute(get=get_sync)) == ['a', 'b', 'a']

        d = dd.from_bcolz(t, chunksize=2, index='x')
        L = list(d.index.compute(get=get_sync))
        assert L == [1, 2, 3] or L == [1, 3, 2]

        # Names
        assert sorted(dd.from_bcolz(t, chunksize=2).dask) == \
               sorted(dd.from_bcolz(t, chunksize=2).dask)
        assert sorted(dd.from_bcolz(t, chunksize=2).dask) != \
               sorted(dd.from_bcolz(t, chunksize=3).dask)

示例#24

0

显示文件

文件： test_io.py 项目： yuvallanger/dask

    def check(i):
        t = bcolz.ctable([[1, 2, 3], [1., 2., 3.], ['a', 'b', 'a']],
                         names=['x', 'y', 'a'])
        d = dd.from_bcolz(t, chunksize=2)
        assert d.npartitions == 2
        assert is_categorical_dtype(d.dtypes['a'])
        assert list(d.x.compute(scheduler='sync')) == [1, 2, 3]
        assert list(d.a.compute(scheduler='sync')) == ['a', 'b', 'a']

        d = dd.from_bcolz(t, chunksize=2, index='x')
        L = list(d.index.compute(scheduler='sync'))
        assert L == [1, 2, 3] or L == [1, 3, 2]

        # Names
        assert (sorted(dd.from_bcolz(t, chunksize=2).dask) ==
                sorted(dd.from_bcolz(t, chunksize=2).dask))
        assert (sorted(dd.from_bcolz(t, chunksize=2).dask) !=
                sorted(dd.from_bcolz(t, chunksize=3).dask))

示例#25

0

显示文件

文件： filetimes.py 项目： sobakavich/datashader

    if len(sys.argv)>5: p.y           = sys.argv[5]
    if len(sys.argv)>6: p.categories  = sys.argv[6:]

from dask.cache import Cache
Cache(p.cachesize).register()


filetypes_storing_categories = {'parq','castra'}


read = odict([(f,odict()) for f in ["parq","bcolz","feather","castra","h5","csv"]])
               
read["csv"]     ["dask"]   = lambda filepath,p:  dd.read_csv(filepath, usecols=p.columns)
read["h5"]      ["dask"]   = lambda filepath,p:  dd.read_hdf(filepath, p.base, chunksize=p.chunksize, columns=p.columns)
read["castra"]  ["dask"]   = lambda filepath,p:  dd.from_castra(filepath)
read["bcolz"]   ["dask"]   = lambda filepath,p:  dd.from_bcolz(filepath, chunksize=1000000)
read["parq"]    ["dask"]   = lambda filepath,p:  dd.io.parquet.read_parquet(filepath,index=False, categories=p.categories, columns=p.columns)

read["csv"]     ["pandas"] = lambda filepath,p:  pd.read_csv(filepath, usecols=p.columns)
read["h5"]      ["pandas"] = lambda filepath,p:  pd.read_hdf(filepath, p.base, columns=p.columns)
read["feather"] ["pandas"] = lambda filepath,p:  feather.read_dataframe(filepath)
read["parq"]    ["pandas"] = lambda filepath,p:  fp.ParquetFile(filepath).to_pandas()


write = odict([(f,odict()) for f in ["parq","snappy.parq","gz.parq","bcolz","feather","castra","h5","csv"]])

write["csv"]          ["dask"]   = lambda df,filepath,p:  df.to_csv(filepath.replace(".csv","*.csv"))
write["h5"]           ["dask"]   = lambda df,filepath,p:  df.to_hdf(filepath, p.base)
write["castra"]       ["dask"]   = lambda df,filepath,p:  df.to_castra(filepath,categories=p.categories)
write["parq"]         ["dask"]   = lambda df,filepath,p:  dd.io.parquet.to_parquet(filepath, df) ## **p.parq_opts
write["snappy.parq"]  ["dask"]   = lambda df,filepath,p:  dd.io.parquet.to_parquet(filepath, df, compression='SNAPPY') ## **p.parq_opts