示例#1
0
文件: test_bag.py 项目: serazing/dask
def test_from_castra():
    pytest.importorskip('castra')
    pd = pytest.importorskip('pandas')
    dd = pytest.importorskip('dask.dataframe')
    blosc = pytest.importorskip('blosc')
    if LooseVersion(blosc.__version__) == '1.3.0':
        pytest.skip()
    df = pd.DataFrame({
        'x': list(range(100)),
        'y': [str(i) for i in range(100)]
    })
    a = dd.from_pandas(df, 10)

    with tmpfile('.castra') as fn:
        c = a.to_castra(fn)
        default = db.from_castra(c)
        with_columns = db.from_castra(c, 'x')
        with_index = db.from_castra(c, 'x', index=True)
        assert (list(default) == [{
            'x': i,
            'y': str(i)
        } for i in range(100)]
                or list(default) == [(i, str(i)) for i in range(100)])
        assert list(with_columns) == list(range(100))
        assert list(with_index) == list(zip(range(100), range(100)))
        assert default.name != with_columns.name != with_index.name
        assert with_index.name == db.from_castra(c, 'x', index=True).name
示例#2
0
def test_from_castra():
    castra = pytest.importorskip('castra')
    pd = pytest.importorskip('pandas')
    dd = pytest.importorskip('dask.dataframe')
    df = pd.DataFrame({'x': list(range(100)),
                       'y': [str(i) for i in range(100)]})
    a = dd.from_pandas(df, 10)

    with tmpfile('.castra') as fn:
        c = a.to_castra(fn)
        default = db.from_castra(c)
        with_columns = db.from_castra(c, 'x')
        with_index = db.from_castra(c, 'x', index=True)
        assert (list(default) == [{'x': i, 'y': str(i)}
                                 for i in range(100)] or
                list(default) == [(i, str(i)) for i in range(100)])
        assert list(with_columns) == list(range(100))
        assert list(with_index) == list(zip(range(100), range(100)))
示例#3
0
文件: test_bag.py 项目: jrenner/dask
def test_from_castra():
    castra = pytest.importorskip('castra')
    pd = pytest.importorskip('pandas')
    dd = pytest.importorskip('dask.dataframe')
    df = pd.DataFrame({'x': list(range(100)),
                       'y': [str(i) for i in range(100)]})
    a = dd.from_pandas(df, 10)

    with tmpfile('.castra') as fn:
        c = a.to_castra(fn)
        default = db.from_castra(c)
        with_columns = db.from_castra(c, 'x')
        with_index = db.from_castra(c, 'x', index=True)
        assert (list(default) == [{'x': i, 'y': str(i)}
                                 for i in range(100)] or
                list(default) == [(i, str(i)) for i in range(100)])
        assert list(with_columns) == list(range(100))
        assert list(with_index) == list(zip(range(100), range(100)))
示例#4
0
文件: test_bag.py 项目: hironow/dask
def test_from_castra():
    castra = pytest.importorskip('castra')
    pd = pytest.importorskip('pandas')
    dd = pytest.importorskip('dask.dataframe')
    df = pd.DataFrame({'x': list(range(100)),
                       'y': [str(i) for i in range(100)]})
    a = dd.from_pandas(df, 10)

    c = a.to_castra()
    default = db.from_castra(c)
    with_columns = db.from_castra(c, 'x')
    with_index = db.from_castra(c, 'x', index=True)
    with_nparts = db.from_castra(c, 'x', npartitions=4)
    try:
        assert list(default) == list(zip(range(100), map(str, range(100))))
        assert list(with_columns) == list(range(100))
        assert list(with_index) == list(zip(range(100), range(100)))
        assert with_nparts.npartitions == 4
        assert list(with_nparts) == list(range(100))
    finally:
        c.drop()
示例#5
0
def test_from_castra():
    pytest.importorskip('castra')
    pd = pytest.importorskip('pandas')
    dd = pytest.importorskip('dask.dataframe')
    blosc = pytest.importorskip('blosc')
    if LooseVersion(blosc.__version__) == '1.3.0':
        pytest.skip()
    df = pd.DataFrame({'x': list(range(100)),
                       'y': [str(i) for i in range(100)]})
    a = dd.from_pandas(df, 10)

    with tmpfile('.castra') as fn:
        c = a.to_castra(fn)
        default = db.from_castra(c)
        with_columns = db.from_castra(c, 'x')
        with_index = db.from_castra(c, 'x', index=True)
        assert (list(default) == [{'x': i, 'y': str(i)}
                                  for i in range(100)] or
                list(default) == [(i, str(i)) for i in range(100)])
        assert list(with_columns) == list(range(100))
        assert list(with_index) == list(zip(range(100), range(100)))
        assert default.name != with_columns.name != with_index.name
        assert with_index.name == db.from_castra(c, 'x', index=True).name
示例#6
0
def test_from_castra():
    castra = pytest.importorskip('castra')
    pd = pytest.importorskip('pandas')
    dd = pytest.importorskip('dask.dataframe')
    df = pd.DataFrame({
        'x': list(range(100)),
        'y': [str(i) for i in range(100)]
    })
    a = dd.from_pandas(df, 10)

    c = a.to_castra()
    default = db.from_castra(c)
    with_columns = db.from_castra(c, 'x')
    with_index = db.from_castra(c, 'x', index=True)
    with_nparts = db.from_castra(c, 'x', npartitions=4)
    try:
        assert list(default) == list(zip(range(100), map(str, range(100))))
        assert list(with_columns) == list(range(100))
        assert list(with_index) == list(zip(range(100), range(100)))
        assert with_nparts.npartitions == 4
        assert list(with_nparts) == list(range(100))
    finally:
        c.drop()