Exemplo n.º 1
0
def test_from_dask_array_compat_numpy_array_1d():

    x = da.ones(10, chunks=3)
    d1 = from_dask_array(x)       # dask
    assert isinstance(d1, dd.Series)
    assert (d1.compute().values == x.compute()).all()
    assert d1.name is None

    d2 = from_array(x.compute())  # numpy
    assert isinstance(d1, dd.Series)
    assert (d2.compute().values == x.compute()).all()
    assert d2.name is None

    d1 = from_dask_array(x, columns='name')       # dask
    assert isinstance(d1, dd.Series)
    assert (d1.compute().values == x.compute()).all()
    assert d1.name == 'name'

    d2 = from_array(x.compute(), columns='name')  # numpy
    assert isinstance(d1, dd.Series)
    assert (d2.compute().values == x.compute()).all()
    assert d2.name == 'name'

    # passing list via columns results in DataFrame
    d1 = from_dask_array(x, columns=['name'])       # dask
    assert isinstance(d1, dd.DataFrame)
    assert (d1.compute().values == x.compute()).all()
    tm.assert_index_equal(d1.columns, pd.Index(['name']))

    d2 = from_array(x.compute(), columns=['name'])  # numpy
    assert isinstance(d1, dd.DataFrame)
    assert (d2.compute().values == x.compute()).all()
    tm.assert_index_equal(d2.columns, pd.Index(['name']))
Exemplo n.º 2
0
def test_from_dask_array_compat_numpy_array_1d():

    x = da.ones(10, chunks=3)
    d1 = from_dask_array(x)  # dask
    assert isinstance(d1, dd.Series)
    assert (d1.compute().values == x.compute()).all()
    assert d1.name is None

    d2 = from_array(x.compute())  # numpy
    assert isinstance(d1, dd.Series)
    assert (d2.compute().values == x.compute()).all()
    assert d2.name is None

    d1 = from_dask_array(x, columns='name')  # dask
    assert isinstance(d1, dd.Series)
    assert (d1.compute().values == x.compute()).all()
    assert d1.name == 'name'

    d2 = from_array(x.compute(), columns='name')  # numpy
    assert isinstance(d1, dd.Series)
    assert (d2.compute().values == x.compute()).all()
    assert d2.name == 'name'

    # passing list via columns results in DataFrame
    d1 = from_dask_array(x, columns=['name'])  # dask
    assert isinstance(d1, dd.DataFrame)
    assert (d1.compute().values == x.compute()).all()
    tm.assert_index_equal(d1.columns, pd.Index(['name']))

    d2 = from_array(x.compute(), columns=['name'])  # numpy
    assert isinstance(d1, dd.DataFrame)
    assert (d2.compute().values == x.compute()).all()
    tm.assert_index_equal(d2.columns, pd.Index(['name']))
Exemplo n.º 3
0
def test_Series_from_dask_array():
    x = da.ones(10, chunks=4)

    ser = from_dask_array(x, 'a')
    assert ser.name == 'a'
    assert list(ser.divisions) == [0, 4, 8, 10]
    assert (ser.compute(get=get_sync).values == x.compute(get=get_sync)).all()

    ser = from_dask_array(x)
    assert ser.name is None
Exemplo n.º 4
0
def test_Series_from_dask_array():
    x = da.ones(10, chunks=4)

    ser = from_dask_array(x, 'a')
    assert ser.name == 'a'
    assert list(ser.divisions) == [0, 4, 8, 9]
    assert (ser.compute(get=get_sync).values == x.compute(get=get_sync)).all()

    ser = from_dask_array(x)
    assert ser.name is None

    # dd.from_array should re-route to from_dask_array
    ser2 = dd.from_array(x)
    assert eq(ser, ser2)
Exemplo n.º 5
0
def test_Series_from_dask_array():
    x = da.ones(10, chunks=4)

    ser = from_dask_array(x, "a")
    assert ser.name == "a"
    assert list(ser.divisions) == [0, 4, 8, 9]
    assert (ser.compute(get=get_sync).values == x.compute(get=get_sync)).all()

    ser = from_dask_array(x)
    assert ser.name is None

    # dd.from_array should re-route to from_dask_array
    ser2 = dd.from_array(x)
    assert eq(ser, ser2)
Exemplo n.º 6
0
def test_from_dask_array_raises():
    x = da.ones((3, 3, 3), chunks=2)
    pytest.raises(ValueError, lambda: from_dask_array(x))

    x = da.ones((10, 3), chunks=(3, 3))
    pytest.raises(ValueError, lambda: from_dask_array(x))  # no columns

    # Not enough columns
    pytest.raises(ValueError, lambda: from_dask_array(x, columns=['a']))

    try:
        from_dask_array(x, columns=['hello'])
    except Exception as e:
        assert 'hello' in str(e)
        assert '3' in str(e)
Exemplo n.º 7
0
def test_from_dask_array_raises():
    x = da.ones((3, 3, 3), chunks=2)
    pytest.raises(ValueError, lambda: from_dask_array(x))

    x = da.ones((10, 3), chunks=(3, 3))
    pytest.raises(ValueError, lambda: from_dask_array(x))  # no columns

    # Not enough columns
    pytest.raises(ValueError, lambda: from_dask_array(x, columns=["a"]))

    try:
        from_dask_array(x, columns=["hello"])
    except Exception as e:
        assert "hello" in str(e)
        assert "3" in str(e)
Exemplo n.º 8
0
def test_DataFrame_from_dask_array():
    x = da.ones((10, 3), chunks=(4, 2))

    df = from_dask_array(x, ['a', 'b', 'c'])
    assert list(df.columns) == ['a', 'b', 'c']
    assert list(df.divisions) == [0, 4, 8, 10]
    assert (df.compute(get=get_sync).values == x.compute(get=get_sync)).all()
Exemplo n.º 9
0
def test_DataFrame_from_dask_array():
    x = da.ones((10, 3), chunks=(4, 2))

    df = from_dask_array(x, ['a', 'b', 'c'])
    assert list(df.columns) == ['a', 'b', 'c']
    assert list(df.divisions) == [0, 4, 8, 9]
    assert (df.compute(get=get_sync).values == x.compute(get=get_sync)).all()

    # dd.from_array should re-route to from_dask_array
    df2 = dd.from_array(x, columns=['a', 'b', 'c'])
    assert df2.columns == df.columns
    assert df2.divisions == df.divisions
Exemplo n.º 10
0
def test_DataFrame_from_dask_array():
    x = da.ones((10, 3), chunks=(4, 2))

    df = from_dask_array(x, ["a", "b", "c"])
    assert list(df.columns) == ["a", "b", "c"]
    assert list(df.divisions) == [0, 4, 8, 9]
    assert (df.compute(get=get_sync).values == x.compute(get=get_sync)).all()

    # dd.from_array should re-route to from_dask_array
    df2 = dd.from_array(x, columns=["a", "b", "c"])
    assert df2.columns == df.columns
    assert df2.divisions == df.divisions
Exemplo n.º 11
0
def test_from_dask_array_compat_numpy_array():
    x = da.ones((3, 3, 3), chunks=2)

    with pytest.raises(ValueError):
        from_dask_array(x)       # dask

    with pytest.raises(ValueError):
        from_array(x.compute())  # numpy

    x = da.ones((10, 3), chunks=(3, 3))
    d1 = from_dask_array(x)       # dask
    assert isinstance(d1, dd.DataFrame)
    assert (d1.compute().values == x.compute()).all()
    tm.assert_index_equal(d1.columns, pd.Index([0, 1, 2]))

    d2 = from_array(x.compute())  # numpy
    assert isinstance(d1, dd.DataFrame)
    assert (d2.compute().values == x.compute()).all()
    tm.assert_index_equal(d2.columns, pd.Index([0, 1, 2]))

    with pytest.raises(ValueError):
        from_dask_array(x, columns=['a'])       # dask

    with pytest.raises(ValueError):
        from_array(x.compute(), columns=['a'])  # numpy

    d1 = from_dask_array(x, columns=['a', 'b', 'c'])       # dask
    assert isinstance(d1, dd.DataFrame)
    assert (d1.compute().values == x.compute()).all()
    tm.assert_index_equal(d1.columns, pd.Index(['a', 'b', 'c']))

    d2 = from_array(x.compute(), columns=['a', 'b', 'c'])  # numpy
    assert isinstance(d1, dd.DataFrame)
    assert (d2.compute().values == x.compute()).all()
    tm.assert_index_equal(d2.columns, pd.Index(['a', 'b', 'c']))
Exemplo n.º 12
0
def test_DataFrame_from_dask_array():
    x = da.ones((10, 3), chunks=(4, 2))

    df = from_dask_array(x, ['a', 'b', 'c'])
    assert isinstance(df, dd.DataFrame)
    tm.assert_index_equal(df.columns, pd.Index(['a', 'b', 'c']))
    assert list(df.divisions) == [0, 4, 8, 9]
    assert (df.compute(get=get_sync).values == x.compute(get=get_sync)).all()

    # dd.from_array should re-route to from_dask_array
    df2 = dd.from_array(x, columns=['a', 'b', 'c'])
    assert isinstance(df, dd.DataFrame)
    tm.assert_index_equal(df2.columns, df.columns)
    assert df2.divisions == df.divisions
Exemplo n.º 13
0
def test_DataFrame_from_dask_array():
    x = da.ones((10, 3), chunks=(4, 2))

    df = from_dask_array(x, ['a', 'b', 'c'])
    assert isinstance(df, dd.DataFrame)
    tm.assert_index_equal(df.columns, pd.Index(['a', 'b', 'c']))
    assert list(df.divisions) == [0, 4, 8, 9]
    assert (df.compute(get=get_sync).values == x.compute(get=get_sync)).all()

    # dd.from_array should re-route to from_dask_array
    df2 = dd.from_array(x, columns=['a', 'b', 'c'])
    assert isinstance(df, dd.DataFrame)
    tm.assert_index_equal(df2.columns, df.columns)
    assert df2.divisions == df.divisions
Exemplo n.º 14
0
def test_from_dask_array_compat_numpy_array():
    x = da.ones((3, 3, 3), chunks=2)

    msg = r"from_array does not input more than 2D array, got array with shape \(3, 3, 3\)"
    with tm.assertRaisesRegexp(ValueError, msg):
        from_dask_array(x)  # dask

    with tm.assertRaisesRegexp(ValueError, msg):
        from_array(x.compute())  # numpy

    x = da.ones((10, 3), chunks=(3, 3))
    d1 = from_dask_array(x)  # dask
    assert isinstance(d1, dd.DataFrame)
    assert (d1.compute().values == x.compute()).all()
    tm.assert_index_equal(d1.columns, pd.Index([0, 1, 2]))

    d2 = from_array(x.compute())  # numpy
    assert isinstance(d1, dd.DataFrame)
    assert (d2.compute().values == x.compute()).all()
    tm.assert_index_equal(d2.columns, pd.Index([0, 1, 2]))

    msg = r"""Length mismatch: Expected axis has 3 elements, new values have 1 elements"""
    with tm.assertRaisesRegexp(ValueError, msg):
        from_dask_array(x, columns=['a'])  # dask

    with tm.assertRaisesRegexp(ValueError, msg):
        from_array(x.compute(), columns=['a'])  # numpy

    d1 = from_dask_array(x, columns=['a', 'b', 'c'])  # dask
    assert isinstance(d1, dd.DataFrame)
    assert (d1.compute().values == x.compute()).all()
    tm.assert_index_equal(d1.columns, pd.Index(['a', 'b', 'c']))

    d2 = from_array(x.compute(), columns=['a', 'b', 'c'])  # numpy
    assert isinstance(d1, dd.DataFrame)
    assert (d2.compute().values == x.compute()).all()
    tm.assert_index_equal(d2.columns, pd.Index(['a', 'b', 'c']))
Exemplo n.º 15
0
def test_from_dask_array_compat_numpy_array():
    x = da.ones((3, 3, 3), chunks=2)

    msg = r"from_array does not input more than 2D array, got array with shape \(3, 3, 3\)"
    with tm.assertRaisesRegexp(ValueError, msg):
        from_dask_array(x)       # dask

    with tm.assertRaisesRegexp(ValueError, msg):
        from_array(x.compute())  # numpy

    x = da.ones((10, 3), chunks=(3, 3))
    d1 = from_dask_array(x)       # dask
    assert isinstance(d1, dd.DataFrame)
    assert (d1.compute().values == x.compute()).all()
    tm.assert_index_equal(d1.columns, pd.Index([0, 1, 2]))

    d2 = from_array(x.compute())  # numpy
    assert isinstance(d1, dd.DataFrame)
    assert (d2.compute().values == x.compute()).all()
    tm.assert_index_equal(d2.columns, pd.Index([0, 1, 2]))

    msg = r"""Length mismatch: Expected axis has 3 elements, new values have 1 elements"""
    with tm.assertRaisesRegexp(ValueError, msg):
        from_dask_array(x, columns=['a'])       # dask

    with tm.assertRaisesRegexp(ValueError, msg):
        from_array(x.compute(), columns=['a'])  # numpy

    d1 = from_dask_array(x, columns=['a', 'b', 'c'])       # dask
    assert isinstance(d1, dd.DataFrame)
    assert (d1.compute().values == x.compute()).all()
    tm.assert_index_equal(d1.columns, pd.Index(['a', 'b', 'c']))

    d2 = from_array(x.compute(), columns=['a', 'b', 'c'])  # numpy
    assert isinstance(d1, dd.DataFrame)
    assert (d2.compute().values == x.compute()).all()
    tm.assert_index_equal(d2.columns, pd.Index(['a', 'b', 'c']))