Exemplo n.º 1
0
def test_from_dask_array_compat_numpy_array_1d():

    x = da.ones(10, chunks=3)
    d1 = from_dask_array(x)  # dask
    assert isinstance(d1, dd.Series)
    assert (d1.compute().values == x.compute()).all()
    assert d1.name is None

    d2 = from_array(x.compute())  # numpy
    assert isinstance(d1, dd.Series)
    assert (d2.compute().values == x.compute()).all()
    assert d2.name is None

    d1 = from_dask_array(x, columns='name')  # dask
    assert isinstance(d1, dd.Series)
    assert (d1.compute().values == x.compute()).all()
    assert d1.name == 'name'

    d2 = from_array(x.compute(), columns='name')  # numpy
    assert isinstance(d1, dd.Series)
    assert (d2.compute().values == x.compute()).all()
    assert d2.name == 'name'

    # passing list via columns results in DataFrame
    d1 = from_dask_array(x, columns=['name'])  # dask
    assert isinstance(d1, dd.DataFrame)
    assert (d1.compute().values == x.compute()).all()
    tm.assert_index_equal(d1.columns, pd.Index(['name']))

    d2 = from_array(x.compute(), columns=['name'])  # numpy
    assert isinstance(d1, dd.DataFrame)
    assert (d2.compute().values == x.compute()).all()
    tm.assert_index_equal(d2.columns, pd.Index(['name']))
Exemplo n.º 2
0
def test_from_dask_array_compat_numpy_array_1d():

    x = da.ones(10, chunks=3)
    d1 = from_dask_array(x)       # dask
    assert isinstance(d1, dd.Series)
    assert (d1.compute().values == x.compute()).all()
    assert d1.name is None

    d2 = from_array(x.compute())  # numpy
    assert isinstance(d1, dd.Series)
    assert (d2.compute().values == x.compute()).all()
    assert d2.name is None

    d1 = from_dask_array(x, columns='name')       # dask
    assert isinstance(d1, dd.Series)
    assert (d1.compute().values == x.compute()).all()
    assert d1.name == 'name'

    d2 = from_array(x.compute(), columns='name')  # numpy
    assert isinstance(d1, dd.Series)
    assert (d2.compute().values == x.compute()).all()
    assert d2.name == 'name'

    # passing list via columns results in DataFrame
    d1 = from_dask_array(x, columns=['name'])       # dask
    assert isinstance(d1, dd.DataFrame)
    assert (d1.compute().values == x.compute()).all()
    tm.assert_index_equal(d1.columns, pd.Index(['name']))

    d2 = from_array(x.compute(), columns=['name'])  # numpy
    assert isinstance(d1, dd.DataFrame)
    assert (d2.compute().values == x.compute()).all()
    tm.assert_index_equal(d2.columns, pd.Index(['name']))
Exemplo n.º 3
0
def test_from_dask_array_compat_numpy_array():
    x = da.ones((3, 3, 3), chunks=2)

    with pytest.raises(ValueError):
        from_dask_array(x)       # dask

    with pytest.raises(ValueError):
        from_array(x.compute())  # numpy

    x = da.ones((10, 3), chunks=(3, 3))
    d1 = from_dask_array(x)       # dask
    assert isinstance(d1, dd.DataFrame)
    assert (d1.compute().values == x.compute()).all()
    tm.assert_index_equal(d1.columns, pd.Index([0, 1, 2]))

    d2 = from_array(x.compute())  # numpy
    assert isinstance(d1, dd.DataFrame)
    assert (d2.compute().values == x.compute()).all()
    tm.assert_index_equal(d2.columns, pd.Index([0, 1, 2]))

    with pytest.raises(ValueError):
        from_dask_array(x, columns=['a'])       # dask

    with pytest.raises(ValueError):
        from_array(x.compute(), columns=['a'])  # numpy

    d1 = from_dask_array(x, columns=['a', 'b', 'c'])       # dask
    assert isinstance(d1, dd.DataFrame)
    assert (d1.compute().values == x.compute()).all()
    tm.assert_index_equal(d1.columns, pd.Index(['a', 'b', 'c']))

    d2 = from_array(x.compute(), columns=['a', 'b', 'c'])  # numpy
    assert isinstance(d1, dd.DataFrame)
    assert (d2.compute().values == x.compute()).all()
    tm.assert_index_equal(d2.columns, pd.Index(['a', 'b', 'c']))
Exemplo n.º 4
0
def test_meta_from_array():
    x = np.array([[1, 2], [3, 4]], dtype=np.int64)
    res = dd.io._meta_from_array(x)
    assert isinstance(res, pd.DataFrame)
    assert res[0].dtype == np.int64
    assert res[1].dtype == np.int64
    tm.assert_index_equal(res.columns, pd.Index([0, 1]))

    x = np.array([[1., 2.], [3., 4.]], dtype=np.float64)
    res = dd.io._meta_from_array(x, columns=['a', 'b'])
    assert isinstance(res, pd.DataFrame)
    assert res['a'].dtype == np.float64
    assert res['b'].dtype == np.float64
    tm.assert_index_equal(res.columns, pd.Index(['a', 'b']))

    with pytest.raises(ValueError):
        dd.io._meta_from_array(x, columns=['a', 'b', 'c'])

    np.random.seed(42)
    x = np.random.rand(201, 2)
    x = from_array(x, chunksize=50, columns=['a', 'b'])
    assert len(x.divisions) == 6 # Should be 5 partitions and the end
Exemplo n.º 5
0
def test_dummy_from_array():
    x = np.array([[1, 2], [3, 4]], dtype=np.int64)
    res = dd.io._dummy_from_array(x)
    assert isinstance(res, pd.DataFrame)
    assert res[0].dtype == np.int64
    assert res[1].dtype == np.int64
    tm.assert_index_equal(res.columns, pd.Index([0, 1]))

    x = np.array([[1., 2.], [3., 4.]], dtype=np.float64)
    res = dd.io._dummy_from_array(x, columns=['a', 'b'])
    assert isinstance(res, pd.DataFrame)
    assert res['a'].dtype == np.float64
    assert res['b'].dtype == np.float64
    tm.assert_index_equal(res.columns, pd.Index(['a', 'b']))

    msg = r"""Length mismatch: Expected axis has 2 elements, new values have 3 elements"""
    with tm.assertRaisesRegexp(ValueError, msg):
        dd.io._dummy_from_array(x, columns=['a', 'b', 'c'])

    np.random.seed(42)
    x = np.random.rand(201, 2)
    x = from_array(x, chunksize=50, columns=['a', 'b'])
    assert len(x.divisions) == 6  # Should be 5 partitions and the end
Exemplo n.º 6
0
def test_dummy_from_array():
    x = np.array([[1, 2], [3, 4]], dtype=np.int64)
    res = dd.io._dummy_from_array(x)
    assert isinstance(res, pd.DataFrame)
    assert res[0].dtype == np.int64
    assert res[1].dtype == np.int64
    tm.assert_index_equal(res.columns, pd.Index([0, 1]))

    x = np.array([[1., 2.], [3., 4.]], dtype=np.float64)
    res = dd.io._dummy_from_array(x, columns=['a', 'b'])
    assert isinstance(res, pd.DataFrame)
    assert res['a'].dtype == np.float64
    assert res['b'].dtype == np.float64
    tm.assert_index_equal(res.columns, pd.Index(['a', 'b']))

    msg = r"""Length mismatch: Expected axis has 2 elements, new values have 3 elements"""
    with tm.assertRaisesRegexp(ValueError, msg):
        dd.io._dummy_from_array(x, columns=['a', 'b', 'c'])

    np.random.seed(42)
    x = np.random.rand(201, 2)
    x = from_array(x, chunksize=50, columns=['a', 'b'])
    assert len(x.divisions) == 6 # Should be 5 partitions and the end
Exemplo n.º 7
0
def test_from_dask_array_compat_numpy_array():
    x = da.ones((3, 3, 3), chunks=2)

    msg = r"from_array does not input more than 2D array, got array with shape \(3, 3, 3\)"
    with tm.assertRaisesRegexp(ValueError, msg):
        from_dask_array(x)  # dask

    with tm.assertRaisesRegexp(ValueError, msg):
        from_array(x.compute())  # numpy

    x = da.ones((10, 3), chunks=(3, 3))
    d1 = from_dask_array(x)  # dask
    assert isinstance(d1, dd.DataFrame)
    assert (d1.compute().values == x.compute()).all()
    tm.assert_index_equal(d1.columns, pd.Index([0, 1, 2]))

    d2 = from_array(x.compute())  # numpy
    assert isinstance(d1, dd.DataFrame)
    assert (d2.compute().values == x.compute()).all()
    tm.assert_index_equal(d2.columns, pd.Index([0, 1, 2]))

    msg = r"""Length mismatch: Expected axis has 3 elements, new values have 1 elements"""
    with tm.assertRaisesRegexp(ValueError, msg):
        from_dask_array(x, columns=['a'])  # dask

    with tm.assertRaisesRegexp(ValueError, msg):
        from_array(x.compute(), columns=['a'])  # numpy

    d1 = from_dask_array(x, columns=['a', 'b', 'c'])  # dask
    assert isinstance(d1, dd.DataFrame)
    assert (d1.compute().values == x.compute()).all()
    tm.assert_index_equal(d1.columns, pd.Index(['a', 'b', 'c']))

    d2 = from_array(x.compute(), columns=['a', 'b', 'c'])  # numpy
    assert isinstance(d1, dd.DataFrame)
    assert (d2.compute().values == x.compute()).all()
    tm.assert_index_equal(d2.columns, pd.Index(['a', 'b', 'c']))
Exemplo n.º 8
0
def test_from_dask_array_compat_numpy_array():
    x = da.ones((3, 3, 3), chunks=2)

    msg = r"from_array does not input more than 2D array, got array with shape \(3, 3, 3\)"
    with tm.assertRaisesRegexp(ValueError, msg):
        from_dask_array(x)       # dask

    with tm.assertRaisesRegexp(ValueError, msg):
        from_array(x.compute())  # numpy

    x = da.ones((10, 3), chunks=(3, 3))
    d1 = from_dask_array(x)       # dask
    assert isinstance(d1, dd.DataFrame)
    assert (d1.compute().values == x.compute()).all()
    tm.assert_index_equal(d1.columns, pd.Index([0, 1, 2]))

    d2 = from_array(x.compute())  # numpy
    assert isinstance(d1, dd.DataFrame)
    assert (d2.compute().values == x.compute()).all()
    tm.assert_index_equal(d2.columns, pd.Index([0, 1, 2]))

    msg = r"""Length mismatch: Expected axis has 3 elements, new values have 1 elements"""
    with tm.assertRaisesRegexp(ValueError, msg):
        from_dask_array(x, columns=['a'])       # dask

    with tm.assertRaisesRegexp(ValueError, msg):
        from_array(x.compute(), columns=['a'])  # numpy

    d1 = from_dask_array(x, columns=['a', 'b', 'c'])       # dask
    assert isinstance(d1, dd.DataFrame)
    assert (d1.compute().values == x.compute()).all()
    tm.assert_index_equal(d1.columns, pd.Index(['a', 'b', 'c']))

    d2 = from_array(x.compute(), columns=['a', 'b', 'c'])  # numpy
    assert isinstance(d1, dd.DataFrame)
    assert (d2.compute().values == x.compute()).all()
    tm.assert_index_equal(d2.columns, pd.Index(['a', 'b', 'c']))