def test_unchunk(sc): x = arange(4*6).reshape(1, 4, 6) b = array(x, sc) assert allclose(b.chunk((2, 3)).unchunk().toarray(), b.toarray()) assert allclose(b.chunk((3, 4)).unchunk().toarray(), b.toarray()) assert allclose(b.chunk('0.1').unchunk().toarray(), b.toarray()) assert allclose(b.chunk().unchunk().toarray(), b.toarray()) x = arange(4*5*10).reshape(1, 4, 5, 10) b = array(x, sc) assert allclose(b.chunk((4, 5, 10)).unchunk().toarray(), b.toarray()) assert allclose(b.chunk((1, 1, 1)).unchunk().toarray(), b.toarray()) assert allclose(b.chunk((3, 3, 3)).unchunk().toarray(), b.toarray()) assert allclose(b.chunk((3, 3, 3)).unchunk().toarray(), b.toarray()) x = arange(4*6).reshape(4, 6) b = array(x, sc, (0, 1)) assert allclose(b.chunk(()).unchunk().toarray(), b.toarray()) b = array(x, sc, (0,)) assert allclose(b.chunk((2)).unchunk().toarray(), b.toarray())
def test_reshape(sc): old_shape = (6, 10, 4, 12) a = arange(prod(old_shape)).reshape(old_shape) b = array(a, sc, axis=(0, 1)) # keys only new_shape = (15, 4, 4, 12) assert allclose( b.reshape(new_shape).toarray(), b.toarray().reshape(new_shape)) # values only new_shape = (6, 10, 24, 2) assert allclose( b.reshape(new_shape).toarray(), b.toarray().reshape(new_shape)) # keys and values, independent new_shape = (15, 4, 24, 2) assert allclose( b.reshape(new_shape).toarray(), b.toarray().reshape(new_shape)) # keys and values, mixing new_shape = (6, 4, 10, 12) with pytest.raises(NotImplementedError): b.reshape(new_shape)
def reduce_suite(arr, b): """ A set of tests for the reduce operator Parameters ---------- arr: `ndarray` A 3D ndarray used in the construction of `b` (used to check results) b: `BoltArray` The BoltArray to be used for testing """ from numpy import ones, sum from operator import add # Reduce over the first axis with an add reduced = b.reduce(add, axis=0) res = reduced.toarray() assert res.shape == (arr.shape[1], arr.shape[2]) assert allclose(res, sum(arr, 0)) # Reduce over multiple axes with an add reduced = b.reduce(add, axis=(0, 1)) res = reduced.toarray() assert res.shape == (arr.shape[2], ) assert allclose(res, sum(sum(arr, 0), 1))
def reduce_suite(arr, b): """ A set of tests for the reduce operator Parameters ---------- arr: `ndarray` A 3D ndarray used in the construction of `b` (used to check results) b: `BoltArray` The BoltArray to be used for testing """ from numpy import ones, sum from operator import add # Reduce over the first axis with an add reduced = b.reduce(add, axis=0) res = reduced.toarray() assert res.shape == (arr.shape[1], arr.shape[2]) assert allclose(res, sum(arr, 0)) # Reduce over multiple axes with an add reduced = b.reduce(add, axis=(0, 1)) res = reduced.toarray() assert res.shape == (arr.shape[2],) assert allclose(res, sum(sum(arr, 0), 1))
def test_sum(sc): x = arange(2*3*4).reshape(2, 3, 4) b = array(x, sc, axis=(0,)) assert allclose(b.sum(), x.sum()) assert allclose(b.sum(axis=0), x.sum(axis=0)) assert allclose(b.sum(axis=(0, 1)), x.sum(axis=(0, 1))) assert b.sum(axis=(0, 1, 2)) == x.sum(axis=(0, 1, 2))
def test_getitem_slice_ragged(sc): x = arange(10 * 10 * 3).reshape((10, 10, 3)) b = array(x, sc, axis=(0, 1)) assert allclose(b[0:5:2, 0:2].toarray(), x[0:5:2, 0:2]) assert allclose(b[0:5:3, 0:2].toarray(), x[0:5:3, 0:2]) assert allclose(b[0:9:3, 0:2].toarray(), x[0:9:3, 0:2])
def test_swapaxes(sc): a = arange(2*3*4*5).reshape((2, 3, 4, 5)) b = array(a, sc, axis=(0, 1)) assert allclose(b.swapaxes(1, 2).toarray(), b.toarray().swapaxes(1, 2)) assert allclose(b.swapaxes(0, 1).toarray(), b.toarray().swapaxes(0, 1)) assert allclose(b.swapaxes(2, 3).toarray(), b.toarray().swapaxes(2, 3))
def test_min(sc): x = arange(2*3*4).reshape(2, 3, 4) b = array(x, sc, axis=(0,)) assert allclose(b.min(), x.min()) assert allclose(b.min(axis=0), x.min(axis=0)) assert allclose(b.min(axis=(0, 1)), x.min(axis=(0, 1))) assert b.min(axis=(0, 1, 2)) == x.min(axis=(0, 1, 2))
def test_getitem_slice_ragged(sc): x = arange(10*10*3).reshape((10, 10, 3)) b = array(x, sc, axis=(0,1)) assert allclose(b[0:5:2, 0:2].toarray(), x[0:5:2, 0:2]) assert allclose(b[0:5:3, 0:2].toarray(), x[0:5:3, 0:2]) assert allclose(b[0:9:3, 0:2].toarray(), x[0:9:3, 0:2])
def test_swapaxes(sc): a = arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) b = array(a, sc, axis=(0, 1)) assert allclose(b.swapaxes(1, 2).toarray(), b.toarray().swapaxes(1, 2)) assert allclose(b.swapaxes(0, 1).toarray(), b.toarray().swapaxes(0, 1)) assert allclose(b.swapaxes(2, 3).toarray(), b.toarray().swapaxes(2, 3))
def test_t(sc): a = arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) b = array(a, sc, axis=0) assert allclose(b.T.toarray(), b.toarray().T) b = array(a, sc, axis=(0, 1)) assert allclose(b.T.toarray(), b.toarray().T)
def test_clip(sc): from numpy import arange a = arange(4).reshape(2, 2) b = array(a, sc) assert allclose(b.clip(0).toarray(), a.clip(0)) assert allclose(b.clip(2).toarray(), a.clip(2)) assert allclose(b.clip(1, 2).toarray(), a.clip(1, 2))
def test_concatenate(sc): from numpy import concatenate x = arange(2*3).reshape((2, 3)) b = array(x, sc) c = array(x) assert allclose(b.concatenate(x).toarray(), concatenate((x, x))) assert allclose(b.concatenate(b).toarray(), concatenate((x, x))) assert allclose(b.concatenate(c).toarray(), concatenate((x, x)))
def test_t(sc): a = arange(2*3*4*5).reshape((2, 3, 4, 5)) b = array(a, sc, axis=0) assert allclose(b.T.toarray(), b.toarray().T) b = array(a, sc, axis=(0, 1)) assert allclose(b.T.toarray(), b.toarray().T)
def test_ones(sc): from numpy import ones as npones x = npones((2, 3, 4)) b = ones((2, 3, 4), sc) assert allclose(x, b.toarray()) x = npones(5) b = ones(5, sc) assert allclose(x, b.toarray())
def test_zeros(sc): from numpy import zeros as npzeros x = npzeros((2, 3, 4)) b = zeros((2, 3, 4), sc) assert allclose(x, b.toarray()) x = npzeros(5) b = zeros(5, sc) assert allclose(x, b.toarray())
def test_ufuncs(): x = arange(2*3*4*5).reshape(2, 3, 4, 5) b = array(x) # test a common ufunc (sum) over different dimensions assert allclose(x.sum(axis=0), b.sum(axis=0).toarray()) assert allclose(x.sum(axis=(0, 1)), b.sum(axis=(0, 1)).toarray()) assert allclose(x.sum(axis=(0, 1, 2)), b.sum(axis=(0, 1, 2)).toarray()) assert x.sum() == b.sum()
def test_keys_to_values(sc): x = arange(4 * 7 * 9 * 6).reshape(4, 7, 9, 6) b = array(x, sc, (0, 1)) c = b.chunk((4, 2)) assert allclose( x, c.keys_to_values((0, )).unchunk().toarray().transpose(1, 0, 2, 3)) assert allclose(x, c.keys_to_values((1, )).unchunk().toarray()) assert allclose(x, c.keys_to_values((1, ), size=(3, )).unchunk().toarray()) assert allclose(x, c.keys_to_values((0, 1)).unchunk().toarray()) assert allclose(x, c.keys_to_values((0, 1), size=(2, 3)).unchunk().toarray()) assert allclose(x, c.keys_to_values(()).unchunk().toarray()) b = array(x, sc, range(4)) c = b.chunk(()) assert allclose(x, c.keys_to_values((3, )).unchunk().toarray()) assert allclose( x, c.keys_to_values((0, 1)).unchunk().toarray().transpose(2, 3, 0, 1)) b = array(x, sc, (0, )) c = b.chunk((2, 3, 4)) assert allclose(x, c.keys_to_values((0, )).unchunk().toarray())
def test_squeeze(sc): from numpy import ones as npones x = npones((1, 2, 1, 4)) b = ones((1, 2, 1, 4), sc, axis=0) assert allclose(b.squeeze().toarray(), x.squeeze()) assert allclose(b.squeeze((0, 2)).toarray(), x.squeeze((0, 2))) assert allclose(b.squeeze(0).toarray(), x.squeeze(0)) assert allclose(b.squeeze(2).toarray(), x.squeeze(2)) assert b.squeeze().split == 0 assert b.squeeze((0, 2)).split == 0 assert b.squeeze(2).split == 1 x = npones((1, 2, 1, 4)) b = ones((1, 2, 1, 4), sc, axis=(0, 1)) assert allclose(b.squeeze().toarray(), x.squeeze()) assert allclose(b.squeeze((0, 2)).toarray(), x.squeeze((0, 2))) assert allclose(b.squeeze(0).toarray(), x.squeeze(0)) assert allclose(b.squeeze(2).toarray(), x.squeeze(2)) assert b.squeeze().split == 1 assert b.squeeze((0, 2)).split == 1 assert b.squeeze(2).split == 2 x = npones((1, 1, 1, 1)) b = ones((1, 1, 1, 1), sc, axis=(0, 1)) assert allclose(b.squeeze().toarray(), x.squeeze())
def test_transpose(sc): n = 4 perms = list(permutations(range(n), n)) a = arange(2 * 3 * 4 * 5).reshape((2, 3, 4, 5)) b = array(a, sc, axis=(0, 1)) for p in perms: assert allclose(b.transpose(p).toarray(), b.toarray().transpose(p)) assert allclose(b.transpose(), b.toarray().transpose())
def test_getitem_list_array(sc): x = arange(3*3*4).reshape((3, 3, 4)) rows = [[0, 0], [1, 1]] cols = [[0, 2], [0, 2]] dept = [[0, 3], [0, 3]] b = array(x, sc, axis=0) assert allclose(b[rows, cols, dept].toarray(), x[rows, cols, dept]) b = array(x, sc, axis=(0, 1)) assert allclose(b[rows, cols, dept].toarray(), x[rows, cols, dept])
def test_getitem_list_array(sc): x = arange(3 * 3 * 4).reshape((3, 3, 4)) rows = [[0, 0], [1, 1]] cols = [[0, 2], [0, 2]] dept = [[0, 3], [0, 3]] b = array(x, sc, axis=0) assert allclose(b[rows, cols, dept].toarray(), x[rows, cols, dept]) b = array(x, sc, axis=(0, 1)) assert allclose(b[rows, cols, dept].toarray(), x[rows, cols, dept])
def test_padding(sc): x = arange(2 * 2 * 5 * 6).reshape(2, 2, 5, 6) b = array(x, sc, (0, 1)) c = b.chunk((2, 2), padding=1) chunks = c.tordd().sortByKey().values().collect() assert allclose(chunks[0], array([[0, 1, 2], [6, 7, 8], [12, 13, 14]])) assert allclose(chunks[1], array([[1, 2, 3, 4], [7, 8, 9, 10], [13, 14, 15, 16]])) assert allclose( chunks[4], array([[7, 8, 9, 10], [13, 14, 15, 16], [19, 20, 21, 22], [25, 26, 27, 28]])) assert allclose(chunks[6], array([[18, 19, 20], [24, 25, 26]])) c = b.chunk((3, 3), padding=(1, 2)) chunks = c.tordd().sortByKey().values().collect() assert allclose( chunks[0], array([[0, 1, 2, 3, 4], [6, 7, 8, 9, 10], [12, 13, 14, 15, 16], [18, 19, 20, 21, 22]])) c = b.chunk((2, 2), padding=1) assert allclose(x, c.unchunk().toarray()) assert allclose(x, c.keys_to_values((1, )).unchunk().toarray()) assert allclose(x, c.values_to_keys((0, )).unchunk().toarray())
def map_suite(arr, b): """ A set of tests for the map operator Parameters ---------- arr: `ndarray` A 2D array used in the construction of `b` (used to check results) b: `BoltArray` The BoltArray to be used for testing """ from numpy import ones import random random.seed(42) # a simple map should be equivalent to an element-wise multiplication (without axis specified) func1 = lambda x: x * 2 mapped = b.map(func1) res = mapped.toarray() assert allclose(res, arr * 2) # a simple map should be equivalent to an element-wise multiplication (with axis specified) func1 = lambda x: x * 2 mapped = b.map(func1, axis=0) res = mapped.toarray() assert allclose(res, arr * 2) # more complicated maps can reshape elements so long as they do so consistently func2 = lambda x: ones(10) mapped = b.map(func2, axis=0) res = mapped.toarray() assert res.shape == (arr.shape[0], 10) # but the shape of the result will change if mapped over different axes mapped = b.map(func2, axis=(0, 1)) res = mapped.toarray() assert res.shape == (arr.shape[0], arr.shape[1], 10) # if a map is not applied uniformly, it should produce an error with pytest.raises(Exception): def nonuniform_map(x): random.seed(x.tostring()) return random.random() func3 = lambda x: ones(10) if nonuniform_map(x) < 0.5 else ones(5) mapped = b.map(func3) res = mapped.toarray()
def test_array(sc): x = arange(2*3*4).reshape((2, 3, 4)) b = array(x, sc) assert isinstance(b, BoltArraySpark) assert allclose(x, b.toarray()) b = array(x, sc, axis=0) assert isinstance(b, BoltArraySpark) assert allclose(x, b.toarray()) b = array(x, sc, axis=(0, 1)) assert isinstance(b, BoltArraySpark) assert allclose(x, b.toarray())
def test_stack_2D(sc): barr = _2D_stackable_preamble(sc) # without stack_size stacked = barr.stack() first_partition = stacked._rdd.first()[1] assert first_partition.shape == (5, 10) assert stacked.shape == (10, 10) # with stack_size stacked = barr.stack(size=2) first_partition = stacked._rdd.first()[1] assert first_partition.shape == (2, 10) # invalid stack_size stacked = barr.stack(size=0) first_partition = stacked._rdd.first()[1] assert first_partition.shape == (5, 10) # unstacking unstacked = stacked.unstack() arr = unstacked.toarray() assert arr.shape == (10, 10) assert allclose(arr, barr.toarray())
def test_stack_2D(sc): barr = _2D_stackable_preamble(sc) # without stack_size stacked = barr.stack() first_partition = stacked._rdd.first()[1] assert first_partition.shape == (5, 10) assert stacked.shape == (10, 10) # with stack_size stacked = barr.stack(stack_size=2) first_partition = stacked._rdd.first()[1] assert first_partition.shape == (2, 10) # invalid stack_size stacked = barr.stack(stack_size=0) first_partition = stacked._rdd.first()[1] assert first_partition.shape == (5, 10) # unstacking unstacked = stacked.unstack() arr = unstacked.toarray() assert arr.shape == (10, 10) assert allclose(arr, barr.toarray())
def test_chunk(sc): x = arange(4*6).reshape(1, 4, 6) b = array(x, sc) k1, v1 = zip(*b.chunk((2,3))._rdd.sortByKey().collect()) k2 = tuple(zip(((0,), (0,), (0,), (0,)), ((0, 0), (0, 1), (1, 0), (1, 1)))) v2 = [s for m in split(x[0], (2,), axis=0) for s in split(m, (3,), axis=1)] assert k1 == k2 assert all([allclose(m1, m2) for (m1, m2) in zip(v1, v2)]) k1, v1 = zip(*b.chunk((3,4))._rdd.sortByKey().collect()) k2 = tuple(zip(((0,), (0,), (0,), (0,)), ((0, 0), (0, 1), (1, 0), (1, 1)))) v2 = [s for m in split(x[0], (3,), axis=0) for s in split(m, (4,), axis=1)] assert k1 == k2 assert all([allclose(m1, m2) for (m1, m2) in zip(v1, v2)])
def test_transpose_keys(sc): x = arange(2 * 3 * 4).reshape((2, 3, 4)) b = array(x, sc, axis=(0, 1)) c = b.keys.transpose((1, 0)) assert c.keys.shape == (3, 2) assert allclose(c.toarray(), x.transpose((1, 0, 2))) b = array(x, sc, axis=0) c = b.keys.transpose((0, )) assert allclose(c.toarray(), x) b = array(x, sc, axis=(0, 1)) c = b.keys.transpose((0, 1)) assert allclose(c.toarray(), x)
def test_concatenate(): from numpy import concatenate as npconcatenate x = arange(2 * 3 * 4).reshape((2, 3, 4)) b = concatenate((x, x)) assert allclose(npconcatenate((x, x)), b.toarray())
def test_transpose_values(sc): x = arange(2 * 3 * 4).reshape((2, 3, 4)) b = array(x, sc, axis=0) c = b.values.transpose((1, 0)) assert c.values.shape == (4, 3) assert allclose(c.toarray(), x.transpose((0, 2, 1))) b = array(x, sc, axis=0) c = b.values.transpose((0, 1)) assert allclose(c.toarray(), x) b = array(x, sc, axis=(0, 1)) c = b.values.transpose((0, )) assert allclose(c.toarray(), x.reshape((2, 3, 4)))
def test_transpose_keys(sc): x = arange(2*3*4).reshape((2, 3, 4)) b = array(x, sc, axis=(0, 1)) c = b.keys.transpose((1, 0)) assert c.keys.shape == (3, 2) assert allclose(c.toarray(), x.transpose((1, 0, 2))) b = array(x, sc, axis=0) c = b.keys.transpose((0,)) assert allclose(c.toarray(), x) b = array(x, sc, axis=(0, 1)) c = b.keys.transpose((0, 1)) assert allclose(c.toarray(), x)
def test_transpose_values(sc): x = arange(2*3*4).reshape((2, 3, 4)) b = array(x, sc, axis=0) c = b.values.transpose((1, 0)) assert c.values.shape == (4, 3) assert allclose(c.toarray(), x.transpose((0, 2, 1))) b = array(x, sc, axis=0) c = b.values.transpose((0, 1)) assert allclose(c.toarray(), x) b = array(x, sc, axis=(0, 1)) c = b.values.transpose((0,)) assert allclose(c.toarray(), x.reshape((2, 3, 4)))
def test_tospark(sc): x = arange(2 * 3 * 4).reshape((2, 3, 4)) b = array(x) s = b.tospark(sc, axis=0) assert isinstance(s, BoltArraySpark) assert s.shape == (2, 3, 4) assert allclose(s.toarray(), x)
def test_tospark(sc): x = arange(2*3*4).reshape((2, 3, 4)) b = array(x) s = b.tospark(sc, axis=0) assert isinstance(s, BoltArraySpark) assert s.shape == (2, 3, 4) assert allclose(s.toarray(), x)
def test_stacked_shape_inference(sc): from numpy import ones as npones a = ones((100, 2), sc) a._rdd = a._rdd.partitionBy(2) s = a.stack(5) n = s.tordd().count() # operations that preserve keys assert s.map(lambda x: x * 2).unstack().shape == (100, 2) assert s.map(lambda x: x.sum(axis=1)).unstack().shape == (100, ) assert s.map(lambda x: tile(x, (1, 2))).unstack().shape == (100, 4) # operations that create new keys assert s.map(lambda x: npones((2, 2))).unstack().shape == (n, 2, 2) assert s.map(lambda x: x.sum(axis=0)).unstack().shape == (n, 2) assert s.map(lambda x: asarray([2])).unstack().toarray().shape == (n, 1) assert s.map(lambda x: asarray(2)).unstack().toarray().shape == (n, ) # composing functions works assert s.map(lambda x: x * 2).map(lambda x: x * 2).unstack().shape == (100, 2) assert s.map(lambda x: x * 2).map(lambda x: npones( (2, 2))).unstack().shape == (n, 2, 2) assert s.map(lambda x: npones((2, 2))).map( lambda x: x * 2).unstack().shape == (n, 2, 2) # check the result assert allclose( s.map(lambda x: x.sum(axis=1)).unstack().toarray(), npones(100) * 2) assert allclose( s.map(lambda x: tile(x, (1, 2))).unstack().toarray(), npones((100, 4))) with pytest.raises(ValueError): s.map(lambda x: 2) with pytest.raises(ValueError): s.map(lambda x: None) with pytest.raises(RuntimeError): s.map(lambda x: 1 / 0)
def test_reshape(sc): old_shape = (6, 10, 4, 12) a = arange(prod(old_shape)).reshape(old_shape) b = array(a, sc, axis=(0, 1)) # keys only new_shape = (15, 4, 4, 12) assert allclose(b.reshape(new_shape).toarray(), b.toarray().reshape(new_shape)) # values only new_shape = (6, 10, 24, 2) assert allclose(b.reshape(new_shape).toarray(), b.toarray().reshape(new_shape)) # keys and values, independent new_shape = (15, 4, 24, 2) assert allclose(b.reshape(new_shape).toarray(), b.toarray().reshape(new_shape)) # keys and values, mixing new_shape = (6, 4, 10, 12) with pytest.raises(NotImplementedError): b.reshape(new_shape)
def test_swap(sc): a = arange(2**8).reshape(*(8 * [2])) b = array(a, sc, axis=(0, 1, 2, 3)) bs = b.swap((1, 2), (0, 3), size=(2, 2)) at = a.transpose((0, 3, 4, 7, 1, 2, 5, 6)) assert allclose(at, bs.toarray()) bs = b.swap((1, 2), (0, 3), size="50") at = a.transpose((0, 3, 4, 7, 1, 2, 5, 6)) assert allclose(at, bs.toarray()) bs = b.swap((1, 2), (0, 3)) at = a.transpose((0, 3, 4, 7, 1, 2, 5, 6)) assert allclose(at, bs.toarray()) bs = b.swap((), (0, 1, 2, 3)) at = a assert allclose(at, bs.toarray()) bs = b.swap(0, 0) at = a.transpose((1, 2, 3, 4, 0, 5, 6, 7)) assert allclose(at, bs.toarray()) bs = b.swap([], 0) at = a.transpose((0, 1, 2, 3, 4, 5, 6, 7)) assert allclose(at, bs.toarray()) assert bs.split == 5 bs = b.swap(0, []) at = a.transpose((1, 2, 3, 0, 4, 5, 6, 7)) assert allclose(at, bs.toarray()) assert bs.split == 3 b = array(a, sc, axis=range(8)) bs = b.swap([0, 1], []) at = a.transpose((2, 3, 4, 5, 6, 7, 0, 1)) assert allclose(at, bs.toarray()) assert bs.split == 6 a = arange(2 * 3 * 4).reshape(2, 3, 4) b = array(a, sc, axis=(0, )) bs = b.swap((0, ), (0, 1)) at = a.transpose(1, 2, 0) assert allclose(at, bs.toarray())
def test_swap(sc): a = arange(2 ** 8).reshape(*(8 * [2])) b = array(a, sc, axis=(0, 1, 2, 3)) bs = b.swap((1, 2), (0, 3), size=(2, 2)) at = a.transpose((0, 3, 4, 7, 1, 2, 5, 6)) assert allclose(at, bs.toarray()) bs = b.swap((1, 2), (0, 3), size="50") at = a.transpose((0, 3, 4, 7, 1, 2, 5, 6)) assert allclose(at, bs.toarray()) bs = b.swap((1, 2), (0, 3)) at = a.transpose((0, 3, 4, 7, 1, 2, 5, 6)) assert allclose(at, bs.toarray()) bs = b.swap((), (0, 1, 2, 3)) at = a assert allclose(at, bs.toarray()) bs = b.swap(0, 0) at = a.transpose((1, 2, 3, 4, 0, 5, 6, 7)) assert allclose(at, bs.toarray()) bs = b.swap([], 0) at = a.transpose((0, 1, 2, 3, 4, 5, 6, 7)) assert allclose(at, bs.toarray()) assert bs.split == 5 bs = b.swap(0, []) at = a.transpose((1, 2, 3, 0, 4, 5, 6, 7)) assert allclose(at, bs.toarray()) assert bs.split == 3 b = array(a, sc, axis=range(8)) bs = b.swap([0, 1], []) at = a.transpose((2, 3, 4, 5, 6, 7, 0, 1)) assert allclose(at, bs.toarray()) assert bs.split == 6 a = arange(2 * 3 * 4).reshape(2, 3, 4) b = array(a, sc, axis=(0,)) bs = b.swap((0,), (0, 1)) at = a.transpose(1, 2, 0) assert allclose(at, bs.toarray())
def test_reshape_keys(sc): x = arange(2 * 3 * 4).reshape((2, 3, 4)) b = array(x, sc, axis=(0, 1)) c = b.keys.reshape((3, 2)) assert c.keys.shape == (3, 2) assert allclose(c.toarray(), x.reshape((3, 2, 4))) b = array(x, sc, axis=0) c = b.keys.reshape((2, 1)) assert allclose(c.toarray(), x.reshape((2, 1, 3, 4))) b = array(x, sc, axis=(0, )) c = b.keys.reshape((2, )) assert allclose(c.toarray(), x) b = array(x, sc, axis=(0, 1)) c = b.keys.reshape((2, 3)) assert allclose(c.toarray(), x)
def test_array(sc): x = arange(2 * 3 * 4).reshape((2, 3, 4)) b = array(x, sc) assert isinstance(b, BoltArraySpark) assert allclose(x, b.toarray()) b = array(x, sc, axis=0) assert isinstance(b, BoltArraySpark) assert allclose(x, b.toarray()) b = array(x, sc, axis=(0, 1)) assert isinstance(b, BoltArraySpark) assert allclose(x, b.toarray()) b = array(x, sc, axis=(0, 1), npartitions=5) assert isinstance(b, BoltArraySpark) assert allclose(x, b.toarray()) assert b.tordd().getNumPartitions() == 5
def test_array(sc): x = arange(2*3*4).reshape((2, 3, 4)) b = array(x, sc) assert isinstance(b, BoltArraySpark) assert allclose(x, b.toarray()) b = array(x, sc, axis=0) assert isinstance(b, BoltArraySpark) assert allclose(x, b.toarray()) b = array(x, sc, axis=(0, 1)) assert isinstance(b, BoltArraySpark) assert allclose(x, b.toarray()) b = array(x, sc, axis=(0, 1), npartitions=5) assert isinstance(b, BoltArraySpark) assert allclose(x, b.toarray()) assert b.tordd().getNumPartitions() == 5
def test_reshape_keys(sc): x = arange(2*3*4).reshape((2, 3, 4)) b = array(x, sc, axis=(0, 1)) c = b.keys.reshape((3, 2)) assert c.keys.shape == (3, 2) assert allclose(c.toarray(), x.reshape((3, 2, 4))) b = array(x, sc, axis=0) c = b.keys.reshape((2, 1)) assert allclose(c.toarray(), x.reshape((2, 1, 3, 4))) b = array(x, sc, axis=(0,)) c = b.keys.reshape((2,)) assert allclose(c.toarray(), x) b = array(x, sc, axis=(0, 1)) c = b.keys.reshape((2, 3)) assert allclose(c.toarray(), x)
def test_concatenate(sc): from numpy import concatenate as npconcatenate x = arange(2*3*4).reshape((2, 3, 4)) b = array(x, sc, axis=0) bb = concatenate((b, b), axis=0) assert allclose(npconcatenate((x, x), axis=0), bb.toarray()) bb = concatenate((b, b), axis=1) assert allclose(npconcatenate((x, x), axis=1), bb.toarray()) bb = concatenate((b, b), axis=2) assert allclose(npconcatenate((x, x), axis=2), bb.toarray()) b = array(x, sc, axis=(0, 1)) bb = concatenate((b, b), axis=0) assert allclose(npconcatenate((x, x), axis=0), bb.toarray()) b = array(x, sc, axis=(0, 1)) bb = concatenate((b, b), axis=1) assert allclose(npconcatenate((x, x), axis=1), bb.toarray()) b = array(x, sc, axis=(0, 1)) bb = concatenate((b, b), axis=2) assert allclose(npconcatenate((x, x), axis=2), bb.toarray())
def test_map(sc): x = arange(4*8*8).reshape(4, 8, 8) b = array(x, sc) c = b.chunk(size=(4, 8)) # no change of shape def f(x): return 2*x assert allclose(c.map(f).unchunk().toarray(), f(x)) assert allclose(c.map(f, value_shape=(4, 8)).unchunk().toarray(), f(x)) # changing the size of an unchunked axis def f(x): return x[:, :4] def f_local(x): return x[:, :, :4] assert allclose(c.map(f).unchunk().toarray(), f_local(x)) assert allclose(c.map(f, value_shape=(4, 4)).unchunk().toarray(), f_local(x))
def test_stacked_shape_inference(sc): from numpy import ones as npones a = ones((100, 2), sc) a._rdd = a._rdd.partitionBy(2) s = a.stack(5) n = s.tordd().count() # operations that preserve keys assert s.map(lambda x: x * 2).unstack().shape == (100, 2) assert s.map(lambda x: x.sum(axis=1)).unstack().shape == (100,) assert s.map(lambda x: tile(x, (1, 2))).unstack().shape == (100, 4) # operations that create new keys assert s.map(lambda x: npones((2, 2))).unstack().shape == (n, 2, 2) assert s.map(lambda x: x.sum(axis=0)).unstack().shape == (n, 2) assert s.map(lambda x: asarray([2])).unstack().toarray().shape == (n, 1) assert s.map(lambda x: asarray(2)).unstack().toarray().shape == (n,) # composing functions works assert s.map(lambda x: x * 2).map(lambda x: x * 2).unstack().shape == (100, 2) assert s.map(lambda x: x * 2).map(lambda x: npones((2, 2))).unstack().shape == (n, 2, 2) assert s.map(lambda x: npones((2, 2))).map(lambda x: x * 2).unstack().shape == (n, 2, 2) # check the result assert allclose(s.map(lambda x: x.sum(axis=1)).unstack().toarray(), npones(100) * 2) assert allclose(s.map(lambda x: tile(x, (1, 2))).unstack().toarray(), npones((100, 4))) with pytest.raises(ValueError): s.map(lambda x: 2) with pytest.raises(ValueError): s.map(lambda x: None) with pytest.raises(RuntimeError): s.map(lambda x: 1/0)
def test_map(sc): x = arange(4 * 8 * 8).reshape(4, 8, 8) b = array(x, sc) c = b.chunk(size=(4, 8)) # no change of shape def f(x): return 2 * x assert allclose(c.map(f).unchunk().toarray(), f(x)) assert allclose(c.map(f, value_shape=(4, 8)).unchunk().toarray(), f(x)) # changing the size of an unchunked axis def f(x): return x[:, :4] def f_local(x): return x[:, :, :4] assert allclose(c.map(f).unchunk().toarray(), f_local(x)) assert allclose( c.map(f, value_shape=(4, 4)).unchunk().toarray(), f_local(x))
def test_stacked_map(sc): barr = _2D_stackable_preamble(sc) map_func1 = lambda x: x * 2 funcs = [map_func1] for func in funcs: stacked = barr.stack() stacked_map = stacked.map(func) normal_map = barr.map(func) unstacked = stacked_map.unstack() assert normal_map.shape == unstacked.shape assert normal_map.split == unstacked.split assert allclose(normal_map.toarray(), unstacked.toarray())