def test_empty(): for shp in [(), (0, ), (5, ), (0, 0), (1, 0), (0, 1), (6, 7), (0, 0, 0), (1, 0, 0), (0, 1, 0), (0, 0, 1), (4, 8, 9), (1, 8, 9)]: for order in ["C", "F"]: for dtype in dtypes_all: x = numpy.empty(shp, dtype, order) y = gpu_ndarray.empty(shp, dtype, order) check_meta(x, y) x = gpu_ndarray.empty(()) # no dtype and order param y = numpy.empty(()) check_meta(x, y) try: gpu_ndarray.empty() assert False except TypeError: pass
def test_empty(): for shp in [(), (0,), (5,), (0, 0), (1, 0), (0, 1), (6, 7), (0, 0, 0), (1, 0, 0), (0, 1, 0), (0, 0, 1), (4, 8, 9), (1, 8, 9)]: for order in ["C", "F"]: for dtype in dtypes_all: x = numpy.empty(shp, dtype, order) y = gpu_ndarray.empty(shp, dtype, order) check_meta(x, y) x = gpu_ndarray.empty(()) # no dtype and order param y = numpy.empty(()) check_meta(x, y) try: gpu_ndarray.empty() assert False except TypeError: pass
def test_elemwise_collapse(): """ Test collapsing under many broadcast and strided pattern """ for dtype1 in ["int16", "float32", "int8"]: for dtype2 in ["int16", "float32", "int8"]: for shape1_, shape2_, expected in [ # 1d to test this special case ((40, ), (40, ), 0), ((40, ), (1, ), 1), # No broadcastable dimensions ((4, 5, 6, 9), (4, 5, 6, 9), 0), # All inputs have one(and the same) broadcastable dimension ((1, 4, 5, 9), (1, 4, 5, 9), 0), ((4, 1, 5, 9), (4, 1, 5, 9), 0), ((4, 5, 1, 9), (4, 5, 1, 9), 0), ((4, 5, 9, 1), (4, 5, 9, 1), 0), # One inputs have one broadcastable dimension ((1, 5, 6, 9), (4, 5, 6, 9), 2), ((4, 1, 6, 9), (4, 5, 6, 9), 3), ((4, 5, 1, 9), (4, 5, 6, 9), 3), ((4, 5, 6, 1), (4, 5, 6, 9), 2), # One inputs have two broadcastable dimension ((1, 1, 6, 9), (4, 5, 6, 9), 2), ((1, 5, 1, 9), (4, 5, 6, 9), 4), ((1, 5, 6, 1), (4, 5, 6, 9), 3), ((4, 1, 1, 9), (4, 5, 6, 9), 3), ((4, 1, 6, 1), (4, 5, 6, 9), 4), ((4, 5, 1, 1), (4, 5, 6, 9), 2), # One inputs have tree broadcastable dimension ((1, 1, 1, 9), (4, 5, 6, 9), 2), ((1, 1, 6, 1), (4, 5, 6, 9), 3), ((1, 5, 1, 1), (4, 5, 6, 9), 3), ((4, 1, 1, 1), (4, 5, 6, 9), 2), # One scalar ((1, 1, 1, 1), (4, 5, 6, 9), 1), # One scalar, the other 1 broadcast dims ((1, 1, 1, 1), (4, 5, 6, 1), 1), ]: scalar_cpu = rand((1, ) * len(shape1_), dtype=dtype1) scalar_gpu = gpu_ndarray.GpuNdArrayObject(scalar_cpu) scalar_gpu1 = MyGpuNdArray(scalar_gpu) for shape1, shape2 in [(shape1_, shape2_), (shape2_, shape1_)]: a_cpu = rand(shape1, dtype=dtype1) a = gpu_ndarray.GpuNdArrayObject(a_cpu) a1 = MyGpuNdArray(a) b_cpu = rand(shape2, dtype=dtype2) b = gpu_ndarray.GpuNdArrayObject(b_cpu) b1 = MyGpuNdArray(b) assert len(shape1) == len(shape2) o_shape = [] for i in range(len(shape1)): o_shape.append(max(shape1[i], shape2[i])) o = gpu_ndarray.empty(o_shape, dtype=(a_cpu + b_cpu).dtype) # 1.1 Check direct collapse nd_collaps, info = elemwise_collapses([a, b], [o]) assert nd_collaps == expected, (shape1, shape2, nd_collaps, expected, info) # 1.2 Check computation are still valid f = MyGpuNdArray.gen_fct(theano.tensor.add, [a1, b1], len(shape1)) out = f([a1, b1]) out2 = f([a1, b1], out=out) assert out is out2 assert numpy.allclose(numpy.asarray(f([a1, b1])), a_cpu + b_cpu) assert numpy.allclose( numpy.asarray(MyGpuNdArray.adds(a1, b1)), a_cpu + b_cpu) assert numpy.allclose( numpy.asarray(MyGpuNdArray.add(a1, b1)), a_cpu + b_cpu) assert MyGpuNdArray.add(a1, b1, out=out2) is out2 # 1.3 Check work without collaping f = MyGpuNdArray.gen_fct(theano.tensor.add, [a1, b1], len(shape1), collapse=False) out = f([a1, b1]) out2 = f([a1, b1], out=out) assert out is out2 assert numpy.allclose(numpy.asarray(f([a1, b1])), a_cpu + b_cpu) assert numpy.allclose( numpy.asarray(MyGpuNdArray.adds(a1, b1)), a_cpu + b_cpu) assert numpy.allclose( numpy.asarray(MyGpuNdArray.add(a1, b1)), a_cpu + b_cpu) assert MyGpuNdArray.add(a1, b1, out=out2) is out2 # 2.1 What if we add a scalar? nd_collaps, info = elemwise_collapses([a, b, scalar_gpu], [o]) if expected == 0: expected2 = 1 else: expected2 = expected assert nd_collaps == expected2, (shape1, shape2, nd_collaps, expected, info) # 2.2 Check computation assert numpy.allclose( numpy.asarray(MyGpuNdArray.adds(a1, b1, scalar_gpu1)), a_cpu + b_cpu + scalar_cpu) # 3.1 What if one of the dimensions is strided? broadcast = any( [True for i in a.shape + b.shape if i == 1]) if expected == 0: expected2 = 2 else: expected2 = expected if len(shape1_) != 4: continue if a.shape[0] != 1: shape = list(shape1) shape[0] *= 2 c_cpu = rand(shape, dtype='float32') c = gpu_ndarray.GpuNdArrayObject(c_cpu)[::2] c1 = MyGpuNdArray(c) err = ("strided", c.shape, shape2, nd_collaps, expected, info) nd_collaps, info = elemwise_collapses([c, b], [o]) if broadcast: assert nd_collaps >= expected, err else: assert nd_collaps == expected2, err assert numpy.allclose( numpy.asarray(MyGpuNdArray.adds(c1, b1)), numpy.asarray(c) + b_cpu) if a.shape[1] != 1: shape = list(shape1) shape[1] *= 2 c_cpu = rand(shape, dtype='float32') c = gpu_ndarray.GpuNdArrayObject(c_cpu)[::, ::2] c1 = MyGpuNdArray(c) err = ("strided", c.shape, shape2, nd_collaps, expected, info) nd_collaps, info = elemwise_collapses([c, b], [o]) if broadcast: assert nd_collaps >= expected, err else: assert nd_collaps == expected2, err pass assert numpy.allclose( numpy.asarray(MyGpuNdArray.adds(c1, b1)), numpy.asarray(c) + b_cpu) if a.shape[2] != 1: shape = list(shape1) shape[2] *= 2 c_cpu = rand(shape, dtype='float32') c = gpu_ndarray.GpuNdArrayObject(c_cpu)[::, ::, ::2] c1 = MyGpuNdArray(c) err = ("strided", c.shape, shape2, nd_collaps, expected, info) nd_collaps, info = elemwise_collapses([c, b], [o]) if broadcast: assert nd_collaps >= expected, err else: assert nd_collaps == expected2, err pass assert numpy.allclose( numpy.asarray(MyGpuNdArray.adds(c1, b1)), numpy.asarray(c) + b_cpu) if a.shape[3] != 1: shape = list(shape1) shape[3] *= 2 c_cpu = rand(shape, dtype='float32') c = gpu_ndarray.GpuNdArrayObject( c_cpu)[::, ::, ::, ::2] c1 = MyGpuNdArray(c) err = ("strided", c.shape, shape2, nd_collaps, expected, info) nd_collaps, info = elemwise_collapses([c, b], [o]) if broadcast: assert nd_collaps >= expected, err else: assert nd_collaps == 1, err pass assert numpy.allclose( numpy.asarray(MyGpuNdArray.adds(c1, b1)), numpy.asarray(c) + b_cpu)
def test_elemwise_collapse(): """ Test collapsing under many broadcast and strided pattern """ for dtype1 in ["int16", "float32", "int8"]: for dtype2 in ["int16", "float32", "int8"]: for shape1_, shape2_, expected in [ # 1d to test this special case ((40,), (40,), 0), ((40,), (1,), 1), # No broadcastable dimensions ((4, 5, 6, 9), (4, 5, 6, 9), 0), # All inputs have one(and the same) broadcastable dimension ((1, 4, 5, 9), (1, 4, 5, 9), 0), ((4, 1, 5, 9), (4, 1, 5, 9), 0), ((4, 5, 1, 9), (4, 5, 1, 9), 0), ((4, 5, 9, 1), (4, 5, 9, 1), 0), # One inputs have one broadcastable dimension ((1, 5, 6, 9), (4, 5, 6, 9), 2), ((4, 1, 6, 9), (4, 5, 6, 9), 3), ((4, 5, 1, 9), (4, 5, 6, 9), 3), ((4, 5, 6, 1), (4, 5, 6, 9), 2), # One inputs have two broadcastable dimension ((1, 1, 6, 9), (4, 5, 6, 9), 2), ((1, 5, 1, 9), (4, 5, 6, 9), 4), ((1, 5, 6, 1), (4, 5, 6, 9), 3), ((4, 1, 1, 9), (4, 5, 6, 9), 3), ((4, 1, 6, 1), (4, 5, 6, 9), 4), ((4, 5, 1, 1), (4, 5, 6, 9), 2), # One inputs have tree broadcastable dimension ((1, 1, 1, 9), (4, 5, 6, 9), 2), ((1, 1, 6, 1), (4, 5, 6, 9), 3), ((1, 5, 1, 1), (4, 5, 6, 9), 3), ((4, 1, 1, 1), (4, 5, 6, 9), 2), # One scalar ((1, 1, 1, 1), (4, 5, 6, 9), 1), # One scalar, the other 1 broadcast dims ((1, 1, 1, 1), (4, 5, 6, 1), 1), ]: scalar_cpu = rand((1,) * len(shape1_), dtype=dtype1) scalar_gpu = gpu_ndarray.GpuNdArrayObject(scalar_cpu) scalar_gpu1 = MyGpuNdArray(scalar_gpu) for shape1, shape2 in [(shape1_, shape2_), (shape2_, shape1_)]: a_cpu = rand(shape1, dtype=dtype1) a = gpu_ndarray.GpuNdArrayObject(a_cpu) a1 = MyGpuNdArray(a) b_cpu = rand(shape2, dtype=dtype2) b = gpu_ndarray.GpuNdArrayObject(b_cpu) b1 = MyGpuNdArray(b) assert len(shape1) == len(shape2) o_shape = [] for i in range(len(shape1)): o_shape.append(max(shape1[i], shape2[i])) o = gpu_ndarray.empty(o_shape, dtype=(a_cpu + b_cpu).dtype) # 1.1 Check direct collapse nd_collaps, info = elemwise_collapses([a, b], [o]) assert nd_collaps == expected, (shape1, shape2, nd_collaps, expected, info) # 1.2 Check computation are still valid f = MyGpuNdArray.gen_fct(theano.tensor.add, [a1, b1], len(shape1)) out = f([a1, b1]) out2 = f([a1, b1], out=out) assert out is out2 assert numpy.allclose(numpy.asarray(f([a1, b1])), a_cpu + b_cpu) assert numpy.allclose(numpy.asarray( MyGpuNdArray.adds(a1, b1)), a_cpu + b_cpu) assert numpy.allclose(numpy.asarray( MyGpuNdArray.add(a1, b1)), a_cpu + b_cpu) assert MyGpuNdArray.add(a1, b1, out=out2) is out2 # 1.3 Check work without collaping f = MyGpuNdArray.gen_fct(theano.tensor.add, [a1, b1], len(shape1), collapse=False) out = f([a1, b1]) out2 = f([a1, b1], out=out) assert out is out2 assert numpy.allclose(numpy.asarray(f([a1, b1])), a_cpu + b_cpu) assert numpy.allclose(numpy.asarray(MyGpuNdArray.adds( a1, b1)), a_cpu + b_cpu) assert numpy.allclose(numpy.asarray(MyGpuNdArray.add( a1, b1)), a_cpu + b_cpu) assert MyGpuNdArray.add(a1, b1, out=out2) is out2 # 2.1 What if we add a scalar? nd_collaps, info = elemwise_collapses( [a, b, scalar_gpu], [o]) if expected == 0: expected2 = 1 else: expected2 = expected assert nd_collaps == expected2, (shape1, shape2, nd_collaps, expected, info) # 2.2 Check computation assert numpy.allclose(numpy.asarray(MyGpuNdArray.adds( a1, b1, scalar_gpu1)), a_cpu + b_cpu + scalar_cpu) # 3.1 What if one of the dimensions is strided? broadcast = any([True for i in a.shape + b.shape if i == 1]) if expected == 0: expected2 = 2 else: expected2 = expected if len(shape1_) != 4: continue if a.shape[0] != 1: shape = list(shape1) shape[0] *= 2 c_cpu = rand(shape, dtype='float32') c = gpu_ndarray.GpuNdArrayObject(c_cpu)[::2] c1 = MyGpuNdArray(c) err = ("strided", c.shape, shape2, nd_collaps, expected, info) nd_collaps, info = elemwise_collapses([c, b], [o]) if broadcast: assert nd_collaps >= expected, err else: assert nd_collaps == expected2, err assert numpy.allclose(numpy.asarray( MyGpuNdArray.adds(c1, b1)), numpy.asarray(c) + b_cpu) if a.shape[1] != 1: shape = list(shape1) shape[1] *= 2 c_cpu = rand(shape, dtype='float32') c = gpu_ndarray.GpuNdArrayObject(c_cpu)[::, ::2] c1 = MyGpuNdArray(c) err = ("strided", c.shape, shape2, nd_collaps, expected, info) nd_collaps, info = elemwise_collapses([c, b], [o]) if broadcast: assert nd_collaps >= expected, err else: assert nd_collaps == expected2, err pass assert numpy.allclose(numpy.asarray( MyGpuNdArray.adds(c1, b1)), numpy.asarray(c) + b_cpu) if a.shape[2] != 1: shape = list(shape1) shape[2] *= 2 c_cpu = rand(shape, dtype='float32') c = gpu_ndarray.GpuNdArrayObject(c_cpu)[::, ::, ::2] c1 = MyGpuNdArray(c) err = ("strided", c.shape, shape2, nd_collaps, expected, info) nd_collaps, info = elemwise_collapses([c, b], [o]) if broadcast: assert nd_collaps >= expected, err else: assert nd_collaps == expected2, err pass assert numpy.allclose(numpy.asarray( MyGpuNdArray.adds(c1, b1)), numpy.asarray(c) + b_cpu) if a.shape[3] != 1: shape = list(shape1) shape[3] *= 2 c_cpu = rand(shape, dtype='float32') c = gpu_ndarray.GpuNdArrayObject(c_cpu)[::, ::, ::, ::2] c1 = MyGpuNdArray(c) err = ("strided", c.shape, shape2, nd_collaps, expected, info) nd_collaps, info = elemwise_collapses([c, b], [o]) if broadcast: assert nd_collaps >= expected, err else: assert nd_collaps == 1, err pass assert numpy.allclose(numpy.asarray( MyGpuNdArray.adds(c1, b1)), numpy.asarray(c) + b_cpu)