def test_wgan_cost(backend_default): """ Set up a Wasserstein GANCost transform and make sure cost and errors are getting computed correctly. """ be = backend_default cost = GANCost(func="wasserstein") y_data = be.iobuf(5).fill(1.) y_noise = be.iobuf(5).fill(2.) output = be.iobuf(1) expected = be.iobuf(1) delta = be.iobuf(5) # fprop for discriminator cost output[:] = cost(y_data, y_noise) expected[:] = be.sum(y_data - y_noise, axis=0) tensors_allclose(output, expected) # bprop for wasserstein cost delta[:] = cost.bprop_data(y_data) assert allclose_with_out(delta.get(), 1.) delta[:] = cost.bprop_noise(y_noise) assert allclose_with_out(delta.get(), -1.) delta[:] = cost.bprop_generator(y_noise) assert allclose_with_out(delta.get(), 1.)
def test_cpu_randomstate(): # run 1 be = gen_backend(backend='cpu', rng_seed=100) a = be.empty((3, 3)) be.make_binary_mask(a, keepthresh=be.rng.rand()) x0 = a.get() be.make_binary_mask(a, keepthresh=be.rng.rand()) x1 = a.get() # run 2, using reset be.rng_reset() be.make_binary_mask(a, keepthresh=be.rng.rand()) y0 = a.get() be.make_binary_mask(a, keepthresh=be.rng.rand()) y1 = a.get() del (be) # run 3, using a new backend be = gen_backend(backend='cpu', rng_seed=100) a = be.empty((3, 3)) be.make_binary_mask(a, keepthresh=be.rng.rand()) z0 = a.get() be.make_binary_mask(a, keepthresh=be.rng.rand()) z1 = a.get() # check equality assert tensors_allclose([x0, x1], [y0, y1], rtol=0., atol=0.) assert tensors_allclose([x0, x1], [z0, z1], rtol=0., atol=0.) del (be)
def test_gradients(backend_tests, custom_args): test_idx, f, flag, dim = custom_args # backend_tests fixture will parameterize over cpu, gpu, and mkl # backends as well as float16 and float32 # pull the be and dtype from the actions of the fixture be = NervanaObject.be dtype = be.default_dtype # tensors tensors = gen_backend_tensors([np, be], [dim] * 5, [flag] * 5, dtype=dtype) # compare function value and gradient numpy_func_val = call_func(f, np, tensors[0]) backend_func_val = call_func(f, be, tensors[1]) numerical_gradient = get_numerical_gradient(f, tensors[0]) ad = get_audiff_gradient(f, be, tensors[1]) autodiff_gradient = ad.get_grad_asnumpyarray(tensors[1]) # TODO: stricter test to fix numerical issues assert tensors_allclose(numpy_func_val, backend_func_val, rtol=1e-2, atol=1e-2) assert tensors_allclose(numerical_gradient, autodiff_gradient, rtol=1e-02, atol=1e-3) # cleanup diff tree ad.cleanup() dtype = None be = None
def test_modified_gan_cost(backend_default): """ Set up a modified GANCost transform and make sure cost and errors are getting computed correctly. """ be = backend_default cost = GANCost(cost_type="dis", func="modified") y_data = be.iobuf(5).fill(1.) y_noise = be.iobuf(5).fill(2.) output = be.iobuf(1) expected = be.iobuf(1) delta = be.iobuf(5) # fprop for discriminator cost output[:] = cost(y_data, y_noise) expected[:] = -be.sum(be.safelog(y_data) + be.safelog(1-y_noise), axis=0) tensors_allclose(output, expected) # bprop for modified cost delta[:] = cost.bprop_data(y_data) assert allclose_with_out(delta.get(), -1. / 1) delta[:] = cost.bprop_noise(y_noise) assert allclose_with_out(delta.get(), 1. - 2.) delta[:] = cost.bprop_generator(y_noise) assert allclose_with_out(delta.get(), -1. / 2.)
def test_cpu_randomstate(): # run 1 be = gen_backend(backend='cpu', rng_seed=100) a = be.empty((3, 3)) be.make_binary_mask(a, keepthresh=be.rng.rand()) x0 = a.get() be.make_binary_mask(a, keepthresh=be.rng.rand()) x1 = a.get() # run 2, using reset be.rng_reset() be.make_binary_mask(a, keepthresh=be.rng.rand()) y0 = a.get() be.make_binary_mask(a, keepthresh=be.rng.rand()) y1 = a.get() del(be) # run 3, using a new backend be = gen_backend(backend='cpu', rng_seed=100) a = be.empty((3, 3)) be.make_binary_mask(a, keepthresh=be.rng.rand()) z0 = a.get() be.make_binary_mask(a, keepthresh=be.rng.rand()) z1 = a.get() # check equality assert tensors_allclose([x0, x1], [y0, y1], rtol=0., atol=0.) assert tensors_allclose([x0, x1], [z0, z1], rtol=0., atol=0.) del(be)
def test_gan_cost(backend_default): """ Set up a GANCost transform and make sure cost and errors are getting computed correctly. """ be = backend_default cost = GANCost(cost_type="dis", original_cost=False) y_data = be.iobuf(5).fill(1.) y_noise = be.iobuf(5).fill(2.) output = be.iobuf(1) expected = be.iobuf(1) delta = be.iobuf(5) # fprop for discriminator cost output[:] = cost(y_data, y_noise) expected[:] = -be.sum(be.safelog(y_data) + be.safelog(1 - y_noise), axis=0) tensors_allclose(output, expected) # bprop for modified cost delta[:] = cost.bprop_data(y_data) assert np.allclose(delta.get(), -1. / 1) delta[:] = cost.bprop_noise(y_noise) assert np.allclose(delta.get(), 1. - 2.) delta[:] = cost.bprop_generator(y_noise) assert np.allclose(delta.get(), -1. / 2.)
def test_edge_cases_mkl(backend_pair_mkl): """ Test several edge cases related to min/max bin, and rounding. Also test backend dump_hist_data functionality. """ nm, nc = backend_pair_mkl # edge case test np_ref = dict() inputs = [ ("edges", np.array([2 ** -48, 2 ** 15], dtype=np.float32)), ("rounding", np.array([2 ** 5, 63.99998856, 2 ** 6, 2 ** -3, 2 ** -4, 0.11262291, 92.22483826], dtype=np.float32)), ("fp16 rounding", np.array([45.21875], dtype=np.float16)) ] for tag, inp in inputs: np_ref[tag] = ref_hist(inp) for be in [nm, nc]: be_inp = be.array(inp) be_hist = be_inp.hist(tag) assert tensors_allclose(np_ref[tag], be_hist), tag + str(be) # dump_hist_data test for be in [nm, nc]: be_hist_data, be_hist_map = be.dump_hist_data() for tag, inp in inputs: be_data = be_hist_data[be_hist_map[tag]] assert tensors_allclose(np_ref[tag], be_data), tag + str(be)
def test_edge_cases(backend_pair): """ Test several edge cases related to min/max bin, and rounding. Also test backend dump_hist_data functionality. """ gpuflag = (check_gpu.get_compute_capability(0) >= 3.0) if gpuflag is False: raise RuntimeError( "Device does not have CUDA compute capability 3.0 or greater") ng, nc = backend_pair # edge case test np_ref = dict() inputs = [ ("edges", np.array([2**-48, 2**15], dtype=np.float32)), ("rounding", np.array( [2**5, 63.99998856, 2**6, 2**-3, 2**-4, 0.11262291, 92.22483826], dtype=np.float32)), ("fp16 rounding", np.array([45.21875], dtype=np.float16)) ] for tag, inp in inputs: np_ref[tag] = ref_hist(inp) for be in [ng, nc]: be_inp = be.array(inp) be_hist = be_inp.hist(tag) assert tensors_allclose(np_ref[tag], be_hist), tag + str(be) # dump_hist_data test for be in [ng, nc]: be_hist_data, be_hist_map = be.dump_hist_data() for tag, inp in inputs: be_data = be_hist_data[be_hist_map[tag]] assert tensors_allclose(np_ref[tag], be_data), tag + str(be)
def test_edge_cases_mkl(backend_pair_mkl): """ Test several edge cases related to min/max bin, and rounding. Also test backend dump_hist_data functionality. """ nm, nc = backend_pair_mkl # edge case test np_ref = dict() inputs = [ ("edges", np.array([2**-48, 2**15], dtype=np.float32)), ("rounding", np.array( [2**5, 63.99998856, 2**6, 2**-3, 2**-4, 0.11262291, 92.22483826], dtype=np.float32)), ("fp16 rounding", np.array([45.21875], dtype=np.float16)) ] for tag, inp in inputs: np_ref[tag] = ref_hist(inp) for be in [nm, nc]: be_inp = be.array(inp) be_hist = be_inp.hist(tag) assert tensors_allclose(np_ref[tag], be_hist), tag + str(be) # dump_hist_data test for be in [nm, nc]: be_hist_data, be_hist_map = be.dump_hist_data() for tag, inp in inputs: be_data = be_hist_data[be_hist_map[tag]] assert tensors_allclose(np_ref[tag], be_data), tag + str(be)
def test_edge_cases(backend_pair): """ Test several edge cases related to min/max bin, and rounding. Also test backend dump_hist_data functionality. """ gpuflag = (check_gpu.get_compute_capability(0) >= 3.0) if gpuflag is False: raise RuntimeError("Device does not have CUDA compute capability 3.0 or greater") ng, nc = backend_pair # edge case test np_ref = dict() inputs = [ ("edges", np.array([2 ** -48, 2 ** 15], dtype=np.float32)), ("rounding", np.array([2 ** 5, 63.99998856, 2 ** 6, 2 ** -3, 2 ** -4, 0.11262291, 92.22483826], dtype=np.float32)), ("fp16 rounding", np.array([45.21875], dtype=np.float16)) ] for tag, inp in inputs: np_ref[tag] = ref_hist(inp) for be in [ng, nc]: be_inp = be.array(inp) be_hist = be_inp.hist(tag) assert tensors_allclose(np_ref[tag], be_hist), tag + str(be) # dump_hist_data test for be in [ng, nc]: be_hist_data, be_hist_map = be.dump_hist_data() for tag, inp in inputs: be_data = be_hist_data[be_hist_map[tag]] assert tensors_allclose(np_ref[tag], be_data), tag + str(be)
def test_gpu_randomstate(device_id): # run 1 be = gen_backend(backend='gpu', rng_seed=100, device_id=device_id) a = be.empty((3, 3)) a[:] = be.rand() # gpu rand x0 = a.get() x1 = be.rng.rand(3, 3) # host rand a[:] = be.rand() # gpu rand x2 = a.get() be.make_binary_mask(a, keepthresh=be.rng.rand()) x3 = a.get() assert len(be.context_rand_state_map) == 1 and len( be.context_rand_state_alive) == 1 for ctx in be.context_rand_state_alive: assert be.context_rand_state_alive[ctx] is True # run 2, using reset be.rng_reset() a[:] = be.rand() y0 = a.get() y1 = be.rng.rand(3, 3) a[:] = be.rand() y2 = a.get() be.make_binary_mask(a, keepthresh=be.rng.rand()) y3 = a.get() assert len(be.context_rand_state_map) == 1 and len( be.context_rand_state_alive) == 1 for ctx in be.context_rand_state_alive: assert be.context_rand_state_alive[ctx] is True del (be) # run 3, using a new backend be = gen_backend(backend='gpu', rng_seed=100, device_id=device_id) a = be.empty((3, 3)) a[:] = be.rand() # gpu rand z0 = a.get() z1 = be.rng.rand(3, 3) # host rand a[:] = be.rand() # gpu rand z2 = a.get() be.make_binary_mask(a, keepthresh=be.rng.rand()) z3 = a.get() # check equality assert tensors_allclose([x0, x1, x2, x3], [y0, y1, y2, y3], rtol=0., atol=0.) assert tensors_allclose([x0, x1, x2, x3], [z0, z1, z2, z3], rtol=0., atol=0.) del (be)
def test_slicing(fargs_tests, backend_pair_dtype): dims = fargs_tests[0] gpu, cpu = backend_pair_dtype dtype = gpu.default_dtype array_np = np.random.uniform(-1, 1, dims).astype(dtype) array_ng = gpu.array(array_np, dtype=dtype) array_nc = cpu.array(array_np, dtype=dtype) assert tensors_allclose(array_ng[0], array_nc[0], rtol=0, atol=1e-3) assert tensors_allclose(array_ng[-1], array_nc[-1], rtol=0, atol=1e-3) assert tensors_allclose(array_ng[0, :], array_nc[0, :], rtol=0, atol=1e-3) assert tensors_allclose(array_ng[0:], array_nc[0:], rtol=0, atol=1e-3) assert tensors_allclose(array_ng[:-1], array_nc[:-1], rtol=0, atol=1e-3) assert tensors_allclose(array_ng[:, 0], array_nc[:, 0], rtol=0, atol=1e-3) assert tensors_allclose(array_ng[:, 0:1], array_nc[:, 0:1], rtol=0, atol=1e-3) assert tensors_allclose(array_ng[-1, 0:], array_nc[-1:, 0:], rtol=0, atol=1e-3) array_ng[0] = 0 array_nc[0] = 0 assert tensors_allclose(array_ng, array_nc, rtol=0, atol=1e-3)
def test_slicing_mkl(fargs_tests, backend_pair_dtype_mkl_32): dims = fargs_tests[0] mkl, cpu = backend_pair_dtype_mkl_32 dtype = mkl.default_dtype array_np = np.random.uniform(-1, 1, dims).astype(dtype) array_nc = cpu.array(array_np, dtype=dtype) array_nm = mkl.array(array_np, dtype=dtype) assert tensors_allclose(array_nm[0], array_nc[0], rtol=0, atol=1e-3) assert tensors_allclose(array_nm[-1], array_nc[-1], rtol=0, atol=1e-3) assert tensors_allclose(array_nm[0, :], array_nc[0, :], rtol=0, atol=1e-3) assert tensors_allclose(array_nm[0:], array_nc[0:], rtol=0, atol=1e-3) assert tensors_allclose(array_nm[:-1], array_nc[:-1], rtol=0, atol=1e-3) assert tensors_allclose(array_nm[:, 0], array_nc[:, 0], rtol=0, atol=1e-3) assert tensors_allclose(array_nm[:, 0:1], array_nc[:, 0:1], rtol=0, atol=1e-3) assert tensors_allclose(array_nm[-1, 0:], array_nc[-1:, 0:], rtol=0, atol=1e-3) array_nc[0] = 0 array_nm[0] = 0 assert tensors_allclose(array_nm, array_nc, rtol=0, atol=1e-3)
def test_gpu_randomstate(device_id): # run 1 be = gen_backend(backend='gpu', rng_seed=100, device_id=device_id) a = be.empty((3, 3)) a[:] = be.rand() # gpu rand x0 = a.get() x1 = be.rng.rand(3, 3) # host rand a[:] = be.rand() # gpu rand x2 = a.get() be.make_binary_mask(a, keepthresh=be.rng.rand()) x3 = a.get() assert len(be.context_rand_state_map) == 1 and len(be.context_rand_state_alive) == 1 for ctx in be.context_rand_state_alive: assert be.context_rand_state_alive[ctx] is True # run 2, using reset be.rng_reset() a[:] = be.rand() y0 = a.get() y1 = be.rng.rand(3, 3) a[:] = be.rand() y2 = a.get() be.make_binary_mask(a, keepthresh=be.rng.rand()) y3 = a.get() assert len(be.context_rand_state_map) == 1 and len(be.context_rand_state_alive) == 1 for ctx in be.context_rand_state_alive: assert be.context_rand_state_alive[ctx] is True del(be) # run 3, using a new backend be = gen_backend(backend='gpu', rng_seed=100, device_id=device_id) a = be.empty((3, 3)) a[:] = be.rand() # gpu rand z0 = a.get() z1 = be.rng.rand(3, 3) # host rand a[:] = be.rand() # gpu rand z2 = a.get() be.make_binary_mask(a, keepthresh=be.rng.rand()) z3 = a.get() # check equality assert tensors_allclose([x0, x1, x2, x3], [y0, y1, y2, y3], rtol=0., atol=0.) assert tensors_allclose([x0, x1, x2, x3], [z0, z1, z2, z3], rtol=0., atol=0.) del(be)
def test_hist(nbin_offset_dim_dtype_inp, backend_pair): """ Compare the nervanagpu and nervanacpu hist implementation to the reference implementation above. Parameterized test case, uses pytest_generate_test to enumerate dim_dtype_inp tuples that drive the test. """ (nbins, offset), dim, dtype, (name, inp_gen) = nbin_offset_dim_dtype_inp gpuflag = (check_gpu.get_compute_capability(0) >= 3.0) if gpuflag is False: raise RuntimeError( "Device does not have CUDA compute capability 3.0 or greater") ng, nc = backend_pair ng.set_hist_buffers(nbins, offset) nc.set_hist_buffers(nbins, offset) np_inp = inp_gen(dim).astype(dtype) np_hist = ref_hist(np_inp, nbins=nbins, offset=offset) for be in [ng, nc]: be_inp = be.array(np_inp, dtype=dtype) be_hist = be_inp.hist(name) assert tensors_allclose(np_hist, be_hist)
def test_hist(nbin_offset_dim_dtype_inp, backend_pair): """ Compare the nervanagpu and nervanacpu hist implementation to the reference implementation above. Parameterized test case, uses pytest_generate_test to enumerate dim_dtype_inp tuples that drive the test. """ (nbins, offset), dim, dtype, (name, inp_gen) = nbin_offset_dim_dtype_inp gpuflag = (check_gpu.get_compute_capability(0) >= 3.0) if gpuflag is False: raise RuntimeError("Device does not have CUDA compute capability 3.0 or greater") ng, nc = backend_pair ng.set_hist_buffers(nbins, offset) nc.set_hist_buffers(nbins, offset) np_inp = inp_gen(dim).astype(dtype) np_hist = ref_hist(np_inp, nbins=nbins, offset=offset) for be in [ng, nc]: be_inp = be.array(np_inp, dtype=dtype) be_hist = be_inp.hist(name) assert tensors_allclose(np_hist, be_hist)
def test_vs_numpy(backend_tests, custom_args): test_idx, f, flag, dim = custom_args # backend be = NervanaObject.be dtype = be.default_dtype # tensors tensors = gen_backend_tensors([np, be], [dim] * 5, [flag] * 5, dtype=dtype) # compare function value and gradient numpy_func_val = call_func(f, np, tensors[0]) backend_func_val = call_func(f, be, tensors[1]) try: assert tensors_allclose(numpy_func_val, backend_func_val, rtol=1e-2, atol=1e-2) except AssertionError: # xfail for gpu backend on TITAN XP platforms if isinstance(NervanaObject.be, NervanaGPU): if os.getenv("PLATFORM"): platform = os.getenv("PLATFORM") else: if os.path.exists("/usr/bin/nvidia-smi"): cmd = '/usr/bin/nvidia-smi -q | grep "Product Name" | tail -1 | cut -f 2 -d \':\' | \ cut -f 2,3 -d \' \'' gpu_info = subp.check_output(cmd, shell=True) else: gpu_info = "unknown" if gpu_info == 'TITAN Xp\n': platform = "TITANXP" if platform == 'TITANXP': pytest.xfail(reason="xfail issue #854 with {} PLATFORM".format( platform)) else: assert tensors_allclose(numpy_func_val, backend_func_val, rtol=1e-2, atol=1e-2)
def hist_helper(nbin_offset_dim_dtype_inp, be): (nbins, offset), dim, dtype, (name, inp_gen) = nbin_offset_dim_dtype_inp be.set_hist_buffers(nbins, offset) np_inp = inp_gen(dim).astype(dtype) np_hist = ref_hist(np_inp, nbins=nbins, offset=offset) be_inp = be.array(np_inp, dtype=dtype) be_hist = be_inp.hist(name) assert tensors_allclose(np_hist, be_hist)
def edge_cases_helper(be): np_ref = dict() inputs = [ ("edges", np.array([2**-48, 2**15], dtype=np.float32)), ("rounding", np.array( [2**5, 63.99998856, 2**6, 2**-3, 2**-4, 0.11262291, 92.22483826], dtype=np.float32)), ("fp16 rounding", np.array([45.21875], dtype=np.float16)) ] for tag, inp in inputs: np_ref[tag] = ref_hist(inp) be_inp = be.array(inp) be_hist = be_inp.hist(tag) assert tensors_allclose(np_ref[tag], be_hist), tag + str(be) # dump_hist_data test be_hist_data, be_hist_map = be.dump_hist_data() for tag, inp in inputs: be_data = be_hist_data[be_hist_map[tag]] assert tensors_allclose(np_ref[tag], be_data), tag + str(be)
def test_batched_dot_mkl(backend_pair_bench_mkl): np.set_printoptions(threshold=8192 * 4, linewidth=600, formatter={'int': lambda x: "%2d" % x, 'float': lambda x: "%2.0f" % x}) nm, nc = backend_pair_bench_mkl dtype = np.float32 # np.float16 or np.float32 X = 100 # Batch Size N = 32 # Minibatch Size C = 1536 # Input Features K = 768 # Output Features cpuI, cpuE, cpuW = setup_test_data(X, N, C, K, dtype) ncO, ncB, ncU = run_batched_dot(nc, cpuI, cpuE, cpuW, X, dtype) npO, npB, npU = run_batched_dot(np, cpuI, cpuE, cpuW, X, dtype) nmO, nmB, nmU = run_batched_dot(nm, cpuI, cpuE, cpuW, X, dtype) assert tensors_allclose(npO, nmO, rtol=0, atol=1e-3) assert tensors_allclose(npB, nmB, rtol=0, atol=1e-3) assert tensors_allclose(npU, nmU, rtol=0, atol=1e-3) assert tensors_allclose(npO, ncO, rtol=0, atol=1e-3) assert tensors_allclose(npB, ncB, rtol=0, atol=1e-3) assert tensors_allclose(npU, ncU, rtol=0, atol=1e-3)
def batched_dot_helper(be): np.set_printoptions(threshold=8192 * 4, linewidth=600, formatter={ 'int': lambda x: "%2d" % x, 'float': lambda x: "%2.0f" % x }) dtype = np.float32 # np.float16 or np.float32 X = 100 # Batch Size N = 32 # Minibatch Size C = 1536 # Input Features K = 768 # Output Features cpuI, cpuE, cpuW = setup_test_data(X, N, C, K, dtype) npO, npB, npU = run_batched_dot(np, cpuI, cpuE, cpuW, X, dtype) if "GPU" in str(be.__class__): if be.compute_capability > (5, 0): ngO, ngB, ngU = run_batched_dot(be, cpuI, cpuE, cpuW, X, dtype) assert tensors_allclose(npO, ngO, rtol=0, atol=1e-3) assert tensors_allclose(npB, ngB, rtol=0, atol=1e-3) assert tensors_allclose(npU, ngU, rtol=0, atol=1e-3) else: ncO, ncB, ncU = run_batched_dot(be, cpuI, cpuE, cpuW, X, dtype) assert tensors_allclose(npO, ncO, rtol=0, atol=1e-3) assert tensors_allclose(npB, ncB, rtol=0, atol=1e-3) assert tensors_allclose(npU, ncU, rtol=0, atol=1e-3)
def test_batched_dot_mkl(backend_pair_bench_mkl): np.set_printoptions(threshold=8192 * 4, linewidth=600, formatter={ 'int': lambda x: "%2d" % x, 'float': lambda x: "%2.0f" % x }) nm, nc = backend_pair_bench_mkl dtype = np.float32 # np.float16 or np.float32 X = 100 # Batch Size N = 32 # Minibatch Size C = 1536 # Input Features K = 768 # Output Features cpuI, cpuE, cpuW = setup_test_data(X, N, C, K, dtype) ncO, ncB, ncU = run_batched_dot(nc, cpuI, cpuE, cpuW, X, dtype) npO, npB, npU = run_batched_dot(np, cpuI, cpuE, cpuW, X, dtype) nmO, nmB, nmU = run_batched_dot(nm, cpuI, cpuE, cpuW, X, dtype) assert tensors_allclose(npO, nmO, rtol=0, atol=1e-3) assert tensors_allclose(npB, nmB, rtol=0, atol=1e-3) assert tensors_allclose(npU, nmU, rtol=0, atol=1e-3) assert tensors_allclose(npO, ncO, rtol=0, atol=1e-3) assert tensors_allclose(npB, ncB, rtol=0, atol=1e-3) assert tensors_allclose(npU, ncU, rtol=0, atol=1e-3)
def test_batched_dot(backend_pair_bench): np.set_printoptions( threshold=8192 * 4, linewidth=600, formatter={"int": lambda x: "%2d" % x, "float": lambda x: "%2.0f" % x} ) ng, nc = backend_pair_bench dtype = np.float32 # np.float16 or np.float32 X = 100 # Batch Size N = 32 # Minibatch Size C = 1536 # Input Features K = 768 # Output Features cpuI, cpuE, cpuW = setup_test_data(X, N, C, K, dtype) ncO, ncB, ncU = run_batched_dot(nc, cpuI, cpuE, cpuW, X, dtype) npO, npB, npU = run_batched_dot(np, cpuI, cpuE, cpuW, X, dtype) if ng.compute_capability > (5, 0): ngO, ngB, ngU = run_batched_dot(ng, cpuI, cpuE, cpuW, X, dtype) assert tensors_allclose(npO, ngO, rtol=0, atol=1e-3) assert tensors_allclose(npB, ngB, rtol=0, atol=1e-3) assert tensors_allclose(npU, ngU, rtol=0, atol=1e-3) assert tensors_allclose(npO, ncO, rtol=0, atol=1e-3) assert tensors_allclose(npB, ncB, rtol=0, atol=1e-3) assert tensors_allclose(npU, ncU, rtol=0, atol=1e-3)
def test_vs_numpy(backend_tests, custom_args): test_idx, f, flag, dim = custom_args # backend be = NervanaObject.be dtype = be.default_dtype # tensors tensors = gen_backend_tensors([np, be], [dim] * 5, [flag] * 5, dtype=dtype) # compare function value and gradient numpy_func_val = call_func(f, np, tensors[0]) backend_func_val = call_func(f, be, tensors[1]) try: assert tensors_allclose(numpy_func_val, backend_func_val, rtol=1e-2, atol=1e-2) except AssertionError: # xfail for gpu backend on TITAN XP platforms if isinstance(NervanaObject.be, NervanaGPU): if os.getenv("PLATFORM"): platform = os.getenv("PLATFORM") else: if os.path.exists("/usr/bin/nvidia-smi"): cmd = '/usr/bin/nvidia-smi -q | grep "Product Name" | tail -1 | cut -f 2 -d \':\' | \ cut -f 2,3 -d \' \'' gpu_info = subp.check_output(cmd, shell=True) else: gpu_info = "unknown" if gpu_info == 'TITAN Xp\n': platform = "TITANXP" if platform == 'TITANXP': pytest.xfail(reason="xfail issue #854 with {} PLATFORM".format(platform)) else: assert tensors_allclose(numpy_func_val, backend_func_val, rtol=1e-2, atol=1e-2)
def test_reshape_separate_mkl(fargs_tests, backend_pair_dtype_mkl_32): dims = fargs_tests[0] mkl, cpu = backend_pair_dtype_mkl_32 dtype = mkl.default_dtype array_np = np.random.uniform(-1, 1, dims).astype(dtype) array_nc = cpu.array(array_np, dtype=dtype) array_nm = mkl.array(array_np, dtype=dtype) if (dims[0] % 2) == 0: reshaped_nc = array_nc.reshape((2, dims[0] // 2, dims[1])) reshaped_nm = array_nm.reshape((2, dims[0] // 2, dims[1])) assert tensors_allclose(reshaped_nm, reshaped_nc, rtol=0, atol=1e-6)
def test_vs_numpy(backend_tests, custom_args): test_idx, f, flag, dim = custom_args # backend be = NervanaObject.be dtype = be.default_dtype # tensors tensors = gen_backend_tensors([np, be], [dim] * 4, [flag] * 4, dtype=dtype) # compare function values numpy_func_val = call_func(f, np, tensors[0]) backend_func_val = call_func(f, be, tensors[1]) assert tensors_allclose(numpy_func_val, backend_func_val, rtol=1e-2, atol=1e-2)
def test_reshape_separate(fargs_tests, backend_pair_dtype): dims = fargs_tests[0] gpu, cpu = backend_pair_dtype dtype = gpu.default_dtype array_np = np.random.uniform(-1, 1, dims).astype(dtype) array_ng = gpu.array(array_np, dtype=dtype) array_nc = cpu.array(array_np, dtype=dtype) assert array_ng.is_contiguous if (dims[0] % 2) == 0: reshaped_ng = array_ng.reshape((2, dims[0] // 2, dims[1])) reshaped_nc = array_nc.reshape((2, dims[0] // 2, dims[1])) assert tensors_allclose(reshaped_ng, reshaped_nc, rtol=0, atol=1e-6)
def test_copy_transpose(shape_inp, backend_pair_dtype): """ Parameterized test case, uses pytest_generate_test to enumerate dim_inp tuples that drive the test. """ shape, (name, inp_gen) = shape_inp ng, nc = backend_pair_dtype np_inp = inp_gen(shape).astype(nc.default_dtype) ndims = len(shape) axes = [None] + list(itt.permutations(range(ndims), ndims)) axes.remove(tuple(range(ndims))) for be, ax in itt.product([ng, nc], axes): be_inp = be.array(np_inp) np_trans = np.transpose(np_inp, axes=ax) be_trans = be.zeros(np_trans.shape) be.copy_transpose(be_inp, be_trans, axes=ax) assert tensors_allclose(np_trans, be_trans)
def test_hist_mkl(nbin_offset_dim_dtype_inp, backend_pair_mkl): """ Compare the nervanamkl and nervanacpu hist implementation to the reference implementation above. Parameterized test case, uses pytest_generate_test to enumerate dim_dtype_inp tuples that drive the test. """ (nbins, offset), dim, dtype, (name, inp_gen) = nbin_offset_dim_dtype_inp nm, nc = backend_pair_mkl nm.set_hist_buffers(nbins, offset) nc.set_hist_buffers(nbins, offset) np_inp = inp_gen(dim).astype(dtype) np_hist = ref_hist(np_inp, nbins=nbins, offset=offset) for be in [nm, nc]: be_inp = be.array(np_inp, dtype=dtype) be_hist = be_inp.hist(name) assert tensors_allclose(np_hist, be_hist)