def test_inclusive_scan(): if not is_thrust_available(): print("skip because thrust is not enabled...") return out_dtype = "int64" for ishape in [(10, ), (10, 10)]: values = te.placeholder(ishape, name="values", dtype="int32") with tvm.target.Target("cuda"): scan = scan_thrust(values, out_dtype, exclusive=False) s = tvm.te.create_schedule([scan.op]) ctx = tvm.gpu(0) f = tvm.build(s, [values, scan], "cuda") values_np = np.random.randint(0, 10, size=ishape).astype(np.int32) values_np_out = np.zeros(values_np.shape, out_dtype) values_in = tvm.nd.array(values_np, ctx) values_out = tvm.nd.array(values_np_out, ctx) f(values_in, values_out) ref_values_out = np.cumsum(values_np, axis=-1, dtype=out_dtype) tvm.testing.assert_allclose(values_out.asnumpy(), ref_values_out, rtol=1e-5)
def test_inclusive_scan(): out_dtype = "int64" for target in ["cuda", "rocm"]: if not tvm.testing.device_enabled(target): print("Skip because %s is not enabled" % target) continue with tvm.target.Target(target + " -libs=thrust") as tgt: if not thrust_check_func[target](tgt, "tvm.contrib.thrust.sum_scan"): print("skip because thrust is not enabled...") return for ishape in [(10, ), (10, 10)]: values = te.placeholder(ishape, name="values", dtype="int32") scan = scan_thrust(values, out_dtype, exclusive=False) s = tvm.te.create_schedule([scan.op]) dev = tvm.device(target, 0) f = tvm.build(s, [values, scan], target) values_np = np.random.randint(0, 10, size=ishape).astype(np.int32) values_np_out = np.zeros(values_np.shape, out_dtype) values_in = tvm.nd.array(values_np, dev) values_out = tvm.nd.array(values_np_out, dev) f(values_in, values_out) ref_values_out = np.cumsum(values_np, axis=-1, dtype=out_dtype) tvm.testing.assert_allclose(values_out.asnumpy(), ref_values_out, rtol=1e-5)