Пример #1
0
def test_inclusive_scan():
    if not is_thrust_available():
        print("skip because thrust is not enabled...")
        return

    out_dtype = "int64"

    for ishape in [(10, ), (10, 10)]:
        values = te.placeholder(ishape, name="values", dtype="int32")

        with tvm.target.Target("cuda"):
            scan = scan_thrust(values, out_dtype, exclusive=False)
            s = tvm.te.create_schedule([scan.op])

        ctx = tvm.gpu(0)
        f = tvm.build(s, [values, scan], "cuda")

        values_np = np.random.randint(0, 10, size=ishape).astype(np.int32)
        values_np_out = np.zeros(values_np.shape, out_dtype)
        values_in = tvm.nd.array(values_np, ctx)
        values_out = tvm.nd.array(values_np_out, ctx)
        f(values_in, values_out)

        ref_values_out = np.cumsum(values_np, axis=-1, dtype=out_dtype)
        tvm.testing.assert_allclose(values_out.asnumpy(),
                                    ref_values_out,
                                    rtol=1e-5)
Пример #2
0
def test_inclusive_scan():
    out_dtype = "int64"

    for target in ["cuda", "rocm"]:
        if not tvm.testing.device_enabled(target):
            print("Skip because %s is not enabled" % target)
            continue

        with tvm.target.Target(target + " -libs=thrust") as tgt:
            if not thrust_check_func[target](tgt,
                                             "tvm.contrib.thrust.sum_scan"):
                print("skip because thrust is not enabled...")
                return

            for ishape in [(10, ), (10, 10)]:
                values = te.placeholder(ishape, name="values", dtype="int32")

                scan = scan_thrust(values, out_dtype, exclusive=False)
                s = tvm.te.create_schedule([scan.op])

                dev = tvm.device(target, 0)
                f = tvm.build(s, [values, scan], target)

                values_np = np.random.randint(0, 10,
                                              size=ishape).astype(np.int32)
                values_np_out = np.zeros(values_np.shape, out_dtype)
                values_in = tvm.nd.array(values_np, dev)
                values_out = tvm.nd.array(values_np_out, dev)
                f(values_in, values_out)

                ref_values_out = np.cumsum(values_np, axis=-1, dtype=out_dtype)
                tvm.testing.assert_allclose(values_out.asnumpy(),
                                            ref_values_out,
                                            rtol=1e-5)