def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum"):
    # Build the logic and compile the function
    dat_dtype = "float32"
    A = tvm.placeholder(shape=in_shape, name="A", dtype=dat_dtype)
    A1 = topi.sqrt(topi.exp(A))
    out_dtype = "float32"
    if type == "sum":
        B = topi.sum(A1, axis=axis, keepdims=keepdims)
    elif type == "max":
        B = topi.max(A1, axis=axis, keepdims=keepdims)
    elif type == "min":
        B = topi.min(A1, axis=axis, keepdims=keepdims)
    elif type == "argmax":
        B = topi.argmax(A1, axis=axis, keepdims=keepdims)
        out_dtype = "int32"
    elif type == "argmin":
        B = topi.argmin(A1, axis=axis, keepdims=keepdims)
        out_dtype = "int32"
    else:
        raise NotImplementedError

    def check_device(device):
        if not tvm.module.enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        with tvm.target.create(device):
            s = topi.generic.schedule_reduce(B)
        ctx = tvm.context(device, 0)
        foo = tvm.build(s, [A, B], device, name="sum")
        # Test
        in_npy = np.random.uniform(size=in_shape).astype(np.float32)
        in_npy_map = np.sqrt(np.exp(in_npy)).astype(np.float32)
        if type == "sum":
            out_npy = in_npy_map.sum(axis=axis, keepdims=keepdims)
        elif type == "max":
            out_npy = in_npy_map.max(axis=axis, keepdims=keepdims)
        elif type == "min":
            out_npy = in_npy_map.min(axis=axis, keepdims=keepdims)
        elif type == "argmax":
            out_npy = _my_npy_argmax(in_npy_map, axis=axis, keepdims=keepdims)
        elif type == "argmin":
            out_npy = _my_npy_argmin(in_npy_map, axis=axis, keepdims=keepdims)
        else:
            raise NotImplementedError
        data_tvm = tvm.nd.array(in_npy, ctx=ctx)
        out_tvm = tvm.nd.empty(shape=out_npy.shape, ctx=ctx, dtype=out_dtype)
        for _ in range(1):
            foo(data_tvm, out_tvm)
        np.testing.assert_allclose(out_tvm.asnumpy(), out_npy, 1E-3, 1E-3)

    check_device("opencl")
    check_device("cuda")
    check_device("metal")
    check_device("rocm")
def test_reduce_map(in_shape, axis, keepdims, type="sum", test_id=0):
    global TASK
    # Build the logic and compile the function
    A = te.placeholder(shape=in_shape, name="A")
    if type == "sum":
        TASK = "sum_map_id%d" % test_id
        B = topi.sum(A, axis=axis, keepdims=keepdims)
    elif type == "max":
        TASK = "max_map_id%d" % test_id
        B = topi.max(A, axis=axis, keepdims=keepdims)
    elif type == "min":
        TASK = "min_map_id%d" % test_id
        B = topi.min(A, axis=axis, keepdims=keepdims)
    else:
        raise NotImplementedError
    s = topi.cuda.schedule_reduce(B)
    with tvm.target.build_config(auto_unroll_max_step=16,
                                 auto_unroll_min_depth=0):
        fcuda = tvm.build(s, [A, B], "cuda", name="sum")

    # Test
    in_npy = np.random.normal(size=in_shape).astype(np.float32)
    if type == "sum":
        out_npy = in_npy.sum(axis=axis, keepdims=keepdims)
    elif type == "max":
        out_npy = in_npy.max(axis=axis, keepdims=keepdims)
    elif type == "min":
        out_npy = in_npy.min(axis=axis, keepdims=keepdims)
    else:
        raise NotImplementedError

    data_tvm = tvm.nd.array(in_npy, ctx=tvm.gpu())
    out_tvm = tvm.nd.empty(shape=out_npy.shape, ctx=tvm.gpu())

    for _ in range(2):
        fcuda(data_tvm, out_tvm)
    tvm.testing.assert_allclose(out_tvm.asnumpy(),
                                out_npy,
                                rtol=4e-4,
                                atol=4e-4)
示例#3
0
def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum"):
    # Build the logic and compile the function
    A = tvm.placeholder(shape=in_shape, name="A")
    if type == "sum":
        B = topi.sum(A, axis=axis, keepdims=keepdims)
    elif type == "max":
        B = topi.max(A, axis=axis, keepdims=keepdims)
    elif type == "min":
        B = topi.min(A, axis=axis, keepdims=keepdims)
    else:
        raise NotImplementedError
    s = topi.cuda.schedule_reduce(B)

    def check_device(device):
        if not tvm.module.enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
        foo = tvm.build(s, [A, B], device, name="sum")

        # Test
        in_npy = np.random.normal(size=in_shape).astype(np.float32)
        if type == "sum":
            out_npy = in_npy.sum(axis=axis, keepdims=keepdims)
        elif type == "max":
            out_npy = in_npy.max(axis=axis, keepdims=keepdims)
        elif type == "min":
            out_npy = in_npy.min(axis=axis, keepdims=keepdims)
        else:
            raise NotImplementedError

        data_tvm = tvm.nd.array(in_npy, ctx=ctx)
        out_tvm = tvm.nd.empty(shape=out_npy.shape, ctx=ctx)
        for _ in range(1):
            foo(data_tvm, out_tvm)
        np.testing.assert_allclose(out_tvm.asnumpy(), out_npy, 1E-3, 1E-3)

    check_device("opencl")
    check_device("cuda")
    check_device("metal")
def make_matrix_softmax_cross_entropy(shape,
                                      tgt,
                                      tgt_host,
                                      func_name,
                                      dtype="float32"):
    """TODO: Your code here"""
    """Hint: output shape should be (1,)"""

    # softmax
    y = tvm.te.placeholder(shape, dtype=dtype, name="y")  # input y
    maxtrix_row_max = topi.max(y, axis=1, keepdims=False)
    Ex = tvm.te.compute(shape,
                        lambda i, j: tvm.te.exp(y[i][j] - maxtrix_row_max[i]),
                        name="exp_element")
    Ex_sum = topi.sum(Ex, axis=1, keepdims=False)

    soft_max = tvm.te.compute(shape,
                              lambda i, j: Ex[i][j] / Ex_sum[i],
                              name="soft_max")

    # cross_entropy
    y_real = tvm.te.placeholder(shape, dtype=dtype, name="y_real")
    j = tvm.te.reduce_axis((0, shape[1]), name="j")
    loss = tvm.te.compute(
        (shape[0], ),
        lambda i: tvm.te.sum(y_real[i][j] * tvm.te.log(soft_max[i][j]), j),
        name="loss")

    sum_loss = topi.sum(loss, axis=0, keepdims=True)
    mean_loss = tvm.te.compute((1, ), lambda *i: -1 * sum_loss(*i) / shape[0],
                               "mean_loss")

    s = tvm.te.create_schedule(mean_loss.op)
    f = tvm.build(s, [y, y_real, mean_loss],
                  tgt,
                  target_host=tgt_host,
                  name=func_name)
    # print(tvm.lower(s, [y, y_real, mean_loss],name=func_name, simple_mode=True))
    return f
示例#5
0
def test_reduce_map(in_shape, axis, keepdims, type="sum", test_id=0):
    global TASK
    # Build the logic and compile the function
    A = tvm.placeholder(shape=in_shape, name="A")
    if type == "sum":
        TASK = "sum_map_id%d" %test_id
        B = topi.sum(A, axis=axis, keepdims=keepdims)
    elif type == "max":
        TASK = "max_map_id%d" %test_id
        B = topi.max(A, axis=axis, keepdims=keepdims)
    elif type == "min":
        TASK = "min_map_id%d" %test_id
        B = topi.min(A, axis=axis, keepdims=keepdims)
    else:
        raise NotImplementedError
    s = topi.cuda.schedule_reduce(B)
    with tvm.build_config(auto_unroll_max_step=16,
                          auto_unroll_min_depth=0):
        fcuda = tvm.build(s, [A, B], "cuda", name="sum")

    # Test
    in_npy = np.random.normal(size=in_shape).astype(np.float32)
    if type == "sum":
        out_npy = in_npy.sum(axis=axis, keepdims=keepdims)
    elif type == "max":
        out_npy = in_npy.max(axis=axis, keepdims=keepdims)
    elif type == "min":
        out_npy = in_npy.min(axis=axis, keepdims=keepdims)
    else:
        raise NotImplementedError

    data_tvm = tvm.nd.array(in_npy, ctx=tvm.gpu())
    out_tvm = tvm.nd.empty(shape=out_npy.shape, ctx=tvm.gpu())

    for _ in range(2):
        fcuda(data_tvm, out_tvm)
    tvm.testing.assert_allclose(out_tvm.asnumpy(), out_npy, rtol=4e-4, atol=4e-4)
示例#6
0
def verify_reduce_map_ele(in_shape,
                          axis,
                          keepdims,
                          type="sum",
                          dtype="float32"):
    # Build the logic and compile the function
    A = tvm.placeholder(shape=in_shape, name="A", dtype=dtype)
    A1 = topi.sqrt(topi.exp(A))
    out_dtype = dtype
    if type == "sum":
        B = topi.sum(A1, axis=axis, keepdims=keepdims)
    elif type == "all":
        B = topi.all(A, axis=axis, keepdims=keepdims)
    elif type == "any":
        B = topi.any(A, axis=axis, keepdims=keepdims)
    elif type == "max":
        B = topi.max(A1, axis=axis, keepdims=keepdims)
    elif type == "min":
        B = topi.min(A1, axis=axis, keepdims=keepdims)
    elif type == "argmax":
        B = topi.argmax(A1, axis=axis, keepdims=keepdims)
        out_dtype = "int32"
    elif type == "argmin":
        B = topi.argmin(A1, axis=axis, keepdims=keepdims)
        out_dtype = "int32"
    else:
        raise NotImplementedError

    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s = topi.generic.schedule_reduce(B)

        foo = tvm.build(s, [A, B], device, name=type)
        # Test
        if dtype == 'bool':
            in_npy_map = in_npy = np.random.choice([True, False],
                                                   size=in_shape)
        else:
            in_npy = np.random.uniform(-1, 1, size=in_shape).astype(dtype)
            in_npy_map = np.sqrt(np.exp(in_npy)).astype(dtype)

        if type == "sum":
            out_npy = in_npy_map.sum(axis=axis, keepdims=keepdims)
        elif type == "all" and dtype == 'bool':
            out_npy = in_npy_map.all(axis=axis, keepdims=keepdims)
        elif type == "any" and dtype == "bool":
            out_npy = in_npy_map.any(axis=axis, keepdims=keepdims)
        elif type == "max":
            out_npy = in_npy_map.max(axis=axis, keepdims=keepdims)
        elif type == "min":
            out_npy = in_npy_map.min(axis=axis, keepdims=keepdims)
        elif type == "argmax":
            out_npy = _my_npy_argmax(in_npy_map, axis=axis, keepdims=keepdims)
        elif type == "argmin":
            out_npy = _my_npy_argmin(in_npy_map, axis=axis, keepdims=keepdims)
        else:
            raise NotImplementedError
        data_tvm = tvm.nd.array(in_npy, ctx=ctx)
        out_tvm = tvm.nd.empty(shape=out_npy.shape, ctx=ctx, dtype=out_dtype)
        for _ in range(1):
            foo(data_tvm, out_tvm)
        if type == "argmax" or type == "argmin":
            out_tvm_indices = out_tvm.asnumpy()
            if keepdims:
                out_tvm_indices = np.take(out_tvm_indices,
                                          indices=0,
                                          axis=axis)
            if axis is None:
                out_tvm_val = in_npy_map.ravel()[out_tvm_indices]
            else:
                other_indices = tuple(
                    np.indices(in_shape[0:axis] + in_shape[(axis + 1):]))
                sel_indices = other_indices[0:axis] + (
                    out_tvm_indices, ) + other_indices[axis:]
                out_tvm_val = in_npy_map[sel_indices]
            if type == "argmax":
                tvm.testing.assert_allclose(out_tvm_val,
                                            in_npy_map.max(axis=axis), 1E-3,
                                            1E-3)
            elif type == "argmin":
                tvm.testing.assert_allclose(out_tvm_val,
                                            in_npy_map.min(axis=axis), 1E-3,
                                            1E-3)
        else:
            tvm.testing.assert_allclose(out_tvm.asnumpy(), out_npy, 1E-3, 1E-3)

    for device in get_all_backend():
        check_device(device)
示例#7
0
def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum"):
    # Build the logic and compile the function
    dat_dtype = "float32"
    A = tvm.placeholder(shape=in_shape, name="A", dtype=dat_dtype)
    A1 = topi.sqrt(topi.exp(A))
    out_dtype = "float32"
    if type == "sum":
        B = topi.sum(A1, axis=axis, keepdims=keepdims)
    elif type == "max":
        B = topi.max(A1, axis=axis, keepdims=keepdims)
    elif type == "min":
        B = topi.min(A1, axis=axis, keepdims=keepdims)
    elif type == "argmax":
        B = topi.argmax(A1, axis=axis, keepdims=keepdims)
        out_dtype = "int32"
    elif type == "argmin":
        B = topi.argmin(A1, axis=axis, keepdims=keepdims)
        out_dtype = "int32"
    else:
        raise NotImplementedError

    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            s = topi.generic.schedule_reduce(B)

        foo = tvm.build(s, [A, B], device, name=type)
        # Test
        in_npy = np.random.uniform(size=in_shape).astype(np.float32)
        in_npy_map = np.sqrt(np.exp(in_npy)).astype(np.float32)
        if type == "sum":
            out_npy = in_npy_map.sum(axis=axis, keepdims=keepdims)
        elif type == "max":
            out_npy = in_npy_map.max(axis=axis, keepdims=keepdims)
        elif type == "min":
            out_npy = in_npy_map.min(axis=axis, keepdims=keepdims)
        elif type == "argmax":
            out_npy = _my_npy_argmax(in_npy_map, axis=axis, keepdims=keepdims)
        elif type == "argmin":
            out_npy = _my_npy_argmin(in_npy_map, axis=axis, keepdims=keepdims)
        else:
            raise NotImplementedError
        data_tvm = tvm.nd.array(in_npy, ctx=ctx)
        out_tvm = tvm.nd.empty(shape=out_npy.shape, ctx=ctx, dtype=out_dtype)
        for _ in range(1):
            foo(data_tvm, out_tvm)
        if type == "argmax" or type == "argmin":
            out_tvm_indices = out_tvm.asnumpy()
            if keepdims:
                out_tvm_indices = np.take(out_tvm_indices, indices=0, axis=axis)
            if axis is None:
                out_tvm_val = in_npy_map.ravel()[out_tvm_indices]
            else:
                other_indices = tuple(np.indices(in_shape[0:axis] + in_shape[(axis+1):]))
                sel_indices = other_indices[0:axis] + (out_tvm_indices,) + other_indices[axis:]
                out_tvm_val = in_npy_map[sel_indices]
            if type == "argmax":
                np.testing.assert_allclose(out_tvm_val, in_npy_map.max(axis=axis), 1E-3, 1E-3)
            elif type == "argmin":
                np.testing.assert_allclose(out_tvm_val, in_npy_map.min(axis=axis), 1E-3, 1E-3)
        else:
            np.testing.assert_allclose(out_tvm.asnumpy(), out_npy, 1E-3, 1E-3)
    for device in ["cuda", "opencl", "metal", "llvm", "rocm", "vulkan"]:
        check_device(device)