示例#1
0
    q = op.args[2]
    s = op.args[3]

    # Don't use this intrinsic if we don't have a int32x4 vector
    # or if we are not multiplying q31 numbers
    if x.dtype != "int32x4" or q.value != 31:
        return op

    # Case 1, shift is negative
    sqrdmulh = tvm.tir.call_llvm_intrin(op.dtype, "llvm.aarch64.neon.sqrdmulh",
                                        tvm.tir.const(2, "uint32"), x, y)

    fixup = (sqrdmulh & (-s)) >> 31
    fixed_up_x = sqrdmulh + fixup
    out_1 = tvm.tir.call_llvm_intrin(op.dtype, "llvm.aarch64.neon.srshl",
                                     tvm.tir.const(2, "uint32"), sqrdmulh, s)

    # Case 2, shift is positive
    x = x * (1 << (s))
    out_2 = tvm.tir.call_llvm_intrin(op.dtype, "llvm.aarch64.neon.sqrdmulh",
                                     tvm.tir.const(2, "uint32"), x, y)

    # Select depending on the shift
    return tvm.tir.Select(s < 0, out_1, out_2)


register_intrin_lowering("tir.q_multiply_shift",
                         target="llvm.aarch64",
                         f=_q_multiply_shift_arm,
                         level=99)
示例#2
0
    assert name.startswith("tir.")
    dispatch_name = name[4:]
    if op.dtype == "float32":
        # call float function
        return tvm.tir.call_pure_extern("float32", "%sf" % dispatch_name,
                                        op.args[0])
    elif op.dtype == "float64":
        # call double function
        return tvm.tir.call_pure_extern("float32", dispatch_name, op.args[0])
    else:
        # cannot do translation, return self.
        return op


register_intrin_lowering("tir.exp",
                         target="cuda",
                         f=my_cuda_math_rule,
                         level=99)
######################################################################
# Register the rule to TVM with override option to override existing rule.
# Notice the difference between the printed code from previous one:
# our new rule uses math function :code:`expf` instead of
# fast math version :code:`__expf`.
#
fcuda = tvm.build(s, [A, B], "cuda", name="myexp")
print(fcuda.imported_modules[0].get_source())

######################################################################
# Add Your Own Intrinsic
# ----------------------
# If there is an intrinsic that is not provided by TVM.
# User can easily add new intrinsic by using the intrinsic rule system.
示例#3
0
                                        op.args[1])
    if op.dtype == "int32":
        return tvm.tir.call_pure_extern("int32", "atomicAdd", op.args[0],
                                        op.args[1])
    raise RuntimeError("only support int32, float32 and float64")


def opencl_atomic_add_rule(op):
    if op.dtype == "int32":
        return tvm.tir.call_pure_extern("int32", "atomic_add", op.args[0],
                                        op.args[1])
    raise RuntimeError("only support int32")


register_intrin_lowering("tir.atomic_add",
                         target="cuda",
                         f=cuda_atomic_add_rule,
                         level=99)

register_intrin_lowering("tir.atomic_add",
                         target="opencl",
                         f=opencl_atomic_add_rule,
                         level=99)


def atomic_add(x, y):
    return tvm.tir.call_intrin(y.dtype, "tir.atomic_add", x, y)


def get_valid_boxes_ir(data, valid_boxes, score_threshold, id_index,
                       score_index):
    """Low level IR to identify bounding boxes given a score threshold.
示例#4
0
    Parameters
    ----------
    op : PrimExpr
        The call expression of original intrinsic.

    Returns
    -------
    ret : PrimExpr
        The translated intrinsic rule.
        Return same op if no translation is possible.

    See Also
    --------
    register_intrin_lowering : The registration function for intrinsic lowering rule.
    """
    if str(op.dtype).startswith("float"):
        return call_pure_extern(op.dtype, op.op.name[4:], *op.args)
    return None


# opencl pattern for exp
register_intrin_lowering("tir.exp",
                         target="opencl",
                         f=_rule_float_direct,
                         level=99)
# default pattern for exp
register_intrin_lowering("tir.exp",
                         target="default",
                         f=_rule_float_suffix,
                         level=99)