Пример #1
0
def shfl_down_i32(mask, val, offset):
    # Here we use 31 as the last argument since 32 (warp size) does not work
    # for some reason. Using 31 leads to the desired behavior.
    return expr.Expr(
        _ti_core.insert_internal_func_call(
            "cuda_shfl_down_sync_i32",
            expr.make_expr_group(mask, val, offset, 31), False))
Пример #2
0
def shfl_up_f32(mask, val, offset):
    return expr.Expr(
        _ti_core.insert_internal_func_call(
            "cuda_shfl_up_sync_f32",
            # lane offset is 0 for warp size 32
            expr.make_expr_group(mask, val, offset, 0),
            False))
Пример #3
0
def shfl_sync_i32(mask, val, offset):
    return expr.Expr(
        _ti_core.insert_internal_func_call(
            # lane offset is 31 for warp size 32
            "cuda_shfl_sync_i32",
            expr.make_expr_group(mask, val, offset, 31),
            False))
Пример #4
0
def call_internal(name, *args):
    return expr_init(
        _ti_core.insert_internal_func_call(name, make_expr_group(args)))
Пример #5
0
def shfl_up_f32(mask, val, offset):
    return expr.Expr(
        _ti_core.insert_internal_func_call(
            "cuda_shfl_up_sync_f32",
            expr.make_expr_group(mask, val, offset, 32), False))
Пример #6
0
def ballot(predicate):
    return expr.Expr(
        _ti_core.insert_internal_func_call("cuda_ballot_i32",
                                           expr.make_expr_group(predicate),
                                           False))
Пример #7
0
def reduce_xor(value):
    return expr.Expr(_ti_core.insert_internal_func_call(
        "subgroupXor", expr.make_expr_group(value), False),
                     dtype=value.ptr.get_ret_type())
Пример #8
0
def invocation_id():
    return expr.Expr(_ti_core.insert_internal_func_call(
        "subgroupInvocationId", expr.make_expr_group(), False),
                     dtype=i32)
Пример #9
0
def group_size():
    return expr.Expr(_ti_core.insert_internal_func_call(
        "subgroupSize", expr.make_expr_group(), False),
                     dtype=i32)
Пример #10
0
def barrier():
    return expr.Expr(
        _ti_core.insert_internal_func_call("subgroupBarrier",
                                           expr.make_expr_group(), False))
Пример #11
0
def broadcast(value, index: i32):
    return expr.Expr(
        _ti_core.insert_internal_func_call("subgroupBroadcast",
                                           expr.make_expr_group(value, index),
                                           False))
Пример #12
0
def elect():
    return expr.Expr(
        _ti_core.insert_internal_func_call("subgroupElect",
                                           expr.make_expr_group(), False))
Пример #13
0
def inclusive_or(value):
    return expr.Expr(_ti_core.insert_internal_func_call(
        "subgroupInclusiveOr", expr.make_expr_group(value), False),
                     dtype=value.ptr.get_ret_type())
Пример #14
0
def call_internal(name, *args, with_runtime_context=True):
    return expr_init(
        _ti_core.insert_internal_func_call(name, make_expr_group(args),
                                           with_runtime_context))