示例#1
0
文件: topi.py 项目: mindspore-ai/akg
def gather(inputs, attrs):
    attrs = {k: v for k, v in attrs.items()}
    axis = int(attrs["axis"][0]) if "axis" in attrs else 0
    if len(inputs) != 2:
        raise ValueError(f"2 inputs expected, but got {len(inputs)}")
    data, indices = inputs
    data_shape = list(data.shape)
    indices_shape = list(indices.shape)
    output_shape = data_shape[:axis] + indices_shape + data_shape[axis + 1:]

    def gen_ir(data, indices, out):
        ib = tvm.ir_builder.create()
        with ib.for_range_n(data_shape[:axis], "i") as i:
            with ib.for_range_n(indices_shape, "j") as j:
                load_idx = ib.load(indices, j)
                inbound = tvm.all(load_idx >= 0, load_idx < data_shape[axis])
                read_idx = i + [load_idx]
                with ib.for_range_n(data_shape[axis + 1:], "k") as k:
                    with ib.if_scope(inbound):
                        ib.store(out, i + j + k, ib.load(data, read_idx + k))
                    with ib.else_scope():
                        ib.store(out, i + j + k, tvm.const(0, data.dtype))
        return ib.get()

    output_name = "T_gather_" + data.op.name + "_" + indices.op.name + "_" + str(
        axis)
    out_buf = tvm.decl_buffer(output_shape, data.dtype, output_name)
    return tvm.extern([data.shape], [data, indices],
                      lambda ins, outs: gen_ir(ins[0], ins[1], outs[0]),
                      dtype=data.dtype,
                      out_buffers=[out_buf],
                      name=output_name)
示例#2
0
文件: topi.py 项目: mindspore-ai/akg
def coo2csr(inputs, attrs):
    row_indices = inputs[0]
    height = int(attrs['height'])
    nnz = row_indices.shape[0]

    def gen_ir(row_indices, output):
        ib = tvm.ir_builder.create()
        with ib.for_range(0, height + 1, name='i') as i:
            ib.store(output, i, tvm.const(0, row_indices.dtype))
            with ib.for_range(0, nnz, name='j') as j:
                row = ib.load(row_indices, j)
                with ib.if_scope(i > row):
                    ptr = ib.load(output, i)
                    ib.store(output, i, ptr + 1)
        return ib.get()

    output_name = "T_coo2csr_" + row_indices.op.name

    out_buf = tvm.decl_buffer(height + 1, row_indices.dtype, "output_data")

    return tvm.extern([height + 1], [row_indices],
                      lambda ins, outs: gen_ir(ins[0], outs[0]),
                      dtype=row_indices.dtype,
                      out_buffers=[out_buf],
                      name=output_name)
示例#3
0
文件: topi.py 项目: mindspore-ai/akg
def csr_gather(inputs, attrs):
    row_idx, col_idx, dense = inputs

    num_rows = row_idx.shape[0] - 1
    feature_shape = get_shape(dense.shape[2:])

    def gen_ir(dense, col_idx, row_idx, output):
        ib = tvm.ir_builder.create()
        ib.scope_attr("INFO", "csr_avg_row",
                      int(col_idx.shape[0]) // max(int(num_rows), 1))
        with ib.for_range(0, num_rows, name='i') as i:
            start = ib.load(row_idx, i)
            end = ib.load(row_idx, i + 1)
            with ib.for_range(0, end - start, name='j') as j:
                pos = start + j
                with ib.for_range_n(feature_shape, 'k') as k:
                    with ib.if_scope(pos < end):
                        col = ib.load(col_idx, pos)
                        ib.store(output, [pos] + k,
                                 ib.load(dense, [i, col] + k))
        return ib.get()

    output_name = "T_csr_gather_" + dense.op.name
    output_shape = get_shape(col_idx.shape) + feature_shape
    out_buf = tvm.decl_buffer(output_shape, dense.dtype, "output_data")
    attrs = {"remove_self_dependence": True, "csr_op": True}
    return tvm.extern(
        [output_shape], [dense, col_idx, row_idx],
        lambda ins, outs: gen_ir(ins[0], ins[1], ins[2], outs[0]),
        dtype=dense.dtype,
        out_buffers=[out_buf],
        name=output_name,
        attrs=attrs)
示例#4
0
def gather(data, indices, axis, flag):
    """Only support axis=0."""
    ndim = len(data.shape)
    axis = axis + ndim if axis < 0 else axis
    assert axis >= 0
    assert axis < ndim

    data_shape = list(data.shape)
    indices_shape = list(indices.shape)
    output_shape = data_shape[:axis] + indices_shape + data_shape[axis + 1:]
    left_shape = output_shape[:1]
    right_shape = output_shape[1:]

    def gen_ir(data, indices, out):
        ib = tvm.ir_builder.create()
        with ib.for_range_n(left_shape, 'i') as i:
            with ib.for_range_n(right_shape, 'j') as j:
                read_idx = [ib.load(indices, i)]
                val = ib.load(data, read_idx + j)
                ib.store(out, i + j, val)
        return ib.get()

    out_buf = tvm.decl_buffer(output_shape, data.dtype, "out_buf")

    return tvm.extern(
        [output_shape],
        [data, indices],
        lambda ins, outs: gen_ir(ins[0], ins[1], outs[0]),
        dtype=data.dtype,
        out_buffers=[out_buf],
        name="fused_gather" + flag,
    )
示例#5
0
文件: topi.py 项目: mindspore-ai/akg
def csr2coo(inputs, attrs):
    indptr = inputs[0]
    num_rows = indptr.shape[0] - 1
    nnz = int(attrs["nnz"])

    def gen_ir(indptr, output):
        ib = tvm.ir_builder.create()
        ib.scope_attr("INFO", "csr_avg_row", nnz // max(int(num_rows), 1))
        with ib.for_range(0, num_rows, name='i') as i:
            start = ib.load(indptr, i)
            end = ib.load(indptr, i + 1)
            with ib.for_range(0, end - start, name='j') as j:
                pos = start + j
                with ib.if_scope(pos < end):
                    ib.store(output, pos, tvm.expr.Cast(indptr.dtype, i))
        return ib.get()

    output_name = "T_csr2coo_" + indptr.op.name

    out_buf = tvm.decl_buffer(nnz, indptr.dtype, "output_data")
    attrs = {"csr_op": True}

    return tvm.extern([nnz], [indptr],
                      lambda ins, outs: gen_ir(ins[0], outs[0]),
                      dtype=indptr.dtype,
                      out_buffers=[out_buf],
                      name=output_name,
                      attrs=attrs)
示例#6
0
文件: topi.py 项目: mindspore-ai/akg
def csr_div(inputs, attrs):
    row_idx, col_idx, sparse_data, dense = inputs
    shape = tuple(attrs["dense_shape"])
    feature_shape = get_shape(sparse_data.shape)[1:]
    assert dense.dtype == sparse_data.dtype, "data and weight must have the same dtype"

    num_rows = row_idx.shape[0] - 1
    dense_shape = get_shape(dense.shape)
    sparse_shape = get_shape(shape)
    broadcast_shape = get_broadcast_shape(dense_shape, sparse_shape)
    need_expand = tvm.const(len(dense_shape) < len(broadcast_shape))
    need_broadcast_first_dim = tvm.const(
        len(dense_shape) == len(broadcast_shape)
        and dense_shape[0] < broadcast_shape[0])
    need_broadcast_last_dim = tvm.const(
        len(dense_shape) == len(broadcast_shape)
        and dense_shape[1] < broadcast_shape[1])

    def gen_ir(dense, sparse_data, col_idx, row_idx, output):
        ib = tvm.ir_builder.create()
        ib.scope_attr("INFO", "csr_avg_row",
                      int(sparse_data.shape[0]) // max(int(num_rows), 1))
        with ib.for_range(0, num_rows, name='i') as i:
            start = ib.load(row_idx, i)
            end = ib.load(row_idx, i + 1)
            with ib.for_range(0, end - start, name='j') as j:
                pos = start + j
                with ib.for_range_n(feature_shape, 'k') as k:
                    with ib.if_scope(pos < end):
                        col = ib.load(col_idx, pos)
                        store_loc = [pos] + k
                        val = ib.load(sparse_data, store_loc)
                        with ib.if_scope(need_expand):
                            ib.store(output, store_loc,
                                     val / ib.load(dense, [col] + k))
                        with ib.else_scope():
                            with ib.if_scope(need_broadcast_first_dim):
                                ib.store(output, store_loc,
                                         val / ib.load(dense, [0, col] + k))
                            with ib.else_scope():
                                with ib.if_scope(need_broadcast_last_dim):
                                    ib.store(output, store_loc,
                                             val / ib.load(dense, [i, 0] + k))
                                with ib.else_scope():
                                    ib.store(
                                        output, store_loc,
                                        val / ib.load(dense, [i, col] + k))
        return ib.get()

    output_name = "T_csr_div_" + dense.op.name + "_" + sparse_data.op.name
    out_buf = tvm.decl_buffer(sparse_data.shape, sparse_data.dtype,
                              output_name)
    attrs = {"remove_self_dependence": True, "csr_op": True}
    return tvm.extern(
        [sparse_data.shape], [dense, sparse_data, col_idx, row_idx],
        lambda ins, outs: gen_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
        dtype=sparse_data.dtype,
        out_buffers=[out_buf],
        name=output_name,
        attrs=attrs)
示例#7
0
def scatter_add(data, indices, updates):
    """
    Args:
        data: [x, y, z]
        indices: [n]
        updates: [n, y, z]
    Output:
        [x, y, z]
    """
    left_shape = list(updates.shape[:1])
    right_shape = list(updates.shape[1:])

    def gen_ir(data, indices, updates, out):
        del data
        ib = tvm.ir_builder.create()
        with ib.for_range_n(left_shape, "i") as i:
            with ib.for_range_n(right_shape, "j") as j:
                idx_updates = i + j
                idx_data = [ib.load(indices, i)] + j
                temp = ib.load(updates, idx_updates) + ib.load(out, idx_data)
                ib.store(out, idx_data, temp)
        return ib.get()

    out_buf = tvm.decl_buffer(data.shape, data.dtype, "out_buf")
    return tvm.extern(
        [data.shape],
        [data, indices, updates],
        lambda ins, outs: gen_ir(ins[0], ins[1], ins[2], outs[0]),
        dtype=data.dtype,
        out_buffers=[out_buf],
        name="fused_scatter_add",
    )
示例#8
0
文件: topi.py 项目: mindspore-ai/akg
def tensor_scatter_add(inputs, attrs):
    if len(inputs) != 3:
        raise ValueError(f"3 inputs expected, but got {len(inputs)}")
    data, indices, updates = inputs
    data_shape = list(data.shape)
    indices_shape = list(indices.shape)
    is_1d_indices = False
    if len(indices_shape) == 1:
        indices_shape.append(1)
        is_1d_indices = True
    left_shape = indices_shape[:-1]
    right_shape = data_shape[int(indices_shape[-1]):]

    def gen_ir(data, indices, updates, out):
        del data
        ib = tvm.ir_builder.create()
        with ib.for_range_n(left_shape, "i") as i:
            with ib.for_range_n(right_shape, "j") as j:
                index_read = i + j
                index_write = []
                inbound = True
                if is_1d_indices:
                    temp_idx = ib.load(indices, i)
                    inbound = tvm.all((temp_idx >= 0),
                                      (temp_idx < data_shape[0]))
                    index_write.append(temp_idx)
                else:
                    for k in range(0, int(indices_shape[-1])):
                        temp_idx = ib.load(indices, i + [k])
                        if k == 0:
                            inbound = tvm.all((temp_idx >= 0),
                                              (temp_idx < data_shape[k]))
                        else:
                            inbound = tvm.all(inbound, (temp_idx >= 0),
                                              (temp_idx < data_shape[k]))
                        index_write.append(temp_idx)
                index_write = index_write + j
                with ib.if_scope(inbound):
                    temp = ib.load(updates, index_read) + ib.load(
                        out, index_write)
                    ib.store(out, index_write, temp)
        return ib.get()

    output_name = "T_tsa_" + data.op.name + "_" + indices.op.name + "_" + updates.op.name
    out_buf = tvm.decl_buffer(data.shape, data.dtype, output_name)
    attrs = {"disable_inline_inject": True}
    return tvm.extern(
        [data.shape], [data, indices, updates],
        lambda ins, outs: gen_ir(ins[0], ins[1], ins[2], outs[0]),
        dtype=data.dtype,
        out_buffers=[out_buf],
        name=output_name,
        attrs=attrs)
示例#9
0
文件: topi.py 项目: mindspore-ai/akg
def elem_all(inputs, attrs):
    del attrs

    def kernel_ir(dst, data):
        ib = tvm.ir_builder.create()
        with ib.for_range_n(data.shape, "ax") as i:
            zero = tvm.const(0, data.dtype)
            with ib.if_scope(ib.load(data, i) == zero):
                ib.store(dst, 0, zero)
        return ib.get()

    in_tensor = inputs[0]
    return tvm.extern((1, ), [in_tensor],
                      lambda ins, outs: kernel_ir(outs[0], ins[0]),
                      name="elemall",
                      dtype=in_tensor.dtype)
示例#10
0
文件: topi.py 项目: mindspore-ai/akg
def complex_number(inputs, attrs):
    del attrs

    def mix_func(dst, real, imag):
        ib = tvm.ir_builder.create()
        with ib.for_range_n(real.shape, "i") as i:
            ib.store(dst, i + [0], ib.load(real, i))
            ib.store(dst, i + [1], ib.load(imag, i))
        return ib.get()

    real, imag = inputs[0], inputs[1]
    shape = [x for x in real.shape]
    shape.append(2)
    return tvm.extern(shape, [real, imag],
                      lambda ins, outs: mix_func(outs[0], ins[0], ins[1]),
                      name="complex",
                      dtype=real.dtype)
示例#11
0
文件: topi.py 项目: mindspore-ai/akg
def tensor_unsorted_segment_sum(inputs, attrs):
    attrs = {k: v for k, v in attrs.items()}
    num = attrs['num_segments']
    op_id = attrs['op_id'] if 'op_id' in attrs else 0
    if len(inputs) != 2:
        raise ValueError(f"2 inputs expected, but got {len(inputs)}")
    data, indices = inputs
    data_shape = list(data.shape)
    indices_shape = list(indices.shape)
    segment_len = len(data_shape) - len(indices_shape)
    if segment_len < 0:
        raise ValueError(f'input rank should not be less than segment_id rank')
    for i, v in enumerate(indices_shape):
        if int(v) != int(data_shape[i]):
            raise ValueError(
                f'input shape at dim {i} is not equal to segment_id shape at dim {i}'
            )
    output_shape = [num]
    if segment_len > 0:
        output_shape += data_shape[len(indices_shape):]
    if len(indices_shape) > 1:
        raise ValueError('only 1-D segment currently supported')

    def gen_ir(data, indices, out):
        ib = tvm.ir_builder.create()
        with ib.for_range_n(indices_shape, "i") as i:
            read_idx = ib.load(indices, i)
            # 1-D segment
            with ib.for_range_n(data_shape[1:], 'j') as j:
                inbound = tvm.all((read_idx >= 0), (read_idx < num))
                with ib.if_scope(inbound):
                    val = ib.load(data, i + j) + ib.load(out, [read_idx] + j)
                    ib.store(out, [read_idx] + j, val)
        return ib.get()

    output_name = "T_uss_" + data.op.name + "_" + indices.op.name
    out_buf = tvm.decl_buffer(output_shape, data.dtype, output_name)
    attrs = {"disable_inline_inject": True}
    return tvm.extern([data.shape], [data, indices],
                      lambda ins, outs: gen_ir(ins[0], ins[1], outs[0]),
                      dtype=data.dtype,
                      out_buffers=[out_buf],
                      name=output_name,
                      attrs=attrs)
示例#12
0
文件: topi.py 项目: mindspore-ai/akg
def cumprod(inputs, attrs):
    if len(inputs) != 1:
        raise ValueError("length of inputs shoule be 1, but got %d." %
                         len(inputs))
    in_tensor = inputs[0]
    shape = in_tensor.shape
    attrs = {k: v for k, v in attrs.items()}
    axis = int(attrs["axis"][0]) if "axis" in attrs else 0
    exclusive = attrs["exclusive"].value if "exclusive" in attrs else False
    reverse = attrs["reverse"].value if "reverse" in attrs else False
    output_name = "T_cumprod_" + in_tensor.op.name

    def kernel_ir(data, dst):
        ib = tvm.ir_builder.create()
        # axes before cumm-axis
        with ib.for_range_n(shape[:axis], "i0") as i0:
            # axes after cumm-axis
            with ib.for_range_n(shape[axis + 1:], "i1") as i1:
                idx_0 = i0 + [0] + i1 if not reverse else i0 + [
                    shape[axis] - 1
                ] + i1
                ib.store(
                    dst, idx_0,
                    tvm.const(1, data.dtype) if exclusive else ib.load(
                        data, idx_0))
                # iterate the cumm-axis to do cumulated production (start from 1)
                with ib.for_range(1, shape[axis], name="cum_idx") as m:
                    idx_pre = i0 + [m - 1] + i1 if not reverse else i0 + [
                        shape[axis] - m
                    ] + i1
                    idx_cur = i0 + [m] + i1 if not reverse else i0 + [
                        shape[axis] - 1 - m
                    ] + i1
                    ib.store(
                        dst, idx_cur,
                        ib.load(dst, idx_pre) *
                        ib.load(data, idx_pre if exclusive else idx_cur))
        return ib.get()

    return tvm.extern(shape, [in_tensor],
                      lambda ins, outs: kernel_ir(ins[0], outs[0]),
                      name=output_name,
                      dtype=in_tensor.dtype)
示例#13
0
文件: topi.py 项目: mindspore-ai/akg
def csr_reduce_sum(inputs, attrs):
    row_idx, _, data = inputs
    # Currently, just support integer axis
    axis = int(attrs['axis'][0])
    shape = tuple(attrs['dense_shape'])
    num_rows = row_idx.shape[0] - 1
    if axis < 0:
        axis += len(shape)
    assert axis == 1, "only supports reduction of CSR axis 1"
    feature_shape = get_shape(data.shape)[1:]
    fused_shape = (shape[0], 1) + shape[2:]

    def gen_ir(data, row_idx, output):
        ib = tvm.ir_builder.create()
        ib.scope_attr("INFO", "csr_avg_row",
                      int(data.shape[0]) // max(int(num_rows), 1))
        with ib.for_range(0, num_rows, name="i") as i:
            start = ib.load(row_idx, i)
            end = ib.load(row_idx, i + 1)
            with ib.for_range_n(feature_shape, "k") as k:
                ib.store(output, [i, 0] + k, tvm.const(0, data.dtype))
                with ib.for_range(0, end - start, name="j") as j:
                    ib.scope_attr(
                        [tvm.api.iter_var_api(
                            (0, shape[1]), "j", 2)], "reduce_update", "")
                    pos = start + j
                    val = tvm.expr.Select(pos < end, ib.load(data, [pos] + k),
                                          tvm.const(0, data.dtype))
                    ib.store(output, [i, 0] + k,
                             val + ib.load(output, [i, 0] + k))
        return ib.get()

    output_shape = fused_shape
    output_name = "T_csr_reduce_sum_" + data.op.name + "_" + str(axis)
    out_buf = tvm.decl_buffer(output_shape, data.dtype, output_name)
    attrs = {"csr_op": True, "fuse_axis_extern": True}
    return tvm.extern([output_shape], [data, row_idx],
                      lambda ins, outs: gen_ir(ins[0], ins[1], outs[0]),
                      dtype=data.dtype,
                      out_buffers=[out_buf],
                      name=output_name,
                      attrs=attrs)
示例#14
0
文件: topi.py 项目: mindspore-ai/akg
def csrmv(inputs, _):
    indptr, indices, data, weight = inputs
    assert len(data.shape) == 1 and len(
        weight.shape) == 2, "only supports 2-dim sparse tensor"
    assert data.dtype == weight.dtype, "data and weight must have same dtype."

    num_rows = indptr.shape[0] - 1

    def csrmv_ir(data, indices, indptr, weight, out):
        ib = tvm.ir_builder.create()
        ib.scope_attr("INFO", "csr_avg_row",
                      int(data.shape[0]) // max(int(num_rows), 1))
        with ib.for_range(0, num_rows, name="row") as row:
            ib.store(out, [row, 0], tvm.const(0, data.dtype))
            row_start = ib.load(indptr, row)
            row_end = ib.load(indptr, row + 1)
            row_elems = row_end - row_start
            with ib.for_range(0, row_elems, name="idx") as idx:
                elem = row_start + idx
                val = tvm.expr.Select(
                    elem < row_end,
                    ib.load(data, elem) *
                    ib.load(weight, [ib.load(indices, elem), 0]),
                    tvm.const(0, data.dtype))
                ib.scope_attr(
                    [tvm.api.iter_var_api(
                        (0, weight.shape[0]), "idx", 2)], "reduce_update", "")
                temp = val + ib.load(out, [row, 0])
                ib.store(out, [row, 0], temp)
        return ib.get()

    output_shape = [num_rows, 1]
    output_name = "T_csrmv_" + weight.op.name + "_" + data.op.name
    out_buf = tvm.decl_buffer(output_shape, data.dtype, output_name)
    attrs = {"csr_op": True}
    return tvm.extern(
        [output_shape], [data, indices, indptr, weight],
        lambda ins, outs: csrmv_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
        dtype=data.dtype,
        out_buffers=[out_buf],
        name=output_name,
        attrs=attrs)
示例#15
0
文件: topi.py 项目: mindspore-ai/akg
def csr_mm(inputs, _):
    indptr, indices, data, dense = inputs
    assert len(indptr.shape) == 1, "CSRTensor.indptr should be 1-dim."
    assert len(indices.shape) == 1, "CSRTensor.indices should be 1-dim."
    assert len(data.shape) == 1, "CSRTensor.values should be 1-dim."
    assert len(dense.shape) == 2, "Dense Tensor should be 2-dim."
    assert data.dtype == dense.dtype, "values and dense should have the same dtype."
    num_rows = indptr.shape[0] - 1
    num_cols = dense.shape[1]

    def csr_mm_ir(indptr, indices, data, dense, out):
        ib = tvm.ir_builder.create()
        with ib.for_range(0, num_rows, name="row") as row:
            row_start = ib.load(indptr, row)
            row_end = ib.load(indptr, row + 1)
            num_eles = row_end - row_start
            with ib.for_range(0, num_cols, name="col") as col:
                ib.store(out, [row, col], tvm.const(0, data.dtype))
                with ib.for_range(0, num_eles, name="strides") as strides:
                    idx = row_start + strides
                    val = tvm.expr.Select(
                        idx < row_end,
                        ib.load(data, idx) *
                        ib.load(dense, [ib.load(indices, idx), col]),
                        tvm.const(0, data.dtype))
                    ib.scope_attr(
                        [tvm.api._IterVar((0, dense.shape[0]), "strides", 2)],
                        "reduce_update", "")
                    temp = val + ib.load(out, [row, col])
                    ib.store(out, [row, col], temp)
        return ib.get()

    output_shape = [num_rows, num_cols]
    output_name = "T_csr_mm_" + dense.op.name + "_" + data.op.name
    out_buf = tvm.decl_buffer(output_shape, data.dtype, output_name)
    return tvm.extern(
        [output_shape], [indptr, indices, data, dense],
        lambda ins, outs: csr_mm_ir(ins[0], ins[1], ins[2], ins[3], outs[0]),
        dtype=data.dtype,
        out_buffers=[out_buf],
        name=output_name)
示例#16
0
文件: topi.py 项目: mindspore-ai/akg
def gather_nd(inputs, attrs):
    del attrs
    if len(inputs) != 2:
        raise ValueError(f"2 inputs expected, but got {len(inputs)}")
    data, indices = inputs

    data_shape = list(data.shape)
    indices_shape = list(indices.shape)
    indices_last_dim = len(indices_shape) - 1
    left_shape = indices_shape[:indices_last_dim]
    right_shape = data_shape[int(indices_shape[indices_last_dim]):]

    def gen_ir(data, indices, out):
        ib = tvm.ir_builder.create()
        with ib.for_range_n(left_shape, 'i') as i:
            with ib.for_range_n(right_shape, 'j') as j:
                read_idx = []
                inbound = True
                for k in range(0, int(indices_shape[-1])):
                    temp_idx = ib.load(indices, i + [k])
                    if k == 0:
                        inbound = tvm.all((temp_idx >= 0),
                                          (temp_idx < data_shape[k]))
                    else:
                        inbound = tvm.all(inbound, (temp_idx >= 0),
                                          (temp_idx < data_shape[k]))
                    read_idx.append(temp_idx)
                with ib.if_scope(inbound):
                    ib.store(out, i + j, ib.load(data, read_idx + j))
                with ib.else_scope():
                    ib.store(out, i + j, tvm.const(0, data.dtype))
        return ib.get()

    output_name = "T_gathernd_" + data.op.name + "_" + indices.op.name
    output_shape = left_shape + right_shape
    out_buf = tvm.decl_buffer(output_shape, data.dtype, output_name)
    return tvm.extern([output_shape], [data, indices],
                      lambda ins, outs: gen_ir(ins[0], ins[1], outs[0]),
                      dtype=data.dtype,
                      out_buffers=[out_buf],
                      name=output_name)
示例#17
0
文件: topi.py 项目: mindspore-ai/akg
def elem_any(inputs, attrs):
    in_tensor = inputs[0]
    if "dst_type" in attrs and hasattr(attrs["dst_type"], "value"):
        out_dtype = attrs["dst_type"].value
    else:
        out_dtype = in_tensor.dtype

    def kernel_ir(dst, data):
        ib = tvm.ir_builder.create()
        with ib.for_range_n(data.shape, "ax") as i:
            zero = tvm.const(0, data.dtype)
            one = tvm.const(1, out_dtype)
            with ib.if_scope(ib.load(data, i) > zero):
                ib.store(dst, 0, one)
        return ib.get()

    return tvm.extern((1, ), [in_tensor],
                      lambda ins, outs: kernel_ir(outs[0], ins[0]),
                      name="elemany",
                      dtype=out_dtype,
                      attrs={"disable_inline_inject": 1})
示例#18
0
文件: topi.py 项目: mindspore-ai/akg
def standard_normal(inputs, attrs):
    del inputs
    attrs = {k: v for k, v in attrs.items()}
    seed = attrs["seed"]
    shape = attrs["shape"]
    dtype = "float32"

    def gen_ir(out):
        ib = tvm.ir_builder.create()
        with ib.for_range_n(shape, "i") as i:
            temp = ib.extern_call(seed, op_name="StandardNormal", dtype=dtype)
            ib.store(out, i, temp)
        return ib.get()

    output_name = "randnorm"
    out_buf = tvm.decl_buffer(shape, dtype, "res")
    return tvm.extern([shape], [],
                      lambda ins, outs: gen_ir(outs[0]),
                      dtype=dtype,
                      out_buffers=[out_buf],
                      name=output_name)
示例#19
0
文件: topi.py 项目: mindspore-ai/akg
    def _zn2default(data, original_shape):
        if len(data.shape) < 4:
            raise ValueError(
                "length of shape of input_data should be greater than or equal to 4, but got %d"
                % len(data.shape))
        if len(original_shape) < 2:
            raise ValueError(
                "length of original_shape(output_shape) should be greater than or equal to 2, but got %d"
                % len(original_shape))

        def kernel_ir(input_, output):
            ib = tvm.ir_builder.create()
            shape = [get_const(x) for x in input_.shape]
            n1, m1, m0, n0 = shape[-4:]
            original_shape_ = [get_const(x) for x in original_shape]
            m, n = original_shape_[-2:]
            batch_dims = shape[:-4]

            with ib.for_range_n(batch_dims, "bs") as i:
                with ib.for_range(0, n1) as i_n1:
                    with ib.for_range(0, m1) as i_m1:
                        with ib.for_range(0, m0) as i_m0:
                            with ib.for_range(0, n0) as i_n0:
                                with ib.if_scope(
                                        tvm.all((i_m1 * cs + i_m0) < m,
                                                (i_n1 * cs + i_n0) < n)):
                                    output_args = i + [
                                        i_m1 * cs + i_m0, i_n1 * cs + i_n0
                                    ]
                                    input_args = i + [i_n1, i_m1, i_m0, i_n0]
                                    ib.store(output, output_args,
                                             ib.load(input_, input_args))
            return ib.get()

        # If it is implemented with tvm.compute,
        # the generated stmt is difficult to process for poly in the fusion scene
        return tvm.extern(original_shape, [data],
                          lambda ins, outs: kernel_ir(ins[0], outs[0]),
                          name=output_name,
                          dtype=data.dtype)
示例#20
0
文件: topi.py 项目: mindspore-ai/akg
def unpad(inputs, attrs):
    def kernel_ir(dst, data):
        ib = tvm.ir_builder.create()
        original_shape_ = [get_const(x) for x in data.shape]
        m0, n0 = original_shape_[-2:]
        unpad_shape_ = [get_const(x) for x in unpad_after]
        m1, n1 = unpad_shape_[-2:]
        batch_dims = data.shape[:-2]

        with ib.for_range_n(batch_dims, "bs") as i:
            with ib.for_range(0, m0 - m1) as i_m1:
                with ib.for_range(0, n0 - n1) as i_n1:
                    output_args = i + [i_m1, i_n1]
                    input_args = i + [i_m1, i_n1]
                    ib.store(dst, output_args, ib.load(data, input_args))
        return ib.get()

    if len(inputs) != 1:
        raise ValueError("Num of inputs should be 1, but got %d." %
                         len(inputs))

    in_tensor = inputs[0]
    attrs = {k: v for k, v in attrs.items()}
    n = len(in_tensor.shape)
    unpad_after = attrs["tail"]
    if n < 2:
        raise ValueError(
            "dimensions of input should greater than 1, but got %d." % n)
    if len(unpad_after) != n:
        raise ValueError(
            "Input dimensions and unpad dimensions dismatch: %d vs %d" %
            (n, len(unpad_after)))
    output_shape = [in_tensor.shape[i] - unpad_after[i] for i in range(0, n)]
    output_name = "T_unpad_" + in_tensor.op.name
    return tvm.extern(output_shape, [in_tensor],
                      lambda ins, outs: kernel_ir(outs[0], ins[0]),
                      name=output_name,
                      dtype=[in_tensor.dtype])