Пример #1
0
def reduce_max_ad_optimized(head, data, axis, keepdims, target="cce"):
    def get_shape(pld):
        return [d.value for d in pld.shape]

    def custom_reduce_max_fdiff(out, inputs, grad, ad_attrs, new_pld_array):
        data = inputs[0]
        shape = get_shape(data)
        max_ = akg.lang.ascend.reduce_max(data, axis=axis, keepdims=keepdims)
        max_broadcast = akg.lang.ascend.broadcast(max_, shape)
        return [
            akg.tvm.compute(shape,
                            lambda *indices: akg.tvm.expr.Select(
                                data(*indices) == max_broadcast(*indices),
                                grad(*get_reduced_indices(
                                    *indices, axis=axis, keepdims=keepdims)),
                                akg.tvm.const(0, dtype=data.dtype)),
                            name="reduce_max_ad2")
        ]

    l = reduce_max(data, axis, keepdims, target=target)

    [dl_ddata
     ] = akg.differentiate(l, [data],
                           head,
                           None,
                           None,
                           override={l: ([data], custom_reduce_max_fdiff)})
    return dl_ddata
Пример #2
0
def segment_max(data, segment_ids, num_segments):
    """
    Computes the max value along segment_ids of a akg.tvm.tensor

    Args:
        data: akg.tvm.Tensor of type "float16", "float32" 
        segment_ids: akg.tvm.Tensor of type int32, sorted

    Returns:
        akg.tvm.Tensor of same shape and type as data

    """

    d_dtype = data.dtype
    utils.ops_dtype_check(d_dtype, utils.DtypeForDavinci.ALL_FLOAT)
    d_shape = [x.value for x in data.shape]
    utils.check_shape(d_shape)

    s_shape = segment_ids.shape
    utils.check_shape(s_shape)

    new_segment_ids, idx = gen_ids(segment_ids)

    output_shape = (1, ) + tuple(d_shape[len(s_shape):])
    zero_data = akg.tvm.compute(output_shape, lambda*i: akg.tvm.const(0.0, d_dtype), name = "zero")

    data_list = Split(data, new_segment_ids)
    out_n = num_segments

    out = []
    j = 0
    for i in range(0, out_n):

        if i in idx:
            tmp = reduce_max(data_list[j], 0, True, target=utils.CCE)
            out.append(tmp)
            j = j + 1
        else:
            out.append(zero_data)

    res = Concat(out, 0)

    return res
Пример #3
0
def tensor_max(x, axis=None, keep_dims=False, target=utils.CUDA):
    """Max"""
    return math.reduce_max(x, axis=axis, keepdims=keep_dims, target=target)
Пример #4
0
def reduce_max_ad_optimized_manual_schedule(input_shape,
                                            dtype,
                                            axis,
                                            keepdims,
                                            polyhedral=True,
                                            attrs=None):
    def custom_reduce_max_fdiff(out, inputs, head_, ad_attrs, new_pld_array):
        data_ = inputs[0]
        shape = data_.shape
        # reduces maximum value for each column
        max_ = akg.lang.ascend.reduce_max(data_, axis=axis, keepdims=True)
        # copies reduced values to get the original shape
        max_broadcast = akg.lang.ascend.broadcast(max_, shape)
        # head broadcast is needed to generate correct cce code for the selection operation
        head_broadcast = akg.tvm.compute(
            shape, lambda *indices: head_(*get_reduced_indices(
                *indices, axis=axis, keepdims=keepdims)))
        # zero all the values that are not max values on the result, remaining is equal to the adjoint of the output
        max_values_and_zeros = akg.tvm.compute(
            shape,
            lambda *indices: akg.tvm.expr.Select(
                data_(*indices) == max_broadcast(*indices),
                head_broadcast(*indices), akg.tvm.const(0, dtype='float16')),
            name="reduce_max_ad2")
        # cast data back to the original dtype
        if dtype != 'float16':
            return [Cast(max_values_and_zeros, dtype, target=utils.CCE)]
        else:
            return [max_values_and_zeros]

    # tensor for the input data
    data = akg.tvm.placeholder(input_shape, dtype, name="input_data")

    # computation of reduce max
    # not used on the schedule because this is the diferentiation op
    l = reduce_max(data, axis, keepdims, target=utils.CCE)

    # adjoint tensor for the differentiation
    head = akg.tvm.placeholder(l.shape, name="head", dtype=l.dtype)

    # cast input data
    if dtype != 'float16':
        data_cast = Cast(data, "float16", target=utils.CCE)
        head_cast = Cast(head, "float16", target=utils.CCE)
    else:
        data_cast = data
        head_cast = head

    # override differentiation computation with custom function
    [dl_ddata] = akg.differentiate(
        l, [data_cast],
        head_cast,
        None,
        None,
        override={l: ([data_cast], custom_reduce_max_fdiff)})

    # get tensors from custom function
    if dtype != 'float16':
        max_values_and_zeros = dl_ddata.op.input_tensors[0]
        max_broadcast = max_values_and_zeros.op.input_tensors[1]
        max_ = max_broadcast.op.input_tensors[0]
        head_broadcast = max_values_and_zeros.op.input_tensors[2]
    else:
        max_broadcast = dl_ddata.op.input_tensors[1]
        max_ = max_broadcast.op.input_tensors[0]
        head_broadcast = dl_ddata.op.input_tensors[2]

    # schedule for differetiation operation
    # inputs: data and head
    s = akg.tvm.create_schedule([dl_ddata.op])

    # cache reads of inputs
    if dtype != 'float16':
        head_ub = s.cache_read(head, "local.UB", [head_cast])
        data_ub = s.cache_read(data, "local.UB", [data_cast])
    else:
        # no cast operation
        head_ub = s.cache_read(head_cast, "local.UB", [head_broadcast])
        data_ub = s.cache_read(data_cast, "local.UB", [max_, dl_ddata])

    # cache write for the output
    dl_ddata_ub = s.cache_write(dl_ddata, "local.UB")

    # get tiling attributes
    if attrs is None:
        raise Exception('attrs is None')
    tiling_factors = attrs['tile']
    split_iterators = []
    assert len(tiling_factors) == len(dl_ddata.shape)
    # split the final compute and save the iterators
    for index, factor in enumerate(tiling_factors):
        split_iterators.append(s[dl_ddata].split(dl_ddata.op.axis[index],
                                                 factor))

    # get iterators
    iterator1 = split_iterators[0][0]

    # move computation of when there is a cast
    if dtype != "float16":
        s[data_cast].compute_at(s[dl_ddata], iterator1)
        s[data_cast].set_scope("local.UB")
        s[head_cast].compute_at(s[dl_ddata], iterator1)
        s[head_cast].set_scope("local.UB")
        s[max_values_and_zeros].compute_at(s[dl_ddata], iterator1)
        s[max_values_and_zeros].set_scope("local.UB")

    # move cache reads and writes
    s[data_ub].compute_at(s[dl_ddata], iterator1)
    s[head_ub].compute_at(s[dl_ddata], iterator1)
    s[dl_ddata_ub].compute_at(s[dl_ddata], iterator1)

    # move computation of the diferentiation
    s[max_].compute_at(s[dl_ddata], iterator1)
    s[max_].set_scope("local.UB")
    s[max_broadcast].compute_at(s[dl_ddata], iterator1)
    s[max_broadcast].set_scope("local.UB")
    s[head_broadcast].compute_at(s[dl_ddata], iterator1)
    s[head_broadcast].set_scope("local.UB")

    with akg.build_config(add_lower_pass=debug_mode(0), dump_pass_ir=True):
        mod = akg.build(s, [head, data, dl_ddata],
                        "cce",
                        name="reduce_max_ad_manual_schedule",
                        attrs=attrs,
                        polyhedral=polyhedral)
        source_code = mod.imported_modules[0].get_source()
        kernel_name = "reduce_max_ad_manual_schedule"
        create_code(kernel_name, './', source_code)
    return mod
Пример #5
0
def reduce_max_ad(head, data, axis, keepdims):
    b = reduce_max(data, axis, keepdims, target=utils.CCE)
    _jacs = akg.differentiate(b, [data], head)
    return _jacs[0]
Пример #6
0
def focal_loss(prediction, tar, gamma):
    """
    Calculate loss by focalloss.
    
    See Source: <a href="https://arxiv.org/abs/1708.02002">Focal Loss for Dense Object Detection;
                Tsung-Yi Lin, Priya Goyal, Ross Girshick, Kaiming He, Piotr Dollár</a>
    
    This op fuses activation function (`softmax`) and loss function (`focalloss`) together.
    
    .. math::
        p = softmax(x) \\
        FL(p) = -(1-p)^{\\gamma}log(p)
    
    Args:
        prediction (tvm.tensor.Tensor): The predicted logits for each class,
            type is float32 or float16 and shape is `(batch_size, num_anchors, num_clases)`,
        tar (tvm.tensor.Tensor): The one-hot encoded classification targets,
            type is float32, float16 or int32 and shape is `(batch_size, num_anchors, num_classes)`,
        gamma (float): positive float number.
    
    Returns:
        tvm.tensor.Tensor, has the same type as inputs with shape `(batch_size, num_anchors)`.
    """

    utils.check_shape(prediction, length=3, tensor_name="prediction")
    utils.check_shape(tar, length=3, tensor_name="target")
    utils.ops_dtype_check(prediction.dtype, utils.DtypeForDavinci.ALL_FLOAT)
    utils.ops_dtype_check(
        tar.dtype,
        [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32])
    utils.check_greater("gamma", "zero", gamma, 0)

    dim_info, _ = focal_loss_set_dim_func(prediction, tar)
    attrs = {"dim": dim_info}

    dtype = prediction.dtype

    if product_is_mini() and dtype == 'float32':
        prediction = akg.topi.cast(prediction, "float16")
        tar = akg.topi.cast(tar, "float16")

    axis = -1
    shape = get_shape(prediction)

    maxv = reduce_max(prediction, axis=axis, keepdims=True, target=utils.CCE)

    k1 = akg.tvm.reduce_axis((0, shape[-1]), name="k1")
    expsum = akg.tvm.compute(
        shape[:-1],
        lambda *i: akg.tvm.sum(akg.tvm.exp(prediction(*i, k1) - maxv(*i, 0)),
                               axis=k1),
        name="expsum")

    gamma = akg.tvm.const(gamma, prediction.dtype)
    one = akg.tvm.const(1, prediction.dtype)

    def cal_focalloss(*i):
        x = prediction(*i) - maxv(*i[:-1], 0)
        pred = akg.tvm.exp(x - akg.tvm.log(expsum(*i[:-1])))  # softmax(x)
        log_p = x - akg.tvm.log(expsum(*i[:-1]))  # logsoftmax(x)
        neg_pred_pow = akg.tvm.exp(akg.tvm.log(one - pred) *
                                   gamma)  # (1-pred)^gamma
        loss = akg.tvm.const(-1,
                             prediction.dtype) * tar(*i) * neg_pred_pow * log_p
        return loss

    loss = akg.tvm.compute(shape, cal_focalloss, name="loss")

    loss = akg.topi.sum(loss, axis=axis)

    if product_is_mini() and dtype == 'float32':
        loss = akg.topi.cast(loss, "float32")

    return loss, attrs