def _schedule(cfg, s, data_vec, weight_vec, output):
        s[data_vec].parallel(s[data_vec].op.axis[0])
        s[weight_vec].parallel(s[weight_vec].op.axis[0])

        y, x = s[output].op.axis
        wb, db, k = s[output].op.reduce_axis

        yo, yi = cfg["tile_y"].apply(s, output, y)
        xo, xi = cfg["tile_x"].apply(s, output, x)
        ko, ki = cfg["tile_k"].apply(s, output, k)

        cfg["reorder_0"].apply(s, output, [yo, xo, ko, yi, wb, db, ki, xi])
        cfg["ann_reduce"].apply(s,
                                output, [db, wb],
                                axis_lens=[
                                    get_const_int(db.dom.extent),
                                    get_const_int(wb.dom.extent)
                                ],
                                max_unroll=8,
                                cfg=cfg)
        cfg["ann_spatial"].apply(
            s,
            output, [yi, xi],
            axis_lens=[cfg['tile_y'].size[-1], cfg['tile_x'].size[-1]],
            max_unroll=8,
            cfg=cfg)
        s[output].vectorize(xi)
        s[output].parallel(yo)
        return s
예제 #2
0
def split_shape_func(attrs, inputs, _):
    """
    Shape function for split op.
    """
    if isinstance(attrs.indices_or_sections, (int, tvm.tir.IntImm)):
        indices_or_sections = get_const_int(attrs.indices_or_sections)
        assert indices_or_sections > 0, "Slice count must be > 0"
    else:
        indices_or_sections = list(get_const_tuple(attrs.indices_or_sections))
        assert sorted(
            indices_or_sections)[0] > 0 and indices_or_sections == sorted(
                indices_or_sections), "split_indices must be sorted"

    axis = get_const_int(attrs.axis)

    if axis < 0:
        axis += get_const_int(inputs[0].shape[0])

    num_out = (indices_or_sections if isinstance(indices_or_sections, int) else
               len(indices_or_sections) + 1)
    if isinstance(indices_or_sections, int):
        indices_or_sections = [indices_or_sections]
    return [
        _split_shape_func(inputs[0], convert(i), convert(indices_or_sections),
                          convert(axis)) for i in range(num_out)
    ]
예제 #3
0
def _create_axis_record(attrs, inputs):
    axes = attrs.axis if attrs.axis is None else list(
        get_const_tuple(attrs.axis))
    exclude = get_const_int(attrs.exclude) > 0
    keepdims = get_const_int(attrs.keepdims) > 0
    data_shape = inputs[0]
    shape_size = data_shape.shape[0].value
    axis_record = [-1] * shape_size
    if axes is None:
        axes = list(range(shape_size))

    for i, axis in enumerate(axes):
        if axis < 0:
            axes[i] = shape_size + axis

    if exclude:
        ex_axes = []
        for i in range(shape_size):
            if i not in axes:
                ex_axes.append(i)
        axes = ex_axes

    for i in range(shape_size):
        if i not in axes:
            axis_record[i] = i

    if not keepdims:
        tmp = []
        for i in axis_record:
            if i >= 0:
                tmp.append(i)
        axis_record = tmp

    return axis_record
예제 #4
0
파일: generic.py 프로젝트: vegaluisjose/tvm
 def _compute_argsort(attrs, inputs, _):
     axis = get_const_int(attrs.axis)
     is_ascend = bool(get_const_int(attrs.is_ascend))
     dtype = attrs.dtype
     return [
         topi_compute(inputs[0],
                      axis=axis,
                      is_ascend=is_ascend,
                      dtype=dtype)
     ]
예제 #5
0
def expand_dim_shape_func(attrs, inputs, _):
    """
    Shape function for expand_dim op.
    """
    axis = get_const_int(attrs.axis)
    num_newaxis = get_const_int(attrs.num_newaxis)
    if axis < 0:
        axis = inputs[0].shape[0] + axis + 1
    ndim = inputs[0].shape[0] if inputs[0].shape else 0
    return [_expand_dim_shape_func(inputs[0], convert(ndim), convert(axis), convert(num_newaxis))]
예제 #6
0
 def _compute_topk(attrs, inputs, out_type):
     if attrs.k is not None:
         k = attrs.k
     else:
         k = inputs[1]
     axis = get_const_int(attrs.axis)
     ret_type = attrs.ret_type
     is_ascend = bool(get_const_int(attrs.is_ascend))
     dtype = attrs.dtype
     out = topi_compute(inputs[0], k, axis, ret_type, is_ascend, dtype)
     out = out if isinstance(out, list) else [out]
     return out
예제 #7
0
    def _callback(op):
        if 'conv2d_nhwc' not in op.tag:
            return

        ### extract tensors ###
        output = op.output(0)
        conv = op
        data_vec = conv.input_tensors[0]
        kernel = conv.input_tensors[1]  # pylint: disable=unused-variable
        last = outs[0]  # pylint: disable=unused-variable

        # tile reduction axes
        n, oh, ow, co = sched[conv].op.axis
        kh, kw, ci = sched[conv].op.reduce_axis
        # NOTE we can't inline data padding in the SIMD path, because it
        # introduces conditionals in the inner loop.
        data_pad = data_vec.op
        sched[data_pad].compute_inline()

        co, vc = cfg['tile_co'].apply(sched, conv, co)
        oh, vh = cfg['tile_oh'].apply(sched, conv, oh)
        ow, vw = cfg['tile_ow'].apply(sched, conv, ow)
        cfg['reorder_0'].apply(sched, conv,
                               [n, co, oh, ow, ci, kh, kw, vh, vw, vc])
        cfg['ann_reduce'].apply(sched,
                                conv, [kh, kw],
                                axis_lens=[
                                    get_const_int(kh.dom.extent),
                                    get_const_int(kw.dom.extent)
                                ],
                                max_unroll=8,
                                cfg=cfg)
        cfg['ann_spatial'].apply(sched,
                                 conv, [vh, vw, vc],
                                 axis_lens=[
                                     cfg['tile_oh'].size[-1],
                                     cfg['tile_ow'].size[-1],
                                     cfg['tile_co'].size[-1]
                                 ],
                                 max_unroll=8,
                                 cfg=cfg)

        kernel_scope = n  # this is the scope to attach global config inside this kernel

        # tune unroll
        sched[output].pragma(kernel_scope, 'auto_unroll_max_step',
                             cfg['auto_unroll_max_step'].val)
        sched[output].pragma(kernel_scope, 'unroll_explicit',
                             cfg['unroll_explicit'].val)
예제 #8
0
 def _compute_multibox_transform_loc(attrs, inputs, _):
     """Compute definition of multibox_detection"""
     clip = bool(get_const_int(attrs.clip))
     threshold = get_const_float(attrs.threshold)
     variances = get_float_tuple(attrs.variances)
     return topi_compute(
         inputs[0], inputs[1], inputs[2], clip, threshold, variances)
예제 #9
0
def repeat_shape_func(attrs, inputs, _):
    """
    Shape func for repeat.
    """
    axis = get_const_int(attrs.axis)
    if axis < 0:
        axis = inputs[0].shape[0] + axis
    return [_repeat_shape_func(inputs[0], attrs.repeats, convert(axis))]
예제 #10
0
def stack_shape_func(attrs, inputs, _):
    """
    Shape func for stack.
    """
    axis = get_const_int(attrs.axis)
    if axis < 0:
        axis += inputs[0].shape[0] + 1
    return [_stack_shape_func(inputs[0], convert(axis), convert(len(inputs)))]
예제 #11
0
 def _compute_multibox_prior(attrs, inputs, _):
     """Compute definition of multibox_prior"""
     sizes = get_float_tuple(attrs.sizes)
     ratios = get_float_tuple(attrs.ratios)
     steps = get_float_tuple(attrs.steps)
     offsets = get_float_tuple(attrs.offsets)
     clip = bool(get_const_int(attrs.clip))
     return [topi_compute(inputs[0], sizes, ratios, steps, offsets, clip)]
예제 #12
0
def split_shape_func(attrs, inputs, _):
    """
    Shape function for split op.
    """
    if isinstance(attrs.indices_or_sections, (int, tvm.tir.IntImm)):
        indices_or_sections = get_const_int(attrs.indices_or_sections)
    else:
        indices_or_sections = get_const_tuple(attrs.indices_or_sections)

    axis = get_const_int(attrs.axis)

    num_out = indices_or_sections if isinstance(indices_or_sections, int) \
        else len(indices_or_sections) + 1
    if isinstance(indices_or_sections, int):
        indices_or_sections = [indices_or_sections]
    return [
        _split_shape_func(inputs[0], convert(i), convert(indices_or_sections),
                          convert(axis)) for i in range(num_out)
    ]
예제 #13
0
def take_shape_func(attrs, inputs, out_ndims):
    """
    Shape function for take op.
    """
    if attrs.axis is None:
        return [_take_no_axis_shape_func(inputs[1], out_ndims[0])]
    axis = get_const_int(attrs.axis)
    data_ndim = int(inputs[0].shape[0])
    if axis < 0:
        axis += data_ndim
    assert 0 <= axis < data_ndim
    return [_take_with_axis_shape_func(*inputs, convert(axis), out_ndims[0])]
예제 #14
0
 def _compute_proposal(attrs, inputs, out_type):
     scales = get_float_tuple(attrs.scales)
     ratios = get_float_tuple(attrs.ratios)
     feature_stride = attrs.feature_stride
     threshold = attrs.threshold
     rpn_pre_nms_top_n = attrs.rpn_pre_nms_top_n
     rpn_post_nms_top_n = attrs.rpn_post_nms_top_n
     rpn_min_size = attrs.rpn_min_size
     iou_loss = bool(get_const_int(attrs.iou_loss))
     return [topi_compute(inputs[0], inputs[1], inputs[2], scales, ratios,
                          feature_stride, threshold, rpn_pre_nms_top_n,
                          rpn_post_nms_top_n, rpn_min_size, iou_loss)]
예제 #15
0
def bitpack(data, bits, pack_type="int8", name="bitpack"):
    """Packs lowest dimension into format needed by VTA

    Parameters
    ----------
    pack_axis : int
        index of the axis to pack in data
    bit_axis : int
        index of axis to place bit axis in resulting packed data

    Returns
    -------
    packed : Tensor
        The packed tensor.
    """
    shape_vec = list(data.shape)
    if pack_type == 'int8':
        data_width = 8
    elif pack_type == 'int16':
        data_width = 16
    elif pack_type == 'int32':
        data_width = 32
    else:
        raise RuntimeError("Unknown pack type %s" % pack_type)
    assert data_width % bits == 0
    lanes = data_width // bits

    # Data must be in multiples of the data_width
    assert util.get_const_int(
        shape_vec[-1]) % lanes == 0, "Not a multiple of word size"
    shape_vec[-1] = shape_vec[-1] // lanes
    oshape = tuple(shape_vec)

    def _bitpack(*indices):
        ret = None
        mask = tvm.tir.const((1 << bits) - 1, pack_type)
        for k in range(lanes):
            idx = list(indices)
            idx[-1] = idx[-1] * lanes + k
            elem = data(*idx).astype(pack_type)
            if k == 0:
                ret = elem & mask
            else:
                val = (elem & mask) << tvm.tir.const(k * bits, pack_type)
                ret = ret | val
        return ret

    return te.compute(oshape, _bitpack, name=name, tag='bitpack')
예제 #16
0
 def _compute_nms(attrs, inputs, out_type):
     max_output_size = inputs[3]
     if attrs.max_output_size is not None:
         max_output_size = attrs.max_output_size
     return_indices = bool(get_const_int(attrs.return_indices))
     iou_threshold = get_const_float(attrs.iou_threshold)
     force_suppress = bool(get_const_int(attrs.force_suppress))
     top_k = get_const_int(attrs.top_k)
     coord_start = get_const_int(attrs.coord_start)
     score_index = get_const_int(attrs.score_index)
     id_index = get_const_int(attrs.id_index)
     invalid_to_bottom = bool(get_const_int(attrs.invalid_to_bottom))
     if return_indices:
         return topi_compute(inputs[0], inputs[1], inputs[2], max_output_size, iou_threshold,
                             force_suppress, top_k, coord_start, score_index, id_index,
                             return_indices, invalid_to_bottom)
     return [topi_compute(inputs[0], inputs[1], inputs[2], max_output_size, iou_threshold,
                          force_suppress, top_k, coord_start, score_index, id_index,
                          return_indices, invalid_to_bottom)]
예제 #17
0
def concatenate_shape_func(attrs, inputs, _):
    axis = get_const_int(attrs.axis)
    if axis < 0:
        axis += inputs[0].shape[0]
    return [_concatenate_shape_func(inputs, convert(axis))]
예제 #18
0
 def _compute_get_valid_counts(attrs, inputs, out_type):
     score_threshold = get_const_float(attrs.score_threshold)
     id_index = get_const_int(attrs.id_index)
     score_index = get_const_int(attrs.score_index)
     return topi_compute(inputs[0], score_threshold, id_index, score_index)
예제 #19
0
def test_util():
    x = tvm.tir.const(100, "int32")
    assert util.get_const_int(x) == 100
    assert util.get_const_tuple((x, x)) == (100, 100)