Пример #1
0
    def traverse(op):
        """Traverse operators from computation graph"""
        # inline all one-to-one-mapping operators except the last stage (output)
        if tag.is_broadcast(op.tag):
            if op not in s.outputs:
                s[op].compute_inline()
            for tensor in op.input_tensors:
                if tensor.op.input_tensors:
                    traverse(tensor.op)

        if 'conv2d_nchw' in op.tag:
            # print('Run in x86-rasp schedule')
            output = op.output(0)
            conv_out = op.input_tensors[0]
            kernel_vec = conv_out.op.input_tensors[1]
            kernel = kernel_vec.op.input_tensors[0]
            data_vec = conv_out.op.input_tensors[0]
            data = data_vec.op.input_tensors[0]
            data_pad = None
            if isinstance(data.op,
                          tvm.tensor.ComputeOp) and "pad" in data.op.tag:
                data_pad = data
                data = data_pad.op.input_tensors[0]

            padding = infer_pad(data, data_pad)
            if data_pad is None:
                stride = infer_stride(data, kernel, output)
            else:
                stride = infer_stride(data_pad, kernel, output)

            wkl = _get_workload(data, kernel, stride, padding, output.dtype)
            sch = _get_schedule(wkl)
            return _SCH_TO_SCH_FUNC[type(sch)](s, data, data_pad, data_vec,
                                               kernel, kernel_vec, conv_out,
                                               output, outs[0])
Пример #2
0
    def traverse(op):
        """Traverse operators from computation graph"""
        # inline all one-to-one-mapping operators except the last stage (output)
        if tag.is_broadcast(op.tag):
            if op not in s.outputs:
                s[op].compute_inline()
            for tensor in op.input_tensors:
                if tensor.op.input_tensors:
                    traverse(tensor.op)

        if 'conv2d_nChwc' in op.tag:
            print('Got conv2d_nChwc tag: ' + str(op.tag))
            output = op.output(0)
            # conv_out = op.input_tensors[0]
            conv_out = output
            kernel = conv_out.op.input_tensors[1]
            # kernel = kernel_vec.op.input_tensors[0]
            data_vec = conv_out.op.input_tensors[0]
            data = data_vec.op.input_tensors[0] \
                if isinstance(data_vec.op, tvm.tensor.ComputeOp) and len(data_vec.op.input_tensors) > 0 and "pad" not in data_vec.op.tag \
                else data_vec
            data_pad = None
            if isinstance(data.op,
                          tvm.tensor.ComputeOp) and "pad" in data.op.tag:
                data_pad = data
                data = data_pad.op.input_tensors[0]

            n, ic_chunk, h, w, ic_block = [x.value for x in data.shape]
            ic = ic_chunk * ic_block
            original_data = tvm.placeholder((n, ic, h, w), dtype=output.dtype)

            if data_pad is not None:
                n, _, pad_h, pad_w, _ = [x.value for x in data_pad.shape]
                original_data_pad = tvm.placeholder((n, ic, pad_h, pad_w),
                                                    dtype=output.dtype)
                padding = infer_pad(original_data, original_data_pad)
            else:
                padding = (0, 0)

            oc, kh, kw = kernel_size
            original_kernel = tvm.placeholder((oc, ic, kh, kw),
                                              dtype=output.dtype)

            n, oc_chunk, oh, ow, oc_block = [x.value for x in output.shape]
            original_output = tvm.placeholder((n, oc_chunk * oc_block, oh, ow),
                                              dtype=output.dtype)

            if data_pad is None:
                stride = infer_stride(original_data, original_kernel,
                                      original_output)
            else:
                stride = infer_stride(original_data_pad, original_kernel,
                                      original_output)

            wkl = _get_workload(original_data, original_kernel, stride,
                                padding, output.dtype)
            sch = _get_schedule(wkl)
            _SCH_TO_SCH_FUNC[type(sch)](s, data, data_pad, data_vec, kernel,
                                        conv_out, output, outs[0])
Пример #3
0
def traverse(s, op):
    """Traverse operators from computation graph"""
    # inline all one-to-one-mapping operators except the last stage (output)
    if tag.is_broadcast(op.tag):
        if op not in s.outputs:
            s[op].compute_inline()
        for tensor in op.input_tensors:
            if tensor.op.input_tensors:
                traverse(tensor.op)
Пример #4
0
 def traverse(OP):
     # inline all one-to-one-mapping operators except the last stage (output)
     if tag.is_broadcast(OP.tag):
         if OP not in s.outputs:
             s[OP].compute_inline()
         for tensor in OP.input_tensors:
             if tensor.op.input_tensors:
                 traverse(tensor.op)
     # schedule depthwise_conv2d
     if OP.tag == 'depthwise_conv2d_nhwc':
         PaddedInput = OP.input_tensors[0]
         Filter = OP.input_tensors[1]
         DepthwiseConv2d = OP.output(0)
         _schedule(PaddedInput, Filter, DepthwiseConv2d)
Пример #5
0
 def traverse(operator):
     """Traverse operators from computation graph"""
     if tag.is_broadcast(operator.tag):
         if operator not in sch.outputs:
             sch[operator].compute_inline()
         for tensor in operator.input_tensors:
             if tensor.op.input_tensors:
                 traverse(tensor.op)
     elif operator.tag == 'conv2d_nhwc':
         Apad = operator.input_tensors[0]
         W = operator.input_tensors[1]
         B = operator.output(0)
         schedule(Apad, W, B)
     else:
         raise RuntimeError("Unsupported operator: %s" % operator.tag)
Пример #6
0
    def traverse(op):
        """Traverse operators from computation graph"""
        # inline all one-to-one-mapping operators except the last stage (output)
        if tag.is_broadcast(op.tag):
            if op not in s.outputs:
                s[op].compute_inline()
            for tensor in op.input_tensors:
                if tensor.op.input_tensors:
                    traverse(tensor.op)

        if 'spatial_conv_output' in op.tag:
            # print('Run in x86-rasp schedule')
            output = op.output(0)
            conv_out = op.input_tensors[0]
            kernel_vec = conv_out.op.input_tensors[1]
            kernel = kernel_vec.op.input_tensors[0]
            data_vec = conv_out.op.input_tensors[0]
            data = data_vec.op.input_tensors[0]
            data_pad = None
            if isinstance(data.op,
                          tvm.tensor.ComputeOp) and "pad" in data.op.tag:
                data_pad = data
                data = data_pad.op.input_tensors[0]

            _schedule_spatial_conv2d(s, data, data_pad, data_vec, kernel,
                                     kernel_vec, conv_out, output, outs[0])

        if 'im2col_conv_output' in op.tag:
            output = op.output(0)
            conv_out = op.input_tensors[0]
            kernel_vec = conv_out.op.input_tensors[1]
            kernel = kernel_vec.op.input_tensors[0]
            data_vec = conv_out.op.input_tensors[0]
            data_col = data_vec.op.input_tensors[0]
            data = data_col.op.input_tensors[0]
            data_pad = None
            if isinstance(data.op,
                          tvm.tensor.ComputeOp) and "pad" in data.op.tag:
                data_pad = data
                data = data_pad.op.input_tensors[0]
            _schedule_im2col_conv2d(s, data, data_pad, data_col, data_vec,
                                    kernel, kernel_vec, conv_out, output,
                                    outs[0])
Пример #7
0
    def traverse(op):
        """Traverse operators from computation graph"""
        # inline all one-to-one-mapping operators except the last stage (output)
        if tag.is_broadcast(op.tag):
            if op not in s.outputs:
                s[op].compute_inline()
            for tensor in op.input_tensors:
                if tensor.op.input_tensors:
                    traverse(tensor.op)

        if 'conv2d_nChwc' in op.tag:
            output = op.output(0)
            # conv_out = op.input_tensors[0]
            conv_out = op.input_tensors[0] if 'conv2d_nChwc_unpack' in op.tag else output
            kernel = conv_out.op.input_tensors[1]
            # kernel = kernel_vec.op.input_tensors[0]
            data_vec = conv_out.op.input_tensors[0]
            data = data_vec.op.input_tensors[0] \
                if isinstance(data_vec.op, tvm.tensor.ComputeOp) and len(data_vec.op.input_tensors) > 0 and "pad" not in data_vec.op.tag \
                else data_vec
            data_pad = None

            if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag:
                data_pad = data
                data = data_pad.op.input_tensors[0]

            ndim_input = len(data.shape)
            if ndim_input == 5:
                n, ic_chunk, h, w, ic_block = [x.value for x in data.shape]
                ic = ic_chunk * ic_block
            else:
                n, ic, h, w = [x.value for x in data.shape]
            original_data = tvm.placeholder((n, ic, h, w), dtype=output.dtype)

            oc = num_filter
            kh, kw = kernel_size
            original_kernel = tvm.placeholder((oc, ic, kh, kw), dtype=output.dtype)

            wkl = _get_workload(original_data, original_kernel, stride, padding, output.dtype)
            sch = _get_schedule(wkl)
            _SCH_TO_SCH_FUNC[type(sch)](s, wkl, data, data_pad, data_vec,
                                        kernel, conv_out, output, outs[0])
Пример #8
0
    def traverse(op):
        """Traverse operators from computation graph"""
        # inline all one-to-one-mapping operators except the last stage (output)
        if tag.is_broadcast(op.tag):
            if op not in s.outputs:
                s[op].compute_inline()
            for tensor in op.input_tensors:
                if tensor.op.input_tensors:
                    traverse(tensor.op)

        if 'conv2d_nchw' in op.tag:
            output = op.output(0)
            conv_out = op.input_tensors[0]
            kernel = conv_out.op.input_tensors[1]
            # kernel = kernel_vec.op.input_tensors[0]
            data_vec = conv_out.op.input_tensors[0]
            data = data_vec.op.input_tensors[0]
            data_pad = None
            if isinstance(data.op,
                          tvm.tensor.ComputeOp) and "pad" in data.op.tag:
                data_pad = data
                data = data_pad.op.input_tensors[0]
            padding = infer_pad(data, data_pad)

            _, ic, _, _ = [x.value for x in data.shape]
            oc = num_filter
            kh, kw = kernel_size
            original_kernel = tvm.placeholder((oc, ic, kh, kw))

            if data_pad is None:
                stride = infer_stride(data, original_kernel, output)
            else:
                stride = infer_stride(data_pad, original_kernel, output)

            wkl = _get_workload(data, original_kernel, stride, padding,
                                output.dtype)
            sch = _get_schedule(wkl)
            _SCH_TO_SCH_FUNC[type(sch)](s, data, data_pad, data_vec, kernel,
                                        conv_out, output, outs[0])
Пример #9
0
    def traverse(op):
        """Traverse operators from computation graph"""
        # inline all one-to-one-mapping operators except the last stage (output)
        if tag.is_broadcast(op.tag):
            if op not in s.outputs:
                s[op].compute_inline()
            for tensor in op.input_tensors:
                if tensor.op.input_tensors:
                    traverse(tensor.op)

        if 'conv2d_nchw' in op.tag:
            conv = op.output(0)
            kernel = op.input_tensors[1]
            data = op.input_tensors[0]
            data_pad = None
            if isinstance(data.op,
                          tvm.tensor.ComputeOp) and "pad" in data.op.tag:
                data_pad = data
                data = data_pad.op.input_tensors[0]

            C = conv
            print(C.op.axis)
            print(C.op.reduce_axis)
            print(data_pad.op.axis)

            n, c, h, w = C.op.axis
            rc, ry, rx = C.op.reduce_axis

            s[C].reorder(n, c, rc, h, w, ry, rx)
            r = s[C].fuse(ry, rx)
            s[C].unroll(r)

            w = s[C].fuse(h, w)
            xo, xi = s[C].split(w, factor=8)
            xoo, xoi = s[C].split(xo, factor=7)
            s[C].parallel(c)
            s[C].vectorize(xi)
            s[C].pragma(n, "parallel_launch_point")
Пример #10
0
    def traverse(OP):
        print("***********")
        print(OP.tag)

        # inline all one-to-one-mapping operators except the last stage (output)
        if tag.is_broadcast(OP.tag):
            # print("is broadcast")
            if OP not in s.outputs:
                s[OP].compute_inline()
            for tensor in OP.input_tensors:
                if tensor.op.input_tensors:
                    traverse(tensor.op)
        elif tag.is_injective(OP.tag):
            # print("is injective")
            for tensor in OP.input_tensors:
                if tensor.op.input_tensors:
                    traverse(tensor.op)
        # schedule depthwise_conv2d
        elif OP.tag == 'depthwise_1by1_fused_nchw':
            PaddedInput = OP.input_tensors[0]
            Filter_d = OP.input_tensors[1]
            Filter_1 = OP.input_tensors[2]
            Depthwise1by1Fused = OP.output(0)
            _schedule(PaddedInput, Filter_d, Filter_1, Depthwise1by1Fused)
Пример #11
0
    def traverse(op):
        """Traverse operators from computation graph"""
        # inline all one-to-one-mapping operators except the last stage (output)
        if tag.is_broadcast(op.tag):
            if op not in s.outputs:
                s[op].compute_inline()
            for tensor in op.input_tensors:
                if tensor.op.input_tensors:
                    traverse(tensor.op)

        if 'conv2d_nchw' in op.tag:
            conv = op.output(0)
            kernel = op.input_tensors[1]
            data = op.input_tensors[0]
            data_pad = None
            if isinstance(data.op, tvm.tensor.ComputeOp) and "pad" in data.op.tag:
                data_pad = data
                data = data_pad.op.input_tensors[0]

            C = conv
            print(C.op.axis)
            print(C.op.reduce_axis)
            print(data_pad.op.axis)

            # WW = s.cache_read(data_pad, 'global', [C])

            n, c, h, w = C.op.axis
            rc, ry, rx = C.op.reduce_axis

            c_chunk, c_block = s[C].split(c, factor=simd_w)
            s[C].reorder(n, c_chunk, h, c_block, w, rc, ry, rx)

            work_amount = s[C].fuse(c_chunk, h)

            ur_w, wi = s[C].split(w, nparts=n_reg)
            s[C].reorder(n, work_amount, rx, c_block, wi, ry, ur_w, rc)
            # s[C].reorder(n, work_amount, c_block, wi, rx, ry, ur_w, rc)
            # s[C].fuse(rx, ry)

            nthread, nthread_inner = s[C].split(work_amount, nparts=4)

            # s[WW].compute_at(s[C], ur_w)
            # print(WW.op.axis)
            # step = s[C].fuse(rx, c_block)

            # r = s[C].fuse(ur_w, rc)

            # s[C].reorder(n, c, rc, h, w, ry, rx)
            # r = s[C].fuse(ry, rx)
            # s[C].unroll(rc)
            # s[C].unroll(ry)
            # s[C].unroll(rx)

            # xo, xi = s[C].split(w, factor=8)
            # s[C].unroll(ry)
            # s[C].unroll(rc)
            # s[C].parallel(work_amount)
            s[C].parallel(nthread)
            s[C].vectorize(c_block)
            s[C].pragma(n, "parallel_launch_point")
            s[C].pragma(nthread, "parallel_stride_pattern")
            s[C].pragma(n, "parallel_barrier_when_finish")