Пример #1
0
 def convnet():
     """Alternating layout of simple convnet (from image super-resolution).
     """
     bias1 = relay.var('bias1', shape=(64,))
     bias2 = relay.var('bias2', shape=(64,))
     bias3 = relay.var('bias3', shape=(64,))
     bias4 = relay.var('bias4', shape=(64,))
     weight1 = relay.var('weight1', shape=(64, 1, 5, 5))
     weight2 = relay.var('weight2', shape=(64, 64, 3, 3))
     weight3 = relay.var('weight3', shape=(64, 64, 3, 3))
     weight4 = relay.var('weight4', shape=(64, 64, 3, 3))
     data = relay.var("x", shape=(1, 1, 224, 224))
     n00 = relay.nn.conv2d(data, weight1, padding=[2, 2], kernel_size=[5, 5])
     n01 = relay.expand_dims(bias1, axis=1, num_newaxis=2)
     n02 = relay.add(n00, n01)
     n03 = relay.nn.relu(n02)
     n04 = relay.nn.conv2d(n03, weight2, padding=[1, 1], kernel_size=[3, 3])
     n05 = relay.expand_dims(bias2, axis=1, num_newaxis=2)
     n06 = relay.add(n04, n05)
     n07 = relay.nn.relu(n06)
     n08 = relay.nn.conv2d(n07, weight3, padding=[1, 1], kernel_size=[3, 3])
     n09 = relay.expand_dims(bias3, axis=1, num_newaxis=2)
     n10 = relay.add(n08, n09)
     n11 = relay.nn.relu(n10)
     n12 = relay.nn.conv2d(n11, weight4, padding=[1, 1], kernel_size=[3, 3])
     n13 = relay.expand_dims(bias4, axis=1, num_newaxis=2)
     n14 = relay.add(n12, n13)
     n15 = relay.reshape(n14, newshape=[1, 1, 3, 3, 224, 224])
     n16 = relay.transpose(n15, axes=[0, 1, 4, 2, 5, 3])
     net = relay.reshape(n16, newshape=[1, 1, 672, 672])
     args = relay.ir_pass.free_vars(net)
     return relay.Function(args, net)
Пример #2
0
 def before(x, conv_weight, out_bias, out_scale, channels):
     args = [x, conv_weight, out_bias, out_scale]
     out_scale = relay.expand_dims(out_scale, axis=1, num_newaxis=2)
     out_bias = relay.expand_dims(out_bias, axis=1, num_newaxis=2)
     y = relay.nn.conv2d(x, conv_weight,
                         channels=channels,
                         kernel_size=(3, 3),
                         padding=(1, 1))
     y = relay.add(y, out_bias)
     y = relay.nn.relu(y)
     y = relay.multiply(y, out_scale)
     return relay.Function(args, y)
Пример #3
0
 def before(x, conv_weight, in_bias, in_scale, channels):
     args = [x, conv_weight, in_bias, in_scale]
     in_scale = relay.expand_dims(in_scale, axis=1, num_newaxis=2)
     in_bias = relay.expand_dims(in_bias, axis=1, num_newaxis=2)
     x = relay.multiply(x, in_scale)
     x = relay.nn.relu(x)
     x = relay.add(x, in_bias)
     y = relay.nn.conv2d(x, conv_weight,
                         channels=channels,
                         kernel_size=(3, 3),
                         padding=(1, 1))
     return relay.Function(args, y)
 def simple_bn(x, gamma, beta, moving_mean, moving_var,
               axis=1, epsilon=1e-5, shape=None):
     # expect = (x - moving_mean) / sqrt(moving_var + eps) * gamma + beta
     scale = rly.multiply(rly.const(1, 'float32') /
             rly.sqrt(moving_var + rly.const(epsilon, 'float32')), gamma)
     shift = rly.add(
         rly.multiply(rly.negative(moving_mean), scale), beta)
     num_newaxis = len(shape) - (axis + 1)
     if num_newaxis:
         scale = rly.expand_dims(scale, axis=1, num_newaxis=num_newaxis)
         shift = rly.expand_dims(shift, axis=1, num_newaxis=num_newaxis)
     return x * scale + shift
Пример #5
0
 def fail2(x, conv_weight, out_bias, out_scale, channels):
     args = [x, conv_weight, out_bias, out_scale]
     out_scale = relay.expand_dims(out_scale, axis=1, num_newaxis=2)
     out_bias = relay.expand_dims(out_bias, axis=1, num_newaxis=2)
     y1 = relay.nn.conv2d(x, conv_weight,
                          channels=channels,
                          kernel_size=(3, 3),
                          padding=(1, 1))
     y2 = relay.nn.relu(y1)
     # fold will fail because y1 is referred also by y2
     y1 = relay.multiply(y1, out_scale)
     y = relay.add(y1, y2)
     return relay.Function(args, y)
Пример #6
0
 def expected(x, conv_weight, in_bias, in_scale, channels):
     # use a fixed order of args so alpha equal check can pass
     args = [x, conv_weight, in_bias]
     in_bias = relay.expand_dims(in_bias, axis=1, num_newaxis=2)
     squeezed_scale = relay.squeeze(in_scale, axis=[1,2])
     x = relay.nn.relu(x)
     in_bias = relay.divide(in_bias, relay.expand_dims(squeezed_scale, axis=1, num_newaxis=2))
     x = relay.add(x, in_bias)
     conv_weight = relay.multiply(
         conv_weight , relay.expand_dims(squeezed_scale, axis=1, num_newaxis=2))
     y = relay.nn.conv2d(x, conv_weight,
                         channels=channels,
                         kernel_size=(3, 3),
                         padding=(1, 1))
     return relay.Function(args, y)
Пример #7
0
def test_expand_dims_infer_type():
    n, t, d = tvm.var("n"), tvm.var("t"), 100
    x = relay.var("x", shape=(n, t, d))
    y = relay.expand_dims(x, axis=2)
    assert "axis=2" in y.astext()
    checked = relay.ir_pass.infer_type(y)
    assert checked.checked_type == relay.TensorType((n, t, 1, 100))
Пример #8
0
 def verify_expand_dims(dshape, dtype, oshape, axis, num_newaxis):
     x = relay.Var("x", relay.TensorType(dshape, dtype))
     func = relay.Function([x], relay.expand_dims(x, axis, num_newaxis))
     for target, ctx in ctx_list():
         data = np.random.uniform(size=dshape).astype(dtype)
         ref_res = data.reshape(oshape)
         intrp = relay.create_executor("graph", ctx=ctx, target=target)
         op_res = intrp.evaluate(func)(data)
         np.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=0.01)
Пример #9
0
def test_call_attrs():
    x = relay.var("x")
    # non default args
    z = relay.nn.softmax(x, axis=2)
    assert "axis=2" in z.astext()
    # default args
    z = relay.nn.softmax(x)
    assert "softmax(%x)" in z.astext()
    # non default args
    z = relay.expand_dims(x, axis=2, num_newaxis=2)
    assert "num_newaxis=2" in z.astext()
Пример #10
0
 def fail1(x, conv_weight, out_bias, out_scale, channels):
     args = [x, conv_weight, out_bias, out_scale]
     out_scale = relay.expand_dims(out_scale, axis=1, num_newaxis=2)
     out_bias = relay.expand_dims(out_bias, axis=1, num_newaxis=2)
     y1 = relay.nn.conv2d(x, conv_weight,
                          channels=channels,
                          kernel_size=(3, 3),
                          padding=(1, 1))
     y1 = relay.nn.relu(y1)
     y2 = relay.nn.conv2d(x, conv_weight,
                          channels=channels,
                          kernel_size=(3, 3),
                          padding=(1, 1),
                          out_layout="CNHW")
     # fold will fail because the axis from two path
     # differs from each other.
     y2 = relay.nn.relu(y2)
     y = relay.add(y1, y2)
     y = relay.multiply(y, out_scale)
     return relay.Function(args, y)
Пример #11
0
 def expected(x, conv_weight, out_scale, channels):
     # use a fixed order of args so alpha equal check can pass
     args = [x, conv_weight]
     squeezed_scale = relay.squeeze(out_scale, axis=[1,2])
     conv_weight = relay.multiply(
         conv_weight , relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3))
     y = relay.nn.conv2d(x, conv_weight,
                         channels=channels,
                         kernel_size=(3, 3),
                         padding=(1, 1))
     return relay.Function(args, y)
Пример #12
0
 def expected(x, conv_weight, out_bias, out_scale, channels):
     # use a fixed order of args so alpha equal check can pass
     args = [x, conv_weight, out_bias, out_scale]
     out_scale = relay.expand_dims(out_scale, axis=1, num_newaxis=2)
     out_bias = relay.expand_dims(out_bias, axis=1, num_newaxis=2)
     squeezed_scale = relay.squeeze(out_scale, axis=[1,2])
     def fold_conv_weight():
         return  relay.multiply(
             conv_weight ,
             relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3))
     y1 = relay.nn.conv2d(x, fold_conv_weight(),
                         channels=channels,
                         kernel_size=(3, 3),
                         padding=(1, 1))
     y1 = relay.nn.relu(y1)
     y2 = relay.nn.conv2d(x, fold_conv_weight(),
                         channels=channels,
                         kernel_size=(3, 3),
                         padding=(1, 1))
     y2 = relay.nn.relu(y2)
     y = relay.add(y1, y2)
     return relay.Function(args, y)
Пример #13
0
 def expected():
     x = relay.var("x", shape=(1, 64, 56, 56))
     bias = relay.var("bias", shape=(64,))
     scale = relay.var("scale", shape=(64, 1, 1))
     weight = relay.var("weight")
     x = relay.layout_transform(x, "NCHW", "NCHW16c")
     bias = relay.expand_dims(bias, 1, 2)
     bias = relay.layout_transform(bias, "CHW", "CHW16c")
     scale = relay.layout_transform(scale, "CHW", "CHW16c")
     y = relay.nn.conv2d(x, weight, channels=64, kernel_size=(3, 3), padding=(1, 1),
                         data_layout="NCHW16c")
     y = relay.add(y, bias)          # test broadcasting to lhs
     y = relay.multiply(scale, y)      # test broadcasting to rhs
     y = relay.layout_transform(y, "NCHW16c", "NCHW")
     y = relay.Function(free_vars(y), y)
     return y
Пример #14
0
    def expected():
        x = relay.var("x", shape=(1, 64, 56, 56))
        bias = relay.var("bias", shape=(64,))
        weight = relay.var("weight", shape=(64, 64, 3, 3))

        y = relay.layout_transform(x, "NCHW", "NCHW16c")
        w = relay.layout_transform(weight, "OIHW", "OIHW16i")
        y = relay.nn.conv2d(y, w,
                            channels=64,
                            kernel_size=(3, 3),
                            padding=(1, 1),
                            kernel_layout="OIHW16i",
                            data_layout="NCHW16c")
        b = relay.expand_dims(bias, axis=1, num_newaxis=2)
        b = relay.layout_transform(b, "CHW", "CHW16c")
        y = relay.add(y, b)

        y = relay.nn.relu(y)
        y = relay.nn.max_pool2d(y, pool_size=(2, 2), layout="NCHW16c")
        y = relay.cast(y, 'int32')
        y = relay.layout_transform(y, "NCHW16c", "NCHW")
        y = relay.nn.batch_flatten(y)
        y = relay.Function(free_vars(y), y)
        return y
Пример #15
0
 def get_graph(x_shape=(1, 3), axis=1, num_newaxis=1):
     x = relay.var("x", shape=(x_shape), dtype="float32")
     out = relay.expand_dims(x, axis, num_newaxis)
     f = relay.Function([x], out)
     return f, {"x": x_shape}, []
 def fold_conv_weight():
     return relay.multiply(
         conv_weight,
         relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3))
Пример #17
0
def _conv2d_legalize(attrs, inputs, arg_types):
    """Legalizes Conv2D op.

    Parameters
    ----------
    attrs : tvm.attrs.Attrs
        Attributes of current convolution
    inputs : list of tvm.relay.Expr
        The args of the Relay expr to be legalized
    types : list of types
        List of input and output types

    Returns
    -------
    result : tvm.relay.Expr
        The legalized expr
    """

    # Dilation not supported yet. Return None if dilation is not (1, 1)
    dilation = attrs.get_int_tuple("dilation")
    if not (dilation[0] == 1 and dilation[1] == 1):
        return None

    # No legalization for depthwise convolutions yet.
    groups = attrs.get_int("groups")
    if groups != 1:
        return None

    # Collect the input tensors.
    data_tensor, kernel_tensor = arg_types[0], arg_types[1]
    data_dtype = data_tensor.dtype
    kernel_dtype = kernel_tensor.dtype

    # Collect the output tensor.
    output_tensor = arg_types[2]

    # Collect the input exprs.
    data, kernel = inputs

    # Get the conv attrs
    new_attrs = {k: attrs[k] for k in attrs.keys()}

    is_int8_inputs = False
    # If both the inputs are int8, we can add 128 to make the input dtype uint8, and then adjust the
    # output. This will help picking up Intel VNNI instructions.
    # Original --> C = A (conv) B
    # A and B are int8
    #   C = (A + 128 - 128) (conv) B
    #   C = (A' conv B) - 128 (conv) B
    # where A' = A + 128
    # and 128 (conv) B is basically a reduce on CRS axis for weights.
    if data_tensor.dtype == 'int8' and kernel_tensor.dtype == 'int8':
        is_int8_inputs = True
        padding = attrs.get_int_tuple("padding")

        if attrs['data_layout'] == 'NHWC' and attrs['kernel_layout'] == 'HWIO':
            adjust_shift = relay.sum(relay.cast(kernel, dtype='int32'),
                                     axis=(0, 1, 2))
            pad_width = ((0, 0), (padding[0], padding[0]),
                         (padding[1], padding[1]), (0, 0))
        elif attrs['data_layout'] == 'NCHW' and attrs[
                'kernel_layout'] == 'OIHW':
            pad_width = ((0, 0), (0, 0), (padding[0], padding[0]),
                         (padding[1], padding[1]))
            adjust_shift = relay.sum(relay.cast(kernel, dtype='int32'),
                                     axis=(1, 2, 3))
            adjust_shift = relay.expand_dims(adjust_shift,
                                             axis=1,
                                             num_newaxis=2)
        else:
            return None

        data = relay.cast(data, 'int32')
        data = relay.add(data, relay.const(128, 'int32'))
        data = relay.cast(data, 'uint8')

        # Do external padding as pad value has to be 128.
        if not (padding[0] == 0 and padding[1] == 0):
            data = relay.nn.pad(data, pad_width=pad_width, pad_value=128)
        new_attrs['padding'] = (0, 0)

        # The data type is now shifted to uint8
        data_dtype = 'uint8'

        # Multiply 128 to adjust shift.
        adjust_shift = relay.multiply(adjust_shift, relay.const(128, 'int32'))

    # Legalize if the datatypes are suitable for fast Int8 instructions.  Int8 instructions require
    # input channel to be a multiple of 4 and output channels to be a multiple of 16. For input
    # channels, we pad both the inputs and weights input channels. For output channels, we pad the
    # weight and stride_slice the output.
    if _is_int8_hw_support(data_dtype, kernel_dtype):
        # Flags to remember if the expr is modified
        ic_modified = False
        oc_modified = False

        # Find the value of input and output channel.
        in_channel = -1
        out_channel = -1
        if attrs['data_layout'] == 'NHWC' and attrs['kernel_layout'] == 'HWIO':
            in_channel = data_tensor.shape[3].value
            out_channel = kernel_tensor.shape[3].value
        elif attrs['data_layout'] == 'NCHW' and attrs[
                'kernel_layout'] == 'OIHW':
            in_channel = data_tensor.shape[1].value
            out_channel = kernel_tensor.shape[0].value
        else:
            return None

        if in_channel % 4 != 0:
            new_in_channel = ((in_channel + 4) // 4) * 4
            diff = new_in_channel - in_channel
            if attrs['data_layout'] == 'NHWC' and attrs[
                    'kernel_layout'] == 'HWIO':
                data = relay.nn.pad(data,
                                    pad_width=((0, 0), (0, 0), (0, 0), (0,
                                                                        diff)))
                kernel = relay.nn.pad(kernel,
                                      pad_width=((0, 0), (0, 0), (0, diff),
                                                 (0, 0)))
                ic_modified = True
            elif attrs['data_layout'] == 'NCHW' and attrs[
                    'kernel_layout'] == 'OIHW':
                pad_width = ((0, 0), (0, diff), (0, 0), (0, 0))
                data = relay.nn.pad(data, pad_width=pad_width)
                kernel = relay.nn.pad(kernel, pad_width=pad_width)
                ic_modified = True
            else:
                return None

        new_out_channel = out_channel
        if out_channel % 16 != 0:
            new_out_channel = ((out_channel + 16) // 16) * 16
            diff = new_out_channel - out_channel
            if attrs['data_layout'] == 'NHWC' and attrs[
                    'kernel_layout'] == 'HWIO':
                kernel = relay.nn.pad(kernel,
                                      pad_width=((0, 0), (0, 0), (0, 0),
                                                 (0, diff)))
                oc_modified = True
            elif attrs['data_layout'] == 'NCHW' and attrs[
                    'kernel_layout'] == 'OIHW':
                kernel = relay.nn.pad(kernel,
                                      pad_width=((0, diff), (0, 0), (0, 0),
                                                 (0, 0)))
                oc_modified = True
            else:
                return None

        if oc_modified:
            new_attrs['channels'] = new_out_channel
            out = tvm.relay.nn.conv2d(data, kernel, **new_attrs)
            original_out_shape = [x.value for x in output_tensor.shape]
            out = relay.strided_slice(out,
                                      begin=(0, 0, 0, 0),
                                      end=original_out_shape)
        else:
            out = relay.nn.conv2d(data, kernel, **new_attrs)

        if is_int8_inputs:
            out = relay.subtract(out, adjust_shift)

        return out
    return None
Пример #18
0
 def fold_conv_weight():
     return  relay.multiply(
         conv_weight ,
         relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3))
 def fold_conv_weight():
     squeezed_scale = relay.squeeze(out_scale, axis=[1, 2])
     return relay.multiply(
         conv_weight,
         relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3))
Пример #20
0
 def verify_expand_dims(dshape, axis, num_newaxis, dtype="float32"):
     x = relay.var("x", relay.ty.TensorType(dshape, dtype))
     y = relay.expand_dims(x, axis, num_newaxis)
     func = relay.Function([x], y)
     x_data = np.random.uniform(size=dshape).astype(dtype)
     verify_results(func, [x_data], "test_expand_dims", rtol=1e-5, atol=1e-5)
Пример #21
0
def conv2d_alter_int8_common(
    data,
    data_tensor,
    kernel,
    kernel_tensor,
    output_tensor,
    attrs,
    data_dtype: str,
    in_channel_vector_length: int,
    out_channel_vector_length: int,
):
    """
    Convert TE inputs/outputs so that they are suitable for fast Int8 instructions.

    Int8 instructions require input channels and output channels to be a
    multiple of the vector length. For input channels, we pad both the inputs
    and weights channels. For output channels, we pad the weight and
    stride_slice the output.

    Arguments
    ---------
    data: Expr
        Data Expr
    data_tensor: Tensor
        Data tensor
    kernel: Expr
        Kernel Expr
    kernel_tensor: Tensor
        Kernel tensor
    output_tensor: Tensor
        Output tensor
    attrs: Conv2dAttrs
        Attributes of the computation
    data_dtype: "int8" or "uint8"
        Desired dtype of data. Data will be converted to this dtype before the main computation.
    in_channel_vector_length: int
        Length of vector units on target hardware. Input channels are padded to this length.
    out_channel_vector_length: int
        Output size of vector instruction. Output channels are padded to this length.

    Returns
    -------
    out : Tensor
        Conv2d computation with inputs in the correct order for tensorization.
    """
    # Dilation not supported yet. Return None if dilation is not (1, 1)
    dilation = attrs.get_int_tuple("dilation")
    if not (dilation[0] == 1 and dilation[1] == 1):
        return None

    # No legalization for depthwise convolutions yet.
    groups = attrs.get_int("groups")
    if groups != 1:
        return None

    # Get the conv attrs
    new_attrs = {k: attrs[k] for k in attrs.keys()}

    padding = attrs.get_int_tuple("padding")
    kh, kw = attrs.get_int_tuple("kernel_size")
    pt, pl, pb, pr = get_pad_tuple(padding, (kh, kw))

    if data_tensor.dtype != data_dtype:
        # How to convert data to int8
        # Original --> C = A (conv) B
        # A and B are int8
        #   C = (A + 128 - 128) (conv) B
        #   C = (A' conv B) - 128 (conv) B
        # where A' = A + 128
        # and 128 (conv) B is basically a reduce on CRS axis for weights.
        #
        # How to convert data to uint8
        #   C = (A - 128 + 128) (conv) B
        #   C = (A' conv B) + 128 (conv) B
        # where A' = A - 128
        if data_dtype == "int8":
            # shift data to int8
            before_shift = relay.add
            after_shift = relay.subtract
        else:
            # shift data to uint8
            before_shift = relay.subtract
            after_shift = relay.add

        if attrs["data_layout"] == "NHWC" and attrs["kernel_layout"] == "HWIO":
            adjust_shift = relay.sum(relay.cast(kernel, dtype="int32"),
                                     axis=(0, 1, 2))
            pad_width = ((0, 0), (pt, pb), (pl, pr), (0, 0))
        elif attrs["data_layout"] == "NCHW" and attrs[
                "kernel_layout"] == "OIHW":
            pad_width = ((0, 0), (0, 0), (pt, pb), (pl, pr))
            adjust_shift = relay.sum(relay.cast(kernel, dtype="int32"),
                                     axis=(1, 2, 3))
            adjust_shift = relay.expand_dims(adjust_shift,
                                             axis=1,
                                             num_newaxis=2)
        else:
            return None

        data = relay.cast(data, "int32")
        data = before_shift(data, relay.const(128, "int32"))
        data = relay.cast(data, data_dtype)

        # Do external padding as pad value has to be 128.
        if any(padding):
            data = relay.nn.pad(data, pad_width=pad_width, pad_value=128)
        new_attrs["padding"] = (0, 0)

        # Multiply 128 to adjust shift.
        adjust_shift = relay.multiply(adjust_shift, relay.const(128, "int32"))

    # Flags to remember if the expr is modified
    ic_modified = False
    oc_modified = False

    # Find the value of input and output channel.
    in_channel = -1
    out_channel = -1
    if attrs["data_layout"] == "NHWC" and attrs["kernel_layout"] == "HWIO":
        in_channel = data_tensor.shape[3].value
        out_channel = kernel_tensor.shape[3].value
    elif attrs["data_layout"] == "NCHW" and attrs["kernel_layout"] == "OIHW":
        in_channel = data_tensor.shape[1].value
        out_channel = kernel_tensor.shape[0].value
    else:
        return None

    if in_channel % in_channel_vector_length != 0:
        new_in_channel = ((in_channel + in_channel_vector_length) //
                          in_channel_vector_length) * in_channel_vector_length
        diff = new_in_channel - in_channel
        if attrs["data_layout"] == "NHWC" and attrs["kernel_layout"] == "HWIO":
            data = relay.nn.pad(data,
                                pad_width=((0, 0), (0, 0), (0, 0), (0, diff)))
            kernel = relay.nn.pad(kernel,
                                  pad_width=((0, 0), (0, 0), (0, diff), (0,
                                                                         0)))
            ic_modified = True
        elif attrs["data_layout"] == "NCHW" and attrs[
                "kernel_layout"] == "OIHW":
            pad_width = ((0, 0), (0, diff), (0, 0), (0, 0))
            data = relay.nn.pad(data, pad_width=pad_width)
            kernel = relay.nn.pad(kernel, pad_width=pad_width)
            ic_modified = True
        else:
            return None

    new_out_channel = out_channel
    if out_channel % out_channel_vector_length != 0:
        new_out_channel = (
            (out_channel + out_channel_vector_length) //
            out_channel_vector_length) * out_channel_vector_length
        diff = new_out_channel - out_channel
        if attrs["data_layout"] == "NHWC" and attrs["kernel_layout"] == "HWIO":
            kernel = relay.nn.pad(kernel,
                                  pad_width=((0, 0), (0, 0), (0, 0), (0,
                                                                      diff)))
            oc_modified = True
        elif attrs["data_layout"] == "NCHW" and attrs[
                "kernel_layout"] == "OIHW":
            kernel = relay.nn.pad(kernel,
                                  pad_width=((0, diff), (0, 0), (0, 0), (0,
                                                                         0)))
            oc_modified = True
        else:
            return None

    if oc_modified:
        new_attrs["channels"] = new_out_channel
        out = relay.nn.conv2d(data, kernel, **new_attrs)
        original_out_shape = [x.value for x in output_tensor.shape]
        out = relay.strided_slice(out,
                                  begin=[0, 0, 0, 0],
                                  end=original_out_shape)
    else:
        out = relay.nn.conv2d(data, kernel, **new_attrs)

    if data_tensor.dtype != data_dtype:
        out = after_shift(out, adjust_shift)

    return out
Пример #22
0
 def fold_conv_weight():
     squeezed_scale = relay.squeeze(out_scale, axis=[1,2])
     return  relay.multiply(
         conv_weight ,
         relay.expand_dims(squeezed_scale, axis=1, num_newaxis=3))
def manual_tir_common(do_tune=False):
    M, N, K = 1024, 1024, 1024  # pylint: disable=invalid-name
    data_shape = (M, K)
    weight_shape = (N, K)

    data_dtype = "uint8"
    data = relay.var("data", shape=data_shape, dtype=data_dtype)
    weight = relay.var("weight", shape=weight_shape, dtype="int8")
    bias = relay.var("bias", shape=(weight_shape[0], ), dtype="int32")

    # dense is tuned by the TIR schedule above, bmm is scheduled by TE (topi/x86/batch_matmul.py)
    dense = relay.nn.dense(data, weight, out_dtype="int32")
    bias_add = relay.nn.bias_add(dense, bias) + relay.const(1, dtype="int32")
    out = relay.nn.batch_matmul(
        relay.cast(relay.expand_dims(bias_add, 0), "uint8"),
        relay.cast(relay.expand_dims(bias_add, 0), "int8"),
        out_dtype="int32",
    )

    relay_mod = tvm.IRModule.from_expr(out)

    target = "llvm -mcpu=cascadelake -num-cores 4"
    dev = tvm.device(target, 0)

    data = np.random.uniform(1, 10, size=(M, K)).astype("uint8")
    weight_np = np.random.uniform(1, 10, size=weight_shape).astype("int8")
    bias_np = np.random.uniform(1, 10,
                                size=(weight_shape[0], )).astype("int32")

    ref = (relay.create_executor(
        "vm", mod=relay_mod, device=dev,
        target=target).evaluate()(*[data, weight_np, bias_np]).numpy())

    params = {"weight": weight_np, "bias": bias_np}

    if do_tune:
        extracted_tasks = ms.extract_task_from_relay(relay_mod, target, params)
        # Filter out tasks that we don't intend to schedule / tune with TIR.
        tune_tasks = list(
            filter(
                lambda task: "dense" in task.task_name,
                extracted_tasks,
            ))
        config = ms.TuneConfig(
            strategy="replay_trace",
            num_trials_per_iter=64,
            max_trials_per_task=20000,
            max_trials_global=20000,
        )

        with tempfile.TemporaryDirectory() as work_dir:
            # postprocs=lambda: [] is important to prevent default post processors from
            # tampering with the manual schedule.
            database = ms.tune_extracted_tasks(
                tune_tasks,
                config,
                work_dir=work_dir,
                postprocs=lambda: [],
            )
    else:

        def schedule_fn(task, sch):
            if "dense" not in task.task_name:
                return False

            block = sch.get_block("compute")

            # Looks up schedule_rule annotation.
            # See the comment in test_tune_relay_manual_tir_vnni().
            schedule_rule = sch.get(block).annotations["schedule_rule"]

            assert "dense_vnni" in schedule_rule

            schedule_dense(block, M, False, sch)

            return True

        database = apply_fixed_schedules(relay_mod, target, params,
                                         schedule_fn)

    with ms.ApplyHistoryBest(database):
        with tvm.transform.PassContext(
                opt_level=3,
                config={"relay.backend.use_meta_schedule": True},
        ):
            # pylint: disable=W0105
            """
            The log should say
            Warning: Cannot find workload: tvmgen_default_fused_expand_dims
            Warning: Cannot find workload: tvmgen_default_fused_cast
            Warning: Cannot find workload: tvmgen_default_fused_cast_1
            Warning: Cannot find workload: tvmgen_default_fused_nn_batch_matmul

            This means batch matmul and others are scheduled by TE, and dense (the one not warned)
            is found in the meta schedule tuning database during ApplyHistoryBest
            """
            # pylint: enable=W0105
            lib = relay.build(relay_mod, target=target, params=params)

    runtime = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))

    runtime.set_input("data", data)
    runtime.run()

    out = runtime.get_output(0).numpy()

    np.testing.assert_equal(out, ref)
Пример #24
0
    def expected(N, CI, H, W, CO, KH, KW, OH, OW, src_layout, dst_layout):
        layout_map = {"src": {}, "dst": {}}
        if src_layout == "NCHW":
            nchw = layout_map["src"]
            nhwc = layout_map["dst"]
        else:
            nchw = layout_map["dst"]
            nhwc = layout_map["src"]

        nchw["data_layout"] = "NCHW"
        nchw["data_shape"] = (N, CI, H, W)
        nchw["offset_shape"] = (N, KH * KW * 2, OH, OW)
        nchw["weight_shape"] = (CO, CI, KH, KW)
        nchw["kernel_layout"] = "OIHW"

        nhwc["data_layout"] = "NHWC"
        nhwc["data_shape"] = (N, H, W, CI)
        nhwc["offset_shape"] = (N, OH, OW, KH * KW * 2)
        nhwc["weight_shape"] = (KH, KW, CI, CO)
        nhwc["kernel_layout"] = "HWIO"

        bias_shape = (CO,)

        data = relay.var("data", shape=layout_map["src"]["data_shape"], dtype="float32")
        offset = relay.var("offset", shape=layout_map["src"]["offset_shape"], dtype="float32")
        weight = relay.var("weight", shape=layout_map["src"]["weight_shape"], dtype="float32")
        bias = relay.var("bias", shape=bias_shape, dtype="float32")

        data = relay.layout_transform(
            data, layout_map["src"]["data_layout"], layout_map["dst"]["data_layout"]
        )
        offset = relay.layout_transform(
            offset, layout_map["src"]["data_layout"], layout_map["dst"]["data_layout"]
        )
        weight = relay.layout_transform(
            weight, layout_map["src"]["kernel_layout"], layout_map["dst"]["kernel_layout"]
        )
        y = relay.nn.deformable_conv2d(
            data,
            offset,
            weight,
            kernel_size=(KH, KW),
            channels=CO,
            data_layout=layout_map["dst"]["data_layout"],
            kernel_layout=layout_map["dst"]["kernel_layout"],
        )
        if layout_map["src"]["data_layout"] == "NHWC":
            bias = relay.expand_dims(bias, axis=0, num_newaxis=3)
        else:
            bias = relay.expand_dims(bias, axis=1, num_newaxis=2)
            bias = relay.expand_dims(bias, axis=0)
        bias = relay.layout_transform(
            bias, layout_map["src"]["data_layout"], layout_map["dst"]["data_layout"]
        )
        y = relay.add(y, bias)
        y = relay.nn.relu(y)
        y = relay.nn.max_pool2d(y, pool_size=(2, 2), layout=layout_map["dst"]["data_layout"])
        y = relay.cast(y, "int32")
        y = relay.layout_transform(
            y, layout_map["dst"]["data_layout"], layout_map["src"]["data_layout"]
        )
        y = relay.nn.batch_flatten(y)
        y = relay.Function(analysis.free_vars(y), y)
        return y
Пример #25
0
 def test_expand_dims(x_shape=(1, 3), axis=1, num_newaxis=1):
     x = relay.var('x', shape=(x_shape), dtype='float32')
     out = relay.expand_dims(x, axis, num_newaxis)
     f = relay.Function([x], out)
     return f, {'x': x_shape}