예제 #1
0
    def _impl(inputs, _):
        weight = inputs[1][0]
        weight_scale = inputs[1][1]
        weight_zero_point = inputs[1][2]
        output_scale = _expr.const(inputs[2])
        output_zero_point = _expr.const(inputs[3])
        assert len(inputs) == 6, "Input quant params not found in op inputs"
        # Manually added by add_input_quant_params_to_op_inputs above
        input_scale = _expr.const(inputs[4])
        input_zero_point = _expr.const(inputs[5])

        weight_shape = infer_shape(weight)
        dense = relay.qnn.op.dense(
            inputs[0],
            weight,
            input_zero_point,
            weight_zero_point,
            input_scale,
            weight_scale,
            units=weight_shape[0],
        )
        bias_var = inputs[1][3]

        return _do_bias_and_requantize(dense, bias_var, input_scale,
                                       weight_scale, output_scale,
                                       output_zero_point, with_relu)
예제 #2
0
def _convert_tsm(inexpr, keras_layer, etab):

    nt, c, h, w = infer_shape(inexpr)
    n = int(nt / 10)
    fold_div = 3
    x = _op.transform.reshape(inexpr, [n, 10, c, h, w])
    fold = c // 3
    # last_fold = c - (fold_div - 1) * fold
    split_out = _op.split(x, (fold, fold*2, ), axis=2)

    out1, out2, out3 = split_out[0], split_out[1], split_out[2]

    padding_1 = _op.zeros((n, 1, fold, h, w), dtype="float32")
    out1 = _op.split(out1, (1, ), axis=1)[1]
    out1 = _op.concatenate([out1, padding_1], axis=1)

    # Shift right
    padding_2 = _op.zeros((n, 1, fold, h, w), dtype="float32")
    out2 = _op.split(out2, (10 - 1, ), axis=1)[0]
    out2 = _op.concatenate([padding_2, out2], axis=1)

    out = _op.concatenate([out1, out2, out3], axis=2)
    out = _op.reshape(out, (-1, c, h, w))

    return out
예제 #3
0
    def _impl(inputs, _):
        weight = inputs[1][0]
        weight_scale = inputs[1][1]
        weight_zero_point = inputs[1][2]

        inp = inputs[0]

        input_scale, input_zero_point = _calculate_qparam(inp)
        qinp = relay.qnn.op.quantize(inp,
                                     input_scale,
                                     input_zero_point,
                                     out_dtype="uint8")

        data_shape = infer_shape(inp)

        if len(data_shape) > 2:
            qinp = _op.reverse_reshape(qinp, [-1, 0])

        weight_shape = infer_shape(weight)
        units = weight_shape[0]
        dense = relay.qnn.op.dense(
            qinp,
            weight,
            input_zero_point,
            weight_zero_point,
            input_scale,
            weight_scale,
            units=units,
        )
        bias_var = inputs[1][3]

        dequant_scale = input_scale * weight_scale
        dense_out = relay.qnn.op.dequantize(dense,
                                            dequant_scale,
                                            input_zero_point=relay.const(
                                                0, "int32"),
                                            axis=1)

        if len(data_shape) > 2:
            new_shape = list(data_shape[:-1])
            new_shape.append(units)
            dense_out = _op.reshape(dense_out, new_shape)

        if bias_var is not None:
            return dense_out + bias_var

        return dense_out
예제 #4
0
    def _impl(inputs, _):
        dim = len(infer_shape(inputs[0]))
        if dim > 1:
            axis = 1
        else:
            axis = 0

        return relay.qnn.op.quantize(
            inputs[0], _expr.const(inputs[1]), _expr.const(inputs[2]), out_dtype="uint8", axis=axis
        )
예제 #5
0
    def _impl(inputs, _):
        # Refer to aten/src/ATen/native/quantized/cpu/qconv.cpp
        # Supported in Torch 1.7 or newer
        conv_params = inputs[1]
        weight = conv_params[0]
        weight_scale = conv_params[1]
        weight_zero_point = conv_params[2]
        bias = conv_params[3]

        strides = conv_params[4]
        padding = conv_params[5]
        dilation = conv_params[6]
        groups = conv_params[7]
        output_padding = conv_params[8]

        output_scale = _expr.const(inputs[2])
        output_zero_point = _expr.const(inputs[3])

        assert len(inputs) == 6, "Input quant params not found in op inputs"

        # These are manually added by add_input_quant_params_to_op_inputs above
        # In torch, they are retrieved from QTensor data structure at runtime
        input_scale = _expr.const(inputs[4])
        input_zero_point = _expr.const(inputs[5])

        weight_shape = list(infer_shape(weight))

        # Swap I and O dims to match shape relay expects for OIHW
        weight_shape[0], weight_shape[1] = weight_shape[1], weight_shape[0]

        kernel_size = (weight_shape[2], weight_shape[3])
        out_channels = weight_shape[0]

        conv_out = relay.qnn.op.conv2d_transpose(
            inputs[0],
            weight,
            input_zero_point,
            weight_zero_point,
            input_scale,
            weight_scale,
            kernel_size=kernel_size,
            dilation=dilation,
            strides=strides,
            padding=padding,
            groups=groups,
            channels=out_channels,
            output_padding=output_padding,
            out_dtype="int32",
            kernel_layout="OIHW",
        )

        return _do_bias_and_requantize(conv_out, bias, input_scale,
                                       weight_scale, output_scale,
                                       output_zero_point, with_relu)
예제 #6
0
    def _impl(inputs, _):
        # refer to aten/src/ATen/native/quantized/cpu/qmul.cpp
        # math for calculating output scale and zp are already done
        # during _add_output_quant_params_to_scalar_op above
        assert len(inputs) == 6, "Input quant params not found in op inputs"
        other_val = inputs[1]  # scalar

        if other_val > 0.0:
            # only scale change
            return inputs[0]
        if other_val == 0.0:
            shape = infer_shape(inputs[0])
            return _op.full(_expr.const(0), shape, dtype="uint8")

        # negative scale case
        q_min = 0
        q_max = 255
        bias = _expr.const(q_max + q_min, dtype="int8")
        int8 = bias - _op.cast(inputs[0], "int8")
        return _op.cast(int8, "uint8")
예제 #7
0
    def _impl(inputs, _):
        # refer to src/ATen/native/quantized/cpu/qconv.cpp
        # inputs[0]: input tensor
        # inputs[1]: (weight, scale, zero_point, bias)
        # inputs[2-5]: stride, padding, dilation, groups
        # inputs[6]: output_scale
        # inputs[7]: output_zero_point
        # inputs[8]: input_scale (added manually by frontend)
        # inputs[9]: input_zero_point (added manually by frontend)
        weight = inputs[1][0]
        weight_scale = inputs[1][1]
        weight_zero_point = inputs[1][2]

        output_scale = _expr.const(inputs[6])
        output_zero_point = _expr.const(inputs[7])

        assert len(inputs) == 10, "Input quant params not found in op inputs"
        # These are manually added by add_input_quant_params_to_op_inputs above
        # In torch, they are retrieved from QTensor data structure at runtime
        input_scale = _expr.const(inputs[8])
        input_zero_point = _expr.const(inputs[9])

        strides, padding, dilation = inputs[2], inputs[3], inputs[4]
        strides = infer_shape(inputs[2])
        padding = infer_shape(inputs[3])
        dilation = infer_shape(inputs[4])
        groups = inputs[5]

        weight_shape = infer_shape(weight)
        kernel_size = (weight_shape[2], weight_shape[3])
        out_channels = weight_shape[0]

        if padding[0] != 0 or padding[1] != 0:
            pad_val = _get_scalar(input_zero_point)
            inp = _op.nn.pad(inputs[0],
                             pad_width=((0, 0), (0, 0), (padding[0],
                                                         padding[0]),
                                        (padding[1], padding[1])),
                             pad_value=float(pad_val))
        else:
            inp = inputs[0]

        # padding is (0, 0) because we did explicit pad op with
        # pad value being zero point above
        conv_out = relay.qnn.op.conv2d(inp,
                                       weight,
                                       input_zero_point,
                                       weight_zero_point,
                                       input_scale,
                                       weight_scale,
                                       kernel_size=kernel_size,
                                       dilation=dilation,
                                       strides=strides,
                                       padding=(0, 0),
                                       groups=groups,
                                       channels=out_channels)
        bias_var = inputs[1][3]

        return _do_bias_and_requantize(conv_out, bias_var, input_scale,
                                       weight_scale, output_scale,
                                       output_zero_point, with_relu)
예제 #8
0
def _convert_attention_mask(inexpr, keras_layer, etab):
    xsum = _op.reduce.sum(_op.reduce.sum(inexpr, axis=2, keepdims=True), axis=3, keepdims=True)
    xshape = infer_shape(inexpr)
    out = inexpr / xsum * tvm.relay.expr.const(xshape[2], dtype='float32') \
          * tvm.relay.expr.const(xshape[3], dtype='float32') * tvm.relay.expr.const(0.5, dtype='float32')
    return out