Пример #1
0
    def callback(self, pre: tvm.relay.Expr, post: tvm.relay.Expr,
                 node_map: tvm.ir.container.Map) -> tvm.relay.Expr:
        params = self.params_class(post.op.body)
        params.ifm.tensor = post.args[
            1] if params.reversed_operands else post.args[0]
        params.ifm2.tensor = post.args[
            0] if params.reversed_operands else post.args[1]
        channels_map = {
            "NHWC": 3,
        }
        if str(params.ofm.layout) not in channels_map.keys():
            raise UnsupportedLayout(str(params.ofm.layout))

        activation_map = {"clip": "CLIP"}
        if params.activation:
            activation = activation_map[params.activation.op.name]
            clip_min = int(params.activation.attrs.a_min)
            clip_max = int(params.activation.attrs.a_max)
        else:
            activation = "NONE"
            clip_min = 0
            clip_max = 0

        # We don't yet support activation functions that need to get legalized to LUTs.
        lut = relay.const([], dtype="int8")

        inputs = [params.ifm, params.ifm2]
        inputs = self.reshape_input(inputs)

        ethosu_binary_elementwise = ethosu_ops.ethosu_binary_elementwise(
            ifm=inputs[0],
            ifm2=inputs[1],
            lut=lut,
            operator_type=params.operator_type,
            ifm_scale=float(params.ifm.q_params.scale_f32),
            ifm_zero_point=int(params.ifm.q_params.zero_point),
            ifm2_scale=float(params.ifm2.q_params.scale_f32),
            ifm2_zero_point=int(params.ifm2.q_params.zero_point),
            ofm_scale=float(params.ofm.q_params.scale_f32),
            ofm_zero_point=int(params.ofm.q_params.zero_point),
            ifm_channels=params.ifm.shape[-1],
            ifm2_channels=params.ifm2.shape[-1],
            reversed_operands=params.reversed_operands,
            ofm_dtype=params.ofm.dtype,
            activation=activation,
            clip_min=clip_min,
            clip_max=clip_max,
            ifm_layout=str(params.ifm.layout),
            ifm2_layout=str(params.ifm2.layout),
            ofm_layout=str(params.ofm.layout),
        )
        output = self.reshape_output(ethosu_binary_elementwise,
                                     params.ifm.shape)
        return output
Пример #2
0
def make_ethosu_binary_elementwise(
    ifm,
    ifm2,
    ifm_channels,
    ifm2_channels,
    operator_type,
    ofm_dtype,
    reversed_operands=False,
    activation="NONE",
    ifm_layout="NHWC",
    ifm2_layout="NHWC",
    ofm_layout="NHWC",
    rounding_mode="TFL",
):
    ethosu_binary_elementwise = ethosu_ops.ethosu_binary_elementwise(
        ifm=ifm,
        ifm2=ifm2,
        lut=relay.const([], dtype="int8"),
        operator_type=operator_type,
        ifm_scale=1,
        ifm_zero_point=0,
        ifm2_scale=1,
        ifm2_zero_point=0,
        ofm_scale=1,
        ofm_zero_point=0,
        ifm_channels=ifm_channels,
        ifm2_channels=ifm2_channels,
        reversed_operands=reversed_operands,
        activation=activation,
        ofm_dtype=ofm_dtype,
        clip_min=10 if activation == "CLIP" else 0,
        clip_max=100 if activation == "CLIP" else 0,
        rounding_mode=rounding_mode,
        ifm_layout=ifm_layout,
        ifm2_layout=ifm2_layout,
        ofm_layout=ofm_layout,
    )
    return ethosu_binary_elementwise
Пример #3
0
    def callback(self, pre: tvm.relay.Expr, post: tvm.relay.Expr,
                 node_map: tvm.ir.container.Map) -> tvm.relay.Expr:
        params = ethosu_patterns.MeanParams(post.op.body)
        params.ifm.tensor = post.args[0]

        ifm_shape = params.ifm.shape
        ofm_shape = params.ofm.shape
        lut = relay.const([], "int8")
        axis = params.axis
        reduced_op = params.ifm.tensor

        # Enforce 4d input
        if len(ifm_shape) < 4:
            axis = [x + 1 for x in axis]
            if len(ifm_shape) == 3:
                ifm_shape = [1, params.height, params.width, ifm_shape[2]]
            else:
                ifm_shape = [1, params.height, params.width, 1]
            reduced_op = relay.reshape(reduced_op, ifm_shape)

        filter_height = ifm_shape[1] if 1 in axis else 1
        filter_width = ifm_shape[2] if 2 in axis else 1
        in_channels = out_channels = ifm_shape[-1]

        # If the height is greater than max kernel height, reshape the input
        # from [filter_height, filter_width] to [1, (filter_height*filter_width)]
        # only in the case the axis is [1, 2].
        if axis == [1, 2] and filter_height > 64:
            ifm_shape = (ifm_shape[0], 1, filter_height * filter_width,
                         in_channels)
            filter_width = filter_height * filter_width
            filter_height = 1
            reduced_op = relay.reshape(reduced_op, ifm_shape)

        if axis == [1, 2] and params.keepdims:
            weight_scale = 1
            weight_values = np.ones(
                [out_channels, filter_height, filter_width, in_channels])
            scale_bias = vela_api.pack_biases(
                biases=np.zeros(ifm_shape[-1]),
                ifm_scale=params.ifm.q_params.scale_f32,
                ifm_dtype=np.dtype(params.ifm.dtype),
                weight_scales=np.array([weight_scale], dtype=np.float),
                ofm_scale=params.ofm.q_params.scale_f32,
                is_activation_tanh_or_sigmoid=False,
            )

            reduced_op = ethosu_ops.ethosu_depthwise_conv2d(
                ifm=reduced_op,
                weight=relay.const(weight_values, params.ifm.dtype),
                scale_bias=relay.const(scale_bias, "uint8"),
                lut=lut,
                ifm_scale=float(params.ifm.q_params.scale_f32),
                ifm_zero_point=int(params.ifm.q_params.zero_point),
                weight_zero_point=0,
                ofm_scale=float(params.ofm.q_params.scale_f32),
                ofm_zero_point=int(params.ofm.q_params.zero_point),
                kernel_shape=(filter_height, filter_width),
                ofm_channels=out_channels,
                ofm_dtype="int16",
            )

            n = int(filter_height * filter_width)
            eps = 1 / (256 * (n + 1)) if n % 2 == 0 else 0

            scalar_tensor = relay.const(np.ones([1, 1, 1, 1], dtype="int16"),
                                        dtype="int16")

            reduced_op = ethosu_ops.ethosu_binary_elementwise(
                ifm=reduced_op,
                ifm2=scalar_tensor,
                lut=lut,
                operator_type="MUL",
                ifm_scale=float(params.ofm.q_params.scale_f32),
                ifm_zero_point=int(params.ofm.q_params.zero_point),
                ifm2_scale=1 / (n - eps),
                ifm2_zero_point=0,
                ofm_scale=float(params.ofm.q_params.scale_f32),
                ofm_zero_point=int(params.ofm.q_params.zero_point),
                ifm_channels=out_channels,
                ifm2_channels=out_channels,
                reversed_operands=False,
                ofm_dtype="int8",
                rounding_mode="NATURAL",
            )
        elif (params.ifm.q_params.scale_f32 == params.ofm.q_params.scale_f32
              and params.ifm.q_params.zero_point
              == params.ofm.q_params.zero_point):
            reduced_op = ethosu_ops.ethosu_pooling(
                ifm=reduced_op,
                lut=lut,
                pooling_type="AVG",
                ifm_scale=float(params.ifm.q_params.scale_f32),
                ifm_zero_point=0,
                ofm_scale=float(params.ofm.q_params.scale_f32),
                ofm_zero_point=0,
                pool_shape=(filter_height, filter_width),
                ofm_channels=out_channels,
                rounding_mode="TRUNCATE",
            )
        else:
            weight_scale = 1 / (filter_height * filter_width)
            weight_values = np.ones(
                [out_channels, filter_height, filter_width, in_channels])
            bias = -1 * int(
                params.ifm.q_params.zero_point) * filter_height * filter_width

            scale_bias = vela_api.pack_biases(
                biases=np.ones([ifm_shape[-1]]) * bias,
                ifm_scale=params.ifm.q_params.scale_f32,
                ifm_dtype=np.dtype(params.ifm.dtype),
                weight_scales=np.array([weight_scale], dtype=np.float),
                ofm_scale=params.ofm.q_params.scale_f32,
                is_activation_tanh_or_sigmoid=False,
            )
            reduced_op = ethosu_ops.ethosu_depthwise_conv2d(
                ifm=reduced_op,
                weight=relay.const(weight_values, params.ifm.dtype),
                scale_bias=relay.const(scale_bias, "uint8"),
                lut=lut,
                ifm_scale=float(params.ifm.q_params.scale_f32),
                ifm_zero_point=0,
                weight_zero_point=0,
                ofm_scale=float(params.ofm.q_params.scale_f32),
                ofm_zero_point=int(params.ofm.q_params.zero_point),
                kernel_shape=(filter_height, filter_width),
                ofm_channels=out_channels,
                rounding_mode="NATURAL",
            )

        # Reshape to original ofm shape
        if len(ofm_shape) < 4:
            reduced_op = relay.reshape(reduced_op, ofm_shape)

        return reduced_op