def callback(self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map) -> tvm.relay.Expr: params = ethosu_patterns.QnnDepthwiseConv2DParams(post.op.body) params.ifm.tensor = post.args[0] channels_map = { "NHWC": 3, } kernel_shape_map = { "HWOI": params.weights.shape[0:2], } weights_values = params.weights.values weights_values_ohwi = np.moveaxis(weights_values, [0, 1, 2, 3], [1, 2, 0, 3]) activation = "NONE" # Activations requiring LUT is currently not supported, so setting it to an empty list lut = relay.const([], "int8") clip_min = 0 clip_max = 0 if params.activation: activation = ethosu_patterns.QnnDepthwiseConv2DParams.activation_map[ params.activation.op.name] if activation == "CLIP": clip_min = int(params.activation.attrs.a_min) clip_max = int(params.activation.attrs.a_max) scale_bias = vela_api.pack_biases( biases=params.biases.tensor.data.asnumpy(), ifm_scale=params.ifm.q_params.scale_f32, ifm_dtype=np.dtype(params.ifm.dtype), weight_scales=params.weights.q_params.scale_f32, ofm_scale=params.ofm.q_params.scale_f32, is_activation_tanh_or_sigmoid=activation in ["TANH", "SIGMOID"], ) ethosu_depthwise_conv2d = ethosu_ops.ethosu_depthwise_conv2d( post.args[0], # IFM relay.const(weights_values_ohwi, params.weights.values.dtype), relay.const(scale_bias, "uint8"), lut, float(params.ifm.q_params.scale_f32), int(params.ifm.q_params.zero_point), int(params.weights.q_params.zero_point), float(params.ofm.q_params.scale_f32), int(params.ofm.q_params.zero_point), kernel_shape_map[str(params.weights.layout)], params.ofm.shape[channels_map[str(params.ofm.layout)]], strides=params.strides, padding=params.padding, dilation=params.dilation, activation=activation, clip_min=clip_min, clip_max=clip_max, upscale="NONE", ifm_layout=str(params.ifm.layout), ofm_layout=str(params.ofm.layout), ofm_dtype=str(params.ofm.dtype), ) return ethosu_depthwise_conv2d
def callback(self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map) -> tvm.relay.Expr: params = ethosu_patterns.QnnConv2DParams(post.op.body) params.ifm.tensor = post.args[0] channels_map = { "NHWC": 3, } kernel_size_map = { "HWIO": params.weights.shape[0:2], "OHWI": params.weights.shape[1:3], "HWOI": params.weights.shape[0:2], } activation_map = {"clip": "CLIP"} weight_to_ohwi_transform_map = {"HWIO": [3, 0, 1, 2]} weights_values = params.weights.values weights_values_ohwi = np.transpose( weights_values, weight_to_ohwi_transform_map[str(params.weights.layout)]) if params.activation: activation = activation_map[params.activation.op.name] clip_min = int(params.activation.attrs.a_min) clip_max = int(params.activation.attrs.a_max) else: activation = "NONE" clip_min = 0 clip_max = 0 scale_bias = vela_api.pack_biases( biases=params.biases.tensor.data.asnumpy(), ifm_scale=params.ifm.q_params.scale_f32, ifm_dtype=np.dtype(params.ifm.dtype), weight_scales=params.weights.q_params.scale_f32, ofm_scale=params.ofm.q_params.scale_f32, is_activation_tanh_or_sigmoid=activation in ["TANH", "SIGMOID"], ) ethosu_conv2d = ethosu_ops.ethosu_conv2d( ifm=post.args[0], weight=relay.const(weights_values_ohwi, params.weights.values.dtype), scale_bias=relay.const(scale_bias, "uint8"), lut=relay.const([], dtype="int8"), ifm_scale=float(params.ifm.q_params.scale_f32), ifm_zero_point=int(params.ifm.q_params.zero_point), weight_zero_point=int(params.weights.q_params.zero_point), ofm_scale=float(params.ofm.q_params.scale_f32), ofm_zero_point=int(params.ofm.q_params.zero_point), kernel_shape=kernel_size_map[str(params.weights.layout)], ofm_channels=params.ofm.shape[channels_map[str( params.ofm.layout)]], strides=params.strides, padding=params.padding, dilation=params.dilation, activation=activation, clip_min=clip_min, clip_max=clip_max, upscale="NONE", ifm_layout=str(params.ifm.layout), ofm_layout=str(params.ofm.layout), ) return ethosu_conv2d
def callback(self, pre, post, node_map): params = ethosu_patterns.FullyConnectedParams(post.op.body) params.ifm.tensor = post.args[0] # IFM reshapes ifm = post.args[0] if len(params.ifm.shape ) != 4 or not params.ifm.shape[1] == params.ifm.shape[2] == 1: ifm = relay.reshape(ifm, (1, 1, 1, params.ifm.shape[-1])) # Weight transformations weights_values = params.weights.values weights_values_ohwi = np.expand_dims(weights_values, axis=(1, 2)) if params.activation: activation = "CLIP" clip_min = int(params.activation.attrs.a_min) clip_max = int(params.activation.attrs.a_max) else: activation = "NONE" clip_min = 0 clip_max = 0 bias_values = (params.biases.tensor.data.asnumpy() if params.biases else np.zeros((params.ofm.shape[-1]))) scale_bias = vela_api.pack_biases( biases=bias_values, ifm_scale=params.ifm.q_params.scale_f32, ifm_dtype=np.dtype(params.ifm.dtype), weight_scales=params.weights.q_params.scale_f32, ofm_scale=params.ofm.q_params.scale_f32, is_activation_tanh_or_sigmoid=False, ) ethosu_fc = ethosu_ops.ethosu_conv2d( ifm=ifm, weight=relay.const(weights_values_ohwi, params.weights.values.dtype), scale_bias=relay.const(scale_bias, "uint8"), lut=relay.const([], dtype="int8"), ifm_scale=float(params.ifm.q_params.scale_f32), ifm_zero_point=int(params.ifm.q_params.zero_point), weight_zero_point=int(params.weights.q_params.zero_point), ofm_scale=float(params.ofm.q_params.scale_f32), ofm_zero_point=int(params.ofm.q_params.zero_point), kernel_shape=[1, 1], ofm_channels=params.weights.shape[0], strides=(1, 1), padding=(0, 0, 0, 0), dilation=(1, 1), activation=activation, clip_min=clip_min, clip_max=clip_max, upscale="NONE", ifm_layout="NHWC", ofm_layout="NHWC", ) if len(params.ofm.shape ) != 4 or not params.ofm.shape[1] == params.ofm.shape[2] == 1: ethosu_fc = relay.reshape(ethosu_fc, params.ofm.shape) return ethosu_fc
def callback(self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map) -> tvm.relay.Expr: params = ethosu_patterns.QnnConv2DTransposeParams(post.op.body) params.ifm.tensor = post.args[0] ofm_shape = params.ofm.shape legalize_padding = params.legalize_padding weight_to_ohwi_transform_map = {"IOHW": [1, 2, 3, 0]} weights_values = params.weights.values weights_values_ohwi = np.transpose( weights_values, weight_to_ohwi_transform_map[str(params.weights.layout)]) weights_values_ohwi = np.flip(weights_values_ohwi, (1, 2)) weights = relay.const(weights_values_ohwi, dtype=params.weights.values.dtype) bias_values = (params.biases.tensor.data.asnumpy() if params.biases else np.zeros((params.ifm.shape[-1]))) scale_bias = vela_api.pack_biases( biases=bias_values, ifm_scale=params.ifm.q_params.scale_f32, ifm_dtype=np.dtype(params.ifm.dtype), weight_scales=params.weights.q_params.scale_f32, ofm_scale=params.ofm.q_params.scale_f32, is_activation_tanh_or_sigmoid=False, ) reduced_op = ethosu_ops.ethosu_conv2d( ifm=post.args[0], weight=weights, scale_bias=relay.const(scale_bias, "uint8"), lut=relay.const([], dtype="int8"), ifm_scale=float(params.ifm.q_params.scale_f32), ifm_zero_point=int(params.ifm.q_params.zero_point), weight_zero_point=int(params.weights.q_params.zero_point), ofm_scale=float(params.ofm.q_params.scale_f32), ofm_zero_point=int(params.ofm.q_params.zero_point), kernel_shape=params.kernel_shape, ofm_channels=int(ofm_shape[-1]), strides=(1, 1), padding=legalize_padding, dilation=params.dilation, ifm_layout=str(params.ifm.layout), ofm_layout=str(params.ofm.layout), upscale="ZEROS", ) # Remove additional padding by 'cropping' back to expected size return relay.strided_slice(reduced_op, (0, 0, 0, 0), ofm_shape)
def create_mock(test_vec): with patch("ethosu.vela.api.npu_encode_bias") as mock_npu_encode_bias: mock_npu_encode_bias.return_value = bytearray(10) ifm_dtype = test_vec["ifm_dtype"] max = np.iinfo(ifm_dtype).max min = np.iinfo(ifm_dtype).min # tvm will always create biases in int32 biases = np.random.randint(min, max, test_vec["bias_length"], np.int32) packed_biases = vela_api.pack_biases( biases=biases, ifm_scale=test_vec["ifm_scale"], ifm_dtype=test_vec["ifm_dtype"], weight_scales=test_vec["weight_scales"], ofm_scale=test_vec["ofm_scale"], is_activation_tanh_or_sigmoid=test_vec["is_activation_tanh_or_sigmoid"], ) test_vec["bias_values"] = biases return mock_npu_encode_bias, packed_biases return None
def callback(self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map) -> tvm.relay.Expr: params = ethosu_patterns.MeanParams(post.op.body) params.ifm.tensor = post.args[0] ifm_shape = params.ifm.shape ofm_shape = params.ofm.shape lut = relay.const([], "int8") axis = params.axis reduced_op = params.ifm.tensor # Enforce 4d input if len(ifm_shape) < 4: axis = [x + 1 for x in axis] if len(ifm_shape) == 3: ifm_shape = [1, params.height, params.width, ifm_shape[2]] else: ifm_shape = [1, params.height, params.width, 1] reduced_op = relay.reshape(reduced_op, ifm_shape) filter_height = ifm_shape[1] if 1 in axis else 1 filter_width = ifm_shape[2] if 2 in axis else 1 in_channels = out_channels = ifm_shape[-1] # If the height is greater than max kernel height, reshape the input # from [filter_height, filter_width] to [1, (filter_height*filter_width)] # only in the case the axis is [1, 2]. if axis == [1, 2] and filter_height > 64: ifm_shape = (ifm_shape[0], 1, filter_height * filter_width, in_channels) filter_width = filter_height * filter_width filter_height = 1 reduced_op = relay.reshape(reduced_op, ifm_shape) if axis == [1, 2] and params.keepdims: weight_scale = 1 weight_values = np.ones( [out_channels, filter_height, filter_width, in_channels]) scale_bias = vela_api.pack_biases( biases=np.zeros(ifm_shape[-1]), ifm_scale=params.ifm.q_params.scale_f32, ifm_dtype=np.dtype(params.ifm.dtype), weight_scales=np.array([weight_scale], dtype=np.float), ofm_scale=params.ofm.q_params.scale_f32, is_activation_tanh_or_sigmoid=False, ) reduced_op = ethosu_ops.ethosu_depthwise_conv2d( ifm=reduced_op, weight=relay.const(weight_values, params.ifm.dtype), scale_bias=relay.const(scale_bias, "uint8"), lut=lut, ifm_scale=float(params.ifm.q_params.scale_f32), ifm_zero_point=int(params.ifm.q_params.zero_point), weight_zero_point=0, ofm_scale=float(params.ofm.q_params.scale_f32), ofm_zero_point=int(params.ofm.q_params.zero_point), kernel_shape=(filter_height, filter_width), ofm_channels=out_channels, ofm_dtype="int16", ) n = int(filter_height * filter_width) eps = 1 / (256 * (n + 1)) if n % 2 == 0 else 0 scalar_tensor = relay.const(np.ones([1, 1, 1, 1], dtype="int16"), dtype="int16") reduced_op = ethosu_ops.ethosu_binary_elementwise( ifm=reduced_op, ifm2=scalar_tensor, lut=lut, operator_type="MUL", ifm_scale=float(params.ofm.q_params.scale_f32), ifm_zero_point=int(params.ofm.q_params.zero_point), ifm2_scale=1 / (n - eps), ifm2_zero_point=0, ofm_scale=float(params.ofm.q_params.scale_f32), ofm_zero_point=int(params.ofm.q_params.zero_point), ifm_channels=out_channels, ifm2_channels=out_channels, reversed_operands=False, ofm_dtype="int8", rounding_mode="NATURAL", ) elif (params.ifm.q_params.scale_f32 == params.ofm.q_params.scale_f32 and params.ifm.q_params.zero_point == params.ofm.q_params.zero_point): reduced_op = ethosu_ops.ethosu_pooling( ifm=reduced_op, lut=lut, pooling_type="AVG", ifm_scale=float(params.ifm.q_params.scale_f32), ifm_zero_point=0, ofm_scale=float(params.ofm.q_params.scale_f32), ofm_zero_point=0, pool_shape=(filter_height, filter_width), ofm_channels=out_channels, rounding_mode="TRUNCATE", ) else: weight_scale = 1 / (filter_height * filter_width) weight_values = np.ones( [out_channels, filter_height, filter_width, in_channels]) bias = -1 * int( params.ifm.q_params.zero_point) * filter_height * filter_width scale_bias = vela_api.pack_biases( biases=np.ones([ifm_shape[-1]]) * bias, ifm_scale=params.ifm.q_params.scale_f32, ifm_dtype=np.dtype(params.ifm.dtype), weight_scales=np.array([weight_scale], dtype=np.float), ofm_scale=params.ofm.q_params.scale_f32, is_activation_tanh_or_sigmoid=False, ) reduced_op = ethosu_ops.ethosu_depthwise_conv2d( ifm=reduced_op, weight=relay.const(weight_values, params.ifm.dtype), scale_bias=relay.const(scale_bias, "uint8"), lut=lut, ifm_scale=float(params.ifm.q_params.scale_f32), ifm_zero_point=0, weight_zero_point=0, ofm_scale=float(params.ofm.q_params.scale_f32), ofm_zero_point=int(params.ofm.q_params.zero_point), kernel_shape=(filter_height, filter_width), ofm_channels=out_channels, rounding_mode="NATURAL", ) # Reshape to original ofm shape if len(ofm_shape) < 4: reduced_op = relay.reshape(reduced_op, ofm_shape) return reduced_op