Exemplo n.º 1
0
    def quantize(self):
        node = self.node
        assert (node.op_type == "MaxPool")

        if self.quantizer.opset_version < 12:
            super().quantize()
            return

        # When mode is QLinearOps, the output quantization params are calculated
        # based on outputs from activation nodes, therefore these nodes can be
        # removed from the graph if they follow a quantized op.
        # If input to this node is not quantized then keep this node
        if node.input[0] not in self.quantizer.quantized_value_map:
            self.quantizer.new_nodes += [node]
            return

        # Create an entry for output quantized value
        quantized_input_value = self.quantizer.quantized_value_map[
            node.input[0]]
        quantized_output_value = QuantizedValue(
            node.output[0], node.output[0] + "_quantized",
            quantized_input_value.scale_name, quantized_input_value.zp_name,
            QuantizedValueType.Input)
        self.quantizer.quantized_value_map[
            node.output[0]] = quantized_output_value

        node.input[0] = quantized_input_value.q_name
        node.output[0] = quantized_output_value.q_name
        self.quantizer.new_nodes += [node]
Exemplo n.º 2
0
    def _get_quantized_weight(self, initializer, qType):
        '''
            :param initializer: TensorProto initializer
            :param qType: type to quantize to
            :return: Weight class with quantization information
        '''
        weights_data = self.tensor_proto_to_array(initializer)
        rmin, rmax, zero_point, scale, quantized_weights_data = quantize_data(
            weights_data.flatten().tolist(), _get_qrange_for_qType(qType, \
            self.reduce_range), qType)
        weight = QuantizedInitializer(initializer.name,
                                      initializer, [rmin], [rmax],
                                      [zero_point], [scale],
                                      weights_data,
                                      quantized_weights_data,
                                      axis=None,
                                      qType=qType)

        # Log entry for this quantized weight
        assert (weight.name not in self.quantized_value_map)
        quantized_value = QuantizedValue(weight.name,
                                         weight.name + "_quantized",
                                         weight.name + "_scale",
                                         weight.name + "_zero_point",
                                         QuantizedValueType.Initializer, None,
                                         qType)
        self.quantized_value_map[weight.name] = quantized_value

        return weight
Exemplo n.º 3
0
    def quantize(self):
        node = self.node
        quantized_input_names, zero_point_names, scale_names, nodes = \
                                           self.quantizer.quantize_inputs(node, [0])
        quantized_node_name = ""
        if node.name != "":
            quantized_node_name = node.name + "_quant"
        kwargs = {}
        for attribute in node.attribute:
            kwargs.update(attribute_to_kwarg(attribute))

        # Output just derive the scale/zero from input
        quantized_output_names = []
        for output_name in node.output:
            quantized_output_name = output_name + "quantized"
            quantized_output_names.append(quantized_output_name)
            q_output = QuantizedValue(output_name, quantized_output_name,
                                      scale_names[0], zero_point_names[0],
                                      QuantizedValueType.Input)
            self.quantizer.quantized_value_map[output_name] = q_output

        if len(node.input) > 1:
            quantized_input_names = quantized_input_names.extend(
                node.input[1:])
        quantized_node = onnx.helper.make_node(node.op_type,
                                               quantized_input_names,
                                               quantized_output_names,
                                               quantized_node_name, **kwargs)

        nodes.append(quantized_node)
        self.quantizer.new_nodes += nodes
Exemplo n.º 4
0
    def quantize_weight_per_channel(self, weight_name, weight_qType, channel_axis):
        # Find if this input is already quantized
        if weight_name in self.quantized_value_map:
            quantized_value = self.quantized_value_map[weight_name]
            return (quantized_value.q_name, quantized_value.zp_name, quantized_value.scale_name)
        
        initializer = find_by_name(weight_name, self.model.initializer())
        if initializer is None:
            raise ValueError("{} is not an initializer", weight_name)

        weights = self.tensor_proto_to_array(initializer)
        channel_count = weights.shape[channel_axis]
        rmin_list = []
        rmax_list = []
        zero_point_list = []
        scale_list = []
        quantized_per_channel_data_list = []
        for i in range(channel_count):
            per_channel_data = weights.take(i, channel_axis)
            rmin, rmax, zero_point, scale, quantized_per_channel_data = quantize_data(
                per_channel_data.flatten().tolist(), _get_qrange_for_qType(weight_qType, 
                self.reduce_range), weight_qType)
            rmin_list.append(rmin)
            rmax_list.append(rmax)
            zero_point_list.append(zero_point)
            scale_list.append(scale)
            quantized_per_channel_data_list.append(quantized_per_channel_data)

        # combine per_channel_data into one
        reshape_dims = list(weights.shape)  # deep copy
        reshape_dims[channel_axis] = 1  # only one per channel for reshape
        quantized_weights = np.asarray(quantized_per_channel_data_list[0]).reshape(reshape_dims)
        for i in range(1, len(quantized_per_channel_data_list)):
            channel_weights = np.asarray(quantized_per_channel_data_list[i]).reshape(reshape_dims)
            quantized_weights = np.concatenate((quantized_weights, channel_weights), channel_axis)

        weight = QuantizedInitializer(initializer.name, initializer, rmin_list, rmax_list, 
                                      zero_point_list, scale_list,
                                      weights,
                                      quantized_weights.flatten().tolist(), 
                                      channel_axis, weight_qType)

        # Make entry for this quantized weight
        assert (weight.name not in self.quantized_value_map)
        quantized_value = QuantizedValue(weight.name, weight.name + "_quantized", 
                                         weight.name + "_scale",
                                         weight.name + "_zero_point", 
                                         QuantizedValueType.Initializer, 
                                         None, weight_qType)
        self.quantized_value_map[weight.name] = quantized_value

        self._update_weight(weight)
        return (weight.name + "_quantized", weight.name + "_zero_point", weight.name + "_scale")
Exemplo n.º 5
0
    def quantize(self):
        node = self.node

        data_found, output_scale_name, output_zp_name, _, _ = \
            self.quantizer._get_quantization_params(node.output[0])
        if (not data_found):  # only try to quantize when given quantization parameters for it
            return super().quantize()

        (quantized_input_names, zero_point_names, scale_names, nodes) = \
            self.quantizer.quantize_inputs(node, [0, 1], initializer_use_weight_qType=False)

        qlinear_binary_math_output = node.output[0] + "_quantized"
        qlinear_binary_math_name = node.name + "_quant" if node.name != "" else ""

        kwargs = {}
        for attribute in node.attribute:
            kwargs.update(attribute_to_kwarg(attribute))
        kwargs["domain"] = ms_domain

        qlinear_binary_math_inputs = []
        # Input 0
        qlinear_binary_math_inputs.append(quantized_input_names[0])
        qlinear_binary_math_inputs.append(scale_names[0])
        qlinear_binary_math_inputs.append(zero_point_names[0])
        # Input 1
        qlinear_binary_math_inputs.append(quantized_input_names[1])
        qlinear_binary_math_inputs.append(scale_names[1])
        qlinear_binary_math_inputs.append(zero_point_names[1])

        # Output
        qlinear_binary_math_inputs.append(output_scale_name)
        qlinear_binary_math_inputs.append(output_zp_name)

        qlinear_binary_math_node = onnx.helper.make_node("QLinear" + node.op_type, 
                                                         qlinear_binary_math_inputs,
                                                         [qlinear_binary_math_output], 
                                                         qlinear_binary_math_name,
                                                         **kwargs)
        nodes.append(qlinear_binary_math_node)

        # Create an entry for this quantized value
        q_output = QuantizedValue(node.output[0], qlinear_binary_math_output, 
                                  output_scale_name, output_zp_name,
                                  QuantizedValueType.Input)
        self.quantizer.quantized_value_map[node.output[0]] = q_output

        self.quantizer.new_nodes += nodes
Exemplo n.º 6
0
    def quantize(self):
        node = self.node
        assert (node.op_type == "MatMul")

        (quantized_input_names, zero_point_names, scale_names, nodes) = \
            self.quantizer.quantize_inputs(node, [0, 1])

        data_found, output_scale_name, output_zp_name, _, _ = \
            self.quantizer._get_quantization_params(node.output[0])

        if not data_found:
            raise ValueError(
                "Quantization parameters for output:\"{}\" of node:\"{}\" not \
                specified".format(node.output[0], node.name))

        qlinear_matmul_output = node.output[0] + "_quantized"
        qlinear_matmul_name = node.name + "_quant" if node.name != "" else ""

        qlinear_matmul_inputs = []
        # Input 0
        qlinear_matmul_inputs.append(quantized_input_names[0])
        qlinear_matmul_inputs.append(scale_names[0])
        qlinear_matmul_inputs.append(zero_point_names[0])
        # Input 1
        qlinear_matmul_inputs.append(quantized_input_names[1])
        qlinear_matmul_inputs.append(scale_names[1])
        qlinear_matmul_inputs.append(zero_point_names[1])
        # Output quantization parameter
        qlinear_matmul_inputs.append(output_scale_name)
        qlinear_matmul_inputs.append(output_zp_name)

        qlinear_matmul_node = onnx.helper.make_node("QLinearMatMul",
                                                    qlinear_matmul_inputs,
                                                    [qlinear_matmul_output],
                                                    qlinear_matmul_name)
        nodes.append(qlinear_matmul_node)

        # Create an entry for this quantized value
        q_output = QuantizedValue(node.output[0], qlinear_matmul_output,
                                  output_scale_name, output_zp_name,
                                  QuantizedValueType.Input)
        self.quantizer.quantized_value_map[node.output[0]] = q_output

        self.quantizer.new_nodes += nodes
Exemplo n.º 7
0
    def quantize(self):
        node = self.node
        assert (node.op_type == "Gather")
        if (not self.quantizer.is_valid_quantize_weight(node.input[0])):
            super().quantize()
            return

        (quantized_input_names, zero_point_names, scale_names, nodes) = \
            self.quantizer.quantize_inputs(node, [0])

        gather_new_output = node.output[0] + "_quantized"

        # Create an entry for this quantized value
        q_output = QuantizedValue(node.output[0], gather_new_output, scale_names[0], 
                                  zero_point_names[0],
                                  QuantizedValueType.Input)
        self.quantizer.quantized_value_map[node.output[0]] = q_output

        gather_original_output = node.output[0]
        node.output[0] = gather_new_output
        node.input[0] = quantized_input_names[0]
        nodes.append(node)

        self.quantizer.new_nodes += nodes
Exemplo n.º 8
0
    def quantize(self):
        node = self.node
        assert (node.op_type in ["Conv", "FusedConv"])

        if self.quantizer.is_input_a_weight(
                node.input[1]) and self.per_channel:
            (quantized_input_names, zero_point_names, scale_names, nodes) = \
                self.quantizer.quantize_inputs(node, [0])
            quant_weight_tuple = self.quantizer.quantize_weight_per_channel(
                node.input[1], self.weight_dtype, 0)
            quantized_input_names.append(quant_weight_tuple[0])
            zero_point_names.append(quant_weight_tuple[1])
            scale_names.append(quant_weight_tuple[2])
        else:
            (quantized_input_names, zero_point_names, scale_names, nodes) = \
                self.quantizer.quantize_inputs(node, [0, 1])

        quantized_bias_name = ""
        bias_present = False
        if len(node.input) == 3:
            quantized_bias_name = self.quantizer.quantize_bias(node, nodes)
            bias_present = True
        data_found, output_scale_name, output_zp_name, _, _ = \
            self.quantizer._get_quantization_params(node.output[0])

        if not data_found:
            raise ValueError(
                "Quantization parameters for output:\"{}\" of node:\"{}\" not \
                              specified".format(node.output[0], node.name))

        qlinear_conv_output = node.output[0] + "_quantized"
        qlinear_conv_name = qlinear_conv_name = node.name + "_quant" if \
                                                node.name != "" else ""

        kwargs = {}
        for attribute in node.attribute:
            if attribute.name == 'activation' and attribute.s in [
                    b'Relu', b'Clip'
            ]:
                continue
            if attribute.name == 'activation_params':
                continue
            kwargs.update(attribute_to_kwarg(attribute))
        qlinear_conv_inputs = []
        # Input 0
        qlinear_conv_inputs.append(quantized_input_names[0])
        qlinear_conv_inputs.append(scale_names[0])
        qlinear_conv_inputs.append(zero_point_names[0])
        # Input 1
        qlinear_conv_inputs.append(quantized_input_names[1])
        qlinear_conv_inputs.append(scale_names[1])
        qlinear_conv_inputs.append(zero_point_names[1])

        # Output
        qlinear_conv_inputs.append(output_scale_name)
        qlinear_conv_inputs.append(output_zp_name)

        if bias_present:
            qlinear_conv_inputs.append(quantized_bias_name)

        qlinear_conv_node = onnx.helper.make_node("QLinearConv",
                                                  qlinear_conv_inputs,
                                                  [qlinear_conv_output],
                                                  qlinear_conv_name, **kwargs)
        nodes.append(qlinear_conv_node)

        # Create an entry for this quantized value
        q_output = QuantizedValue(node.output[0], qlinear_conv_output,
                                  output_scale_name, output_zp_name,
                                  QuantizedValueType.Input)
        self.quantizer.quantized_value_map[node.output[0]] = q_output

        self.quantizer.new_nodes += nodes
Exemplo n.º 9
0
    def quantize_bias(self, node, new_node_list):
        '''
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
        '''

        # get scale for weight
        weight_scale_name = self.quantized_value_map[node.input[1]].scale_name
        weight_initializer = find_by_name(weight_scale_name,
                                          self.model.initializer())
        weight_scale = self.tensor_proto_to_array(weight_initializer)

        # get bias
        bias_name = node.input[2]
        bias_initializer = find_by_name(bias_name, self.model.initializer())
        bias_data = self.tensor_proto_to_array(bias_initializer)
        quantized_bias_name = bias_name + "_quantized"

        # input scale is not provided and this input is dynamically quantized
        # so it is not pre-computed at this point
        # so resort to dynamic quantization for bias
        if self.quantization_params is None or node.input[0] not in self.quantization_params and \
           node.input[0] not in self.quantized_value_map:
            self._dynamic_quantize_bias(node.input[0], weight_scale_name,
                                        bias_name, quantized_bias_name,
                                        new_node_list)
        else:
            # get scale for input
            if node.input[0] in self.quantized_value_map:
                input_scale_name = self.quantized_value_map[
                    node.input[0]].scale_name
            elif node.input[0] in self.quantization_params:
                _, input_scale_name, _, _, _ = self._get_quantization_params(
                    node.input[0])
            else:
                raise ValueError("Expected {} to be in quantized value map \
                                  for static quantization".format(
                    node.input[0]))

            inputscale_initializer = find_by_name(input_scale_name,
                                                  self.model.initializer())
            input_scale = self.tensor_proto_to_array(inputscale_initializer)

            # calcuate scale for bias

            bias_scale = input_scale * weight_scale

            # quantize bias
            quantized_data = (np.asarray(bias_data) /
                              bias_scale).round().astype(np.int32)

            # update bias initializer
            bias_np_data = np.asarray(quantized_data, dtype=np.int32).reshape(\
                           bias_initializer.dims)
            packed_bias_initializer = onnx.numpy_helper.from_array(
                bias_np_data, quantized_bias_name)
            self.model.initializer().extend([packed_bias_initializer])

            # log entries for this quantized bias value
            quantized_bias_entry = QuantizedInitializer(
                bias_name,
                bias_initializer, [0], [0], [0], [bias_scale],
                bias_data,
                quantized_data,
                qType=onnx_proto.TensorProto.INT32)
            self._quantized_weights.append(quantized_bias_entry)

            assert (bias_name not in self.quantized_value_map)
            quantized_value = QuantizedValue(bias_name, quantized_bias_name,
                                             "", "",
                                             QuantizedValueType.Initializer,
                                             None,
                                             onnx_proto.TensorProto.INT32)
            self.quantized_value_map[bias_name] = quantized_value

        return quantized_bias_name
Exemplo n.º 10
0
    def quantize(self):
        node = self.node
        assert (node.op_type == "Pad")

        # Only after version 11, it has the optional constant_value
        # If input[0] is not quantized, do not quanitize this node
        if (self.quantizer.opset_version < 11) or (node.input[0] not \
                                               in self.quantizer.quantized_value_map):
            super().quantize()
            return
        quantized_input_value = self.quantizer.quantized_value_map[
            node.input[0]]

        kwargs = {}
        for attribute in node.attribute:
            kv = attribute_to_kwarg(attribute)
            kwargs.update(kv)

        if 'mode' not in kwargs or kwargs['mode'] == b'constant':
            if len(node.input) > 2:  # There is 3rd input 'constant_value'
                zp_tensor = self.quantizer.model.get_initializer(
                    quantized_input_value.zp_name)
                scale_tensor = \
                            self.quantizer.model.get_initializer(quantized_input_value.scale_name)
                # if zp_tensor is None or scale_tensor is None:
                #     super().quantize()
                #     return

                padding_constant_initializer = self.quantizer.model.get_initializer(
                    node.input[2])
                if padding_constant_initializer is not None:
                    zp_array = onnx.numpy_helper.to_array(zp_tensor)
                    zp_value = zp_array.item(
                    ) if zp_array.ndim == 0 else zp_array[0]
                    scale_array = onnx.numpy_helper.to_array(scale_tensor)
                    scale_value = scale_array.item(
                    ) if scale_array.ndim == 0 else scale_array[0]
                    padding_constant_array = \
                                          onnx.numpy_helper.to_array(padding_constant_initializer)
                    quantized_padding_constant_array = quantize_nparray(
                        self.activation_dtype, padding_constant_array,
                        scale_value, zp_value)
                    quantized_padding_constant_name = node.input[
                        2] + "_quantized"
                    quantized_padding_constant_initializer = onnx.numpy_helper.from_array(
                        quantized_padding_constant_array,
                        quantized_padding_constant_name)
                    # Suppose this padding constant initializer only used by the node
                    self.quantizer.model.remove_initializer(
                        padding_constant_initializer)
                    self.quantizer.model.add_initializer(
                        quantized_padding_constant_initializer)
                    node.input[2] = quantized_padding_constant_name
                else:
                    pad_value_qnodes = self.quantizer._get_quantize_input_nodes(
                        node, 2, self.activation_dtype)
                    self.quantizer.new_nodes += pad_value_qnodes
                    node.input[2] = pad_value_qnodes[0].output[0]
            else:
                # pad zero_point for original zero
                node.input.extend([quantized_input_value.zp_name])

        # Create an entry for output quantized value
        quantized_output_value = QuantizedValue(
            node.output[0], node.output[0] + "_quantized",
            quantized_input_value.scale_name, quantized_input_value.zp_name,
            QuantizedValueType.Input)
        self.quantizer.quantized_value_map[
            node.output[0]] = quantized_output_value

        node.input[0] = quantized_input_value.q_name
        node.output[0] = quantized_output_value.q_name
        self.quantizer.new_nodes += [node]