示例#1
0
 def _add_pool_function(self, original_node, quantized_op_node):
     helper.set_attr_dtype(quantized_op_node, "T", dtypes.quint8)
     helper.copy_attr(quantized_op_node, "ksize",
                      original_node.attr["ksize"])
     helper.copy_attr(quantized_op_node, "strides",
                      original_node.attr["strides"])
     helper.copy_attr(quantized_op_node, "padding",
                      original_node.attr["padding"])
    def apply_matmul_biasadd_fusion(self, match_node_name):
        skip_node_name = match_node_name[1:]
        matched_node = self.node_name_mapping[match_node_name[0]]
        control_inputs, normal_inputs = self._get_node_input(
            matched_node.node.name)
        weight_name = normal_inputs[1]

        self._intel_cpu_quantize_weight_eightbit(
            matched_node.node.op, self.node_name_mapping[weight_name].node,
            self.per_channel)

        skip_node_name.append(weight_name)

        for _, node in enumerate(self.input_graph.node):
            if node.name in skip_node_name:
                pass
            elif node.name == match_node_name[0]:
                logging.debug("matched node {} with input {}".format(
                    node.name, node.input))

                logging.debug("apply_conv_biasadd_fusion")

                quantized_node_name = node.name + "_eightbit_quantized_mat_mul"
                bias_node_name = self.node_name_mapping[
                    match_node_name[1]].node.input[1]
                all_input_names = self._add_eightbit_prologue_nodes(
                    matched_node.node.name)
                quantized_node_input_names = all_input_names[:2] + [
                    bias_node_name
                ] + all_input_names[2:] + control_inputs

                quantized_matmul_node = helper.create_node(
                    "QuantizedMatMulWithBias", quantized_node_name,
                    quantized_node_input_names)

                helper.copy_attr(quantized_matmul_node, "transpose_a",
                                 node.attr["transpose_a"])
                helper.copy_attr(quantized_matmul_node, "transpose_b",
                                 node.attr["transpose_b"])
                helper.set_attr_dtype(quantized_matmul_node, "T1",
                                      dtypes.quint8)
                helper.set_attr_dtype(quantized_matmul_node, "T2",
                                      dtypes.qint8)
                helper.set_attr_dtype(quantized_matmul_node, "Toutput",
                                      dtypes.qint32)
                helper.set_attr_dtype(quantized_matmul_node, "Tbias",
                                      dtypes.float32)

                self.add_output_graph_node(quantized_matmul_node)
                requantize_type = dtypes.qint8

                quantize_down_name = self._add_quantize_down_nodes(
                    node, quantized_node_name, requantize_type, False)
                self._intel_cpu_add_dequantize_result_node(
                    quantize_down_name, match_node_name[1], requantize_type)
            else:
                new_node = node_def_pb2.NodeDef()
                new_node.CopyFrom(node)
                self.add_output_graph_node(new_node)
示例#3
0
    def add_dequantize_result_node(self,
                                   quantized_output_name,
                                   original_node_name,
                                   min_tensor_index=1):
        min_max_inputs = [
            "%s:%s" % (quantized_output_name, min_tensor_index),
            "%s:%s" % (quantized_output_name, (min_tensor_index + 1))
        ]
        dequantize_name = original_node_name

        dequantize_node = helper.create_node(
            "Dequantize", dequantize_name,
            [quantized_output_name, min_max_inputs[0], min_max_inputs[1]])
        helper.set_attr_dtype(dequantize_node, "T", dtypes.quint8)
        helper.set_attr_string(
            dequantize_node, "mode",
            b"SCALED" if self.intel_cpu_eightbitize else b"MIN_FIRST")
        self.add_output_graph_node(dequantize_node)
示例#4
0
    def _add_quantize_down_nodes(self,
                                 original_node,
                                 quantized_output_name,
                                 requantize_type=dtypes.quint8,
                                 is_relu6=False):
        quantized_outputs = [
            quantized_output_name, quantized_output_name + ":1",
            quantized_output_name + ":2"
        ]
        # Add a RequantizationRange node for finding the min and max values.
        requant_range_node = helper.create_node(
            "RequantizationRangePerChannel"
            if self.per_channel else "RequantizationRange",
            original_node.name + "_eightbit_requant_range", quantized_outputs)

        if self.per_channel:
            helper.set_attr_dtype(requant_range_node, "T", dtypes.qint32)
            if is_relu6:
                helper.set_attr_float(requant_range_node, "clip_value_max",
                                      6.0)
            else:
                helper.set_attr_float(requant_range_node, "clip_value_max",
                                      1e30)
        else:
            helper.set_attr_dtype(requant_range_node, "Tinput", dtypes.qint32)

        self.add_output_graph_node(requant_range_node)
        min_max_inputs = [
            requant_range_node.name + ":0", requant_range_node.name + ":1"
        ]
        requantize_node = helper.create_node(
            "RequantizePerChannel" if self.per_channel else "Requantize",
            original_node.name + "_eightbit_requantize",
            quantized_outputs + min_max_inputs)
        if self.per_channel:
            helper.set_attr_dtype(requantize_node, "T", dtypes.qint32)
        else:
            helper.set_attr_dtype(requantize_node, "Tinput", dtypes.qint32)

        helper.set_attr_dtype(requantize_node, "out_type", requantize_type)
        self.add_output_graph_node(requantize_node)
        return requantize_node.name
 def _apply_concatv2_transform(self, original_node):
     namespace_prefix = original_node.name + "_eightbit"
     quantized_concat_name = namespace_prefix + "_quantized_concatv2"
     reshape_dims_name, reduction_dims_name = self._add_common_quantization_nodes(
         namespace_prefix,
         helper.node_name_from_input(original_node.input[-1]))
     num_input = len(original_node.input)
     shape_input_name = original_node.input[num_input - 1]
     original_inputs = original_node.input[0:num_input - 1]
     input_names = []
     min_names = []
     max_names = []
     for original_input_name in original_inputs:
         quantize_input_name, min_input_name, max_input_name = (
             self._eightbitize_input_to_node(namespace_prefix,
                                             original_input_name,
                                             reshape_dims_name,
                                             reduction_dims_name,
                                             dtype=dtypes.quint8))
         input_names.append(quantize_input_name)
         min_names.append(min_input_name)
         max_names.append(max_input_name)
     all_input_names = input_names
     all_input_names.append(shape_input_name)
     all_input_names.extend(min_names)
     all_input_names.extend(max_names)
     quantized_concat_node = helper.create_node("QuantizedConcatV2",
                                                quantized_concat_name,
                                                all_input_names)
     helper.set_attr_int(quantized_concat_node, "N", len(original_inputs))
     helper.set_attr_dtype(quantized_concat_node, "T", dtypes.quint8)
     self.add_output_graph_node(quantized_concat_node)
     if self.intel_cpu_eightbitize:
         self._intel_cpu_add_dequantize_result_node(quantized_concat_name,
                                                    original_node.name)
     else:
         self._add_dequantize_result_node(quantized_concat_name,
                                          original_node.name)
示例#6
0
    def apply_conv_single_fusion(self, match_node_name):
        skip_node_name = match_node_name[1:]
        matched_node = self.node_name_mapping[match_node_name[0]]
        _, normal_inputs = self._get_node_input(matched_node.node.name)
        weight_name = normal_inputs[1]
        # TODO this is workaround as the tf 2.1 doesn't support depthwise s8 feature.
        if self.enable_s8 and matched_node.node.op == "DepthwiseConv2dNative" and not self._find_relu_node(
                matched_node.node):
            self.output_graph = self.input_graph
            return

        self._intel_cpu_quantize_weight_eightbit(
            matched_node.node.op, self.node_name_mapping[weight_name].node,
            self.per_channel)

        all_input_names = self._add_eightbit_prologue_nodes(
            matched_node.node.name)
        skip_node_name.append(weight_name)

        for _, node in enumerate(self.input_graph.node):
            if node.name in skip_node_name:
                logging.debug("skip node {}".format(node.name))
            elif node.name == match_node_name[0]:
                postfix = "_eightbit_quantized_conv" if node.op == "Conv2D" else "_eightbit_quantized_depthwise_conv"
                quantized_node_name = node.name + postfix
                if node.op == "Conv2D":
                    quantized_conv_node = helper.create_node(
                        "QuantizedConv2DPerChannel"
                        if self.per_channel else "QuantizedConv2D",
                        quantized_node_name, all_input_names)

                elif node.op == "DepthwiseConv2dNative":
                    quantized_conv_node = helper.create_node(
                        "QuantizedDepthwiseConv2D", quantized_node_name,
                        all_input_names)

                helper.copy_attr(quantized_conv_node, "strides",
                                 node.attr["strides"])
                helper.copy_attr(quantized_conv_node, "padding",
                                 node.attr["padding"])
                if node.op != 'DepthwiseConv2dNative' and "padding_list" in node.attr:
                    helper.copy_attr(quantized_conv_node, "padding_list",
                                     node.attr["padding_list"])
                helper.copy_attr(quantized_conv_node, "dilations",
                                 node.attr["dilations"])
                input_data_type = dtypes.quint8 if self._find_relu_node(
                    node) else dtypes.qint8
                helper.set_attr_dtype(quantized_conv_node, "Tinput",
                                      input_data_type)
                helper.set_attr_dtype(quantized_conv_node, "Tfilter",
                                      dtypes.qint8)
                helper.set_attr_dtype(quantized_conv_node, "out_type",
                                      dtypes.qint32)
                self.add_output_graph_node(quantized_conv_node)
                quantize_down_name = self._add_quantize_down_nodes(
                    node, quantized_node_name, dtypes.qint8)
                self._intel_cpu_add_dequantize_result_node(
                    quantize_down_name, node.name, dtypes.qint8)
            else:
                new_node = node_def_pb2.NodeDef()
                new_node.CopyFrom(node)
                self.add_output_graph_node(new_node)
示例#7
0
    def apply_conv_biasadd_addn_relu_fusion(self, match_node_name):
        skip_node_name = match_node_name[1:]
        matched_node = self.node_name_mapping[match_node_name[0]]
        control_inputs, normal_inputs = self._get_node_input(
            matched_node.node.name)
        weight_name = normal_inputs[1]
        self._intel_cpu_quantize_weight_eightbit(
            matched_node.node.op, self.node_name_mapping[weight_name].node,
            self.per_channel)
        all_input_names = self._add_eightbit_prologue_nodes(
            matched_node.node.name)
        skip_node_name.append(weight_name)
        for _, node in enumerate(self.input_graph.node):
            if node.name in skip_node_name:
                logging.debug("skip node {}".format(node.name))
            elif node.name == match_node_name[0]:
                logging.debug("matched node {} with input {}".format(
                    node.name, node.input))

                logging.debug("apply_conv_biasadd_addn_relu_fusion")

                quantized_node_name = node.name + "_eightbit_quantized_conv"
                bias_node_name = self.node_name_mapping[
                    match_node_name[1]].node.input[1]
                relu_node_name = match_node_name[3]
                is_relu6 = self.node_name_mapping[
                    relu_node_name].node.op == "Relu6"

                sum_index = 1 if match_node_name[1] == self.node_name_mapping[
                    match_node_name[2]].node.input[0] else 0
                quantized_node_input_names = all_input_names[:2] + [
                    bias_node_name
                ] + all_input_names[2:] + [
                    self.node_name_mapping[
                        match_node_name[2]].node.input[sum_index]
                ] + control_inputs

                quantized_conv_node = helper.create_node(
                    "QuantizedConv2DWithBiasSumAndRelu", quantized_node_name,
                    quantized_node_input_names)
                helper.copy_attr(quantized_conv_node, "strides",
                                 node.attr["strides"])
                helper.copy_attr(quantized_conv_node, "padding",
                                 node.attr["padding"])
                if "padding_list" in node.attr:
                    helper.copy_attr(quantized_conv_node, "padding_list",
                                     node.attr["padding_list"])
                helper.copy_attr(quantized_conv_node, "dilations",
                                 node.attr["dilations"])
                input_data_type = dtypes.quint8 if self._find_relu_node(
                    node) else dtypes.qint8
                helper.set_attr_dtype(quantized_conv_node, "Tinput",
                                      input_data_type)
                helper.set_attr_dtype(quantized_conv_node, "Tfilter",
                                      dtypes.qint8)
                helper.set_attr_dtype(quantized_conv_node, "out_type",
                                      dtypes.qint32)
                self.add_output_graph_node(quantized_conv_node)

                quantize_down_name = self._add_quantize_down_nodes(
                    node, quantized_node_name, dtypes.quint8, is_relu6)

                self._intel_cpu_add_dequantize_result_node(
                    quantize_down_name, relu_node_name)

            else:
                new_node = node_def_pb2.NodeDef()
                new_node.CopyFrom(node)
                self.add_output_graph_node(new_node)
示例#8
0
    def apply_conv_biasadd_relu_fusion(self, match_node_name):
        """Fuse the conv/biasadd/relu pattern.

        Arguments:
            match_node_name {[type]} -- [description]
        """
        skip_node_name = match_node_name[1:]
        matched_node = self.node_name_mapping[match_node_name[0]]
        control_inputs, normal_inputs = self._get_node_input(
            matched_node.node.name)
        weight_name = normal_inputs[1]

        self._intel_cpu_quantize_weight_eightbit(
            matched_node.node.op, self.node_name_mapping[weight_name].node,
            self.per_channel)

        all_input_names = self._add_eightbit_prologue_nodes(
            matched_node.node.name)
        skip_node_name.append(weight_name)

        for _, node in enumerate(self.input_graph.node):
            if node.name in skip_node_name:
                logging.debug("skip node {}".format(node.name))
            elif node.name == match_node_name[0]:

                logging.debug("apply_conv_biasadd_relu_fusion")
                postfix = "_eightbit_quantized_conv" if node.op == "Conv2D" else "_eightbit_quantized_depthwise_conv"
                quantized_node_name = node.name + postfix
                bias_node_name = self.node_name_mapping[
                    match_node_name[1]].node.input[1]
                relu_node_name = match_node_name[2]
                is_relu6 = self.node_name_mapping[
                    relu_node_name].node.op == "Relu6"
                quantized_node_input_names = all_input_names[:2] + [
                    bias_node_name
                ] + all_input_names[2:] + control_inputs
                quantized_conv_node = helper.create_node(
                    "QuantizedConv2DWithBiasAndRelu" if node.op == "Conv2D"
                    else "QuantizedDepthwiseConv2DWithBiasAndRelu",
                    quantized_node_name, quantized_node_input_names)
                helper.copy_attr(quantized_conv_node, "strides",
                                 node.attr["strides"])
                helper.copy_attr(quantized_conv_node, "padding",
                                 node.attr["padding"])
                if node.op != 'DepthwiseConv2dNative' and "padding_list" in node.attr:
                    helper.copy_attr(quantized_conv_node, "padding_list",
                                     node.attr["padding_list"])
                helper.copy_attr(quantized_conv_node, "dilations",
                                 node.attr["dilations"])
                input_data_type = dtypes.quint8 if self._find_relu_node(
                    node) else dtypes.qint8
                helper.set_attr_dtype(quantized_conv_node, "Tinput",
                                      input_data_type)
                helper.set_attr_dtype(quantized_conv_node, "Tfilter",
                                      dtypes.qint8)
                helper.set_attr_dtype(quantized_conv_node, "out_type",
                                      dtypes.qint32)
                self.add_output_graph_node(quantized_conv_node)
                quantize_down_name = self._add_quantize_down_nodes(
                    node, quantized_node_name, dtypes.quint8, is_relu6)
                self._intel_cpu_add_dequantize_result_node(
                    quantize_down_name, relu_node_name)
            else:
                new_node = node_def_pb2.NodeDef()
                new_node.CopyFrom(node)
                self.add_output_graph_node(new_node)
示例#9
0
    def _intel_cpu_quantize_weight_eightbit(self,
                                            parent,
                                            input_node,
                                            per_channel,
                                            quantization_mode=b"SCALED"):
        base_name = input_node.name + "_"
        qint8_const_name = base_name + "qint8_const"
        min_name = base_name + "min"
        max_name = base_name + "max"
        float_tensor = tensor_util.MakeNdarray(input_node.attr["value"].tensor)
        epsilon = 1e-4  # Needs to be set empirically if accuracy is not satisfactory
        if parent in ("Conv2D", "MatMul"):
            if per_channel:
                ranges = np.abs(float_tensor).max(axis=(0, 1, 2))
                min_value = -ranges
                max_value = ranges
                # nudging min-max values outside epsilon radius around zero
                ranges[ranges < epsilon] = epsilon
                min_value[np.abs(min_value) < epsilon] = -epsilon
                max_value[np.abs(max_value) < epsilon] = epsilon
                qint8_tensor = (float_tensor * 127.0 / ranges).astype(np.int8)
            else:
                min_value = np.min(float_tensor.flatten())
                max_value = np.max(float_tensor.flatten())
                # Same processing of min-max as in quantize_weight_eightbit
                # function.
                if min_value > 0.0:
                    min_value = 0.0
                if min_value == max_value:
                    if abs(min_value) < 0.000001:
                        max_value = min_value + 1.0
                    elif min_value > 0:
                        max_value = 2 * min_value
                    else:
                        max_value = min_value / 2.0

                sess = session.Session()
                with sess.as_default():
                    quantize_op = array_ops.quantize_v2(
                        float_tensor,
                        min_value,
                        max_value,
                        dtypes.qint8,
                        mode=quantization_mode,
                        round_mode="HALF_TO_EVEN")
                    qint8_tensor = quantize_op[0].eval()
                    # Updated min-max values should be passed to the next feeding node.
                    min_value = quantize_op[1].eval()
                    max_value = quantize_op[2].eval()
        elif parent == "DepthwiseConv2dNative":
            # get the max values based on dim 0 and 1 for depthwise conv
            # since, the output channel will be dim 2 * dim 3
            ranges = np.abs(float_tensor).max(axis=(0, 1))
            ranges = ranges.flatten()
            min_value = -ranges
            max_value = ranges
            # nudging min-max values outside epsilon radius around zero
            ranges[ranges < epsilon] = epsilon
            min_value[np.abs(min_value) < epsilon] = -epsilon
            max_value[np.abs(max_value) < epsilon] = epsilon
            # Since output channel will be 1 dim which is dim 2 * dim 3
            # When divide by range, qint8_tensor needs to be 3 dim
            # where, 3rd dim should be same dim of ranges
            a, b, c, d = float_tensor.shape
            qint8_tensor = (float_tensor.reshape(a, b, c * d) * 127.0 /
                            ranges).astype(np.int8)
            # get the shape back to 4 dim
            qint8_tensor = qint8_tensor.reshape(a, b, c, d)
        shape = tensor_util.TensorShapeProtoToList(
            input_node.attr["value"].tensor.tensor_shape)
        qint8_const_node = helper.create_constant_node(qint8_const_name,
                                                       qint8_tensor,
                                                       dtypes.qint8,
                                                       shape=shape)

        min_node = helper.create_constant_node(min_name, min_value,
                                               dtypes.float32)

        max_node = helper.create_constant_node(max_name, max_value,
                                               dtypes.float32)

        dequantize_node = helper.create_node(
            "Dequantize", input_node.name,
            [qint8_const_name, min_name, max_name])

        helper.set_attr_dtype(dequantize_node, "T", dtypes.qint8)
        helper.set_attr_string(dequantize_node, "mode", b"SCALED")
        self.add_output_graph_node(qint8_const_node)
        self.add_output_graph_node(min_node)
        self.add_output_graph_node(max_node)
        self.add_output_graph_node(dequantize_node)
示例#10
0
    def _eightbitize_input_to_node(self,
                                   namespace_prefix,
                                   original_input_name,
                                   reshape_dims_name,
                                   reduction_dims_name,
                                   dtype=dtypes.quint8):
        """Takes one float input to an op, and converts it to quantized form."""
        unique_input_name = helper.unique_node_name_from_input(
            original_input_name)
        if unique_input_name in self.quantized_node_dict:
            quantized_tuple = self.quantized_node_dict[unique_input_name]
            return quantized_tuple[0], quantized_tuple[1], quantized_tuple[2]

        reshape_input_name = namespace_prefix + "_reshape_" + unique_input_name
        min_input_name = namespace_prefix + "_min_" + unique_input_name
        max_input_name = namespace_prefix + "_max_" + unique_input_name
        quantize_input_name = namespace_prefix + "_quantize_" + unique_input_name
        reshape_input_node = helper.create_node(
            "Reshape", reshape_input_name,
            [original_input_name, reshape_dims_name])
        helper.set_attr_dtype(reshape_input_node, "T", dtypes.float32)
        self.add_output_graph_node(reshape_input_node)
        min_input_node = helper.create_node(
            "Min", min_input_name, [reshape_input_name, reduction_dims_name])
        helper.set_attr_dtype(min_input_node, "T", dtypes.float32)
        helper.set_attr_dtype(min_input_node, "Tidx", dtypes.int32)
        helper.set_attr_bool(min_input_node, "keep_dims", False)
        self.add_output_graph_node(min_input_node)
        max_input_node = helper.create_node(
            "Max", max_input_name, [reshape_input_name, reduction_dims_name])
        helper.set_attr_dtype(max_input_node, "T", dtypes.float32)
        helper.set_attr_dtype(max_input_node, "Tidx", dtypes.int32)
        helper.set_attr_bool(max_input_node, "keep_dims", False)
        self.add_output_graph_node(max_input_node)
        quantize_input_node = helper.create_node(
            "QuantizeV2", quantize_input_name,
            [original_input_name, min_input_name, max_input_name])

        helper.set_attr_dtype(quantize_input_node, "T", dtype)

        helper.set_attr_string(quantize_input_node, "mode", b"SCALED")
        helper.set_attr_string(quantize_input_node, "round_mode",
                               b"HALF_TO_EVEN")
        # if FLAGS.model_name in ["wide_deep_large_ds"]:
        #    set_attr_string(quantize_input_node, "mode", b"MIN_FIRST")
        # else:
        #    set_attr_string(quantize_input_node, "mode",
        #                    b"SCALED" if self.intel_cpu_eightbitize else b"MIN_FIRST")
        #    set_attr_string(quantize_input_node, "round_mode",
        #                    b"HALF_TO_EVEN" if self.intel_cpu_eightbitize
        #                    else b"HALF_AWAY_FROM_ZERO")
        self.add_output_graph_node(quantize_input_node)
        min_output_name = quantize_input_name + ":1"
        max_output_name = quantize_input_name + ":2"
        self.quantized_node_dict[unique_input_name] = (quantize_input_name,
                                                       min_output_name,
                                                       max_output_name)
        return quantize_input_name, min_output_name, max_output_name