def apply_matmul_biasadd_fusion(self, match_node_name): skip_node_name = match_node_name[1:] matched_node = self.node_name_mapping[match_node_name[0]] control_inputs, normal_inputs = self._get_node_input( matched_node.node.name) weight_name = normal_inputs[1] self._intel_cpu_quantize_weight_eightbit( matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel) skip_node_name.append(weight_name) for _, node in enumerate(self.input_graph.node): if node.name in skip_node_name: pass elif node.name == match_node_name[0]: logging.debug("matched node {} with input {}".format( node.name, node.input)) logging.debug("apply_conv_biasadd_fusion") quantized_node_name = node.name + "_eightbit_quantized_mat_mul" bias_node_name = self.node_name_mapping[ match_node_name[1]].node.input[1] all_input_names = self._add_eightbit_prologue_nodes( matched_node.node.name) quantized_node_input_names = all_input_names[:2] + [ bias_node_name ] + all_input_names[2:] + control_inputs quantized_matmul_node = helper.create_node( "QuantizedMatMulWithBias", quantized_node_name, quantized_node_input_names) helper.copy_attr(quantized_matmul_node, "transpose_a", node.attr["transpose_a"]) helper.copy_attr(quantized_matmul_node, "transpose_b", node.attr["transpose_b"]) helper.set_attr_dtype(quantized_matmul_node, "T1", dtypes.quint8) helper.set_attr_dtype(quantized_matmul_node, "T2", dtypes.qint8) helper.set_attr_dtype(quantized_matmul_node, "Toutput", dtypes.qint32) helper.set_attr_dtype(quantized_matmul_node, "Tbias", dtypes.float32) self.add_output_graph_node(quantized_matmul_node) requantize_type = dtypes.qint8 quantize_down_name = self._add_quantize_down_nodes( node, quantized_node_name, requantize_type, False) self._intel_cpu_add_dequantize_result_node( quantize_down_name, match_node_name[1], requantize_type) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) self.add_output_graph_node(new_node)
def _add_pool_function(self, original_node, quantized_op_node): helper.set_attr_dtype(quantized_op_node, "T", dtypes.quint8) helper.copy_attr(quantized_op_node, "ksize", original_node.attr["ksize"]) helper.copy_attr(quantized_op_node, "strides", original_node.attr["strides"]) helper.copy_attr(quantized_op_node, "padding", original_node.attr["padding"])
def apply_conv_single_fusion(self, match_node_name): skip_node_name = match_node_name[1:] matched_node = self.node_name_mapping[match_node_name[0]] _, normal_inputs = self._get_node_input(matched_node.node.name) weight_name = normal_inputs[1] # TODO this is workaround as the tf 2.1 doesn't support depthwise s8 feature. if self.enable_s8 and matched_node.node.op == "DepthwiseConv2dNative" and not self._find_relu_node( matched_node.node): self.output_graph = self.input_graph return self._intel_cpu_quantize_weight_eightbit( matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel) all_input_names = self._add_eightbit_prologue_nodes( matched_node.node.name) skip_node_name.append(weight_name) for _, node in enumerate(self.input_graph.node): if node.name in skip_node_name: logging.debug("skip node {}".format(node.name)) elif node.name == match_node_name[0]: postfix = "_eightbit_quantized_conv" if node.op == "Conv2D" else "_eightbit_quantized_depthwise_conv" quantized_node_name = node.name + postfix if node.op == "Conv2D": quantized_conv_node = helper.create_node( "QuantizedConv2DPerChannel" if self.per_channel else "QuantizedConv2D", quantized_node_name, all_input_names) elif node.op == "DepthwiseConv2dNative": quantized_conv_node = helper.create_node( "QuantizedDepthwiseConv2D", quantized_node_name, all_input_names) helper.copy_attr(quantized_conv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_conv_node, "padding", node.attr["padding"]) if node.op != 'DepthwiseConv2dNative' and "padding_list" in node.attr: helper.copy_attr(quantized_conv_node, "padding_list", node.attr["padding_list"]) helper.copy_attr(quantized_conv_node, "dilations", node.attr["dilations"]) input_data_type = dtypes.quint8 if self._find_relu_node( node) else dtypes.qint8 helper.set_attr_dtype(quantized_conv_node, "Tinput", input_data_type) helper.set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8) helper.set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) self.add_output_graph_node(quantized_conv_node) quantize_down_name = self._add_quantize_down_nodes( node, quantized_node_name, dtypes.qint8) self._intel_cpu_add_dequantize_result_node( quantize_down_name, node.name, dtypes.qint8) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) self.add_output_graph_node(new_node)
def apply_conv_biasadd_addn_relu_fusion(self, match_node_name): skip_node_name = match_node_name[1:] matched_node = self.node_name_mapping[match_node_name[0]] control_inputs, normal_inputs = self._get_node_input( matched_node.node.name) weight_name = normal_inputs[1] self._intel_cpu_quantize_weight_eightbit( matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel) all_input_names = self._add_eightbit_prologue_nodes( matched_node.node.name) skip_node_name.append(weight_name) for _, node in enumerate(self.input_graph.node): if node.name in skip_node_name: logging.debug("skip node {}".format(node.name)) elif node.name == match_node_name[0]: logging.debug("matched node {} with input {}".format( node.name, node.input)) logging.debug("apply_conv_biasadd_addn_relu_fusion") quantized_node_name = node.name + "_eightbit_quantized_conv" bias_node_name = self.node_name_mapping[ match_node_name[1]].node.input[1] relu_node_name = match_node_name[3] is_relu6 = self.node_name_mapping[ relu_node_name].node.op == "Relu6" sum_index = 1 if match_node_name[1] == self.node_name_mapping[ match_node_name[2]].node.input[0] else 0 quantized_node_input_names = all_input_names[:2] + [ bias_node_name ] + all_input_names[2:] + [ self.node_name_mapping[ match_node_name[2]].node.input[sum_index] ] + control_inputs quantized_conv_node = helper.create_node( "QuantizedConv2DWithBiasSumAndRelu", quantized_node_name, quantized_node_input_names) helper.copy_attr(quantized_conv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_conv_node, "padding", node.attr["padding"]) if "padding_list" in node.attr: helper.copy_attr(quantized_conv_node, "padding_list", node.attr["padding_list"]) helper.copy_attr(quantized_conv_node, "dilations", node.attr["dilations"]) input_data_type = dtypes.quint8 if self._find_relu_node( node) else dtypes.qint8 helper.set_attr_dtype(quantized_conv_node, "Tinput", input_data_type) helper.set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8) helper.set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) self.add_output_graph_node(quantized_conv_node) quantize_down_name = self._add_quantize_down_nodes( node, quantized_node_name, dtypes.quint8, is_relu6) self._intel_cpu_add_dequantize_result_node( quantize_down_name, relu_node_name) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) self.add_output_graph_node(new_node)
def apply_conv_biasadd_relu_fusion(self, match_node_name): """Fuse the conv/biasadd/relu pattern. Arguments: match_node_name {[type]} -- [description] """ skip_node_name = match_node_name[1:] matched_node = self.node_name_mapping[match_node_name[0]] control_inputs, normal_inputs = self._get_node_input( matched_node.node.name) weight_name = normal_inputs[1] self._intel_cpu_quantize_weight_eightbit( matched_node.node.op, self.node_name_mapping[weight_name].node, self.per_channel) all_input_names = self._add_eightbit_prologue_nodes( matched_node.node.name) skip_node_name.append(weight_name) for _, node in enumerate(self.input_graph.node): if node.name in skip_node_name: logging.debug("skip node {}".format(node.name)) elif node.name == match_node_name[0]: logging.debug("apply_conv_biasadd_relu_fusion") postfix = "_eightbit_quantized_conv" if node.op == "Conv2D" else "_eightbit_quantized_depthwise_conv" quantized_node_name = node.name + postfix bias_node_name = self.node_name_mapping[ match_node_name[1]].node.input[1] relu_node_name = match_node_name[2] is_relu6 = self.node_name_mapping[ relu_node_name].node.op == "Relu6" quantized_node_input_names = all_input_names[:2] + [ bias_node_name ] + all_input_names[2:] + control_inputs quantized_conv_node = helper.create_node( "QuantizedConv2DWithBiasAndRelu" if node.op == "Conv2D" else "QuantizedDepthwiseConv2DWithBiasAndRelu", quantized_node_name, quantized_node_input_names) helper.copy_attr(quantized_conv_node, "strides", node.attr["strides"]) helper.copy_attr(quantized_conv_node, "padding", node.attr["padding"]) if node.op != 'DepthwiseConv2dNative' and "padding_list" in node.attr: helper.copy_attr(quantized_conv_node, "padding_list", node.attr["padding_list"]) helper.copy_attr(quantized_conv_node, "dilations", node.attr["dilations"]) input_data_type = dtypes.quint8 if self._find_relu_node( node) else dtypes.qint8 helper.set_attr_dtype(quantized_conv_node, "Tinput", input_data_type) helper.set_attr_dtype(quantized_conv_node, "Tfilter", dtypes.qint8) helper.set_attr_dtype(quantized_conv_node, "out_type", dtypes.qint32) self.add_output_graph_node(quantized_conv_node) quantize_down_name = self._add_quantize_down_nodes( node, quantized_node_name, dtypes.quint8, is_relu6) self._intel_cpu_add_dequantize_result_node( quantize_down_name, relu_node_name) else: new_node = node_def_pb2.NodeDef() new_node.CopyFrom(node) self.add_output_graph_node(new_node)