def inspect_tensor(self, original_op_list, iteration_list, work_dir, inspect_type): """dump the specified op's output tensor content Args: original_op_list (string list): the ops name iteration_list (int list): the specified iteration to dump tensor Returns: dict: key is op name while value is the content saved in np.array format. """ assert iteration_list is not None, "The parameter iterations list could not be empty." graph_node_name_mapping = {} q_node_name = [] fp32_node_name = [] fp32_node_name_mapping = {} q_node_scale = {} sorted_graph = QuantizeGraphHelper().get_sorted_graph( self._fp32_model.graph_def, self._fp32_model.input_node_names, self._fp32_model.output_node_names) graph_q_node_name = [] op_name_type_dict = {} quantized_node_name_postfix = '_eightbit_requantize' weights_tensor = {} g = GraphAnalyzer() g.graph = sorted_graph graph_info = g.parse_graph() for node in sorted_graph.node: node_name = node.name if node.op.find("Quantized") != -1: node_name = node.name.split(quantized_node_name_postfix)[0] graph_q_node_name.append(node_name) graph_node_name_mapping[node_name] = node for node in sorted_graph.node: node_name = node.name if node.op.find("Quantized") != -1: node_name = node.name.split(quantized_node_name_postfix)[0] if inspect_type in ('weight', 'all') and node.op.find("Conv") != -1: if node.op.find("Quantized") == -1: weights_tensor[node_name] = {node.input[1]: tensor_util.MakeNdarray( graph_node_name_mapping[\ node.input[1]].attr['value'].tensor).transpose(3,2,0,1)} bias_node = None if \ not graph_info[node.name].outputs \ else graph_info[graph_info[node.name].outputs[0]].node if bias_node and bias_node.op == 'BiasAdd': weights_tensor[node_name][ bias_node.name] = tensor_util.MakeNdarray( graph_node_name_mapping[ bias_node.input[1]].attr['value'].tensor) else: if graph_info[ node.input[5]].node.attr['value'].tensor.float_val: min_filter_tensor = graph_info[\ node.input[5]].node.attr['value'].tensor.float_val max_filter_tensor = graph_info[\ node.input[6]].node.attr['value'].tensor.float_val else: min_filter_tensor = tensor_util.MakeNdarray(\ graph_info[node.input[5]].node.attr['value'].tensor) max_filter_tensor = tensor_util.MakeNdarray(\ graph_info[node.input[6]].node.attr['value'].tensor) weight_tensor = tensor_util.MakeNdarray(\ graph_node_name_mapping[node.input[1]].attr['value'].tensor) weight_tensor = weight_tensor = weight_tensor.astype( 'float') DequantizeWeight(weight_tensor, min_filter_tensor, max_filter_tensor) weights_tensor[node_name] = { node.input[1]: weight_tensor.transpose(3, 2, 0, 1) } weights_tensor[node_name][ node.input[2]] = tensor_util.MakeNdarray( graph_node_name_mapping[ node.input[2]].attr['value'].tensor) for op_name in original_op_list: if isinstance(op_name, tuple): op_name = op_name[0] op_type = op_name[1] else: #TODO op_type set to conv2d for fast_bias_correction and weigh correction. op_type = "conv2d" #TODO if op_type not in ["conv2d"]: continue op_name_type_dict[op_name] = op_type if op_name in graph_q_node_name: q_node_name.append(op_name + quantized_node_name_postfix) q_node = graph_node_name_mapping[op_name] q_out_min = graph_node_name_mapping[ q_node.input[-2]].attr["value"].tensor.float_val[0] q_out_max = graph_node_name_mapping[ q_node.input[-1]].attr["value"].tensor.float_val[0] q_node_scale[op_name + quantized_node_name_postfix] = (q_node.op, q_out_min, q_out_max) else: fp32_node_name.append(op_name) node_op = graph_node_name_mapping[op_name].op if node_op in ("Conv2D", "DepthwiseConv2dNative"): _, matched_nodes = FuseNodeStartWithConv2d( input_graph=sorted_graph, patterns=self.int8_sequences[node_op], remove_redundant_quant_flag=True, op_wise_cfg=(False, "minmax", False, 7.0), start_node_name=op_name, device=self.device).get_longest_fuse() if matched_nodes: fp32_node_name_mapping[matched_nodes[-1]] = op_name else: fp32_node_name_mapping[op_name] = op_name InsertLogging(sorted_graph, node_name_list=fp32_node_name_mapping.keys(), message="__KL:", summarize=-1, dump_fp32=True).do_transformation() if q_node_name: sorted_graph = InsertLogging(sorted_graph, node_name_list=q_node_name, message="__KL:", summarize=-1).do_transformation() tmp_dump_file = os.path.join(work_dir, 'kl.log') model = TensorflowModel(sorted_graph, self._tmp_model.framework_specific_info) with CaptureOutputToFile(tmp_dump_file): self._inference(model) with open(tmp_dump_file) as f: disk_content = f.readlines() filter_content = (i for i in disk_content if i.startswith(';')) dump_tensor_content = {} for i in filter_content: contents = i.split('__print__;__KL:') node_name = contents[0][1:] node_content = str2array(contents[1]) if node_name not in dump_tensor_content: dump_tensor_content[node_name] = [] dump_tensor_content[node_name].append(node_content) activation_content = [] for iter_idx in iteration_list: result_disk = {} for k, v in dump_tensor_content.items(): if k in fp32_node_name_mapping: key = fp32_node_name_mapping[k] result_disk[(key, op_name_type_dict[key])] = \ {key: v[iter_idx - 1].transpose(0,3,1,2)} else: result_key = k.split(quantized_node_name_postfix)[0] result_disk[(result_key, op_name_type_dict[result_key])] = \ {result_key: Dequantize(v[0], q_node_scale[k]).transpose(0,3,1,2)} activation_content.append(result_disk) final_result = { 'weight': weights_tensor, 'activation': activation_content } return final_result
def inspect_tensor(self, original_op_list, iteration_list, work_dir): """dump the specified op's output tensor content Args: original_op_list (string list): the ops name iteration_list (int list): the specified iteration to dump tensor Returns: dict: key is op name while value is the content saved in np.array format. """ graph_node_name_mapping = {} q_node_name = [] fp32_node_name = [] fp32_node_name_mapping = {} q_node_scale = {} sorted_graph = QuantizeGraphHelper().get_sorted_graph( self._fp32_origin_graph, self.input_node_names, self.output_node_names) graph_q_node_name = [] op_name_type_dict = {} quantized_node_name_postfix = '_eightbit_requantize' for node in sorted_graph.node: node_name = node.name if node.op.find("Quantized") != -1: node_name = node.name.split(quantized_node_name_postfix)[0] graph_q_node_name.append(node_name) graph_node_name_mapping[node_name] = node for op_info in original_op_list: op_name = op_info[0] op_type = op_info[1] if op_type not in ["conv2d"]: continue op_name_type_dict[op_name] = op_type if op_name in graph_q_node_name: q_node_name.append(op_name + quantized_node_name_postfix) q_node = graph_node_name_mapping[op_name] q_out_min = graph_node_name_mapping[ q_node.input[-2]].attr["value"].tensor.float_val[0] q_out_max = graph_node_name_mapping[ q_node.input[-1]].attr["value"].tensor.float_val[0] q_node_scale[op_name + quantized_node_name_postfix] = (q_node.op, q_out_min, q_out_max) else: fp32_node_name.append(op_name) node_op = graph_node_name_mapping[op_name].op if node_op in ("Conv2D", "DepthwiseConv2dNative"): _, matched_nodes = FuseNodeStartWithConv2d( input_graph=sorted_graph, patterns=self.int8_sequences[node_op], remove_redundant_quant_flag=True, op_wise_cfg=(False, "minmax", False), start_node_name=op_name, device=self.device).get_longest_fuse() if matched_nodes: fp32_node_name_mapping[matched_nodes[-1]] = op_name else: fp32_node_name_mapping[op_name] = op_name InsertLogging(sorted_graph, node_name_list=fp32_node_name_mapping.keys(), message="__KL:", summarize=-1, dump_fp32=True).do_transformation() if q_node_name: sorted_graph = InsertLogging(sorted_graph, node_name_list=q_node_name, message="__KL:", summarize=-1).do_transformation() tmp_dump_file = os.path.join(work_dir, 'kl.log') with CaptureOutputToFile(tmp_dump_file): self._inference(sorted_graph) with open(tmp_dump_file) as f: disk_content = f.readlines() filter_content = (i for i in disk_content if i.startswith(';')) dump_tensor_content = {} for i in filter_content: contents = i.split('__print__;__KL:') node_name = contents[0][1:] node_content = str2array(contents[1]) if node_name not in dump_tensor_content: dump_tensor_content[node_name] = [] dump_tensor_content[node_name].append(node_content) result_disk = {} tensor_iter_idx = iteration_list[0] - 1 if iteration_list else 0 for k, v in dump_tensor_content.items(): if k in fp32_node_name_mapping: key = fp32_node_name_mapping[k] result_disk[(key, op_name_type_dict[key])] = v[tensor_iter_idx] else: result_key = k.split( quantized_node_name_postfix)[tensor_iter_idx] result_disk[( result_key, op_name_type_dict[result_key])] = self._dequantize( v[0], q_node_scale[k]) return result_disk