Пример #1
0
    def inspect_tensor(self, original_op_list, iteration_list, work_dir,
                       inspect_type):
        """dump the specified op's output tensor content

        Args:
            original_op_list (string list): the ops name
            iteration_list (int list): the specified iteration to dump tensor

        Returns:
            dict: key is op name while value is the content saved in np.array format.
        """
        assert iteration_list is not None, "The parameter iterations list could not be empty."
        graph_node_name_mapping = {}
        q_node_name = []
        fp32_node_name = []
        fp32_node_name_mapping = {}
        q_node_scale = {}
        sorted_graph = QuantizeGraphHelper().get_sorted_graph(
            self._fp32_model.graph_def, self._fp32_model.input_node_names,
            self._fp32_model.output_node_names)

        graph_q_node_name = []
        op_name_type_dict = {}
        quantized_node_name_postfix = '_eightbit_requantize'
        weights_tensor = {}
        g = GraphAnalyzer()
        g.graph = sorted_graph
        graph_info = g.parse_graph()

        for node in sorted_graph.node:
            node_name = node.name
            if node.op.find("Quantized") != -1:
                node_name = node.name.split(quantized_node_name_postfix)[0]
                graph_q_node_name.append(node_name)
            graph_node_name_mapping[node_name] = node

        for node in sorted_graph.node:
            node_name = node.name
            if node.op.find("Quantized") != -1:
                node_name = node.name.split(quantized_node_name_postfix)[0]

            if inspect_type in ('weight',
                                'all') and node.op.find("Conv") != -1:
                if node.op.find("Quantized") == -1:
                    weights_tensor[node_name] = {node.input[1]: tensor_util.MakeNdarray(
                        graph_node_name_mapping[\
                                node.input[1]].attr['value'].tensor).transpose(3,2,0,1)}
                    bias_node = None if \
                        not graph_info[node.name].outputs \
                            else graph_info[graph_info[node.name].outputs[0]].node
                    if bias_node and bias_node.op == 'BiasAdd':
                        weights_tensor[node_name][
                            bias_node.name] = tensor_util.MakeNdarray(
                                graph_node_name_mapping[
                                    bias_node.input[1]].attr['value'].tensor)

                else:
                    if graph_info[
                            node.input[5]].node.attr['value'].tensor.float_val:
                        min_filter_tensor = graph_info[\
                                node.input[5]].node.attr['value'].tensor.float_val
                        max_filter_tensor = graph_info[\
                                node.input[6]].node.attr['value'].tensor.float_val
                    else:
                        min_filter_tensor = tensor_util.MakeNdarray(\
                                graph_info[node.input[5]].node.attr['value'].tensor)
                        max_filter_tensor = tensor_util.MakeNdarray(\
                                graph_info[node.input[6]].node.attr['value'].tensor)

                    weight_tensor = tensor_util.MakeNdarray(\
                            graph_node_name_mapping[node.input[1]].attr['value'].tensor)
                    weight_tensor = weight_tensor = weight_tensor.astype(
                        'float')

                    DequantizeWeight(weight_tensor, min_filter_tensor,
                                     max_filter_tensor)
                    weights_tensor[node_name] = {
                        node.input[1]: weight_tensor.transpose(3, 2, 0, 1)
                    }

                    weights_tensor[node_name][
                        node.input[2]] = tensor_util.MakeNdarray(
                            graph_node_name_mapping[
                                node.input[2]].attr['value'].tensor)

        for op_name in original_op_list:
            if isinstance(op_name, tuple):
                op_name = op_name[0]
                op_type = op_name[1]
            else:
                #TODO op_type set to conv2d for fast_bias_correction and weigh correction.
                op_type = "conv2d"  #TODO

            if op_type not in ["conv2d"]:
                continue

            op_name_type_dict[op_name] = op_type
            if op_name in graph_q_node_name:
                q_node_name.append(op_name + quantized_node_name_postfix)
                q_node = graph_node_name_mapping[op_name]
                q_out_min = graph_node_name_mapping[
                    q_node.input[-2]].attr["value"].tensor.float_val[0]
                q_out_max = graph_node_name_mapping[
                    q_node.input[-1]].attr["value"].tensor.float_val[0]
                q_node_scale[op_name +
                             quantized_node_name_postfix] = (q_node.op,
                                                             q_out_min,
                                                             q_out_max)
            else:
                fp32_node_name.append(op_name)
                node_op = graph_node_name_mapping[op_name].op
                if node_op in ("Conv2D", "DepthwiseConv2dNative"):
                    _, matched_nodes = FuseNodeStartWithConv2d(
                        input_graph=sorted_graph,
                        patterns=self.int8_sequences[node_op],
                        remove_redundant_quant_flag=True,
                        op_wise_cfg=(False, "minmax", False, 7.0),
                        start_node_name=op_name,
                        device=self.device).get_longest_fuse()

                    if matched_nodes:
                        fp32_node_name_mapping[matched_nodes[-1]] = op_name
                else:
                    fp32_node_name_mapping[op_name] = op_name

        InsertLogging(sorted_graph,
                      node_name_list=fp32_node_name_mapping.keys(),
                      message="__KL:",
                      summarize=-1,
                      dump_fp32=True).do_transformation()

        if q_node_name:
            sorted_graph = InsertLogging(sorted_graph,
                                         node_name_list=q_node_name,
                                         message="__KL:",
                                         summarize=-1).do_transformation()

        tmp_dump_file = os.path.join(work_dir, 'kl.log')

        model = TensorflowModel(sorted_graph,
                                self._tmp_model.framework_specific_info)
        with CaptureOutputToFile(tmp_dump_file):
            self._inference(model)

        with open(tmp_dump_file) as f:
            disk_content = f.readlines()

        filter_content = (i for i in disk_content if i.startswith(';'))

        dump_tensor_content = {}

        for i in filter_content:
            contents = i.split('__print__;__KL:')
            node_name = contents[0][1:]
            node_content = str2array(contents[1])

            if node_name not in dump_tensor_content:
                dump_tensor_content[node_name] = []
            dump_tensor_content[node_name].append(node_content)

        activation_content = []
        for iter_idx in iteration_list:
            result_disk = {}
            for k, v in dump_tensor_content.items():
                if k in fp32_node_name_mapping:
                    key = fp32_node_name_mapping[k]
                    result_disk[(key, op_name_type_dict[key])] = \
                            {key: v[iter_idx - 1].transpose(0,3,1,2)}
                else:
                    result_key = k.split(quantized_node_name_postfix)[0]
                    result_disk[(result_key, op_name_type_dict[result_key])] = \
                            {result_key: Dequantize(v[0], q_node_scale[k]).transpose(0,3,1,2)}
            activation_content.append(result_disk)

        final_result = {
            'weight': weights_tensor,
            'activation': activation_content
        }

        return final_result
Пример #2
0
    def inspect_tensor(self, original_op_list, iteration_list, work_dir):
        """dump the specified op's output tensor content

        Args:
            original_op_list (string list): the ops name
            iteration_list (int list): the specified iteration to dump tensor

        Returns:
            dict: key is op name while value is the content saved in np.array format.
        """
        graph_node_name_mapping = {}
        q_node_name = []
        fp32_node_name = []
        fp32_node_name_mapping = {}
        q_node_scale = {}
        sorted_graph = QuantizeGraphHelper().get_sorted_graph(
            self._fp32_origin_graph, self.input_node_names,
            self.output_node_names)
        graph_q_node_name = []
        op_name_type_dict = {}
        quantized_node_name_postfix = '_eightbit_requantize'
        for node in sorted_graph.node:
            node_name = node.name
            if node.op.find("Quantized") != -1:
                node_name = node.name.split(quantized_node_name_postfix)[0]
                graph_q_node_name.append(node_name)
            graph_node_name_mapping[node_name] = node

        for op_info in original_op_list:
            op_name = op_info[0]
            op_type = op_info[1]

            if op_type not in ["conv2d"]:
                continue
            op_name_type_dict[op_name] = op_type

            if op_name in graph_q_node_name:
                q_node_name.append(op_name + quantized_node_name_postfix)
                q_node = graph_node_name_mapping[op_name]
                q_out_min = graph_node_name_mapping[
                    q_node.input[-2]].attr["value"].tensor.float_val[0]
                q_out_max = graph_node_name_mapping[
                    q_node.input[-1]].attr["value"].tensor.float_val[0]
                q_node_scale[op_name +
                             quantized_node_name_postfix] = (q_node.op,
                                                             q_out_min,
                                                             q_out_max)
            else:
                fp32_node_name.append(op_name)
                node_op = graph_node_name_mapping[op_name].op
                if node_op in ("Conv2D", "DepthwiseConv2dNative"):
                    _, matched_nodes = FuseNodeStartWithConv2d(
                        input_graph=sorted_graph,
                        patterns=self.int8_sequences[node_op],
                        remove_redundant_quant_flag=True,
                        op_wise_cfg=(False, "minmax", False),
                        start_node_name=op_name,
                        device=self.device).get_longest_fuse()

                    if matched_nodes:
                        fp32_node_name_mapping[matched_nodes[-1]] = op_name
                else:
                    fp32_node_name_mapping[op_name] = op_name

        InsertLogging(sorted_graph,
                      node_name_list=fp32_node_name_mapping.keys(),
                      message="__KL:",
                      summarize=-1,
                      dump_fp32=True).do_transformation()

        if q_node_name:
            sorted_graph = InsertLogging(sorted_graph,
                                         node_name_list=q_node_name,
                                         message="__KL:",
                                         summarize=-1).do_transformation()

        tmp_dump_file = os.path.join(work_dir, 'kl.log')
        with CaptureOutputToFile(tmp_dump_file):
            self._inference(sorted_graph)

        with open(tmp_dump_file) as f:
            disk_content = f.readlines()

        filter_content = (i for i in disk_content if i.startswith(';'))

        dump_tensor_content = {}

        for i in filter_content:
            contents = i.split('__print__;__KL:')
            node_name = contents[0][1:]
            node_content = str2array(contents[1])

            if node_name not in dump_tensor_content:
                dump_tensor_content[node_name] = []
            dump_tensor_content[node_name].append(node_content)

        result_disk = {}
        tensor_iter_idx = iteration_list[0] - 1 if iteration_list else 0
        for k, v in dump_tensor_content.items():
            if k in fp32_node_name_mapping:
                key = fp32_node_name_mapping[k]
                result_disk[(key, op_name_type_dict[key])] = v[tensor_iter_idx]
            else:
                result_key = k.split(
                    quantized_node_name_postfix)[tensor_iter_idx]
                result_disk[(
                    result_key,
                    op_name_type_dict[result_key])] = self._dequantize(
                        v[0], q_node_scale[k])
        return result_disk