示例#1
0
def nndct_warn_print(string):
  if True == GLOBAL_MAP.get_ele(NNDCT_KEYS.WARN_FLAG):
    logger = GLOBAL_MAP.get_ele(NNDCT_KEYS.LOGGER)
    if logger:
      logger.warning("[NNDCT_WARN] {}".format(string))
    else:
      print("[NNDCT_WARN] {}".format(string))
示例#2
0
def nndct_error_print(string):
  if True == GLOBAL_MAP.get_ele(NNDCT_KEYS.ERROR_FLAG):
    logger = GLOBAL_MAP.get_ele(NNDCT_KEYS.LOGGER)
    if logger:
      logger.error("[NNDCT_ERROR] {}".format(string))
    else:
      print("[NNDCT_ERROR] {}".format(string))
    sys.exit(1)
示例#3
0
 def _do_map(output_name, node_name):
     if not output_name == node_name:
         if not GLOBAL_MAP.get_ele(NNDCT_KEYS.OUTPUT_TO_NODE_MAP):
             GLOBAL_MAP.set_map(NNDCT_KEYS.OUTPUT_TO_NODE_MAP, {})
         if not GLOBAL_MAP.get_ele(NNDCT_KEYS.NODE_TO_OUTPUT_MAP):
             GLOBAL_MAP.set_map(NNDCT_KEYS.NODE_TO_OUTPUT_MAP, {})
         #map output to node
         output_to_node_map = GLOBAL_MAP.get_ele(
             NNDCT_KEYS.OUTPUT_TO_NODE_MAP)
         if not output_name in output_to_node_map:
             nndct_debug_print(
                 "<map_output_and_node> map out {} and node{}".format(
                     output_name, node_name),
                 level=NNDCT_DEBUG_LVL.BUILD_GRAPH)
             output_to_node_map[output_name] = node_name
         else:
             assert output_to_node_map[
                 output_name] == node_name, "restored node name for output_name {} is {}, meet new node name {}".format(
                     output_name, output_to_node_map[output_name],
                     node_name)
         #add output to list keyed by node_name
         node_to_output_map = GLOBAL_MAP.get_ele(
             NNDCT_KEYS.NODE_TO_OUTPUT_MAP)
         if not node_name in node_to_output_map:
             node_to_output_map[node_name] = [output_name]
         else:
             node_to_output_map[node_name].append(output_name)
示例#4
0
def nndct_debug_print(string, title='', level=1):
  if True == GLOBAL_MAP.get_ele(
      NNDCT_KEYS.DEBUG_FLAG) and level <= GLOBAL_MAP.get_ele(
          NNDCT_KEYS.VERBOSE_LEVEL):
    logger = GLOBAL_MAP.get_ele(NNDCT_KEYS.LOGGER)
    if title == 'Start':
      string = "\n********************* <{} : {}> *********************".format(
          title, string)
    elif title == 'End':
      string = "\n********************* <{} : {}> *********************\n".format(
          title, string)
    if logger:
      logger.debug("[NNDCT_DEBUG_Lv_{}] {}".format(level, string))
    else:
      print("[NNDCT_DEBUG_Lv_{}] {}".format(level, string))
示例#5
0
        def _graph2module(op):
            node = getattr(op, "node", None)
            for param_type, tensor in node.op.params.items():
                py_tensor_util.param_to_torch_format(tensor)

                data = np.copy(tensor.data)
                if node.op.type in [
                        NNDCT_OP.CONVTRANSPOSE2D, NNDCT_OP.CONVTRANSPOSE3D
                ] and param_type == node.op.ParamName.WEIGHTS:
                    # data = data.transpose(1, 0, 2, 3)
                    data = data.swapaxes(0, 1)
                    data = np.ascontiguousarray(data)

                if node.op.type in [
                        NNDCT_OP.DEPTHWISE_CONV2D, NNDCT_OP.DEPTHWISE_CONV3D
                ] and param_type == node.op.ParamName.WEIGHTS:
                    out_channels = node.node_config("out_channels")
                    kernel_size = node.node_config("kernel_size")
                    data = data.reshape((out_channels, 1, *kernel_size))

                if node.op.type in [
                        NNDCT_OP.DEPTHWISE_CONVTRANSPOSE2D,
                        NNDCT_OP.DEPTHWISE_CONVTRANSPOSE3D
                ] and param_type == node.op.ParamName.WEIGHTS:
                    in_channels = node.node_config("in_channels")
                    kernel_size = node.node_config("kernel_size")
                    data = data.reshape((1, in_channels, *kernel_size))
                    data = data.swapaxes(0, 1)
                    data = np.ascontiguousarray(data)

                torch_tensor = torch.from_numpy(data)
                param_name = cls._parameter_map.get(param_type,
                                                    param_type.value)
                if node.has_bound_params():
                    if hasattr(op, param_name):
                        if isinstance(getattr(op, param_name), torch.Tensor):
                            torch_tensor = torch_tensor.to(
                                getattr(op, param_name))
                        else:
                            torch_tensor = torch_tensor.to(
                                getattr(op, param_name).data)

                        if param_name in op._buffers:
                            op._buffers[param_name] = torch_tensor
                        else:
                            op._parameters[param_name] = torch.nn.Parameter(
                                torch_tensor)
                    else:
                        NndctScreenLogger().warning(
                            f"new parameter: '{param_name}' is registered in {node.name}"
                        )
                        op.register_parameter(param_name,
                                              torch.nn.Parameter(torch_tensor))
                else:
                    torch_tensor = torch_tensor.to(
                        device=GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE))
                    module.register_parameter(param_name,
                                              torch.nn.Parameter(torch_tensor))

                py_tensor_util.param_to_nndct_format(tensor)
示例#6
0
    def forward(self, input):

        [input], _ = process_inputs_and_params(self.node,
                                               self.quantizer,
                                               inputs=[input])

        if NndctOption.nndct_quant_off.value or NndctOption.nndct_cv_app.value:
            output = super().forward(input)
            # quantize output
            [output] = post_quant_process(self.node, [output])
        elif self.quant_mode > 0:
            output = torch.empty_like(input)
            if NndctOption.nndct_tanh_sigmoid_sim.value > 0:
                NndctSigmoidSimulation(input, output)
                [output] = post_quant_process(self.node, [output])
            else:
                input_name = self.node.in_nodes[0]
                fragpos = self.quantizer.get_bnfp(input_name, False)[1]
                quant_device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
                Ttable = SIGMOID_TABLE.table.to(quant_device)
                output = output.to(quant_device)
                NndctSigmoidTableLookup(input, Ttable, output, fragpos)
        else:
            output = super().forward(input)

        return output
示例#7
0
 def export_quant_config(self):
   """
   `export bitwidth and fixpoint info of blobs and parameters under work dir`
   """
   quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER)
   if quantizer and quantizer.quant_mode == 1:
     quantizer.export_quant_config()
示例#8
0
 def wrapper(*args, **kwargs):
   error_flag = GLOBAL_MAP.get_ele(NNDCT_KEYS.ERROR_FLAG)
   if error_flag == True:
     print("[NNDCT_ERROR]", end='')
   return func(*args, **kwargs)
   if error_flag == True:
     exit(1)
示例#9
0
 def __init__(self, file_name=None):
     file_name = file_name or GLOBAL_MAP.get_ele(
         NNDCT_KEYS.MODIFIER).nndct_prefix + '.py'
     Exception.__init__(
         self,
         "The rebuilt graph mismatch with original graph, please manually modify '{}' and run again"
         .format(file_name))
示例#10
0
    def _init_quant_env():
        nonlocal quant_mode
        if NndctOption.nndct_quant_mode.value > 0:
            quant_mode = NndctOption.nndct_quant_mode.value

        if quant_mode == 1:
            NndctScreenLogger().info(
                f"Quantization calibration process start up...")
        elif quant_mode == 2:
            NndctScreenLogger().info(f"Quantization test process start up...")

        quantizer = TORCHQuantizer(quant_mode, output_dir, bitwidth_w,
                                   bitwidth_a)
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer)
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, quant_mode)
        return quantizer, quant_mode
示例#11
0
 def custom_op(self, node, *args):
     node2caller = GLOBAL_MAP.get_ele(NNDCT_KEYS.NODE_CALLER_MAP)
     if node2caller is None:
         node2caller: Dict[str, Callable] = {}
         GLOBAL_MAP.set_map(NNDCT_KEYS.NODE_CALLER_MAP, node2caller)
     node2caller[node.name] = node.caller
     op = TorchCustomOperation(node.raw_kind, node.raw_kind)
     for i, arg in enumerate(args):
         op.set_config(str(i), arg)
     attrs = GLOBAL_MAP.get_ele(NNDCT_KEYS.CUSTOM_OP_ATTRS_MAP).get(
         node.raw_kind, None)
     if attrs:
         attr_vals = args[len(args) - len(attrs):]
         for name, val in zip(attrs, attr_vals):
             op.set_attr_by_name(name, val)
     return op
示例#12
0
  def set_op_class_type(self, force_to_primitive: bool, schema: "Schema", class_type=None):
    if class_type is not None:
      self.op_class_type = TorchOpClassType.CUSTOM_FUNCTION
    elif schema is not None:
      schema2torchop = GLOBAL_MAP.get_ele(NNDCT_KEYS.TORCH_SCHEMA_OP_TABLE)
      schema_handler = SchemaHelper(schema)
      torchop = schema2torchop[schema_handler.toString()]
      self.op_class_type = torchop.op_class_type
    else:
      if force_to_primitive:
        self.op_class_type = TorchOpClassType.PRIMITIVE
      else:
        if self.op_name in dir(torch.nn):
          self.op_class_type = TorchOpClassType.NN_MODULE
          self.op_name = '.'.join(['torch', 'nn', self.op_name])

        elif self.op_name in dir(torch.nn.functional):
          self.op_class_type = TorchOpClassType.NN_FUNCTION
          self.op_name = '.'.join(['torch', 'nn', 'functional', self.op_name])

        elif self.op_name in dir(torch) and isinstance(getattr(torch, self.op_name), Callable):
          self.op_class_type = TorchOpClassType.TORCH_FUNCTION
          self.op_name = '.'.join(['torch', self.op_name])

        elif self.op_name in dir(torch.Tensor):
          self.op_class_type = TorchOpClassType.TENSOR

        else:
          self.op_class_type = TorchOpClassType.UNKNOWN
示例#13
0
def build_aten_torch_ops_table():
  op_gathering_fns = (_get_tensor_ops, 
                      _get_nn_functional_ops, 
                      _get_torchscript_builtins, 
                      _get_global_builtins, 
                      _get_math_builtins,
                      )
  schema2torchop = GLOBAL_MAP.get_ele(NNDCT_KEYS.TORCH_SCHEMA_OP_TABLE)
  # schema_lut = GLOBAL_MAP.get_ele(NNDCT_KEYS.SCHEMA_LUT)
  if not schema2torchop:
    
    schema2torchop: Dict[str, TorchOp] = {}
    GLOBAL_MAP.set_map(NNDCT_KEYS.TORCH_SCHEMA_OP_TABLE, schema2torchop)

    # schema_lut: Dict[Tuple(str, int), "Schema"] = {}
    for fn in op_gathering_fns:
      fn()
示例#14
0
  def dump_xmodel(self, deploy_check=False):
    """
    `dump xmodel for LSTM cell`
    """
    quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER)
    if quantizer and quantizer.quant_mode > 1:
      compiler = CompilerFactory.get_compiler("xmodel")
      xmodel_dir = os.path.join(self._export_folder, "xmodel")
      create_work_dir(xmodel_dir)
      for info in self._modules_info.values():
        for l_num, layer_graph in enumerate(info["layers_graph"]):
          for lstm_direction, graph in layer_graph.items():
            try:
              compiler.do_compile(
                  nndct_graph=graph,
                  quant_config_info=quantizer.quant_config,
                  output_file_name=os.path.join(xmodel_dir, graph.name),
                  graph_attr_kwargs={"direction": lstm_direction})
            except Exception as e:
              print(
                  f"[NNDCT_ERROR]:failed convert nndct graph to xmodel({str(e)})."
              )

            else:
              print("[NNDCT_NOTE]:Successfully convert nndct graph to xmodel!")

      if deploy_check:
        print("[NNDCT_NOTE]: Dumping checking data...")
        checker = DeployChecker(
            output_dir_name=self._export_folder, data_format="txt")     
        
        # get timestep output
        for name, info in self._layers_info.items():
          cell = info["cell_module"]
          layer = info["layer_module"]
          graph = info["graph"]
          if layer.input is None:
            warnings.warn(
                f"[NNDCT_WARNING]: Provide inputs for '{name}' when do deploy checking",
                RuntimeWarning)
            continue
          
          set_outputs_recorder_status(cell, True)
          layer(layer.input, layer.initial_state, layer.batch_lengths)

          for timestep in range(layer.input.size()[1]):
            enable_dump_weight = True if timestep == 0 else False
            update_nndct_blob_data(cell, graph, timestep)
            checker.update_dump_folder(f"{graph.name}/frame_{timestep}")
            checker.dump_nodes_output(
                graph,
                quantizer.quant_config,
                round_method=quantizer.quant_opt['round_method'],
                enable_dump_weight=enable_dump_weight)
          
          set_outputs_recorder_status(cell, False)

        print("[NNDCT_NOTE]: Finsh dumping data.")
示例#15
0
    def convert_to_deployable(self, trained_model, mix_bit=False):
        if not self._qinfo_to_quantizer or not self._module_map:
            raise RuntimeError('Must call "trainable_model" first.')

        # Copy trained parameters from transformed model to original float model.
        orig_state_dict = self._model.state_dict()
        trained_state_dict = trained_model.state_dict()
        state_dict = {}
        for key in orig_state_dict.keys():
            module_name, weight_name, = key.rsplit('.', 1)
            if module_name in self._module_map:
                trained_module_name = self._module_map[module_name]
                trained_key = '.'.join([trained_module_name, weight_name])
            else:
                trained_key = key
            state_dict[key] = trained_state_dict[trained_key]
        model = copy.deepcopy(self._model)
        model.load_state_dict(state_dict)
        model.eval()
        '''
    inputs = dummy_inputs(self._input_specs)
    qprocessor = qproc.TorchQuantProcessor(
        'test',
        model,
        [inp.cuda() for inp in inputs],
        mix_bit=mix_bit,
        device=torch.device('cuda'))
    '''
        inputs = self._input_args
        qprocessor = qproc.TorchQuantProcessor('test',
                                               model,
                                               inputs,
                                               mix_bit=mix_bit,
                                               device=torch.device('cuda'))

        quantizer = qprocessor.quantizer
        self._fill_in_quant_info(quantizer, self._qinfo_to_quantizer)
        quantizer.export_quant_config()

        quant_model = quantizer.quant_model
        quant_model.dump_xmodel = dump_xmodel
        self.deploy_quantizer = quantizer
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer)
        NndctScreenLogger().info(f"=>Deployable model is generated.")
示例#16
0
文件: api.py 项目: Xilinx/Vitis-AI
def tf_quantizer(model,
                 input_signature,
                 quant_mode: str = "calib",
                 output_dir: str = "quantize_result",
                 bitwidth: int = 8):
    #initialize quant mode
    qmode = _init_quant_mode(quant_mode)

    # turn off weights equalization and bias correction
    option_util.set_option_value("nndct_param_corr", False)
    option_util.set_option_value("nndct_equalization", False)

    # lstm IP only support 16 bit activation
    quantizer = TFQuantizer(qmode, output_dir, bitwidth, 16)
    GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer)
    GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, qmode)

    graph = parser.from_keras_model(model, input_signature)
    quant_model, layer_nodes = builder.KerasBuilder(graph).build(
        os.path.join(output_dir, model.name + '_quant.py'), quantized=True)

    rebuilding_results = _maybe_rebuild_rnn(quant_model)
    if rebuilding_results:
        cell_graphs = []
        cell_layer_nodes = []
        for graph, layer_nodes in rebuilding_results:
            cell_graphs.append(graph)
            cell_layer_nodes.extend(layer_nodes)
            quantizer.add_rnn_cell_graph('forward', graph)

        graph = _merge_cell_graphs(cell_graphs)
        layer_nodes = cell_layer_nodes
        # TODO(yuwang): Support backward direction.

    export_file = os.path.join(output_dir, 'merged_graph.pb')
    graph_utils.maybe_export_graph(export_file, graph)

    lstm = True if len(rebuilding_results) > 0 else False
    quantizer.setup(graph, lstm=lstm)
    quantizer.load_node_to_layer(layer_nodes, quant_model)

    return quantizer
示例#17
0
 def calib_global_param(self):
     quant_device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
     for tensor_type, algo_dict in self._QuantAlgo.items():
         for name, algo in algo_dict.items():
             if not algo.statistic_local:
                 q_config = self.get_quant_config(name, False, tensor_type)
                 if q_config[0] < 32:
                     algo.calib_global_statis(quant_device)
                     q_config[1], q_config[2], q_config[
                         3] = algo.scale, algo.zero_point, algo.float_max
                 self.set_quant_config(name, q_config, tensor_type)
示例#18
0
def dump_xmodel(output_dir="quantize_result", deploy_check=False):
    r"""converts module to xmodel for deployment
  compilation only works when quantm model = 2.
  The xmodel and some checking data will be generated under work dir.

  Args:
    deploy_check(bool): if true, can dump blobs and parameters of model for deployment verification

  Returns:
    None
  """
    quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER)
    if quantizer and quantizer.quant_mode > 1:
        nndct_utils.create_work_dir(output_dir)

        # compile to xmodel

        compiler = CompilerFactory.get_compiler("xmodel")

        NndctScreenLogger().info("=>Converting to xmodel ...")
        deploy_graphs = get_deploy_graph_list(quantizer.quant_model,
                                              quantizer.Nndctgraph)
        depoly_infos = compiler.get_deloy_graph_infos(quantizer, deploy_graphs)

        for depoly_info in depoly_infos:
            try:
                compiler.do_compile(depoly_info.dev_graph,
                                    quant_config_info=depoly_info.quant_info,
                                    output_file_name=os.path.join(
                                        output_dir,
                                        depoly_info.dev_graph.name))

            except AddXopError as e:
                NndctScreenLogger().error(
                    f"Failed convert graph '{depoly_info.dev_graph.name}' to xmodel({str(e)})."
                )

            # dump data for accuracy check
            if deploy_check:
                NndctScreenLogger().info(
                    f"=>Dumping '{depoly_info.dev_graph.name}'' checking data..."
                )
                checker = DeployChecker(output_dir_name=output_dir)
                checker.update_dump_folder(f"{depoly_info.dev_graph.name}")
                checker.dump_nodes_output(
                    depoly_info.dev_graph,
                    depoly_info.quant_info,
                    round_method=quantizer.quant_opt['round_method'],
                    select_batch=False)

                NndctScreenLogger().info(
                    f"=>Finsh dumping data.({checker.dump_folder})")

        set_outputs_recorder_status(quantizer.quant_model, False)
示例#19
0
    def default(self, node, *args):
        schema2torchop = GLOBAL_MAP.get_ele(NNDCT_KEYS.TORCH_SCHEMA_OP_TABLE)
        schema_handler = SchemaHelper(node.schema)
        torchop = schema2torchop.get(schema_handler.toString(), None)
        if torchop is None:
            op = TorchUnknownOperation(node.raw_kind)
            return op
        node2caller = GLOBAL_MAP.get_ele(NNDCT_KEYS.NODE_CALLER_MAP)
        if node2caller is None:
            node2caller: Dict[str, Callable] = {}
            GLOBAL_MAP.set_map(NNDCT_KEYS.NODE_CALLER_MAP, node2caller)
        node2caller[node.name] = torchop.caller
        op = TorchBaseOperation(schema_handler.op_name,
                                torchop.name,
                                schema=node.schema)
        # op.set_caller(torchop.caller)
        assert len(args) == len(schema_handler.get_arguments())
        if len(args) == 1:
            return op
        arg_name_convertor = {"self": "input"}
        for inp, arg in zip(args, schema_handler.get_arguments()):
            arg_name = schema_handler.arg_name(arg)
            if torchop.op_class_type == TorchOpClassType.TENSOR and arg_name == "self":
                continue
            if arg_name in ["layout", "memory_format", "pin_memory"]:
                continue
            config_name = arg_name_convertor.get(arg_name, arg_name)
            if convert_type_str(schema_handler.arg_type(arg)).replace(
                    "?", "") == "bool":
                inp = bool(inp) if inp is not None else inp
            if convert_type_str(schema_handler.arg_type(arg)).replace(
                    "?", "") == "str":
                inp = f"'{inp}'" if inp is not None else inp

            if arg_name == "device":
                inp = f"'{self._device_type}'"
            if arg_name == "dtype":
                inp = scalar_type_to_pytorch_type[
                    inp] if inp is not None else inp
            op.set_config(config_name, inp)
        return op
示例#20
0
 def export_traced_torch_script(self, output_dir, verbose=False):
     torch_version = torch.__version__.split('.')
     if int(torch_version[0]) == 1 and int(torch_version[1]) < 7:
         NndctScreenLogger().error(
             f'Only supprt exporting torch script with pytorch 1.7 and later version'
         )
         return
     self.quantizer.reset_status_for_exporting()
     device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
     force_cpu = os.getenv('NNDCT_FORCE_CPU_DUMP')
     if force_cpu is not None:
         device = torch.device('cpu')
         GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_DEVICE, device)
     model, input_args = to_device(self.quantizer.quant_model,
                                   self._example_inputs, device)
     script_module = torch.jit.trace(model, input_args, check_trace=False)
     output_file = os.path.join(
         output_dir, f"{self.quantizer.quant_model._get_name()}_int.pt")
     if verbose is True:
         print(script_module.inlined_graph)
     torch.jit.save(script_module, output_file)
示例#21
0
    def __init__(self,
                 quant_mode: str,
                 module: torch.nn.Module,
                 input_args: Union[torch.Tensor, Sequence[Any]] = None,
                 state_dict_file: Optional[str] = None,
                 output_dir: str = "quantize_result",
                 bitwidth_w: int = 8,
                 bitwidth_a: int = 8,
                 mix_bit: bool = False,
                 device: torch.device = torch.device("cuda"),
                 lstm_app: bool = False):
        # Check arguments type
        self._check_args(module, input_args)

        # Check device available
        if device.type == "cuda":
            if not (torch.cuda.is_available() and "CUDA_HOME" in os.environ):
                device = torch.device("cpu")
                NndctScreenLogger().warning(
                    f"CUDA is not available, change device to CPU")

        # Transform torch module to quantized module format
        nndct_utils.create_work_dir(output_dir)

        # Create a quantizer object, which can control all quantization flow,
        quant_strategy = DefaultQstrategy(bits_weight=bitwidth_w,
                                          bits_bias=bitwidth_a,
                                          bits_activation=bitwidth_a,
                                          mix_bit=mix_bit)
        quantizer, qmode = self._init_quant_env(quant_mode, output_dir,
                                                quant_strategy)
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer)
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, qmode)
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_DEVICE, device)
        if lstm_app: option_util.set_option_value("nndct_cv_app", False)
        else: option_util.set_option_value("nndct_cv_app", True)

        # Prepare quantizable module
        quant_module, graph = prepare_quantizable_module(
            module=module,
            input_args=input_args,
            export_folder=output_dir,
            state_dict_file=state_dict_file,
            quant_mode=qmode,
            device=device)

        # enable record outputs of per layer
        if qmode > 1:
            register_output_hook(quant_module, record_once=True)
            set_outputs_recorder_status(quant_module, True)

        # intialize quantizer
        quantizer.setup(graph, False, lstm_app)

        # hook module with quantizer
        # connect_module_with_quantizer(quant_module, quantizer)
        quantizer.quant_model = quant_module

        self.quantizer = quantizer
        self.adaquant = None
示例#22
0
    def do_quantize(self, blob, name, node=None, tensor_type='input'):
        # forward quant graph but not quantize parameter and activation
        if NndctOption.nndct_quant_off.value:
            return blob

        blob_save = blob
        if isinstance(blob.values, torch.Tensor):
            blob = blob.values

        quant_device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
        if blob.device.type != quant_device.type:
            raise TypeError(
                "Device of quantizer is {}, device of model and data should match device of quantizer"
                .format(quant_device.type))

        if (NndctOption.nndct_quant_opt.value
                and NndctOption.nndct_logging_level.value > 0):
            quant_data = nndct_quant.QuantizeData(name,
                                                  blob.cpu().detach().numpy())
        # quantize the tensor
        bnfp = self.get_bnfp(name, True, tensor_type)
        #print('---- quant %s with 1/step = %g' % (name, bnfp[1]))
        # hardware cut method
        mth = 4 if self.lstm else 2
        if tensor_type == 'param':
            mth = 3

        res = py_nndct.nn.NndctFixNeuron(blob,
                                         blob,
                                         maxamp=[bnfp[0], bnfp[1]],
                                         method=mth)

        if (NndctOption.nndct_quant_opt.value
                and NndctOption.nndct_logging_level.value > 0):
            global global_snr_inv
            quant_efficiency, sqnr = quant_data.quant_efficiency(
                blob.cpu().detach().numpy(), 8)
            global_snr_inv += 1 / sqnr
            print(
                f"quant_efficiency={quant_efficiency}, global_snr_inv={global_snr_inv} {quant_data._name}\n"
            )

        # update param to nndct graph
        if tensor_type == 'param':
            self.update_param_to_nndct(node, name, res.cpu().detach().numpy())

        blob = blob_save
        res = blob_save

        return res
示例#23
0
def node_from_output(output_name, model_type):
    if model_type == 'Nndct':
        return output_name
    if model_type == 'tensorflow':
        output_name = output_name.split(':')[0]
    elif model_type == 'torch':
        if output_name.split('_')[-1] in ['backward', 'forward']:
            output_name = ''.join(output_name.split('_')[:-1])
    else:
        raise KeyError("node_from_output is not available for model type " +
                       str(model_type))
    output_map = GLOBAL_MAP.get_ele(NNDCT_KEYS.OUTPUT_TO_NODE_MAP)
    if output_map and output_name in output_map:
        return output_map[output_name]
    return output_name
示例#24
0
  def finetune_v2(self, run_fn, run_args):
    # check status
    if self.quantizer.quant_mode == 2:
      NndctScreenLogger().warning(f"Finetune function will be ignored in test mode!")
      return    
    
    # parameter finetuning
   
    with AdaQuant(processor=self):
      # calibration to get a set of quantization steps
      NndctScreenLogger().info(f"=>Preparing data for fast finetuning module parameters ...")   
      with NoQuant():
        net_inputs, net_outputs = self.cache_net_inpouts(run_fn, run_args)
      
      NndctScreenLogger().info(f"=>Find initial quantization steps for fast finetuning...")
      self.calibrate(run_fn, run_args)
      
      NndctScreenLogger().info(f"=>Fast finetuning module parameters for better quantization accuracy...")
      self.setup_test()    
      device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)  
      
      intial_net_loss = self.calc_net_loss(net_inputs, net_outputs, device)
      
      layer_act_pair = self.collect_layer_act_pair()  
      
      finetune_group = []
      for qmod, fmod in zip(self._quant_model.modules(), self._float_model.modules()):
        if hasattr(qmod, "node"):
          if (self.quantizer.configer.is_node_quantizable(qmod.node, False) and 
            len(qmod.node.op.params) > 0):     
            finetune_group.append([qmod.node, fmod])

      net_loss = intial_net_loss
      for idx, (qnode, fmod) in tqdm(enumerate(finetune_group), total=len(finetune_group)):
        is_cached = self.is_cached(qnode, len(net_inputs[0]))
        if (is_cached and idx < len(finetune_group) / 2) or (not is_cached):
          need_cache = False
        else:
          need_cache = True
                  
        net_loss = self.optimize_layer_v2(qnode, fmod, layer_act_pair, net_inputs, net_outputs, net_loss, device, need_cache)
      print(f"%%%%%%%%%%%%%%%%% final opt net loss:{net_loss.avg}")

        # print(f"{qnode.name}({need_cache}):{net_loss}")
            
    NndctScreenLogger().info(f"=>Export fast finetuned parameters ...")
    # export finetuned parameters
    self.quantizer.export_param()
示例#25
0
  def clone_quant_module(cls, quant_module):
    quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER)
  
    if _is_module_hooked(quant_module):
      cls.detach_node_from_module(quant_module)
      cls.hook_module_with_quantizer(quant_module, None)
      new_quant_module = copy.deepcopy(quant_module)
      cls.hook_module_with_node(quant_module, quantizer.graph)
      cls.hook_module_with_quantizer(quant_module, quantizer)
      new_graph = Graph(graph_name=quantizer.graph.name)
      new_graph.clone_from(quantizer.graph)
      cls.hook_module_with_node(new_quant_module, new_graph)
      cls.hook_module_with_quantizer(new_quant_module, quantizer)
    else:
      new_quant_module = copy.deepcopy(quant_module)

    return new_quant_module
示例#26
0
def dump_xmodel(output_dir="quantize_result", deploy_check=False):
    r"""converts module to xmodel for deployment
  compilation only works when quantm model = 2.
  The xmodel and some checking data will be generated under work dir.

  Args:
    deploy_check(bool): if true, can dump blobs and parameters of model for deployment verification

  Returns:
    None
  """
    quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER)
    if quantizer and quantizer.quant_mode > 1:
        nndct_utils.create_work_dir(output_dir)

        # compile to xmodel
        try:
            compiler = CompilerFactory.get_compiler("xmodel")
            NndctScreenLogger().info("=>Converting to xmodel ...")
            compiler.do_compile(nndct_graph=quantizer.Nndctgraph,
                                quant_config_info=quantizer.quant_config,
                                output_file_name=os.path.join(
                                    output_dir, quantizer.Nndctgraph.name))

        except AddXopError as e:
            NndctScreenLogger().error(
                f"Failed convert nndct graph to xmodel({str(e)}).")
        else:
            NndctScreenLogger().info(
                f"=>Successfully convert to xmodel.({compiler.xmodel_file})")

        # dump data for accuracy checkvim
        if deploy_check:
            NndctScreenLogger().info("=>Dumping checking data...")
            update_nndct_blob_data(quantizer.quant_model, quantizer.Nndctgraph)
            checker = DeployChecker(output_dir_name=output_dir)
            checker.dump_nodes_output(
                quantizer.Nndctgraph,
                quantizer.quant_config,
                round_method=quantizer.quant_opt['round_method'])

            set_outputs_recorder_status(quantizer.quant_model, False)
            NndctScreenLogger().info(
                f"=>Finsh dumping data.({checker.dump_folder})")
示例#27
0
    def export_onnx_model(self, output_dir, verbose=False):
        from torch.onnx import register_custom_op_symbolic
        from torch.onnx.symbolic_helper import parse_args
        import sys
        torch_version = torch.__version__.split('.')
        if int(torch_version[0]) == 1 and int(torch_version[1]) < 7:
            NndctScreenLogger().error(
                f'Only supprt exporting onnx model with pytorch 1.7 and later version'
            )
            return

        @parse_args("v", "i", "i", "f", "i", "i", "i", "i")
        def symbolic_fix_neuron(g, input, valmin, valmax, valamp, zero_point,
                                method, device_id, inplace):
            #print(f'{valmax} {valamp} {method} {device_id}')
            if valamp < sys.float_info.min:
                scale = torch.tensor(sys.float_info.max).float(
                )  # Avoid exportor generating double type
            else:
                scale = torch.tensor(
                    1.0 /
                    valamp).float()  # Avoid exportor generating double type
            zero_point = torch.tensor(
                0, dtype=torch.int8)  # ONNX requires zero_point to be tensor
            return g.op("DequantizeLinear",
                        g.op("QuantizeLinear", input, scale, zero_point),
                        scale, zero_point)

        register_custom_op_symbolic("vai::fix_neuron", symbolic_fix_neuron, 9)
        output_file = os.path.join(
            output_dir, f"{self.quantizer.quant_model._get_name()}_int.onnx")
        opset_version = torch.onnx.symbolic_helper._onnx_stable_opsets[-1]
        device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
        self.quantizer.reset_status_for_exporting()
        model, input_args = to_device(self.quantizer.quant_model,
                                      self._example_inputs, device)
        torch.onnx.export(self.quantizer.quant_model,
                          input_args,
                          output_file,
                          verbose=verbose,
                          opset_version=opset_version)
示例#28
0
文件: tanh.py 项目: Xilinx/Vitis-AI
    def forward(self, input):

        qinput = quantize_tensors([input], self.node, tensor_type='input')[0]

        if NndctOption.nndct_quant_off.value or NndctOption.nndct_cv_app.value:
            output = super().forward(qinput)
            output = quantize_tensors([output], self.node)[0]
        elif self.quant_mode > 0:
            output = torch.empty_like(qinput)
            if NndctOption.nndct_tanh_sigmoid_sim.value > 0:
                NndctTanhSimulation(input, output)
                output = quantize_tensors([output], self.node)[0]
            else:
                input_name = self.node.in_nodes[0]
                fragpos = self.quantizer.get_quant_config(input_name, False)[1]
                quant_device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
                Ttable = TANH_TABLE.table.to(quant_device)
                output = output.to(quant_device)
                NndctTanhTableLookup(input, Ttable, output, fragpos)
        else:
            output = super().forward(qinput)

        return output
示例#29
0
def maybe_get_quantizer(quantizer=None):
    quantizer = quantizer or GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER)
    if quantizer:
        return quantizer.quant_mode, quantizer
    else:
        return GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_MODE), None
示例#30
0
    def do_scan(self, res, name, node=None, tensor_type='input'):
        # keep quantization steps after fast finetune
        if self.keep_fp:
            return self.do_quantize(res, name, node, tensor_type)

        # forward quant graph but not quantize parameter and activation
        if NndctOption.nndct_quant_off.value:
            if self.inplace:
                return res
            else:
                return res.clone().detach()

        res_save = None
        if isinstance(res.values, torch.Tensor):
            res_save = res
            res = res.values.data

        if res.dtype != torch.float32 and res.dtype != torch.double:
            NndctScreenLogger().warning_once(
                f'The tensor type of  {node.name} is {str(res.dtype)}. Only support float32/double quantization.'
            )
            return res_save if res_save is not None else res

        quant_device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)
        if res.device.type != quant_device.type:
            raise TypeError(
                "Device of quantizer is {}, device of model and data should match device of quantizer"
                .format(quant_device.type))

        # get fixed position
        bnfp = self.get_quant_config(name, False, tensor_type)

        # hardware cut method
        mth = 4 if self.lstm else 2

        if NndctOption.nndct_use_torch_quantizer.value is True:
            mth = -1
        elif tensor_type == 'param':
            mth = 3

        scope = 5 if NndctOption.nndct_diffs_mode.value == "mse" else 1
        # set fix pos scanning scope to 1 for some type of tensors
        if (node.op.type in [NNDCT_OP.INPUT, NNDCT_OP.QUANT_STUB]):
            scope = 1
        if (self.lstm and tensor_type == 'input'):
            scope = 1
            res = res.detach().clone()

        Tbuffer = torch.empty_like(res).to(quant_device)
        Tfixpos = torch.tensor(
            [1], dtype=torch.get_default_dtype()).to(quant_device)

        # activation always calculate fix pos
        # calcualte fix pos if it is None
        # always calculate fis pos in finetune mode

        if tensor_type != 'param' or bnfp[1] is None or self.quant_mode == 3:
            py_nndct.nn.NndctDiffsFixPos(Tinput=res,
                                         Tbuffer=Tbuffer,
                                         Tfixpos=Tfixpos,
                                         bit_width=bnfp[0],
                                         range=scope,
                                         method=mth)
            bnfp[1] = (int)(Tfixpos.item())
            # limit max fix pos to 12 if bit width <= 8, others limit to 15
            if bnfp[0] <= 8 or self.lstm:
                max_fp = NndctOption.nndct_max_fix_position.value
                bnfp[1] = min(max_fp, bnfp[1])
            else:
                bnfp[1] = min(15, bnfp[1])
            # record fix pos of activation
            if tensor_type != 'param':
                self.config_history[tensor_type][name].append(bnfp[1])
                if (NndctOption.nndct_stat.value > 1):
                    print(
                        f'---- fp history: {stats.mode(np.array(self.config_history[tensor_type][name]))}'
                    )
                data = np.array(self.config_history[tensor_type][name])
                bnfp[1] = stats.mode(data)[0][0]
                bnfp[1] = bnfp[1].astype(np.int32).tolist()
            self.set_quant_config(name, bnfp, tensor_type)
            if (NndctOption.nndct_stat.value > 1):
                print('---- quant %s tensor: %s with bw = %d and fp = %g' %
                      (tensor_type, name, bnfp[0], bnfp[1]))

            # get 2^bit_width and 2^fracpos
            bnfp = self.get_quant_config(name, True, tensor_type)

            if (NndctOption.nndct_stat.value > 2):
                quant_data = nndct_quant.QuantizeData(
                    name,
                    res.cpu().detach().numpy())

            # do quantization for parameter or activation
            res = fake_quantize_per_tensor(res, bnfp[1], 0, -bnfp[0],
                                           bnfp[0] - 1, mth, self.inplace)

            if (NndctOption.nndct_stat.value > 2):
                #quant_data.all_close(res.cpu().detach().numpy())
                global global_snr_inv
                quant_efficiency, sqnr = quant_data.quant_efficiency(
                    res.cpu().detach().numpy(), math.log2(bnfp[0]))
                global_snr_inv += 1 / sqnr
                if quant_efficiency < 3.0:
                    print(
                        f"quant_efficiency={quant_efficiency}, {quant_data._name}\n"
                    )
                    print('Statistic [Min, Max, Mean, Std]:')
                    print('[{}, {}, {}, {}]'.format(res.min(), res.max(),
                                                    res.mean(), res.std()))
                    print('histogram: {}'.format(
                        res.histc(bins=10).cpu().detach().numpy()))
                    t = res
                    if tensor_type != 'param':
                        t = res.transpose(0, 1)
                    print('Channel number:{}'.format(t.shape[0]))
                    print('Channel-wise statistic [Min, Max, Mean, Std]:')
                    for c in range(t.shape[0]):
                        print('[{}, {}, {}, {}]'.format(
                            t[c].min(), t[c].max(), t[c].mean(), t[c].std()))
                        print('histogram: {}'.format(
                            t[c].histc(bins=10).cpu().detach().numpy()))

        if res_save is not None:
            res_save.values.data = res
            res = res_save

        return res