Пример #1
0
def prepare_quantizable_module(
    module: torch.nn.Module,
    input_args: Union[torch.Tensor, Sequence[Any]],
    export_folder: str,
    state_dict_file: Optional[str] = None,
    quant_mode: int = 1,
    device: torch.device = torch.device("cuda")
) -> Tuple[torch.nn.Module, Graph]:

    nndct_utils.create_work_dir(export_folder)

    if isinstance(state_dict_file, str):
        state_dict = torch.load(state_dict_file)
        module.load_state_dict(state_dict)

    export_file = os.path.join(export_folder,
                               module._get_name() + TorchSymbol.SCRIPT_SUFFIX)

    # switch to specified device
    module, input_args = to_device(module, input_args, device)

    # parse origin module to graph
    NndctScreenLogger().info(f"=>Parsing {module._get_name()}...")
    graph = parse_module(module, input_args)
    NndctScreenLogger().info(
        f"=>Quantizable module is generated.({export_file})")
    # recreate quantizable module from graph
    quant_module = recreate_nndct_module(graph, True, export_file).to(device)
    quant_module.train(mode=module.training)
    # hook module with graph
    connect_module_with_graph(quant_module, graph)

    return quant_module, graph
Пример #2
0
    def __init__(self,
                 quant_mode: str,
                 module: torch.nn.Module,
                 input_args: Union[torch.Tensor, Sequence[Any]] = None,
                 state_dict_file: Optional[str] = None,
                 output_dir: str = "quantize_result",
                 bitwidth_w: int = 8,
                 bitwidth_a: int = 8,
                 mix_bit: bool = False,
                 device: torch.device = torch.device("cuda"),
                 lstm_app: bool = False):
        # Check arguments type
        self._check_args(module, input_args)

        # Check device available
        if device.type == "cuda":
            if not (torch.cuda.is_available() and "CUDA_HOME" in os.environ):
                device = torch.device("cpu")
                NndctScreenLogger().warning(
                    f"CUDA is not available, change device to CPU")

        # Transform torch module to quantized module format
        nndct_utils.create_work_dir(output_dir)

        # Create a quantizer object, which can control all quantization flow,
        quant_strategy = DefaultQstrategy(bits_weight=bitwidth_w,
                                          bits_bias=bitwidth_a,
                                          bits_activation=bitwidth_a,
                                          mix_bit=mix_bit)
        quantizer, qmode = self._init_quant_env(quant_mode, output_dir,
                                                quant_strategy)
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer)
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, qmode)
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_DEVICE, device)
        if lstm_app: option_util.set_option_value("nndct_cv_app", False)
        else: option_util.set_option_value("nndct_cv_app", True)

        # Prepare quantizable module
        quant_module, graph = prepare_quantizable_module(
            module=module,
            input_args=input_args,
            export_folder=output_dir,
            state_dict_file=state_dict_file,
            quant_mode=qmode,
            device=device)

        # enable record outputs of per layer
        if qmode > 1:
            register_output_hook(quant_module, record_once=True)
            set_outputs_recorder_status(quant_module, True)

        # intialize quantizer
        quantizer.setup(graph, False, lstm_app)

        # hook module with quantizer
        # connect_module_with_quantizer(quant_module, quantizer)
        quantizer.quant_model = quant_module

        self.quantizer = quantizer
        self.adaquant = None
Пример #3
0
  def dump_xmodel(self, deploy_check=False):
    """
    `dump xmodel for LSTM cell`
    """
    quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER)
    if quantizer and quantizer.quant_mode > 1:
      compiler = CompilerFactory.get_compiler("xmodel")
      xmodel_dir = os.path.join(self._export_folder, "xmodel")
      create_work_dir(xmodel_dir)
      for info in self._modules_info.values():
        for l_num, layer_graph in enumerate(info["layers_graph"]):
          for lstm_direction, graph in layer_graph.items():
            try:
              compiler.do_compile(
                  nndct_graph=graph,
                  quant_config_info=quantizer.quant_config,
                  output_file_name=os.path.join(xmodel_dir, graph.name),
                  graph_attr_kwargs={"direction": lstm_direction})
            except Exception as e:
              print(
                  f"[NNDCT_ERROR]:failed convert nndct graph to xmodel({str(e)})."
              )

            else:
              print("[NNDCT_NOTE]:Successfully convert nndct graph to xmodel!")

      if deploy_check:
        print("[NNDCT_NOTE]: Dumping checking data...")
        checker = DeployChecker(
            output_dir_name=self._export_folder, data_format="txt")     
        
        # get timestep output
        for name, info in self._layers_info.items():
          cell = info["cell_module"]
          layer = info["layer_module"]
          graph = info["graph"]
          if layer.input is None:
            warnings.warn(
                f"[NNDCT_WARNING]: Provide inputs for '{name}' when do deploy checking",
                RuntimeWarning)
            continue
          
          set_outputs_recorder_status(cell, True)
          layer(layer.input, layer.initial_state, layer.batch_lengths)

          for timestep in range(layer.input.size()[1]):
            enable_dump_weight = True if timestep == 0 else False
            update_nndct_blob_data(cell, graph, timestep)
            checker.update_dump_folder(f"{graph.name}/frame_{timestep}")
            checker.dump_nodes_output(
                graph,
                quantizer.quant_config,
                round_method=quantizer.quant_opt['round_method'],
                enable_dump_weight=enable_dump_weight)
          
          set_outputs_recorder_status(cell, False)

        print("[NNDCT_NOTE]: Finsh dumping data.")
Пример #4
0
def dump_xmodel(output_dir="quantize_result", deploy_check=False):
    r"""converts module to xmodel for deployment
  compilation only works when quantm model = 2.
  The xmodel and some checking data will be generated under work dir.

  Args:
    deploy_check(bool): if true, can dump blobs and parameters of model for deployment verification

  Returns:
    None
  """
    quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER)
    if quantizer and quantizer.quant_mode > 1:
        nndct_utils.create_work_dir(output_dir)

        # compile to xmodel

        compiler = CompilerFactory.get_compiler("xmodel")

        NndctScreenLogger().info("=>Converting to xmodel ...")
        deploy_graphs = get_deploy_graph_list(quantizer.quant_model,
                                              quantizer.Nndctgraph)
        depoly_infos = compiler.get_deloy_graph_infos(quantizer, deploy_graphs)

        for depoly_info in depoly_infos:
            try:
                compiler.do_compile(depoly_info.dev_graph,
                                    quant_config_info=depoly_info.quant_info,
                                    output_file_name=os.path.join(
                                        output_dir,
                                        depoly_info.dev_graph.name))

            except AddXopError as e:
                NndctScreenLogger().error(
                    f"Failed convert graph '{depoly_info.dev_graph.name}' to xmodel({str(e)})."
                )

            # dump data for accuracy check
            if deploy_check:
                NndctScreenLogger().info(
                    f"=>Dumping '{depoly_info.dev_graph.name}'' checking data..."
                )
                checker = DeployChecker(output_dir_name=output_dir)
                checker.update_dump_folder(f"{depoly_info.dev_graph.name}")
                checker.dump_nodes_output(
                    depoly_info.dev_graph,
                    depoly_info.quant_info,
                    round_method=quantizer.quant_opt['round_method'],
                    select_batch=False)

                NndctScreenLogger().info(
                    f"=>Finsh dumping data.({checker.dump_folder})")

        set_outputs_recorder_status(quantizer.quant_model, False)
Пример #5
0
    def __init__(self,
                 quant_mode: str,
                 module: torch.nn.Module,
                 input_args: Union[torch.Tensor, Sequence[Any]] = None,
                 state_dict_file: Optional[str] = None,
                 output_dir: str = "quantize_result",
                 bitwidth_w: int = 8,
                 bitwidth_a: int = 8,
                 device: torch.device = torch.device("cuda"),
                 lstm_app: bool = True):
        self._export_folder = output_dir
        # Check arguments type
        self._check_args(module)

        # Check device available
        if device.type == "cuda":
            if not (torch.cuda.is_available() and "CUDA_HOME" in os.environ):
                device = torch.device("cpu")
                NndctScreenLogger().warning(
                    f"CUDA is not available, change device to CPU")

        # Transform torch module to quantized module format
        nndct_utils.create_work_dir(output_dir)

        # turn off weights equalization and bias correction
        option_util.set_option_value("nndct_quant_opt", 0)
        option_util.set_option_value("nndct_param_corr", False)
        option_util.set_option_value("nndct_equalization", False)
        option_util.set_option_value("nndct_cv_app", False)

        transformed_module = convert_lstm(module)
        script_module = torch.jit.script(transformed_module)
        quant_module, graph = prepare_quantizable_module(
            module=script_module,
            input_args=None,
            export_folder=output_dir,
            state_dict_file=state_dict_file,
            quant_mode=quant_mode,
            device=device)

        quant_strategy = DefaultQstrategy(bits_weight=bitwidth_w,
                                          bits_bias=bitwidth_w,
                                          bits_activation=bitwidth_a)

        quantizer, qmode = self._init_quant_env(quant_mode, output_dir,
                                                quant_strategy)
        quantizer.quant_model = quant_module.to(device)

        quantizer.setup(graph, rnn_front_end=True, lstm=True)

        self.quantizer = quantizer
Пример #6
0
    def dump_nodes_output(self,
                          nndct_graph: Graph,
                          quant_configs: NndctQuantInfo,
                          round_method: int,
                          enable_dump_weight: bool = True,
                          select_batch: bool = False) -> NoReturn:

        nndct_utils.create_work_dir(self._full_folder)
        if self._quant_off:
            self._dump_floating_model(nndct_graph, enable_dump_weight,
                                      round_method, select_batch)
        else:
            self._dump_floating_model(nndct_graph, enable_dump_weight,
                                      round_method, select_batch)
            self._dump_fixed_model(nndct_graph, quant_configs,
                                   enable_dump_weight, round_method,
                                   select_batch)
            self._dump_graph_info(nndct_graph, quant_configs)
Пример #7
0
def dump_xmodel(output_dir="quantize_result", deploy_check=False):
    r"""converts module to xmodel for deployment
  compilation only works when quantm model = 2.
  The xmodel and some checking data will be generated under work dir.

  Args:
    deploy_check(bool): if true, can dump blobs and parameters of model for deployment verification

  Returns:
    None
  """
    quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER)
    if quantizer and quantizer.quant_mode > 1:
        nndct_utils.create_work_dir(output_dir)

        # compile to xmodel
        try:
            compiler = CompilerFactory.get_compiler("xmodel")
            NndctScreenLogger().info("=>Converting to xmodel ...")
            compiler.do_compile(nndct_graph=quantizer.Nndctgraph,
                                quant_config_info=quantizer.quant_config,
                                output_file_name=os.path.join(
                                    output_dir, quantizer.Nndctgraph.name))

        except AddXopError as e:
            NndctScreenLogger().error(
                f"Failed convert nndct graph to xmodel({str(e)}).")
        else:
            NndctScreenLogger().info(
                f"=>Successfully convert to xmodel.({compiler.xmodel_file})")

        # dump data for accuracy checkvim
        if deploy_check:
            NndctScreenLogger().info("=>Dumping checking data...")
            update_nndct_blob_data(quantizer.quant_model, quantizer.Nndctgraph)
            checker = DeployChecker(output_dir_name=output_dir)
            checker.dump_nodes_output(
                quantizer.Nndctgraph,
                quantizer.quant_config,
                round_method=quantizer.quant_opt['round_method'])

            set_outputs_recorder_status(quantizer.quant_model, False)
            NndctScreenLogger().info(
                f"=>Finsh dumping data.({checker.dump_folder})")
Пример #8
0
def dump_xmodel(output_dir="quantize_result", deploy_check=False, lstm_app=False):
  r"""converts module to xmodel for deployment
  compilation only works when quantm model = 2.
  The xmodel and some checking data will be generated under work dir.

  Args:
    deploy_check(bool): if true, can dump blobs and parameters of model for deployment verification

  Returns:
    None
  """
  quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER)
  if quantizer and quantizer.quant_mode > 1:
    nndct_utils.create_work_dir(output_dir)
    
    # compile to xmodel
    
    compiler = CompilerFactory.get_compiler("xmodel")
      
    NndctScreenLogger().info("=>Converting to xmodel ...")
    deploy_graphs = get_deploy_graph_list(quantizer.quant_model, quantizer.Nndctgraph)
    #depoly_infos = compiler.get_deloy_graph_infos(quantizer, deploy_graphs)
    xmodel_depoly_infos, dump_deploy_infos = compiler.get_xmodel_and_dump_infos(quantizer, deploy_graphs)
    if not lstm_app:
      for node in xmodel_depoly_infos[0].dev_graph.nodes:
        error_out = False
        if node.op.type not in [NNDCT_OP.INPUT, NNDCT_OP.QUANT_STUB]:
          continue
        for i, tensor in enumerate(node.out_tensors):
          if tensor.shape and tensor.shape[0] != 1:
            NndctScreenLogger().error(f"Batch size must be 1 when exporting xmodel.")
            error_out = True
            break
        if error_out:
          break
      
    for depoly_info in dump_deploy_infos:
      # dump data for accuracy check
      if deploy_check:
        NndctScreenLogger().info(f"=>Dumping '{depoly_info.dev_graph.name}'' checking data...")
        if lstm_app:
          checker = DeployChecker(output_dir_name=output_dir, data_format='txt')
          checker.update_dump_folder(f"{depoly_info.dev_graph.name}/frame_0")
          select_batch = True
        else:
          checker = DeployChecker(output_dir_name=output_dir)
          checker.update_dump_folder(f"{depoly_info.dev_graph.name}")
          select_batch = False
        checker.dump_nodes_output(
            depoly_info.dev_graph,
            depoly_info.quant_info,
            round_method=quantizer.quant_opt['round_method'], select_batch=select_batch)
        
        NndctScreenLogger().info(f"=>Finsh dumping data.({checker.dump_folder})")
      
    for depoly_info in xmodel_depoly_infos:
      try:
        xgraph = compiler.do_compile(
            depoly_info.dev_graph,
            quant_config_info=depoly_info.quant_info,
            output_file_name=os.path.join(output_dir, depoly_info.dev_graph.name))

      except AddXopError as e:
        NndctScreenLogger().error(f"Failed convert graph '{depoly_info.dev_graph.name}' to xmodel.")
        raise e
      
      compiler.verify_xmodel(depoly_info.dev_graph, xgraph)
    set_outputs_recorder_status(quantizer.quant_model, False)
Пример #9
0
      torch.ops.load_library(lib_abspath)
  except ImportError as e:
    NndctScreenLogger().error(f"{str(e)}")
    sys.exit(1)
  else:
    NndctScreenLogger().info(f"Loading NNDCT kernels...")
    
else:    
  if os.path.exists(os.path.join(_cur_dir, "kernel")):
    from .kernel import NN_PATH
  else:
    NN_PATH = _cur_dir
  try:
    cwd = NN_PATH
    lib_path = os.path.join(cwd, "lib")
    create_work_dir(lib_path)
    cpu_src_path = os.path.join(cwd, "../../../csrc/cpu")
    source_files = []
    for name in os.listdir(cpu_src_path):
      if name.split(".")[-1] in ["cpp", "cc", "c"]:
        source_files.append(os.path.join(cpu_src_path, name))

    extra_include_paths = [
        os.path.join(cwd, "../../../include/cpu"),
        os.path.join(cwd, "include")
    ]
    
    with_cuda = False
    #if torch.cuda.is_available() and "CUDA_HOME" in os.environ:
    if "CUDA_HOME" in os.environ:
      cuda_src_path = os.path.join(cwd, "../../../csrc/cuda")
Пример #10
0
  def __init__(self,
               quant_mode: str,
               module: torch.nn.Module,
               input_args: Union[torch.Tensor, Sequence[Any]] = None,
               state_dict_file: Optional[str] = None,
               output_dir: str = "quantize_result",
               bitwidth_w: int = 8,
               bitwidth_a: int = 8,
               device: torch.device = torch.device("cuda"),
               lstm_app: bool = True):
    self._export_folder = output_dir
    # Check arguments type
    self._check_args(module)
    
    # Check device available
    if device.type == "cuda":
      if not (torch.cuda.is_available() and "CUDA_HOME" in os.environ):
        device = torch.device("cpu")
        NndctScreenLogger().warning(f"CUDA is not available, change device to CPU")
    
    # Transform torch module to quantized module format
    nndct_utils.create_work_dir(output_dir)
    
    # turn off weights equalization and bias correction
    option_util.set_option_value("nndct_quant_opt", 0)
    option_util.set_option_value("nndct_param_corr", False)
    option_util.set_option_value("nndct_equalization", False)
    
    # Create a quantizer object, which can control all quantization flow,
    #if quant_strategy == None:
    quant_strategy = DefaultQstrategy(bits_weight=bitwidth_w,
                                      bits_bias=bitwidth_w,
                                      bits_activation=bitwidth_a)
    quantizer, qmode = self._init_quant_env(quant_mode, 
                                            output_dir,
                                            quant_strategy)
    GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer)
    GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, qmode)
    GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_DEVICE, device)
    
    standard_RNNs, customized_RNNs = self._analyse_module(module)

    if len(standard_RNNs) == 0 and len(customized_RNNs) == 0:
      raise RuntimeError(
          f"The top module '{module._get_name()}' should have one LSTM module at least."
      )

    self._modules_info = defaultdict(dict)

    # process customized Lstm
    for layer_name, layer_module in customized_RNNs.items():
      for cell_name, cell_module in layer_module.named_children():
        lstm_direction = "forward" if layer_module.go_forward else "backward"
        full_cell_name = ".".join([layer_name, cell_name])
        layer_graph = self._get_customized_LSTM_graph(full_cell_name,
                                                      cell_module,
                                                      layer_module.input_size,
                                                      layer_module.hidden_size,
                                                      layer_module.memory_size)
        self._modules_info[full_cell_name]["layers_graph"] = [{
            lstm_direction: layer_graph
        }]
        self._modules_info[full_cell_name]["stack_mode"] = None
        self._modules_info[full_cell_name]["layer_module"] = layer_module

    # process standard Lstm
    for name, rnn_module in standard_RNNs.items():
      layers_graph = self._get_standard_RNN_graph(
          graph_name=name, lstm_module=rnn_module)
      self._modules_info[name]["layers_graph"] = layers_graph
      self._modules_info[name]["input_size"] = [rnn_module.input_size
                                                ] * rnn_module.num_layers
      self._modules_info[name]["hidden_size"] = [rnn_module.hidden_size
                                                 ] * rnn_module.num_layers
      self._modules_info[name]["memory_size"] = [rnn_module.hidden_size
                                                 ] * rnn_module.num_layers
      self._modules_info[name][
          "stack_mode"] = "bidirectional" if rnn_module.bidirectional else "unidirectional"
      self._modules_info[name][
          "batch_first"] = True if rnn_module.batch_first is True else False

      if rnn_module.mode == 'LSTM':
        self._modules_info[name]["mode"] = "LSTM"
      elif rnn_module.mode == "GRU": 
        self._modules_info[name]["mode"] = "GRU"
    # merge multi graphs into a graph
    top_graph = self._merge_subgraphs()
    
    # turn on quantizer
    #if quant_mode:
    quantizer.setup(top_graph, rnn_front_end=True, lstm=True)
    
    # write and reload quantizable cell module
    module_graph_map = self._rebuild_layer_module()
    
    # replace float module with quantizale module
    for name, info in self._modules_info.items():
      if info["stack_mode"] is not None:
        self._build_stack_lstm_module(info)
      else:
        info["QLSTM"] = list(info["layers_module"][0].values())[0]
      module = self._insert_QuantLstm_in_top_module(module, name, info)

    # move modules info into layers info
    self._convert_modules_info_to_layers(module_graph_map)

    # hook module with quantizer
    # connect_module_with_quantizer(quant_module, quantizer)
    quantizer.quant_model = module

    self.quantizer = quantizer
Пример #11
0
  def quantize_modules(self, top_module: torch.nn.Module) -> torch.nn.Module:
    """
    `prepare quantizable LSTM sub modules.`
    
    Args:
        top_module (torch.nn.Module): Top Module in which LSTM need to do quantization
    
    Raises:
        RuntimeError: The top module should have one LSTM at least.
    
    Returns:
        torch.nn.Module: Top Module in which LSTM sub modules are transformed to quantizible module
    """

    standard_RNNs, customized_RNNs = self._analyse_module(top_module)

    if len(standard_RNNs) == 0 and len(customized_RNNs) == 0:
      raise RuntimeError(
          f"The top module '{top_module._get_name()}' should have one LSTM module at least."
      )

    nndct_utils.create_work_dir(self._export_folder)

    self._modules_info = defaultdict(dict)

    # process customized Lstm
    for layer_name, layer_module in customized_RNNs.items():
      for cell_name, cell_module in layer_module.named_children():
        lstm_direction = "forward" if layer_module.go_forward else "backward"
        full_cell_name = ".".join([layer_name, cell_name])
        layer_graph = self._get_customized_LSTM_graph(full_cell_name,
                                                      cell_module,
                                                      layer_module.input_size,
                                                      layer_module.hidden_size,
                                                      layer_module.memory_size)
        self._modules_info[full_cell_name]["layers_graph"] = [{
            lstm_direction: layer_graph
        }]
        self._modules_info[full_cell_name]["stack_mode"] = None
        self._modules_info[full_cell_name]["layer_module"] = layer_module

    # process standard Lstm
    for name, module in standard_RNNs.items():
      layers_graph = self._get_standard_RNN_graph(
          graph_name=name, lstm_module=module)
      self._modules_info[name]["layers_graph"] = layers_graph
      self._modules_info[name]["input_size"] = [module.input_size
                                                ] * module.num_layers
      self._modules_info[name]["hidden_size"] = [module.hidden_size
                                                 ] * module.num_layers
      self._modules_info[name]["memory_size"] = [module.hidden_size
                                                 ] * module.num_layers
      self._modules_info[name][
          "stack_mode"] = "bidirectional" if module.bidirectional else "unidirectional"
      self._modules_info[name][
          "batch_first"] = True if module.batch_first is True else False

      if module.mode == 'LSTM':
        self._modules_info[name]["mode"] = "LSTM"
      elif module.mode == "GRU": 
        self._modules_info[name]["mode"] = "GRU"
    # merge multi graphs into a graph
    top_graph = self._merge_subgraphs()
    
    # turn on quantizer
    if self._quant_mode:
      quantizer = TORCHQuantizer(self._quant_mode, self._export_folder,
                                 self._bit_w, self._bit_a)
      GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer)
      GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, self._quant_mode)
      quantizer.setup(top_graph, lstm=True)
    
    # write and reload quantizable cell module
    module_graph_map = self._rebuild_layer_module()
    
    # hook quantizer and module
    if self._quant_mode is not None:
      self._hook_quant_module_with_quantizer(quantizer)
    
    # replace float module with quantizale module
    for name, info in self._modules_info.items():
      if info["stack_mode"] is not None:
        self._build_stack_lstm_module(info)
      else:
        info["QLSTM"] = list(info["layers_module"][0].values())[0]
      top_module = self._insert_QuantLstm_in_top_module(top_module, name, info)

    # move modules info into layers info
    self._convert_modules_info_to_layers(module_graph_map)

    return top_module
Пример #12
0
def torch_quantizer(quant_mode: int,
                    module: torch.nn.Module,
                    input_args: Union[torch.Tensor, Sequence[Any]],
                    state_dict_file: Optional[str] = None,
                    output_dir: str = "quantize_result",
                    bitwidth_w: int = 8,
                    bitwidth_a: int = 8) -> TORCHQuantizer:
    def _check_args():
        nonlocal module
        if not isinstance(module, torch.nn.Module):
            raise TypeError(f"type of 'module' should be 'torch.nn.Module'.")

        if not isinstance(input_args, (tuple, list, torch.Tensor)):
            raise TypeError(
                f"type of input_args should be tuple/list/torch.Tensor.")

        device = None
        if isinstance(input_args, torch.Tensor):
            device = input_args.device
        else:
            for inp in input_args:
                if isinstance(inp, torch.Tensor):
                    device = inp.device
                    break

        if device:
            module = module.to(device)

    def _init_quant_env():
        nonlocal quant_mode
        if NndctOption.nndct_quant_mode.value > 0:
            quant_mode = NndctOption.nndct_quant_mode.value

        if quant_mode == 1:
            NndctScreenLogger().info(
                f"Quantization calibration process start up...")
        elif quant_mode == 2:
            NndctScreenLogger().info(f"Quantization test process start up...")

        quantizer = TORCHQuantizer(quant_mode, output_dir, bitwidth_w,
                                   bitwidth_a)
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer)
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, quant_mode)
        return quantizer, quant_mode

    # Check arguments type
    _check_args()

    # Transform torch module to quantized module format
    nndct_utils.create_work_dir(output_dir)

    # Create a quantizer object, which can control all quantization flow,
    quantizer, quant_mode = _init_quant_env()

    quant_module, graph = prepare_quantizable_module(
        module=module,
        input_args=input_args,
        export_folder=output_dir,
        state_dict_file=state_dict_file,
        quant_mode=quant_mode)

    # enable record outputs of per layer
    if quant_mode > 1:
        set_outputs_recorder_status(quant_module, True)

    # intialize quantizer
    quantizer.setup(graph)

    # hook module with quantizer
    connect_module_with_quantizer(quant_module, quantizer)

    quantizer.quant_model = quant_module

    return quantizer
Пример #13
0
    def dump_nodes_output(self,
                          nndct_graph: Graph,
                          quant_configs: NndctQuantInfo,
                          round_method: int,
                          enable_dump_weight=True) -> NoReturn:
        def _dump_floating_model() -> NoReturn:
            for node in nndct_graph.nodes:
                if enable_dump_weight:
                    for _, param_tensor in node.op.params.items():
                        self.dump_tensor_to_file(param_tensor.name,
                                                 param_tensor.data,
                                                 round_method=round_method)
                if len(node.out_tensors) > 1:
                    raise RuntimeError(
                        "Don't support multi-output op:'{} {}' for deploying!".
                        format(node.name, node.op.type))
                for tensor in node.out_tensors:
                    self.dump_tensor_to_file(node.name,
                                             tensor.data,
                                             round_method=round_method)

        def _dump_fixed_model() -> NoReturn:
            for node in nndct_graph.nodes:
                if enable_dump_weight:
                    for _, param_tensor in node.op.params.items():
                        if param_tensor.name in quant_configs['params']:
                            bit_width, fix_point = quant_configs['params'][
                                param_tensor.name]
                            self.dump_tensor_to_file(param_tensor.name +
                                                     NNDCT_KEYS.FIX_OP_SUFFIX,
                                                     param_tensor.data,
                                                     bit_width,
                                                     fix_point,
                                                     round_method=round_method)
                if len(node.out_tensors) > 1:
                    raise RuntimeError(
                        "Don't support multi-output op:'{} {}' for deploying!".
                        format(node.name, node.op.type))
                if node.name in quant_configs['blobs']:
                    for tensor in node.out_tensors:
                        bit_width, fix_point = quant_configs['blobs'][
                            node.name]
                        self.dump_tensor_to_file(node.name +
                                                 NNDCT_KEYS.FIX_OP_SUFFIX,
                                                 tensor.data,
                                                 bit_width,
                                                 fix_point,
                                                 round_method=round_method)

        def _dump_graph_info() -> NoReturn:
            # dump tensor shape information
            file_name = os.path.join(self._full_folder, "shape.txt")
            with open(file_name, "w") as file_obj:
                for node in nndct_graph.nodes:
                    if node.name in quant_configs['blobs']:
                        for tensor in node.out_tensors:
                            try:
                                file_obj.write("{}: {}\n".format(
                                    tensor.data.shape, node.name))
                            except AttributeError as e:
                                NndctScreenLogger().warning(
                                    f"{tensor.name} is not tensor.It's shape info is ignored."
                                )

        nndct_utils.create_work_dir(self._full_folder)
        if self._quant_off:
            _dump_floating_model()
        else:
            _dump_floating_model()
            _dump_fixed_model()
            _dump_graph_info()
Пример #14
0
  def __init__(self,
               quant_mode: str,
               module: torch.nn.Module,
               input_args: Union[torch.Tensor, Sequence[Any]] = None,
               state_dict_file: Optional[str] = None,
               output_dir: str = "quantize_result",
               bitwidth_w: int = 8,
               bitwidth_a: int = 8,
               device: torch.device = torch.device("cuda"),
               lstm_app: bool = True,
               quant_config_file: Optional[str] = None):
    self._export_folder = output_dir
    # Check arguments type
    self._check_args(module)
    
    # Check device available
    if device.type == "cuda":
      if not (torch.cuda.is_available() and "CUDA_HOME" in os.environ):
        device = torch.device("cpu")
        NndctScreenLogger().warning(f"CUDA is not available, change device to CPU")
    
    # Transform torch module to quantized module format
    nndct_utils.create_work_dir(output_dir)
    
    # turn off weights equalization and bias correction
    option_util.set_option_value("nndct_quant_opt", 0)
    option_util.set_option_value("nndct_param_corr", False)
    option_util.set_option_value("nndct_equalization", False)
    option_util.set_option_value("nndct_cv_app", False)
    
    # Parse the quant config file
    QConfiger = RNNTorchQConfig()
    #if quant_config_file:
    QConfiger.parse_config_file(quant_config_file,
                                bit_width_w = bitwidth_w, 
                                bit_width_a = bitwidth_a)
    qconfig = QConfiger.qconfig
    #bitwidth_w = qconfig['weight']['bit_width']
    #bitwidth_b = qconfig['bias']['bit_width']
    #bitwidth_a = qconfig['activation']['bit_width']
    #mix_bit = qconfig['mix_bit'] 

    transformed_module = convert_lstm(module)
    script_module = torch.jit.script(transformed_module)
    quant_module, graph = prepare_quantizable_module(
        module=script_module,
        input_args=None,
        export_folder=output_dir,
        state_dict_file=state_dict_file,
        quant_mode=quant_mode,
        device=device)
    
    #qstrategy_factory =  QstrategyFactory()
    #quant_strategy = qstrategy_factory.create_qstrategy(qconfig) 

    #quant_strategy = DefaultQstrategy(bits_weight=bitwidth_w,
    #                                  bits_bias=bitwidth_w,
    #                                  bits_activation=bitwidth_a)
    
    quantizer, qmode = self._init_quant_env(quant_mode, 
                                            output_dir,
                                            qconfig,
                                            is_lstm=True)
    
    GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer)
    GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, qmode)
    GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_DEVICE, device)
    GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_CONFIG, qconfig)

    quantizer.quant_model = quant_module.to(device)
    
    quantizer.setup(graph, rnn_front_end=True, lstm=True)

    self.quantizer = quantizer
Пример #15
0
    def __init__(self,
                 quant_mode: str,
                 module: torch.nn.Module,
                 input_args: Union[torch.Tensor, Sequence[Any]] = None,
                 state_dict_file: Optional[str] = None,
                 output_dir: str = "quantize_result",
                 bitwidth_w: int = 8,
                 bitwidth_a: int = 8,
                 mix_bit: bool = False,
                 device: torch.device = torch.device("cuda"),
                 lstm_app: bool = False,
                 custom_quant_ops: Optional[List[str]] = None,
                 quant_config_file: Optional[str] = None):
        # Check arguments type
        self._check_args(module, input_args)

        # Check device available
        if device.type == "cuda":
            if not (torch.cuda.is_available() and "CUDA_HOME" in os.environ):
                device = torch.device("cpu")
                NndctScreenLogger().warning(
                    f"CUDA is not available, change device to CPU")

        # Transform torch module to quantized module format
        nndct_utils.create_work_dir(output_dir)

        # Parse the quant config file
        QConfiger = TorchQConfig()
        #if quant_config_file:
        QConfiger.parse_config_file(quant_config_file,
                                    bit_width_w=bitwidth_w,
                                    bit_width_a=bitwidth_a,
                                    mix_bit=mix_bit)
        qconfig = QConfiger.qconfig
        #bitwidth_w = qconfig['weights']['bit_width']
        #bitwidth_b = qconfig['bias']['bit_width']
        #bitwidth_a = qconfig['activation']['bit_width']
        #mix_bit = qconfig['mix_bit']

        # Create a quantizer object, which can control all quantization flow,
        #qstrategy_factory = QstrategyFactory()
        #quant_strategy = qstrategy_factory.create_qstrategy(qconfig)
        #quant_strategy = DefaultQstrategy(bits_weight=bitwidth_w,
        #                                  bits_bias=bitwidth_a,
        #                                  bits_activation=bitwidth_a,
        #                                  mix_bit=mix_bit)
        quantizer, qmode = self._init_quant_env(quant_mode, output_dir,
                                                qconfig)

        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer)
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, qmode)
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_DEVICE, device)
        GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_CONFIG, qconfig)
        if lstm_app: option_util.set_option_value("nndct_cv_app", False)
        else: option_util.set_option_value("nndct_cv_app", True)

        # Prepare quantizable module

        quant_module, graph = prepare_quantizable_module(
            module=module,
            input_args=input_args,
            export_folder=output_dir,
            state_dict_file=state_dict_file,
            quant_mode=qmode,
            device=device)

        # enable record outputs of per layer
        if qmode > 1:
            register_output_hook(quant_module, record_once=True)
            set_outputs_recorder_status(quant_module, True)

        # intialize quantizer
        quantizer.setup(graph,
                        False,
                        lstm_app,
                        custom_quant_ops=custom_quant_ops)
        #if qmode > 1:
        #  quantizer.features_check()

        # hook module with quantizer
        # connect_module_with_quantizer(quant_module, quantizer)
        quantizer.quant_model = quant_module
        self._example_inputs = input_args

        self._lstm_app = lstm_app
        self.quantizer = quantizer
        self.adaquant = None

        # dump blob dist
        if NndctOption.nndct_visualize.value is True:
            visualize_tensors(quantizer.quant_model)