示例#1
0
def create_nncf_model_and_algo_builder(model: NNCFNetwork, config: NNCFConfig,
                                       dummy_forward_fn: Callable[[Module], Any] = None,
                                       wrap_inputs_fn: Callable[[Tuple, Dict], Tuple[Tuple, Dict]] = None,
                                       resuming_state_dict: dict = None):
    assert isinstance(config, NNCFConfig)
    NNCFConfig.validate(config)
    input_info_list = create_input_infos(config)
    scopes_without_shape_matching = config.get('scopes_without_shape_matching', [])
    ignored_scopes = config.get('ignored_scopes')
    target_scopes = config.get('target_scopes')

    compressed_model = NNCFNetwork(model, input_infos=input_info_list,
                                   dummy_forward_fn=dummy_forward_fn,
                                   wrap_inputs_fn=wrap_inputs_fn,
                                   ignored_scopes=ignored_scopes,
                                   target_scopes=target_scopes,
                                   scopes_without_shape_matching=scopes_without_shape_matching)

    should_init = resuming_state_dict is None
    compression_algo_builder_list = create_compression_algorithm_builders(config, should_init=should_init)
    return compressed_model, compression_algo_builder_list
示例#2
0
class PruningScheduler(CompressionScheduler):
    def __init__(self, pruning_algo, params: NNCFConfig = None):
        super().__init__()
        if params is None:
            self._params = NNCFConfig()
        else:
            self._params = params

        self.algo = pruning_algo

        # Number of initial steps of training before pruning
        self.num_init_steps = self._params.get('num_init_steps', 0)
        self.pruning_steps = self._params.get('pruning_steps', 100)

        # Pruning rates
        self.initial_pruning = self._params.get('pruning_init', 0)
        self.pruning_target = self._params.get('pruning_target', 0.5)

    def load_state_dict(self, state_dict):
        super().load_state_dict(state_dict)
        self._set_pruning_level()

    def epoch_step(self, epoch=None):
        super().epoch_step(epoch)
        self._set_pruning_level()

    def _set_pruning_level(self):
        self.algo.set_pruning_rate(self.current_pruning_level)

        if self.last_epoch >= (self.pruning_steps + self.num_init_steps):
            self.algo.freeze()

    def _calc_density_level(self):
        raise NotImplementedError

    @property
    def current_pruning_level(self):
        if self.last_epoch >= self.num_init_steps:
            return 1 - self._calc_density_level()
        return 0
示例#3
0
def get_empty_config(model_size=4,
                     input_sample_sizes: Union[Tuple[List[int]],
                                               List[int]] = None,
                     input_info: Dict = None) -> NNCFConfig:
    if input_sample_sizes is None:
        input_sample_sizes = [1, 1, 4, 4]

    def _create_input_info():
        if isinstance(input_sample_sizes, tuple):
            return [{"sample_size": sizes} for sizes in input_sample_sizes]
        return [{"sample_size": input_sample_sizes}]

    config = NNCFConfig()
    config.update({
        "model":
        "empty_config",
        "model_size":
        model_size,
        "input_info":
        input_info if input_info else _create_input_info()
    })
    return config
示例#4
0
def register_default_init_args(nncf_config: NNCFConfig,
                               data_loader: tf.data.Dataset,
                               batch_size: int,
                               device: str = None) -> NNCFConfig:
    """
    Register extra structures in the NNCFConfig. Initialization of some
    compression algorithms requires certain extra structures.

    :param nncf_config: An instance of the NNCFConfig class without extra structures.
    :param data_loader: Dataset used for initialization.
    :param batch_size: Batch size used for initialization.
    :param device: Device to perform initialization. If `device` is `None` then the device
        of the model parameters will be used.
    :return: An instance of the NNCFConfig class with extra structures.
    """
    nncf_config.register_extra_structs([
        QuantizationRangeInitArgs(data_loader=TFInitializingDataLoader(data_loader, batch_size),
                                  device=device),
        BNAdaptationInitArgs(data_loader=TFInitializingDataLoader(data_loader, batch_size),
                             device=device)
    ])
    return nncf_config
示例#5
0
def main():
    model_bin, model_xml = get_ir_paths(args.model, args.bin)

    config = NNCFConfig.from_json(args.config)

    input_infos_list = create_input_infos(config)
    image_size = input_infos_list[0].shape[-1]

    size = int(image_size / 0.875)

    print('IE version: {}'.format(get_version()))

    # NOTE: importing torch after loading IE to plugin to avoid issue with built-in MKLDNN of PyTorch
    plugin = IEPlugin(device='CPU', plugin_dirs=args.cpu_plugin_dir)
    plugin.add_cpu_extension(
        os.path.join(args.cpu_plugin_dir, "libcpu_extension.so"))
    net = IENetwork(model=model_xml, weights=model_bin)
    exec_net = getExecNet(plugin, net)
    from torch.utils.data import DataLoader
    import torchvision.datasets as datasets
    import torchvision.transforms as transforms

    val_loader = DataLoader(datasets.ImageFolder(
        args.data,
        transforms.Compose([
            transforms.Resize(size),
            transforms.CenterCrop(image_size),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])),
                            batch_size=1,
                            shuffle=False,
                            num_workers=4,
                            pin_memory=True)
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    config['log_dir'] = args.output_dir

    infer_fn = partial(infer_ie_model, net=net)
    validate_general(val_loader, exec_net, infer_fn)

    validate_torch_model(os.path.join(args.output_dir, "PTH"),
                         config=config,
                         num_layers=args.num_layers,
                         dump=args.dump,
                         val_loader=val_loader,
                         cuda=args.cuda)
示例#6
0
def test_scheduler_can_do_epoch_step__with_rb_algo():
    config = NNCFConfig()
    config['input_info'] = [{"sample_size": [1, 1, 32, 32]}]
    config['compression'] = {
        'algorithm': 'rb_sparsity',
        'sparsity_init': 0.2,
        "params": {
            'schedule': 'polynomial',
            'power': 1,
            'sparsity_target_epoch': 2,
            'sparsity_target': 0.6,
            'sparsity_freeze_epoch': 3
        }
    }

    _, compression_ctrl = create_compressed_model_and_algo_for_test(
        BasicConvTestModel(), config)
    scheduler = compression_ctrl.scheduler
    loss = compression_ctrl.loss

    assert pytest.approx(loss.target_sparsity_rate) == 0.2
    assert not loss.disabled

    for module_info in compression_ctrl.sparsified_module_info:
        assert not module_info.operand.frozen
    scheduler.epoch_step()
    assert pytest.approx(loss.target_sparsity_rate, abs=1e-3) == 0.2
    assert pytest.approx(loss().item(), abs=1e-3) == 16
    assert not loss.disabled

    scheduler.epoch_step()
    assert pytest.approx(loss.target_sparsity_rate, abs=1e-3) == 0.4
    assert pytest.approx(loss().item(), abs=1e-3) == 64
    assert not loss.disabled

    scheduler.epoch_step()
    assert pytest.approx(loss.target_sparsity_rate, abs=1e-3) == 0.6
    assert pytest.approx(loss().item(), abs=1e-3) == 144
    assert not loss.disabled

    scheduler.epoch_step()
    assert loss.disabled
    assert loss.target_sparsity_rate == 0.6
    assert loss() == 0

    for module_info in compression_ctrl.sparsified_module_info:
        assert module_info.operand.frozen
示例#7
0
 def q_dq_config(config):
     nncf_config = NNCFConfig.from_json(config)
     if "compression" in nncf_config:
         compression_config = nncf_config["compression"]
         quantization_config = None
         if isinstance(compression_config, list):
             matches = []
             for subconfig in compression_config:
                 if subconfig["algorithm"] == "quantization":
                     matches.append(subconfig)
             if matches:
                 assert len(matches) == 1
                 quantization_config = matches[0]
         else:
             if compression_config["algorithm"] == "quantization":
                 quantization_config = compression_config
         if quantization_config is not None:
             quantization_config["export_to_onnx_standard_ops"] = True
     return nncf_config
示例#8
0
def test_model_can_be_loaded_with_resume(_params):
    p = _params
    sample_config_path = p['sample_config_path']
    checkpoint_path = p['checkpoint_path']

    config = SampleConfig.from_json(str(sample_config_path))
    nncf_config = NNCFConfig.from_json(str(sample_config_path))

    config.execution_mode = p['execution_mode']

    config.current_gpu = 0
    config.device = get_device(config)
    config.distributed = config.execution_mode in (
        ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED)
    if config.distributed:
        config.dist_url = "tcp://127.0.0.1:9898"
        config.dist_backend = "nccl"
        config.rank = 0
        config.world_size = 1
        configure_distributed(config)

    model_name = config['model']
    model = load_model(model_name,
                       pretrained=False,
                       num_classes=config.get('num_classes', 1000),
                       model_params=config.get('model_params'))
    nncf_config = register_default_init_args(
        nncf_config, train_loader=create_ones_mock_dataloader(nncf_config))

    model.to(config.device)
    model, compression_ctrl = create_compressed_model_and_algo_for_test(
        model, nncf_config)
    model, _ = prepare_model_for_execution(model, config)

    if config.distributed:
        compression_ctrl.distributed()

    checkpoint = torch.load(checkpoint_path, map_location='cpu')
    load_state(model, checkpoint['state_dict'], is_resume=True)
示例#9
0
def test_init_ranges_are_set(quantization_mode: str, per_channel: bool,
                             range_init_type_vs_ref_vals: Tuple[str, float,
                                                                float, float]):
    class SyntheticDataset(torch.utils.data.Dataset):
        def __init__(self):
            super().__init__()
            self._length = 1

        def __getitem__(self, idx):
            if idx >= self._length:
                raise StopIteration
            test_input_sample = torch.zeros([3, 100, 100])
            for i in range(0, 100):
                for j in range(0, 100):
                    test_input_sample[0][i][j] = i * 100 + j
            test_input_sample[1] = test_input_sample[0]
            test_input_sample[2] = test_input_sample[0]
            return test_input_sample, test_input_sample

        def __len__(self):
            return self._length

    data_loader = torch.utils.data.DataLoader(SyntheticDataset(),
                                              batch_size=1,
                                              drop_last=True)

    range_init_type = range_init_type_vs_ref_vals[0]
    config_with_init = NNCFConfig()
    config_with_init.update({
        "input_info": {
            "sample_size": [1, 3, 100, 100]
        },
        "target_device": "TRIAL",
        "compression": {
            "algorithm": "quantization",
            "activations": {
                "mode": quantization_mode,
                "per_channel": per_channel
            },
            "weights": {
                "mode": quantization_mode,
                "per_channel": per_channel
            },
            "initializer": {
                "range": {
                    "num_init_samples": 1,
                    "type": range_init_type
                }
            }
        }
    })

    if range_init_type == "percentile":
        config_with_init["compression"]["initializer"]["range"]["params"] = {
            "min_percentile": 32.10,
            "max_percentile": 67.89
        }

    # Activations init check
    id_model = SingleConv2dIdentityModel()
    config_with_init.register_extra_structs(
        [QuantizationRangeInitArgs(wrap_dataloader_for_init(data_loader))])
    register_bn_adaptation_init_args(config_with_init)
    _, compression_ctrl = create_compressed_model_and_algo_for_test(
        id_model, config_with_init)

    act_quantizer_info = next(
        iter(compression_ctrl.non_weight_quantizers.values()))

    ref_scale = range_init_type_vs_ref_vals[1]
    ref_input_low = range_init_type_vs_ref_vals[2]
    ref_input_high = range_init_type_vs_ref_vals[3]

    def check_scales(quantizer: BaseQuantizer, per_channel: bool):
        # Absolute tolerance is 1.0 due to percentile value interpolation
        if quantization_mode == 'symmetric':
            assert torch.allclose(quantizer.scale,
                                  torch.ones_like(quantizer.scale) * ref_scale,
                                  atol=1.0)
            if per_channel:
                assert quantizer.scale.numel() == 3
            else:
                assert quantizer.scale.numel() == 1
        else:
            assert torch.allclose(quantizer.input_low,
                                  torch.ones_like(quantizer.input_low) *
                                  ref_input_low,
                                  atol=1.0)
            assert torch.allclose(quantizer.input_range,
                                  torch.ones_like(quantizer.input_low) *
                                  ref_input_high,
                                  atol=1.0)
            if per_channel:
                assert quantizer.input_low.numel() == 3
                assert quantizer.input_range.numel() == 3
            else:
                assert quantizer.input_low.numel() == 1
                assert quantizer.input_range.numel() == 1

    check_scales(act_quantizer_info.quantizer_module_ref, per_channel)
    # Weight init check
    synth_weight_model = SingleConv2dSyntheticWeightModel()
    _, compression_ctrl = create_compressed_model_and_algo_for_test(
        synth_weight_model, config_with_init)

    weight_quantizer_info = next(
        iter(compression_ctrl.weight_quantizers.values()))
    check_scales(weight_quantizer_info.quantizer_module_ref, per_channel)
示例#10
0
def test_get_default_weight_decay(algo, ref_weight_decay):
    config = NNCFConfig()
    config.update({"compression": {"algorithm": algo}})
    assert ref_weight_decay == get_default_weight_decay(config)
示例#11
0
def create_compressed_model(model: Module, config: NNCFConfig,
                            resuming_state_dict: dict = None,
                            dummy_forward_fn: Callable[[Module], Any] = None,
                            dump_graphs=True,) \
    -> Tuple[CompressionAlgorithmController, NNCFNetwork]:
    """
    The main function used to produce a model ready for compression fine-tuning from an original PyTorch
    model and a configuration object.
    dummy_forward_fn
    :param model: The original model. Should have its parameters already loaded from a checkpoint or another
    source.
    :param config: A configuration object used to determine the exact compression modifications to be applied
    to the model
    :param resuming_state_dict: A PyTorch state dict object to load (strictly) into the compressed model after
    building.
    :param dummy_forward_fn: will be used instead of a *forward* function call to build
    the internal graph representation via tracing. Specifying this is useful when the original training pipeline
    has special formats of data loader output or has additional *forward* arguments other than input tensors.
    Otherwise, the *forward* call of the model during graph tracing will be made with mock tensors according
    to the shape specified in the config object.
    :param dump_graphs: Whether or not should also dump the internal graph representation of the
    original and compressed models in the .dot format into the log directory.
    :return: A controller for the compression algorithm (or algorithms, in which case the controller
    is an instance of CompositeCompressionController) and the model ready for compression parameter training wrapped
    as an object of NNCFNetwork."""

    if dump_graphs:
        if dummy_forward_fn is None:
            input_info_list = create_input_infos(config)
            graph_builder = GraphBuilder(
                custom_forward_fn=create_dummy_forward_fn(
                    input_info_list, with_input_tracing=True))
        else:
            graph_builder = GraphBuilder(custom_forward_fn=dummy_forward_fn)

        if is_main_process():
            graph = graph_builder.build_graph(model)
            graph.dump_graph(osp.join(config.get("log_dir", "."),
                                      "original_graph.dot"),
                             extended=True)

    if is_debug():
        set_debug_log_dir(config.get("log_dir", "."))

    input_info_list = create_input_infos(config)
    scopes_without_shape_matching = config.get('scopes_without_shape_matching',
                                               [])
    ignored_scopes = config.get('ignored_scopes')
    target_scopes = config.get('target_scopes')

    compressed_model = NNCFNetwork(
        model,
        input_infos=input_info_list,
        dummy_forward_fn=dummy_forward_fn,
        ignored_scopes=ignored_scopes,
        target_scopes=target_scopes,
        scopes_without_shape_matching=scopes_without_shape_matching)

    should_init = resuming_state_dict is None
    compression_algo_builder_list = create_compression_algorithm_builders(
        config, should_init=should_init)

    for builder in compression_algo_builder_list:
        compressed_model = builder.apply_to(compressed_model)
    compression_ctrl = compressed_model.commit_compression_changes()

    if dump_graphs and is_main_process() and compression_algo_builder_list:
        if dummy_forward_fn is None:
            compressed_graph_builder = GraphBuilder(
                custom_forward_fn=create_dummy_forward_fn(
                    input_info_list, with_input_tracing=False))
        else:
            compressed_graph_builder = GraphBuilder(
                custom_forward_fn=dummy_forward_fn)

        graph = compressed_graph_builder.build_graph(
            compressed_model, compressed_model.get_tracing_context())
        graph.dump_graph(osp.join(config.get("log_dir", "."),
                                  "compressed_graph.dot"),
                         extended=True)

    if resuming_state_dict is not None:
        load_state(compressed_model, resuming_state_dict, is_resume=True)

    return compression_ctrl, compressed_model
示例#12
0
def create_compression_algorithm_builders(
        config: NNCFConfig,
        should_init: bool = True) -> List[CompressionAlgorithmBuilder]:
    compression_config_json_section = config.get('compression', {})
    compression_config_json_section = deepcopy(compression_config_json_section)

    hw_config_type = None
    hw_config_type_str = config.get("hw_config_type")
    if hw_config_type_str is not None:
        hw_config_type = HWConfigType.from_str(config.get("hw_config_type"))
    if isinstance(compression_config_json_section, dict):
        compression_config = NNCFConfig(compression_config_json_section)
        compression_config.register_extra_structs(
            config.get_all_extra_structs_for_copy())
        compression_config["hw_config_type"] = hw_config_type
        return [
            get_compression_algorithm(compression_config)(
                compression_config, should_init=should_init),
        ]
    retval = []
    for algo_config in compression_config_json_section:
        algo_config = NNCFConfig(algo_config)
        algo_config.register_extra_structs(
            config.get_all_extra_structs_for_copy())
        algo_config["hw_config_type"] = hw_config_type
        retval.append(
            get_compression_algorithm(algo_config)(algo_config,
                                                   should_init=should_init))
    return retval
示例#13
0
def create_compressed_model(model: Module, config: NNCFConfig,
                            resuming_state_dict: dict = None,
                            dummy_forward_fn: Callable[[Module], Any] = None,
                            wrap_inputs_fn: Callable[[Tuple, Dict], Tuple[Tuple, Dict]] = None,
                            dump_graphs=True,) \
    -> Tuple[CompressionAlgorithmController, NNCFNetwork]:
    """
    The main function used to produce a model ready for compression fine-tuning from an original PyTorch
    model and a configuration object.
    dummy_forward_fn
    :param model: The original model. Should have its parameters already loaded from a checkpoint or another
    source.
    :param config: A configuration object used to determine the exact compression modifications to be applied
    to the model
    :param resuming_state_dict: A PyTorch state dict object to load (strictly) into the compressed model after
    building.
    :param dummy_forward_fn: if supplied, will be used instead of a *forward* function call to build
    the internal graph representation via tracing. Specifying this is useful when the original training pipeline
    has special formats of data loader output or has additional *forward* arguments other than input tensors.
    Otherwise, the *forward* call of the model during graph tracing will be made with mock tensors according
    to the shape specified in the config object.
    :param wrap_inputs_fn: if supplied, will be used on the module's input arguments during a regular, non-dummy
    forward call before passing the inputs to the underlying compressed model. This is required if the model's input
    tensors that are important for compression are not supplied as arguments to the model's forward call directly, but
    instead are located in a container (such as list), and the model receives the container as an argument.
    wrap_inputs_fn should take as input two arguments - the tuple of positional arguments to the underlying
    model's forward call, and a dict of keyword arguments to the same. The function should wrap each tensor among the
    supplied model's args and kwargs that is important for compression (e.g. quantization) with an nncf.nncf_model_input
    function, which is a no-operation function and marks the tensors as inputs to be traced by NNCF in the internal
    graph representation. Output is the tuple of (args, kwargs), where args and kwargs are the same as were supplied in
    input, but each tensor in the original input.
    :param dump_graphs: Whether or not should also dump the internal graph representation of the
    original and compressed models in the .dot format into the log directory.
    :return: A controller for the compression algorithm (or algorithms, in which case the controller
    is an instance of CompositeCompressionController) and the model ready for compression parameter training wrapped
    as an object of NNCFNetwork."""

    # Compress model that will be deployed for the inference on target device. No need to compress parts of the
    # model that are used on training stage only (e.g. AuxLogits of Inception-v3 model) or unused modules with weights.
    # As a consequence, no need to care about spoiling BN statistics, as there're disabled in eval mode.
    model.eval()

    if dump_graphs:
        if dummy_forward_fn is None:
            input_info_list = create_input_infos(config)
            graph_builder = GraphBuilder(
                custom_forward_fn=create_dummy_forward_fn(
                    input_info_list, with_input_tracing=True))
        else:
            graph_builder = GraphBuilder(custom_forward_fn=dummy_forward_fn)

        if is_main_process():
            graph = graph_builder.build_graph(model)
            graph.visualize_graph(
                osp.join(config.get("log_dir", "."), "original_graph.dot"))

    set_debug_log_dir(config.get("log_dir", "."))

    input_info_list = create_input_infos(config)
    scopes_without_shape_matching = config.get('scopes_without_shape_matching',
                                               [])
    ignored_scopes = config.get('ignored_scopes')
    target_scopes = config.get('target_scopes')

    compressed_model = NNCFNetwork(
        model,
        input_infos=input_info_list,
        dummy_forward_fn=dummy_forward_fn,
        wrap_inputs_fn=wrap_inputs_fn,
        ignored_scopes=ignored_scopes,
        target_scopes=target_scopes,
        scopes_without_shape_matching=scopes_without_shape_matching)

    should_init = resuming_state_dict is None
    compression_algo_builder_list = create_compression_algorithm_builders(
        config, should_init=should_init)

    for builder in compression_algo_builder_list:
        compressed_model = builder.apply_to(compressed_model)
    compression_ctrl = compressed_model.commit_compression_changes()

    try:
        if resuming_state_dict is not None:
            load_state(compressed_model, resuming_state_dict, is_resume=True)
    finally:
        if dump_graphs and is_main_process() and compression_algo_builder_list:
            if dummy_forward_fn is None:
                compressed_graph_builder = GraphBuilder(
                    custom_forward_fn=create_dummy_forward_fn(
                        input_info_list, with_input_tracing=False))
            else:
                compressed_graph_builder = GraphBuilder(
                    custom_forward_fn=dummy_forward_fn)

            graph = compressed_graph_builder.build_graph(
                compressed_model, compressed_model.get_tracing_context())
            graph.visualize_graph(
                osp.join(config.get("log_dir", "."), "compressed_graph.dot"))
    return compression_ctrl, compressed_model
示例#14
0
def create_compression_algorithm_builders(
        config: NNCFConfig,
        should_init: bool = True) -> List[CompressionAlgorithmBuilder]:
    compression_config_json_section = config.get('compression', {})
    compression_config_json_section = deepcopy(compression_config_json_section)

    hw_config_type = None
    quantizer_setup_type_str = config.get("quantizer_setup_type",
                                          "propagation_based")
    quantizer_setup_type = QuantizerSetupType.from_str(
        quantizer_setup_type_str)
    if quantizer_setup_type == QuantizerSetupType.PROPAGATION_BASED:
        target_device = config.get("target_device", "ANY")
        if target_device != 'TRIAL':
            hw_config_type = HWConfigType.from_str(
                HW_CONFIG_TYPE_TARGET_DEVICE_MAP[target_device])

    if isinstance(compression_config_json_section, dict):
        compression_config = NNCFConfig(compression_config_json_section)
        compression_config.register_extra_structs(
            config.get_all_extra_structs_for_copy())
        compression_config["hw_config_type"] = hw_config_type
        compression_config['quantizer_setup_type'] = quantizer_setup_type
        return [
            get_compression_algorithm(compression_config)(
                compression_config, should_init=should_init),
        ]
    retval = []
    for algo_config in compression_config_json_section:
        algo_config = NNCFConfig(algo_config)
        algo_config.register_extra_structs(
            config.get_all_extra_structs_for_copy())
        algo_config["hw_config_type"] = hw_config_type
        algo_config['quantizer_setup_type'] = quantizer_setup_type
        retval.append(
            get_compression_algorithm(algo_config)(algo_config,
                                                   should_init=should_init))
    return retval
示例#15
0
def create_compressed_model(model: Module,
                            config: NNCFConfig,
                            compression_state: Optional[Dict[str, Any]] = None,
                            dummy_forward_fn: Callable[[Module], Any] = None,
                            wrap_inputs_fn: Callable[[Tuple, Dict], Tuple[Tuple, Dict]] = None,
                            wrap_outputs_fn: Callable[[Tuple, Dict], Tuple[Tuple, Dict]] = None,
                            dump_graphs=True) \
        -> Tuple[CompressionAlgorithmController, NNCFNetwork]:
    """
    The main function used to produce a model ready for compression fine-tuning from an original PyTorch
    model and a configuration object.
    dummy_forward_fn
    :param model: The original model. Should have its parameters already loaded from a checkpoint or another
    source.
    :param config: A configuration object used to determine the exact compression modifications to be applied
    to the model
    :param compression_state: representation of the entire compression state to unambiguously restore
    the compressed model. Includes builder and controller states.
    :param dummy_forward_fn: if supplied, will be used instead of a *forward* function call to build
    the internal graph representation via tracing. Specifying this is useful when the original training pipeline
    has special formats of data loader output or has additional *forward* arguments other than input tensors.
    Otherwise, the *forward* call of the model during graph tracing will be made with mock tensors according
    to the shape specified in the config object. The dummy_forward_fn code MUST contain calls to nncf.nncf_model_input
    functions made with each compressed model input tensor in the underlying model's args/kwargs tuple, and these
    calls should be exactly the same as in the wrap_inputs_fn function code (see below); if dummy_forward_fn is
    specified, then wrap_inputs_fn also must be specified.
    :param wrap_inputs_fn: if supplied, will be used on the module's input arguments during a regular, non-dummy
    forward call before passing the inputs to the underlying compressed model. This is required if the model's input
    tensors that are important for compression are not supplied as arguments to the model's forward call directly, but
    instead are located in a container (such as list), and the model receives the container as an argument.
    wrap_inputs_fn should take as input two arguments - the tuple of positional arguments to the underlying
    model's forward call, and a dict of keyword arguments to the same. The function should wrap each tensor among the
    supplied model's args and kwargs that is important for compression (e.g. quantization) with an nncf.nncf_model_input
    function, which is a no-operation function and marks the tensors as inputs to be traced by NNCF in the internal
    graph representation. Output is the tuple of (args, kwargs), where args and kwargs are the same as were supplied in
    input, but each tensor in the original input. Must be specified if dummy_forward_fn is specified.
    :param dump_graphs: Whether or not should also dump the internal graph representation of the
    original and compressed models in the .dot format into the log directory.
    :return: A controller for the compression algorithm (or algorithms, in which case the controller
    is an instance of CompositeCompressionController) and the model ready for compression parameter training wrapped
    as an object of NNCFNetwork."""

    if dummy_forward_fn is not None and wrap_inputs_fn is None:
        raise ValueError(
            "A custom dummy forward function was specified, but the corresponding input wrapping function "
            "was not. In case a custom dummy forward function is specified for purposes of NNCF graph "
            "building, then the wrap_inputs_fn parameter MUST also be specified and be consistent with "
            "the input wrapping done in dummy_forward_fn.")

    is_legacy_model_state_dict = compression_state is not None and \
                                 BaseController.BUILDER_STATE not in compression_state and \
                                 BaseController.CONTROLLER_STATE not in compression_state
    maybe_convert_legacy_names_in_compress_state(compression_state)
    # Compress model that will be deployed for the inference on target device. No need to compress parts of the
    # model that are used on training stage only (e.g. AuxLogits of Inception-v3 model) or unused modules with weights.
    # As a consequence, no need to care about spoiling BN statistics, as there're disabled in eval mode.
    model.eval()

    if dump_graphs:
        if dummy_forward_fn is None:
            input_info_list = create_input_infos(config)
            graph_builder = GraphBuilder(
                custom_forward_fn=create_dummy_forward_fn(
                    input_info_list, with_input_tracing=True))
        else:
            graph_builder = GraphBuilder(custom_forward_fn=dummy_forward_fn)

        if is_main_process():
            graph = graph_builder.build_graph(model)
            graph.visualize_graph(
                osp.join(config.get("log_dir", "."), "original_graph.dot"))

    set_debug_log_dir(config.get("log_dir", "."))

    input_info_list = create_input_infos(config)
    scopes_without_shape_matching = config.get('scopes_without_shape_matching',
                                               [])
    ignored_scopes = config.get('ignored_scopes')
    target_scopes = config.get('target_scopes')

    original_model_accuracy = None
    if is_accuracy_aware_training(config):
        if config.has_extra_struct(ModelEvaluationArgs):
            evaluation_args = config.get_extra_struct(ModelEvaluationArgs)
            with torch.no_grad():
                original_model_accuracy = evaluation_args.eval_fn(model)
                nncf_logger.info("Non-compressed model accuracy = {}".format(
                    original_model_accuracy))

    compressed_model = NNCFNetwork(
        model,
        input_infos=input_info_list,
        dummy_forward_fn=dummy_forward_fn,
        wrap_inputs_fn=wrap_inputs_fn,
        wrap_outputs_fn=wrap_outputs_fn,
        ignored_scopes=ignored_scopes,
        target_scopes=target_scopes,
        scopes_without_shape_matching=scopes_without_shape_matching,
        original_model_accuracy=original_model_accuracy)

    should_init = compression_state is None

    builder = create_compression_algorithm_builder(config, should_init)
    is_state_loadable = not is_legacy_model_state_dict and compression_state is not None
    if is_state_loadable:
        builder.load_state(compression_state[BaseController.BUILDER_STATE])

    builder.apply_to(compressed_model)
    compression_ctrl = builder.build_controller(compressed_model)
    if is_state_loadable:
        compression_ctrl.load_state(
            compression_state[BaseController.CONTROLLER_STATE])

    # Required to ensure that the model leaving create_compressed_model has correct compressed graph.
    # In particular, this is currently required for correct functioning of RNNs.
    compressed_model.rebuild_graph()

    try:
        if is_legacy_model_state_dict:
            from nncf.torch import load_state
            state_dict_to_load = compression_state.get('state_dict',
                                                       compression_state)
            load_state(compressed_model, state_dict_to_load, is_resume=True)
    finally:
        if dump_graphs and is_main_process():
            compressed_model_graph = compressed_model.get_graph()
            compressed_model_graph.visualize_graph(
                osp.join(config.get("log_dir", "."), "compressed_graph.dot"))

    # Synchronize all processes if run in distributed mode
    if is_dist_avail_and_initialized():
        try:
            barrier()
        # Exception can be raised during running barrier
        # if the backend not in the supported list https://pytorch.org/docs/stable/distributed.html
        except RuntimeError as err:
            nncf_logger.warning(err)
            nncf_logger.warning(
                "NNCF continues work, while does not guarantee that "
                "the processes will finish model's compression at the same time. "
                "If your training pipeline demands the processes be synchronized, please, "
                "keep attention to that error")
            return compression_ctrl, compressed_model
    compressed_model.get_tracing_context().disable_trace_dynamic_graph()
    return compression_ctrl, compressed_model
示例#16
0
def wrap_nncf_model(model,
                    cfg,
                    data_loader_for_init=None,
                    get_fake_input_func=None,
                    export=False):
    """
    The function wraps mmaction model by NNCF
    Note that the parameter `get_fake_input_func` should be the function `get_fake_input`
    -- cannot import this function here explicitly
    """

    check_nncf_is_enabled()

    from nncf.config import NNCFConfig
    from nncf.torch import (create_compressed_model,
                            register_default_init_args)
    from nncf.torch.dynamic_graph.io_handling import nncf_model_input
    from nncf.torch.dynamic_graph.trace_tensor import TracedTensor
    from nncf.torch.initialization import DefaultInitializingDataLoader

    class MMInitializeDataLoader(DefaultInitializingDataLoader):
        def get_inputs(self, dataloader_output):
            return (), dataloader_output

    pathlib.Path(cfg.work_dir).mkdir(parents=True, exist_ok=True)
    nncf_config = NNCFConfig(cfg.nncf_config)
    logger = get_root_logger(cfg.log_level)

    if data_loader_for_init:
        wrapped_loader = MMInitializeDataLoader(data_loader_for_init)
        nncf_config = register_default_init_args(
            nncf_config,
            wrapped_loader,
            device=next(model.parameters()).device)

    if cfg.get('resume_from'):
        checkpoint_path = cfg.get('resume_from')
        assert is_checkpoint_nncf(checkpoint_path), (
            'It is possible to resume training with NNCF compression from NNCF checkpoints only. '
            'Use "load_from" with non-compressed model for further compression by NNCF.'
        )
    elif cfg.get('load_from'):
        checkpoint_path = cfg.get('load_from')
        if not is_checkpoint_nncf(checkpoint_path):
            checkpoint_path = None
            logger.info('Received non-NNCF checkpoint to start training '
                        '-- initialization of NNCF fields will be done')
    else:
        checkpoint_path = None

    if not data_loader_for_init and not checkpoint_path:
        raise RuntimeError('Either data_loader_for_init or NNCF pre-trained '
                           'model checkpoint should be set')

    if checkpoint_path:
        logger.info(f'Loading NNCF checkpoint from {checkpoint_path}')
        logger.info(
            'Please, note that this first loading is made before addition of '
            'NNCF FakeQuantize nodes to the model, so there may be some '
            'warnings on unexpected keys')
        resuming_state_dict = load_checkpoint(model, checkpoint_path)
        logger.info(f'Loaded NNCF checkpoint from {checkpoint_path}')
    else:
        resuming_state_dict = None

    if "nncf_compress_postprocessing" in cfg:
        # NB: This parameter is used to choose if we should try to make NNCF compression
        #     for a whole model graph including postprocessing (`nncf_compress_postprocessing=True`),
        #     or make NNCF compression of the part of the model without postprocessing
        #     (`nncf_compress_postprocessing=False`).
        #     Our primary goal is to make NNCF compression of such big part of the model as
        #     possible, so `nncf_compress_postprocessing=True` is our primary choice, whereas
        #     `nncf_compress_postprocessing=False` is our fallback decision.
        #     When we manage to enable NNCF compression for sufficiently many models,
        #     we should keep one choice only.
        nncf_compress_postprocessing = cfg.get('nncf_compress_postprocessing')
        logger.debug('set should_compress_postprocessing='
                     f'{nncf_compress_postprocessing}')
    else:
        nncf_compress_postprocessing = True

    def _get_fake_data_for_forward(cfg, nncf_config, get_fake_input_func):
        input_size = nncf_config.get("input_info").get('sample_size')
        assert get_fake_input_func is not None
        assert len(input_size) == 4 and input_size[0] == 1
        H, W, C = input_size[2], input_size[3], input_size[1]
        device = next(model.parameters()).device
        with no_nncf_trace():
            return get_fake_input_func(cfg,
                                       orig_img_shape=tuple([H, W, C]),
                                       device=device)

    def dummy_forward(model):
        fake_data = _get_fake_data_for_forward(cfg, nncf_config,
                                               get_fake_input_func)
        img = fake_data["imgs"]
        img = nncf_model_input(img)
        if export:
            img, _, _ = model.reshape_input(imgs=img)
            model(imgs=img)
        else:
            model(imgs=img, return_loss=False)

    def wrap_inputs(args, kwargs):
        # during dummy_forward
        if not len(kwargs):
            if not isinstance(args[0][0], TracedTensor):
                args[0][0] = nncf_model_input(args[0][0])
            return args, kwargs

        # during building original graph
        if not kwargs.get('return_loss') and kwargs.get('forward_export'):
            return args, kwargs

        # during model's forward
        assert 'imgs' in kwargs, 'During model forward imgs must be in kwargs'
        img = kwargs['imgs']
        if isinstance(img, list):
            assert len(img) == 1, 'Input list must have a length 1'
            assert torch.is_tensor(
                img[0]), 'Input for a model must be a tensor'
            if not isinstance(img[0], TracedTensor):
                img[0] = nncf_model_input(img[0])
        else:
            assert torch.is_tensor(img), 'Input for a model must be a tensor'
            if not isinstance(img, TracedTensor):
                img = nncf_model_input(img)
        kwargs['imgs'] = img
        return args, kwargs

    model.dummy_forward_fn = dummy_forward

    if 'log_dir' in nncf_config:
        os.makedirs(nncf_config['log_dir'], exist_ok=True)
    compression_ctrl, model = create_compressed_model(
        model,
        nncf_config,
        dummy_forward_fn=dummy_forward,
        wrap_inputs_fn=wrap_inputs,
        compression_state=resuming_state_dict)

    return compression_ctrl, model
示例#17
0
def test_percentile_init(quantization_mode):
    class SyntheticDataset(torch.utils.data.Dataset):
        def __init__(self):
            self._length = 1

        def __getitem__(self, idx):
            if idx >= self._length:
                raise StopIteration
            test_input_sample = torch.zeros([1, 100, 100])
            for i in range(0, 100):
                for j in range(0, 100):
                    test_input_sample[0][i][j] = i * 100 + j
            return test_input_sample, test_input_sample

        def __len__(self):
            return self._length

    data_loader = torch.utils.data.DataLoader(SyntheticDataset(), batch_size=1)

    config_with_init = NNCFConfig()
    config_with_init.update({
        "input_info": {
            "sample_size": [1, 1, 100, 100]
        },
        "compression": {
            "algorithm": "quantization",
            "activations": {
                "mode": quantization_mode,
            },
            "weights": {
                "mode": quantization_mode,
            },
            "initializer": {
                "range": {
                    "num_init_steps": 1,
                    "type": "percentile",
                    "min_percentile": 32.10,
                    "max_percentile": 67.89
                }
            }
        }
    })

    # Activations init check
    id_model = SingleConv2dIdentityModel()
    config_with_init.register_extra_structs(
        [QuantizationRangeInitArgs(data_loader)])
    _, compression_ctrl = create_compressed_model_and_algo_for_test(
        id_model, config_with_init)

    act_quantizer = next(iter(compression_ctrl.non_weight_quantizers.values()))

    def assert_range(quantizer: BaseQuantizer):
        # Absolute tolerance is 1.0 due to percentile value interpolation
        if quantization_mode == 'symmetric':
            assert quantizer.scale.item() == approx(6789, abs=1.0)
        else:
            assert quantizer.input_low.item() == approx(3210, abs=1.0)
            assert quantizer.input_range.item() == approx(3578, abs=1.0)

    assert_range(act_quantizer)
    # Weight init check
    synth_weight_model = SingleConv2dSyntheticWeightModel()
    _, compression_ctrl = create_compressed_model_and_algo_for_test(
        synth_weight_model, config_with_init)

    weight_quantizer = next(
        iter(compression_ctrl.non_weight_quantizers.values()))
    assert_range(weight_quantizer)
示例#18
0
def register_bn_adaptation_init_args(config: NNCFConfig):
    config.register_extra_structs(
        [BNAdaptationInitArgs(data_loader=DummyDataLoader(), device=None)])