Exemplo n.º 1
0
def create_nncf_model_and_single_algo_builder(model: Module, config: NNCFConfig,
                                              dummy_forward_fn: Callable[[Module], Any] = None,
                                              wrap_inputs_fn: Callable[[Tuple, Dict], Tuple[Tuple, Dict]] = None) \
        -> Tuple[NNCFNetwork, PTCompressionAlgorithmController]:
    assert isinstance(config, NNCFConfig)
    NNCFConfig.validate(config)
    input_info_list = create_input_infos(config)
    scopes_without_shape_matching = config.get('scopes_without_shape_matching',
                                               [])
    ignored_scopes = config.get('ignored_scopes')
    target_scopes = config.get('target_scopes')

    compressed_model = NNCFNetwork(
        model,
        input_infos=input_info_list,
        dummy_forward_fn=dummy_forward_fn,
        wrap_inputs_fn=wrap_inputs_fn,
        ignored_scopes=ignored_scopes,
        target_scopes=target_scopes,
        scopes_without_shape_matching=scopes_without_shape_matching)

    algo_names = extract_algorithm_names(config)
    assert len(algo_names) == 1
    algo_name = next(iter(algo_names))
    builder_cls = PT_COMPRESSION_ALGORITHMS.get(algo_name)
    builder = builder_cls(config, should_init=True)
    return compressed_model, builder
Exemplo n.º 2
0
    def test_operator_metatype_marking(self):
        from nncf.torch.graph.operator_metatypes import PTConv2dMetatype, PTBatchNormMetatype, PTRELUMetatype, \
            PTMaxPool2dMetatype, PTTransposeMetatype, \
            PTConvTranspose2dMetatype, PTDepthwiseConv2dSubtype, PTAddMetatype, PTAvgPool2dMetatype, PTLinearMetatype
        ref_scope_vs_metatype_dict = {
            "/" + MODEL_INPUT_OP_NAME + "_0": PTInputNoopMetatype,
            "ModelForMetatypeTesting/NNCFConv2d[conv_regular]/conv2d_0": PTConv2dMetatype,
            "ModelForMetatypeTesting/NNCFBatchNorm[bn]/batch_norm_0": PTBatchNormMetatype,
            "ModelForMetatypeTesting/relu_0": PTRELUMetatype,
            "ModelForMetatypeTesting/transpose__0": PTTransposeMetatype,
            "ModelForMetatypeTesting/MaxPool2d[max_pool2d]/max_pool2d_0": PTMaxPool2dMetatype,
            "ModelForMetatypeTesting/NNCFConvTranspose2d[conv_transpose]/conv_transpose2d_0": PTConvTranspose2dMetatype,
            "ModelForMetatypeTesting/NNCFConv2d[conv_depthwise]/conv2d_0": PTDepthwiseConv2dSubtype,
            "ModelForMetatypeTesting/__iadd___0": PTAddMetatype,
            "ModelForMetatypeTesting/AdaptiveAvgPool2d[adaptive_avg_pool]/adaptive_avg_pool2d_0": PTAvgPool2dMetatype,
            "ModelForMetatypeTesting/NNCFLinear[linear]/linear_0": PTLinearMetatype,
            'ModelForMetatypeTesting/flatten_0': PTReshapeMetatype,
            "/" + MODEL_OUTPUT_OP_NAME + "_0": PTOutputNoopMetatype,
        }

        class ModelForMetatypeTesting(torch.nn.Module):
            def __init__(self):
                super().__init__()
                self.conv_regular = torch.nn.Conv2d(in_channels=3,
                                                    out_channels=16,
                                                    kernel_size=3)
                self.bn = torch.nn.BatchNorm2d(num_features=16)
                self.max_pool2d = torch.nn.MaxPool2d(kernel_size=2)
                self.conv_transpose = torch.nn.ConvTranspose2d(in_channels=16,
                                                               out_channels=8,
                                                               kernel_size=3)
                self.conv_depthwise = torch.nn.Conv2d(in_channels=8, out_channels=8,
                                                      kernel_size=5, groups=8)
                self.adaptive_avg_pool = torch.nn.AdaptiveAvgPool2d(output_size=1)
                self.linear = torch.nn.Linear(in_features=8, out_features=1)

            def forward(self, input_):
                x = self.conv_regular(input_)
                x = self.bn(x)
                x = torch.nn.functional.relu(x)
                x.transpose_(2, 3)
                x = self.max_pool2d(x)
                x = self.conv_transpose(x)
                x = self.conv_depthwise(x)
                x += torch.ones_like(x)
                x = self.adaptive_avg_pool(x)
                x = self.linear(x.flatten())
                return x

        model = ModelForMetatypeTesting()
        nncf_network = NNCFNetwork(model, [ModelInputInfo([1, 3, 300, 300])])
        nncf_graph = nncf_network.get_original_graph()

        for nncf_node in nncf_graph.get_all_nodes():  # type: NNCFNode
            assert nncf_node.node_name in ref_scope_vs_metatype_dict
            ref_metatype = ref_scope_vs_metatype_dict[nncf_node.node_name]
            assert nncf_node.metatype == ref_metatype
Exemplo n.º 3
0
def test_disable_shape_matching():
    class MatMulModel(nn.Module):
        def __init__(self):
            super().__init__()
            self.dummy_param = torch.nn.Parameter(torch.ones([1]))

        def forward(self, inputs):
            half1, half2 = torch.chunk(inputs, 2, dim=2)
            return torch.bmm(half1, half2.transpose(1, 2))

    model = MatMulModel()

    input_shape_1 = (3, 32, 32)
    input_shape_2 = (4, 64, 64)

    qnet_no_shape = NNCFNetwork(deepcopy(model), input_infos=[ModelInputInfo(input_shape_1), ],
                                scopes_without_shape_matching=['MatMulModel'])  # type: NNCFNetwork

    context = qnet_no_shape.get_tracing_context()
    context.enable_trace_dynamic_graph()
    _ = qnet_no_shape(torch.zeros(*input_shape_1))
    graph_1 = deepcopy(qnet_no_shape.get_dynamic_graph())

    _ = qnet_no_shape(torch.zeros(*input_shape_2))
    graph_2 = deepcopy(qnet_no_shape.get_dynamic_graph())

    assert graph_1 == graph_2

    nodes_1 = list(graph_1.get_all_nodes())
    assert len(nodes_1) == 5  # 1 input node + 1 chunk + 1 transpose + 1 matmul + 1 output node

    qnet = NNCFNetwork(model, input_infos=[ModelInputInfo(input_shape_1), ])  # type: NNCFNetwork
    context = qnet.get_tracing_context()
    context.enable_trace_dynamic_graph()
    _ = qnet(torch.zeros(*input_shape_1))
    _ = qnet(torch.zeros(*input_shape_2))
    # The second forward run should have led to an increase in registered node counts
    # since disable_shape_matching was False and the network was run with a different
    # shape of input tensor
    assert qnet.get_dynamic_graph().get_nodes_count() > graph_1.get_nodes_count()
Exemplo n.º 4
0
def create_test_quantization_env(model_creator=BasicConvTestModel,
                                 input_info_cfg=None) -> QuantizationEnv:
    if input_info_cfg is None:
        input_info_cfg = {"input_info": {"sample_size": [1, 1, 4, 4]}}

    model = model_creator()
    nncf_network = NNCFNetwork(model,
                               input_infos=create_input_infos(input_info_cfg))
    hw_config_type = HWConfigType.VPU
    hw_config_path = HWConfig.get_path_to_hw_config(hw_config_type)
    hw_config = PTHWConfig.from_json(hw_config_path)
    setup = PropagationBasedQuantizerSetupGenerator(
        NNCFConfig(), nncf_network, hw_config=hw_config).generate_setup()
    dummy_multi_setup = MultiConfigQuantizerSetup.from_single_config_setup(
        setup)
    for qp in dummy_multi_setup.quantization_points.values():
        qconf_constraint_list = []
        qconf = qp.possible_qconfigs[0]
        bit_set = [8, 4, 2] if 'conv' in str(qp.insertion_point) else [8, 4]
        for bits in bit_set:
            adj_qconf = deepcopy(qconf)
            adj_qconf.num_bits = bits
            qconf_constraint_list.append(adj_qconf)
        qp.possible_qconfigs = qconf_constraint_list
    experimental_builder = ExperimentalQuantizationBuilder(
        dummy_multi_setup, setup, {}, hw_config)
    experimental_builder.apply_to(nncf_network)
    # pylint:disable=line-too-long
    experimental_ctrl = experimental_builder.build_controller(nncf_network)
    data_loader = create_ones_mock_dataloader(input_info_cfg)
    constraints = HardwareQuantizationConstraints()
    for qid, qp_id_set in experimental_ctrl.module_id_to_qp_id_translation_dict.items(
    ):
        first_qp_id_for_this_quantizer_module = next(iter(qp_id_set))
        qconfigs = dummy_multi_setup.quantization_points[
            first_qp_id_for_this_quantizer_module].possible_qconfigs
        constraints.add(qid, qconfigs)

    return QuantizationEnv(nncf_network,
                           experimental_ctrl,
                           constraints,
                           data_loader,
                           lambda *x: 0,
                           hw_config_type=HWConfigType.VPU,
                           params=QuantizationEnvParams(
                               compression_ratio=0.15,
                               eval_subset_ratio=1.0,
                               skip_constraint=False,
                               performant_bw=False,
                               finetune=False,
                               bits=[2, 4, 8],
                               dump_init_precision_data=False))
Exemplo n.º 5
0
def test_get_op_nodes_in_scope():
    model = TwoConvTestModel()
    nncf_model = NNCFNetwork(deepcopy(model), input_infos=[ModelInputInfo([1, 1, 4, 4])])  # type: NNCFNetwork
    nncf_graph = nncf_model.get_original_graph()

    # Valid scopes should be successfully found
    valid_nncf_modules = nncf_model.get_nncf_modules()
    nodes_list = list(nncf_graph.get_all_node_ids())
    for module_scope, _ in valid_nncf_modules.items():
        matching_nncf_nodes = nncf_graph.get_op_nodes_in_scope(module_scope)
        assert len(matching_nncf_nodes) == 1
        node = matching_nncf_nodes[0]
        assert isinstance(node, NNCFNode)
        assert node.node_id in nodes_list

    fake_model = BasicConvTestModel()
    fake_nncf_model = NNCFNetwork(deepcopy(fake_model), input_infos=[ModelInputInfo([1, 1, 4, 4])])

    # Not valid scopes shouldn't be found
    fake_nncf_modules = fake_nncf_model.get_nncf_modules()
    for module_scope, _ in fake_nncf_modules.items():
        matching_nncf_nodes = nncf_graph.get_op_nodes_in_scope(module_scope)
        assert not matching_nncf_nodes
Exemplo n.º 6
0
def test_weight_normed_modules_are_replaced_correctly():
    nncf_model = NNCFNetwork(WeightNormedConvModel(), input_infos=[ModelInputInfo([1, 1, 10])])

    wrapped_conv = nncf_model.conv
    assert hasattr(wrapped_conv, "weight_g")
    assert hasattr(wrapped_conv, "weight_v")
    assert hasattr(wrapped_conv, "weight")

    assert isinstance(wrapped_conv.weight_g, torch.nn.Parameter)
    assert isinstance(wrapped_conv.weight_v, torch.nn.Parameter)
    assert not isinstance(wrapped_conv.weight, torch.nn.Parameter)

    #pylint:disable=protected-access
    assert len(wrapped_conv._forward_pre_hooks) == 1
Exemplo n.º 7
0
def test_compressed_graph_models_hw(desc, hw_config_type):
    model = desc.model_builder()
    config = get_basic_quantization_config_with_hw_config_type(hw_config_type.value,
                                                               input_sample_size=desc.input_sample_sizes)
    input_info_list = create_input_infos(config)
    compressed_model = NNCFNetwork(model, input_infos=input_info_list)

    # pylint:disable=protected-access
    quantization_builder = QuantizationBuilder(config, should_init=False)
    single_config_quantizer_setup = quantization_builder._get_quantizer_setup(compressed_model)
    sketch_graph = compressed_model.get_original_graph()

    potential_quantizer_graph = prepare_potential_quantizer_graph(sketch_graph, single_config_quantizer_setup)
    check_nx_graph(potential_quantizer_graph, desc.dot_filename, _case_dir(hw_config_type.value), sort_dot_graph=False)
Exemplo n.º 8
0
def test_custom_module_registering():
    model = TwoConvTestModelWithUserModule()
    nncf_model = NNCFNetwork(model, input_infos=[ModelInputInfo([1, 1, 4, 4])])  # type: NNCFNetwork

    from nncf.torch.layers import UNWRAPPED_USER_MODULES
    assert ModuleOfUser in UNWRAPPED_USER_MODULES.registry_dict.values()

    # pylint: disable=protected-access
    assert isinstance(nncf_model.user_module, ModuleOfUser)
    assert isinstance(nncf_model.user_module, _NNCFModuleMixin)
    assert type(nncf_model.user_module).__name__ == "NNCFUserModuleOfUser"

    user_module_attrs = dir(nncf_model.user_module)
    for attr in dir(_NNCFModuleMixin):
        assert attr in user_module_attrs
Exemplo n.º 9
0
    def get_model_and_ctrl_with_applied_hw_config_quantization(
            model: torch.nn.Module,
            hw_config_dict: dict,
            should_be_quantize_inputs: bool = True):
        nncf_config = get_quantization_config_without_range_init(model_size=1)
        nncf_config["compression"].update(
            {"quantize_inputs": should_be_quantize_inputs})
        nncf_config["target_device"] = "ANY"  # for compatibility

        net = NNCFNetwork(model, input_infos=[ModelInputInfo([1, 2, 1, 1])])
        hw_config = PTHWConfig.from_dict(hw_config_dict)
        qbuilder = QuantizationBuilder(nncf_config, should_init=False)
        qbuilder.hw_config = hw_config
        net = qbuilder.apply_to(net)
        ctrl = qbuilder.build_controller(net)
        return net, ctrl
Exemplo n.º 10
0
def test_pruning_node_selector(
        test_input_info_struct_: GroupPruningModulesTestStruct):
    model = test_input_info_struct_.model
    non_pruned_module_nodes = test_input_info_struct_.non_pruned_module_nodes
    pruned_groups_by_node_id = test_input_info_struct_.pruned_groups_by_node_id
    prune_first, prune_downsample = test_input_info_struct_.prune_params

    pruning_operations = [v.op_func_name for v in NNCF_PRUNING_MODULES_DICT]
    grouping_operations = PTElementwisePruningOp.get_all_op_aliases()
    from nncf.common.pruning.node_selector import PruningNodeSelector
    pruning_node_selector = PruningNodeSelector(PT_PRUNING_OPERATOR_METATYPES,
                                                pruning_operations,
                                                grouping_operations, None,
                                                None, prune_first,
                                                prune_downsample)
    model = model()
    model.eval()
    nncf_network = NNCFNetwork(model,
                               input_infos=[ModelInputInfo([1, 1, 8, 8])])
    graph = nncf_network.get_original_graph()
    pruning_groups = pruning_node_selector.create_pruning_groups(graph)

    # 1. Check all not pruned modules
    all_pruned_nodes = pruning_groups.get_all_nodes()
    all_pruned_modules = [
        nncf_network.get_containing_module(node.node_name)
        for node in all_pruned_nodes
    ]
    for node_name in non_pruned_module_nodes:
        module = nncf_network.get_containing_module(node_name)
        assert module is not None and module not in all_pruned_modules

    # 2. Check that all pruned groups are valid
    for group_by_id in pruned_groups_by_node_id:
        first_node_id = group_by_id[0]
        cluster = pruning_groups.get_cluster_containing_element(first_node_id)
        cluster_node_ids = [n.node_id for n in cluster.elements]
        cluster_node_ids.sort()

        assert Counter(cluster_node_ids) == Counter(group_by_id)
Exemplo n.º 11
0
def test_gnmt_quantization(_case_config):
    model = GNMT(vocab_size=32)
    model = replace_lstm(model)
    forward_fn_ = gnmt_forward_fn(seq_len=10, batch_size=3, vocab_size=32)

    config = get_basic_quantization_config(_case_config.quant_type)
    config["input_info"] = [
        {
            "sample_size": [3, 10],
            "type": "long"
        },
        {
            "sample_size": [3],
            "type": "long"
        },
        {
            "sample_size": [3, 10],
            "type": "long"
        }
    ]
    config["compression"].update({
        "ignored_scopes": ["GNMT/ResidualRecurrentEncoder[encoder]/Embedding[embedder]",
                           "GNMT/ResidualRecurrentDecoder[decoder]/Embedding[embedder]"]})

    compressed_model = NNCFNetwork(model,
                                   input_infos=create_input_infos(config),
                                   dummy_forward_fn=forward_fn_,
                                   wrap_inputs_fn=gnmt_wrap_inputs_fn,
                                   scopes_without_shape_matching=
                                   ['GNMT/ResidualRecurrentDecoder[decoder]/RecurrentAttention[att_rnn]/'
                                    'BahdanauAttention[attn]'])

    builder = QuantizationBuilder(config, should_init=False)
    builder.apply_to(compressed_model)

    check_model_graph(compressed_model, 'gnmt_variable.dot', _case_config.graph_dir)
Exemplo n.º 12
0
def create_compressed_model(model: Module,
                            config: NNCFConfig,
                            compression_state: Optional[Dict[str, Any]] = None,
                            dummy_forward_fn: Callable[[Module], Any] = None,
                            wrap_inputs_fn: Callable[[Tuple, Dict], Tuple[Tuple, Dict]] = None,
                            wrap_outputs_fn: Callable[[Tuple, Dict], Tuple[Tuple, Dict]] = None,
                            dump_graphs=True) \
        -> Tuple[CompressionAlgorithmController, NNCFNetwork]:
    """
    The main function used to produce a model ready for compression fine-tuning from an original PyTorch
    model and a configuration object.
    dummy_forward_fn
    :param model: The original model. Should have its parameters already loaded from a checkpoint or another
    source.
    :param config: A configuration object used to determine the exact compression modifications to be applied
    to the model
    :param compression_state: representation of the entire compression state to unambiguously restore
    the compressed model. Includes builder and controller states.
    :param dummy_forward_fn: if supplied, will be used instead of a *forward* function call to build
    the internal graph representation via tracing. Specifying this is useful when the original training pipeline
    has special formats of data loader output or has additional *forward* arguments other than input tensors.
    Otherwise, the *forward* call of the model during graph tracing will be made with mock tensors according
    to the shape specified in the config object. The dummy_forward_fn code MUST contain calls to nncf.nncf_model_input
    functions made with each compressed model input tensor in the underlying model's args/kwargs tuple, and these
    calls should be exactly the same as in the wrap_inputs_fn function code (see below); if dummy_forward_fn is
    specified, then wrap_inputs_fn also must be specified.
    :param wrap_inputs_fn: if supplied, will be used on the module's input arguments during a regular, non-dummy
    forward call before passing the inputs to the underlying compressed model. This is required if the model's input
    tensors that are important for compression are not supplied as arguments to the model's forward call directly, but
    instead are located in a container (such as list), and the model receives the container as an argument.
    wrap_inputs_fn should take as input two arguments - the tuple of positional arguments to the underlying
    model's forward call, and a dict of keyword arguments to the same. The function should wrap each tensor among the
    supplied model's args and kwargs that is important for compression (e.g. quantization) with an nncf.nncf_model_input
    function, which is a no-operation function and marks the tensors as inputs to be traced by NNCF in the internal
    graph representation. Output is the tuple of (args, kwargs), where args and kwargs are the same as were supplied in
    input, but each tensor in the original input. Must be specified if dummy_forward_fn is specified.
    :param dump_graphs: Whether or not should also dump the internal graph representation of the
    original and compressed models in the .dot format into the log directory.
    :return: A controller for the compression algorithm (or algorithms, in which case the controller
    is an instance of CompositeCompressionController) and the model ready for compression parameter training wrapped
    as an object of NNCFNetwork."""

    if dummy_forward_fn is not None and wrap_inputs_fn is None:
        raise ValueError(
            "A custom dummy forward function was specified, but the corresponding input wrapping function "
            "was not. In case a custom dummy forward function is specified for purposes of NNCF graph "
            "building, then the wrap_inputs_fn parameter MUST also be specified and be consistent with "
            "the input wrapping done in dummy_forward_fn.")

    is_legacy_model_state_dict = compression_state is not None and \
                                 BaseController.BUILDER_STATE not in compression_state and \
                                 BaseController.CONTROLLER_STATE not in compression_state
    maybe_convert_legacy_names_in_compress_state(compression_state)
    # Compress model that will be deployed for the inference on target device. No need to compress parts of the
    # model that are used on training stage only (e.g. AuxLogits of Inception-v3 model) or unused modules with weights.
    # As a consequence, no need to care about spoiling BN statistics, as there're disabled in eval mode.
    model.eval()

    if dump_graphs:
        if dummy_forward_fn is None:
            input_info_list = create_input_infos(config)
            graph_builder = GraphBuilder(
                custom_forward_fn=create_dummy_forward_fn(
                    input_info_list, with_input_tracing=True))
        else:
            graph_builder = GraphBuilder(custom_forward_fn=dummy_forward_fn)

        if is_main_process():
            graph = graph_builder.build_graph(model)
            graph.visualize_graph(
                osp.join(config.get("log_dir", "."), "original_graph.dot"))

    set_debug_log_dir(config.get("log_dir", "."))

    input_info_list = create_input_infos(config)
    scopes_without_shape_matching = config.get('scopes_without_shape_matching',
                                               [])
    ignored_scopes = config.get('ignored_scopes')
    target_scopes = config.get('target_scopes')

    original_model_accuracy = None
    if is_accuracy_aware_training(config):
        if config.has_extra_struct(ModelEvaluationArgs):
            evaluation_args = config.get_extra_struct(ModelEvaluationArgs)
            with torch.no_grad():
                original_model_accuracy = evaluation_args.eval_fn(model)
                nncf_logger.info("Non-compressed model accuracy = {}".format(
                    original_model_accuracy))

    compressed_model = NNCFNetwork(
        model,
        input_infos=input_info_list,
        dummy_forward_fn=dummy_forward_fn,
        wrap_inputs_fn=wrap_inputs_fn,
        wrap_outputs_fn=wrap_outputs_fn,
        ignored_scopes=ignored_scopes,
        target_scopes=target_scopes,
        scopes_without_shape_matching=scopes_without_shape_matching,
        original_model_accuracy=original_model_accuracy)

    should_init = compression_state is None

    builder = create_compression_algorithm_builder(config, should_init)
    is_state_loadable = not is_legacy_model_state_dict and compression_state is not None
    if is_state_loadable:
        builder.load_state(compression_state[BaseController.BUILDER_STATE])

    builder.apply_to(compressed_model)
    compression_ctrl = builder.build_controller(compressed_model)
    if is_state_loadable:
        compression_ctrl.load_state(
            compression_state[BaseController.CONTROLLER_STATE])

    # Required to ensure that the model leaving create_compressed_model has correct compressed graph.
    # In particular, this is currently required for correct functioning of RNNs.
    compressed_model.rebuild_graph()

    try:
        if is_legacy_model_state_dict:
            from nncf.torch import load_state
            state_dict_to_load = compression_state.get('state_dict',
                                                       compression_state)
            load_state(compressed_model, state_dict_to_load, is_resume=True)
    finally:
        if dump_graphs and is_main_process():
            compressed_model_graph = compressed_model.get_graph()
            compressed_model_graph.visualize_graph(
                osp.join(config.get("log_dir", "."), "compressed_graph.dot"))

    # Synchronize all processes if run in distributed mode
    if is_dist_avail_and_initialized():
        try:
            barrier()
        # Exception can be raised during running barrier
        # if the backend not in the supported list https://pytorch.org/docs/stable/distributed.html
        except RuntimeError as err:
            nncf_logger.warning(err)
            nncf_logger.warning(
                "NNCF continues work, while does not guarantee that "
                "the processes will finish model's compression at the same time. "
                "If your training pipeline demands the processes be synchronized, please, "
                "keep attention to that error")
            return compression_ctrl, compressed_model
    compressed_model.get_tracing_context().disable_trace_dynamic_graph()
    return compression_ctrl, compressed_model
Exemplo n.º 13
0
 def setup(self):
     self.compressed_model = NNCFNetwork(InsertionPointTestModel(),
                                         [ModelInputInfo([1, 1, 10, 10])])  # type: NNCFNetwork
Exemplo n.º 14
0
def test_check_correct_modules_replacement():
    model = TwoConvTestModel()
    nncf_model = NNCFNetwork(TwoConvTestModel(), input_infos=[ModelInputInfo([1, 1, 4, 4])])  # type: NNCFNetwork

    _, nncf_modules = check_correct_nncf_modules_replacement(model, nncf_model)
    assert set(nncf_modules) == set(nncf_model.get_nncf_modules())