示例#1
0
def test_quantize_inputs():
    model = QuantizeInputsTestModel()
    config = get_quantization_config_without_range_init()
    config["input_info"] = [{
        "sample_size": [2, 3, 32, 32],
    }, {
        "sample_size": [2, 3, 32, 32],
    }, {
        "sample_size": [2, 3, 32, 32],
    }, {
        "sample_size": [2, 3, 32, 32],
    }, {
        "sample_size": [2, 3, 32, 32],
    }]

    model, _ = create_compressed_model_and_algo_for_test(model, config)
    REF_QUANTIZED_INPUT_MODULE_SCOPES = [
        '/nncf_model_input_0', '/nncf_model_input_1', '/nncf_model_input_2',
        '/nncf_model_input_3', '/nncf_model_input_4'
    ]
    actual_input_quantizer_str_scopes =\
         [str_scope for str_scope in model.activation_quantizers if 'nncf_model_input' in str_scope]
    assert len(REF_QUANTIZED_INPUT_MODULE_SCOPES) == len(
        actual_input_quantizer_str_scopes)
    for ref_qinput_scope_str in REF_QUANTIZED_INPUT_MODULE_SCOPES:
        assert isinstance(model.activation_quantizers[ref_qinput_scope_str],
                          SymmetricQuantizer)
示例#2
0
 def create_autoq_test_config(batch_size=10,
                              image_size=10,
                              num_channels=3,
                              num_init_samples=1):
     config = get_quantization_config_without_range_init()
     config['input_info'] = {
         "sample_size": [batch_size, num_channels, image_size, image_size],
     }
     config['batch_size'] = batch_size
     config['compression'].update({
         'initializer': {
             'precision': {
                 "type": "autoq",
                 "bits": [2, 4, 8],
                 "iter_number": 2,
                 "compression_ratio": 0.15,
                 "eval_subset_ratio": 1.0,
                 "warmup_iter_number": 1
             },
             'range': {
                 'num_init_samples': num_init_samples
             },
             'batchnorm_adaptation': {
                 'num_bn_adaptation_samples': 0,
                 'num_bn_forget_samples': 0
             }
         }
     })
     return config
示例#3
0
def test_can_load_quant_algo__with_defaults():
    model = BasicConvTestModel()
    config = get_quantization_config_without_range_init()
    compression_algo_builder_list = create_compression_algorithm_builders(config)
    assert len(compression_algo_builder_list) == 1
    assert isinstance(compression_algo_builder_list[0], QuantizationBuilder)

    quant_model, _ = create_compressed_model_and_algo_for_test(deepcopy(model), config)

    model_conv = get_all_modules_by_type(model, 'Conv2d')
    quant_model_conv = get_all_modules_by_type(quant_model.get_nncf_wrapped_model(), 'NNCFConv2d')
    assert len(model_conv) == len(quant_model_conv)

    for module_scope, _ in model_conv.items():
        quant_scope = deepcopy(module_scope)  # type: Scope
        quant_scope.pop()
        quant_scope.push(ScopeElement('NNCFConv2d', 'conv'))
        assert quant_scope in quant_model_conv.keys()

        store = []
        for op in quant_model_conv[quant_scope].pre_ops.values():
            if isinstance(op, (UpdateInputs, UpdateWeight)) and isinstance(op.operand, SymmetricQuantizer):
                assert op.__class__.__name__ not in store
                store.append(op.__class__.__name__)
        assert UpdateWeight.__name__ in store
示例#4
0
 def create_hawq_test_config(batch_size=10,
                             num_data_points=100,
                             image_size=10):
     config = get_quantization_config_without_range_init()
     config['input_info'] = {
         "sample_size": [batch_size, 3, image_size, image_size],
     }
     config['batch_size'] = batch_size
     config['compression'].update({
         'initializer': {
             'precision': {
                 "type": "hawq",
                 "bits": [4, 8, 6],
                 "num_data_points": num_data_points,
                 "iter_number": 1,
                 "tolerance": 1e-2
             },
             'range': {
                 'num_init_samples': 1
             },
             'batchnorm_adaptation': {
                 'num_bn_adaptation_samples': 0,
                 'num_bn_forget_samples': 0
             }
         }
     })
     return config
示例#5
0
def test_quantization_configs__with_precisions_list():
    model = ModelForTest()

    config = get_quantization_config_without_range_init()
    config['compression']['initializer'].update({
        "precision": {
            "bitwidth_per_scope": [[2, 'ModelForTest/NNCFConv2d[conv1]'],
                                   [4, 'ModelForTest/NNCFConv2d[conv2]']]
        }
    })
    config['compression']["activations"] = {"bits": 6}
    config['quantizer_setup_type'] = 'pattern_based'
    model, compression_ctrl = create_compressed_model_and_algo_for_test(
        model, config)

    ref_bits = [('ModelForTest/NNCFConv2d[conv1]module_weight', 2),
                ('ModelForTest/NNCFConv2d[conv2]module_weight', 4),
                ('ModelForTest/NNCFConv2d[conv2]/conv2d_0|OUTPUT', 6),
                ('ModelForTest/NNCFConv2d[conv1]/conv2d_0|OUTPUT', 6),
                ('/nncf_model_input_0|OUTPUT', 6)]

    for key, quantizer in compression_ctrl.all_quantizations.items():
        expected_bit = [
            ref_bit for (name, ref_bit) in ref_bits if name == str(key)
        ][0]
        assert quantizer.num_bits == expected_bit, 'Unexpected number of bits for {}'.format(
            key)

    ref_rows = [['2', '20', '0', '20'], ['4', '20', '0', '20'],
                ['6', '0', '60', '60']]
    table = compression_ctrl.non_stable_metric_collectors[0].get_bits_stat()
    # pylint: disable=protected-access
    assert table._rows == ref_rows
示例#6
0
def test_can_quantize_free_operators(mocker):
    class Model(nn.Module):
        def __init__(self):
            super().__init__()
            self.weight = nn.Parameter(torch.ones([1]))
            self.bias = nn.Parameter(torch.ones([1]))

        def forward(self, x):
            return F.linear(x, self.weight, self.bias)

    mod = Model()
    config = get_quantization_config_without_range_init(model_size=1)

    config["compression"].update({"quantize_inputs": False})
    quant_model, _ = create_compressed_model_and_algo_for_test(mod, config)

    quantizer_list = quant_model.get_compression_modules_by_type(
        CompressionModuleType.FUNCTION_QUANTIZER).values()
    assert len(quantizer_list) == 2
    for quantizer in quantizer_list:
        mocker.spy(quantizer, 'quantize')

    quant_model.do_dummy_forward()
    for quantizer in quantizer_list:
        assert quantizer.quantize.call_count == 1
示例#7
0
def test_can_create_quant_loss_and_scheduler():
    config = get_quantization_config_without_range_init()
    _, compression_ctrl = create_compressed_model_and_algo_for_test(MockModel(), config)

    loss = compression_ctrl.loss
    assert isinstance(loss, CompressionLoss)

    scheduler = compression_ctrl.scheduler
    assert isinstance(scheduler, CompressionScheduler)
示例#8
0
def test_get_weight_activation_pairs__with_double_weights_per_activation():
    model_cls = DoubleWeightsPerActivation
    model_name = model_cls.__name__
    config = get_quantization_config_without_range_init()

    _, algo = create_compressed_model_and_algo_for_test(model_cls(), config)

    actual_pairs = algo.get_weights_activation_quantizers_pairs()
    ref_pair_names = [(['NNCFConv2d[conv1]module_weight', 'NNCFConv2d[conv2]module_weight'],
                       'ReLU[relu]/RELU_0')]

    compare_weights_activation_quantizers_pairs(actual_pairs, algo, ref_pair_names, model_name)
示例#9
0
def test_get_weight_activation_pairs():
    model_cls = TwoConvTestModel
    config = get_quantization_config_without_range_init()
    _, algo = create_compressed_model_and_algo_for_test(model_cls(), config)

    actual_pairs = algo.get_weights_activation_quantizers_pairs()
    ref_pair_names = [(['Sequential[features]/Sequential[0]/NNCFConv2d[0]module_weight'],
                       'Sequential[features]/Sequential[0]/NNCFConv2d[0]module_input',
                       ),
                      (['Sequential[features]/Sequential[1]/NNCFConv2d[0]module_weight'],
                       'Sequential[features]/Sequential[0]/NNCFConv2d[0]/conv2d_0',
                       )]

    compare_weights_activation_quantizers_pairs(actual_pairs, algo, ref_pair_names, model_cls.__name__)
示例#10
0
    def get_model_and_ctrl_with_applied_hw_config_quantization(model: torch.nn.Module, hw_config_dict: dict,
                                                               should_be_quantize_inputs: bool = True):
        nncf_config = get_quantization_config_without_range_init(model_size=1)
        nncf_config["compression"].update({"quantize_inputs": should_be_quantize_inputs})
        nncf_config["hw_config_type"] = "mock"

        net = NNCFNetwork(model, input_infos=[ModelInputInfo([1, 2, 1, 1])])
        hw_config = HWConfig.from_dict(hw_config_dict)
        qbuilder = QuantizationBuilder(nncf_config["compression"], should_init=False)
        qbuilder.quantizer_setup_type = QuantizerSetupType.PROPAGATION_BASED
        qbuilder.hw_config = hw_config
        net = qbuilder.apply_to(net)
        ctrl = net.commit_compression_changes()
        return net, ctrl
示例#11
0
def test_staged_quantization_saves_enabled_quantizers_in_state_dict(tmp_path):
    config = get_quantization_config_without_range_init()
    config["compression"]["params"] = {
        "activations_quant_start_epoch": 2,
        "weights_quant_start_epoch": 1
    }
    model_save, ctrl_save = create_compressed_model_and_algo_for_test(BasicConvTestModel(), config)
    ctrl_save.scheduler.epoch_step()

    _, ctrl_load = create_compressed_model_and_algo_for_test(BasicConvTestModel(), config,
                                                             resuming_state_dict=model_save.state_dict())
    for quantizer_info in ctrl_load.non_weight_quantizers.values():
        assert not quantizer_info.quantizer_module_ref.is_enabled_quantization()
    for quantizer in ctrl_load.weight_quantizers.values():
        assert quantizer.is_enabled_quantization()
示例#12
0
def test_get_weight_activation_pairs__with_extra_module():
    model_cls = DoubleWeightsPerActivationWithExtraModule
    model_name = model_cls.__name__
    config = get_quantization_config_without_range_init()
    config["compression"].update({
        "quantizable_subgraph_patterns": [["sigmoid", "conv2d"]],
        "quantize_inputs": False})

    _, algo = create_compressed_model_and_algo_for_test(model_cls(), config)

    actual_pairs = algo.get_weights_activation_quantizers_pairs()
    ref_pair_names = [(['NNCFConv2d[conv1]module_weight', 'NNCFConv2d[conv2]module_weight'],
                       'ReLU[relu]/RELU_0')]

    compare_weights_activation_quantizers_pairs(actual_pairs, algo, ref_pair_names, model_name)
示例#13
0
def test_quantization_configs__with_defaults():
    model = BasicConvTestModel()
    config = get_quantization_config_without_range_init()
    _, compression_ctrl = create_compressed_model_and_algo_for_test(model, config)

    assert isinstance(compression_ctrl, QuantizationController)
    weight_quantizers = compression_ctrl.weight_quantizers
    activation_quantizers = compression_ctrl.non_weight_quantizers

    ref_weight_qconfig = QuantizerConfig(8, QuantizationMode.SYMMETRIC, None, False, None, True)
    for wq in weight_quantizers.values():
        compare_qconfigs(ref_weight_qconfig, wq)

    ref_activation_qconfig = QuantizerConfig(8, QuantizationMode.SYMMETRIC, None, False, None, False)
    for wq in activation_quantizers.values():
        compare_qconfigs(ref_activation_qconfig, wq)
示例#14
0
def test_load_state_sets_initialized_flag():
    config = get_quantization_config_without_range_init()

    model = TwoConvTestModel()
    quant_model, _ = create_compressed_model_and_algo_for_test(model, config)

    load_state(quant_model, {
        'module.features.0.0.pre_ops.0.op.signed_tensor': torch.tensor([1.0]),  # quantizer of 1st conv's weights
        'module.features.1.0.pre_ops.0.op.scale': torch.tensor([1.0])  # quantizer of 2nd conv's weights
    })

    quantizers = get_all_modules_by_type(quant_model, 'SymmetricQuantizer')
    for scope, module in quantizers.items():
        if 'activation_quantizers' in str(scope) or 'UpdateInputs' in str(scope):
            assert not module.initialized
        else:
            assert module.initialized
def disable_quantizer_gradients():
    config = get_quantization_config_without_range_init()
    config['input_info'] = {
        "sample_size": [1, 3, 10, 10],
    }
    model = MobileNetV2(num_classes=10)
    model.eval()
    model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config)
    original_requires_grad_per_param = get_requires_grad_per_param(model)
    quantization_types = [class_type.__name__ for class_type in QUANTIZATION_MODULES.registry_dict.values()]
    all_quantizations = get_all_modules_by_type(model, quantization_types)
    quantizers_switcher = QuantizersSwitcher(list(all_quantizations.values()))
    disabled_parameters = HAWQPrecisionInitializer.disable_all_gradients_except_weights_of_quantized_modules(
        quantizers_switcher,
        compression_ctrl.quantized_weight_modules_registry,
        model,
        get_scopes_of_skipped_weight_quantizers())
    return quantizers_switcher, disabled_parameters, model, original_requires_grad_per_param
示例#16
0
def test_quantization_configs__custom():
    model = BasicConvTestModel()

    config = get_quantization_config_without_range_init()
    config['compression'].update({
        "weights": {
            "mode": "asymmetric",
            "per_channel": True,
            "bits": 4
        },
        "activations": {
            "mode": "asymmetric",
            "bits": 4,
            "signed": True,
        },
    })
    config['target_device'] = 'NONE'
    _, compression_ctrl = create_compressed_model_and_algo_for_test(
        model, config)

    assert isinstance(compression_ctrl, QuantizationController)
    weight_quantizers = compression_ctrl.weight_quantizers
    activation_quantizer_infos = compression_ctrl.non_weight_quantizers

    ref_weight_qconfig = QuantizerConfig(bits=4,
                                         mode=QuantizationMode.ASYMMETRIC,
                                         signedness_to_force=None,
                                         per_channel=True,
                                         input_shape=None,
                                         is_weights=True)
    for wq in weight_quantizers.values():
        compare_qconfigs(ref_weight_qconfig, wq)

    ref_activation_qconfig = QuantizerConfig(bits=4,
                                             mode=QuantizationMode.ASYMMETRIC,
                                             signedness_to_force=True,
                                             per_channel=False,
                                             input_shape=None,
                                             is_weights=False)

    for aq_info in activation_quantizer_infos.values():
        compare_qconfigs(ref_activation_qconfig, aq_info.quantizer_module_ref)
示例#17
0
def test_unified_scales_for_vpu():
    nncf_config = get_quantization_config_without_range_init(model_size=1)
    nncf_config["compression"]["quantize_outputs"] = True
    nncf_config["input_info"] = [{
        "sample_size": [1, 1, 1, 1],
    }, {
        "sample_size": [1, 1, 1, 1],
    }]
    nncf_config["target_device"] = "VPU"

    _, compression_ctrl = create_compressed_model_and_algo_for_test(
        QuantizerLinkingTestModel(), nncf_config)

    assert len(compression_ctrl.non_weight_quantizers) == 2

    total_quantizations = sum([
        len(info.affected_insertions)
        for info in compression_ctrl.non_weight_quantizers.values()
    ])
    assert total_quantizations == 8
示例#18
0
def test_quantize_inputs():
    model = QuantizeInputsTestModel()
    config = get_quantization_config_without_range_init()
    config["input_info"] = [
        {
            "sample_size": [2, 3, 32, 32],
        },
        {
            "sample_size": [2, 3, 32, 32],
        },
        {
            "sample_size": [2, 3, 32, 32],
        },
        {
            "sample_size": [2, 3, 32, 32],
        },
        {
            "sample_size": [2, 3, 32, 32],
        }
    ]

    model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config)
    REF_QUANTIZED_INPUT_MODULE_SCOPES = [
        "QuantizeInputsTestModel/NNCFConv2d[conv1]",
        "QuantizeInputsTestModel/NNCFConv2d[conv2]",
        "QuantizeInputsTestModel/NNCFConv2d[conv5]",
        "QuantizeInputsTestModel/NNCFConv2d[conv6]",
    ]
    for ref_qinput_module_scope_str in REF_QUANTIZED_INPUT_MODULE_SCOPES:
        scope = Scope.from_str(ref_qinput_module_scope_str)
        assert model.get_module_by_scope(scope) is not None
        assert ref_qinput_module_scope_str in compression_ctrl.quantized_inputs_modules_registry

    nncf_modules_dict = model.get_nncf_modules()
    for scope, nncf_module in nncf_modules_dict.items():
        scope_str = str(scope)
        update_inputs_count = sum(1 for pre_op in nncf_module.pre_ops.values() if isinstance(pre_op, UpdateInputs))
        if scope_str in REF_QUANTIZED_INPUT_MODULE_SCOPES:
            assert update_inputs_count == 1
        else:
            assert update_inputs_count == 0
示例#19
0
def test_quantize_has_proper_is_weights_flag():
    class Model(nn.Module):
        def __init__(self, size=1):
            super().__init__()
            self.size = size
            self.conv = nn.Conv2d(size, size, size)

        def forward(self, x):
            return self.conv(x)

    model = Model()
    config = get_quantization_config_without_range_init(model_size=2)
    quant_model, _ = create_compressed_model_and_algo_for_test(model, config)

    for module in quant_model.modules():
        if isinstance(module, NNCFConv2d):
            for op in module.pre_ops.values():
                assert isinstance(op, (UpdateWeight, UpdateInputs))
                assert op.operand.is_weights == isinstance(op, UpdateWeight)
    for _, aq in quant_model.get_compression_modules_by_type(CompressionModuleType.ACTIVATION_QUANTIZER).items():
        assert aq.is_weights is False
def test_quantization_configs__with_precisions_list():
    class ModelForTest(nn.Module):
        def __init__(self):
            super().__init__()
            self.conv1 = create_conv(1, 2, 2, -1, -2)
            self.conv2 = create_conv(1, 2, 2, -1, -2)

        def forward(self, x):
            return self.conv1(x) + self.conv2(x)

    model = ModelForTest()

    config = get_quantization_config_without_range_init()
    config['compression']['initializer'].update({
        "precision": {
            "bitwidth_per_scope":
                [[2, 'ModelForTest/NNCFConv2d[conv1]'],
                 [4, 'ModelForTest/NNCFConv2d[conv2]']]
        }})
    config['compression']["activations"] = {"bits": 6}

    model, compression_ctrl = create_compressed_model_and_algo_for_test(model, config)

    ref_bits = [('ModelForTest/NNCFConv2d[conv1]module_weight', 2),
                ('ModelForTest/NNCFConv2d[conv2]module_weight', 4),
                ('ModelForTest/NNCFConv2d[conv2]/conv2d_0', 6),
                ('ModelForTest/NNCFConv2d[conv1]/conv2d_0', 6),
                ('ModelForTest/NNCFConv2d[conv1]module_input', 2),
                ('ModelForTest/NNCFConv2d[conv2]module_input', 4)]

    for key, quantizer in compression_ctrl.all_quantizations.items():
        expected_bit = [ref_bit for (name, ref_bit) in ref_bits if name == str(key)][0]
        assert quantizer.num_bits == expected_bit, 'Unexpected number of bits for {}'.format(key)

    ref_rows = [['2', '16.667', '16.667', '33.333'],
                ['4', '16.667', '16.667', '33.333'],
                ['6', '0', '33.333', '33.333']]
    table = compression_ctrl.get_bit_stats()
    # pylint: disable=protected-access
    assert table._rows == ref_rows
示例#21
0
def get_basic_asym_quantization_config(model_size=4):
    config = get_quantization_config_without_range_init(model_size)
    config['compression']['activations'] = {"mode": "asymmetric"}
    config['compression']['initializer']['range'] = {"num_init_steps": 0}
    return config
示例#22
0
def get_quantization_config_with_ignored_scope():
    config = get_quantization_config_without_range_init()
    config['compression']['ignored_scopes'] = 'ConvLinear/NNCFLinear[fc]'
    return config
示例#23
0
def test_quantizer_scale_linking():
    nncf_config = get_quantization_config_without_range_init(model_size=1)
    nncf_config['quantizer_setup_type'] = 'pattern_based'
    nncf_config["compression"]["quantize_outputs"] = True
    nncf_config["compression"]["quantize_inputs"] = False
    nncf_config["input_info"] = [{
        "sample_size": [1, 1, 1, 1],
    }, {
        "sample_size": [1, 1, 1, 1],
    }]
    nncf_config["compression"]["activations"] = {
        "linked_quantizer_scopes": [[
            # Note: Assuming that quantizers are attached as a post-op to the specified operation
            "QuantizerLinkingTestModel/Path[path2]/__mul___0",
            "QuantizerLinkingTestModel/Path[path2]/__add___0",
        ]],
        "ignored_scopes": [
            # Ignore path output averaging operations
            "QuantizerLinkingTestModel/__add___0",
            "QuantizerLinkingTestModel/__add___1",
            "QuantizerLinkingTestModel/__add___2",
        ]
    }

    compressed_model, compression_ctrl = create_compressed_model_and_algo_for_test(
        QuantizerLinkingTestModel(), nncf_config)

    # 2 paths x 3 quantizers - 1 because two are shared in one path
    assert len(compression_ctrl.non_weight_quantizers) == 5

    test_input1 = torch.ones([1, 1, 1, 1])
    test_input2 = 2 * test_input1

    non_shared_mul_quantizer_id = NonWeightQuantizerId(
        InputAgnosticOperationExecutionContext.from_str(
            "QuantizerLinkingTestModel/Path[path1]/__mul___0"))

    non_shared_add_quantizer_id = NonWeightQuantizerId(
        InputAgnosticOperationExecutionContext.from_str(
            "QuantizerLinkingTestModel/Path[path1]/__add___0"))

    shared_quantizer_id = NonWeightQuantizerId(
        InputAgnosticOperationExecutionContext.from_str(
            "QuantizerLinkingTestModel/Path[path2]/__add___0"))

    non_shared_mul_quantizer = compression_ctrl.non_weight_quantizers[
        non_shared_mul_quantizer_id].quantizer_module_ref
    non_shared_add_quantizer = compression_ctrl.non_weight_quantizers[
        non_shared_add_quantizer_id].quantizer_module_ref
    shared_quantizer = compression_ctrl.non_weight_quantizers[
        shared_quantizer_id].quantizer_module_ref

    old_scale = 765.0  # so that the quantum is equal to 3
    with torch.no_grad():
        for quantizer in compression_ctrl.all_quantizations.values():
            quantizer.scale.fill_(old_scale)

    # Expected outputs without compression - 6, 12, 8. Scale deliberately set to preserve the values
    uncompressed_expected_outputs = (6.0 * torch.ones([1]),
                                     12.0 * torch.ones([1]),
                                     18.0 * torch.ones([1]))
    outputs_with_shared_scale_1 = compressed_model(test_input1, test_input2)

    for uncomp_out, comp_out_1 in zip(uncompressed_expected_outputs,
                                      outputs_with_shared_scale_1):
        assert torch.allclose(uncomp_out, comp_out_1)

    # Specifically clip the shared quantizer's outputs by setting scale to 1.0
    new_shared_scale = 1.0
    with torch.no_grad():
        shared_quantizer.scale.fill_(new_shared_scale)
    outputs_with_shared_scale_2 = compressed_model(test_input1, test_input2)

    # __add___0 outputs
    assert torch.allclose(outputs_with_shared_scale_2[0],
                          4.0 * torch.ones([1]))
    # __mul___0 outputs
    assert torch.allclose(outputs_with_shared_scale_2[1],
                          7.0 * torch.ones([1]))
    # __add___1 outputs
    assert torch.allclose(outputs_with_shared_scale_2[2],
                          12.0 * torch.ones([1]))

    # Clipping the non-shared quantizers at the same position in the path as the two shared ones
    # in the same manner is required to simulate the same grad input for both the shared quantizers
    # and the unshared ones
    with torch.no_grad():
        non_shared_mul_quantizer.scale.fill_(new_shared_scale)
        non_shared_add_quantizer.scale.fill_(new_shared_scale)
    final_output = compressed_model(test_input1, test_input2)[2]
    final_output.backward()

    assert torch.allclose(
        shared_quantizer.scale.grad, non_shared_mul_quantizer.scale.grad +
        non_shared_add_quantizer.scale.grad)
示例#24
0
def test_unified_scales_are_identical_in_onnx(tmp_path):
    # pylint:disable=no-member
    nncf_config = get_quantization_config_without_range_init(model_size=1)
    nncf_config["compression"]["quantize_outputs"] = True
    nncf_config["input_info"] = [
        {
            "sample_size": [1, 1, 1, 2],
        },
    ]
    nncf_config["target_device"] = "VPU"

    compressed_model, compression_ctrl = create_compressed_model_and_algo_for_test(
        SimplerModelForUnifiedScalesTesting(), nncf_config)

    with torch.no_grad():
        for quant_info in compression_ctrl.non_weight_quantizers.values():
            if isinstance(quant_info.quantizer_module_ref,
                          AsymmetricQuantizer):
                quant_info.quantizer_module_ref.input_range *= torch.abs(
                    torch.rand_like(
                        quant_info.quantizer_module_ref.input_range))
            else:
                quant_info.quantizer_module_ref.scale *= torch.abs(
                    torch.rand_like(quant_info.quantizer_module_ref.scale))

    test_input1 = torch.ones([1, 1, 1, 2])
    compressed_model.forward(test_input1)

    onnx_path = tmp_path / "model.onnx"
    compression_ctrl.export_model(onnx_path)

    onnx_model = onnx.load(onnx_path)

    def get_fq_nodes(onnx_model: onnx.ModelProto) -> List[onnx.NodeProto]:
        retval = []
        for node in onnx_model.graph.node:
            if str(node.op_type) == "FakeQuantize":
                retval.append(node)
        return retval

    def immediately_dominates_add_or_mul(node: onnx.NodeProto,
                                         graph: onnx.GraphProto) -> bool:
        if len(node.output) != 1:
            return False
        output_tensor_id = node.output[0]
        matches = [x for x in graph.node if output_tensor_id in x.input]
        for match in matches:
            if match.op_type in ["Add", "Mul"]:
                return True
        return False

    def get_successor(node: onnx.NodeProto,
                      graph: onnx.GraphProto) -> onnx.NodeProto:
        assert len(
            node.output
        ) == 1  # Only single-output nodes are supported in this func
        for target_node in graph.node:
            if node.output[0] in target_node.input:
                return target_node
        return None

    def group_nodes_by_output_target(
            nodes: List[onnx.NodeProto],
            graph: onnx.GraphProto) -> List[List[onnx.NodeProto]]:
        output_nodes = {}  # type: Dict[str, List[onnx.NodeProto]]
        for node in nodes:
            target_node_name = get_successor(node, graph).name
            if target_node_name not in output_nodes:
                output_nodes[target_node_name] = []
            output_nodes[target_node_name].append(node)
        return list(output_nodes.values())

    def resolve_constant_node_inputs_to_values(node: onnx.NodeProto, graph: onnx.GraphProto) -> \
        Dict[str, onnx.AttributeProto]:
        retval = {}
        for input_ in node.input:
            constant_input_nodes = [
                x for x in graph.node
                if input_ in x.output and x.op_type == "Constant"
            ]
            for constant_input_node in constant_input_nodes:
                assert len(constant_input_node.attribute) == 1
                val = constant_input_node.attribute[0]
                retval[input_] = numpy_helper.to_array(val.t)
        return retval

    fq_nodes = get_fq_nodes(onnx_model)
    eltwise_predicate = partial(immediately_dominates_add_or_mul,
                                graph=onnx_model.graph)
    eltwise_fq_nodes = list(filter(eltwise_predicate, fq_nodes))
    fq_nodes_grouped_by_output = group_nodes_by_output_target(
        eltwise_fq_nodes, onnx_model.graph)

    for unified_scale_group in fq_nodes_grouped_by_output:
        inputs = [
            resolve_constant_node_inputs_to_values(fq_node, onnx_model.graph)
            for fq_node in unified_scale_group
        ]
        for inputs_dict in inputs[1:]:
            curr_values = list(inputs_dict.values())
            ref_values = list(inputs[0].values())
            assert curr_values == ref_values  # All inputs for unified scale quantizers must be equal