def test_compare_model_stub(self): r"""Compare the output of quantized conv layer and its float shadow module """ def compare_and_validate_results(float_model, q_model, module_swap_list, data): ob_dict = compare_model_stub(float_model, q_model, module_swap_list, data, ShadowLogger) self.assertEqual(len(ob_dict), 1) for k, v in ob_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape) for qengine in supported_qengines: with override_quantized_engine(qengine): model_list = [ AnnotatedConvModel(qengine), AnnotatedConvBnReLUModel(qengine), ] data = self.img_data[0][0] module_swap_list = [ nn.Conv2d, nn.intrinsic.modules.fused.ConvReLU2d ] for model in model_list: model.eval() if hasattr(model, "fuse_model"): model.fuse_model() q_model = quantize(model, default_eval_fn, self.img_data) compare_and_validate_results(model, q_model, module_swap_list, data) # Test adding stub to sub module model = ModelWithSubModules().eval() q_model = quantize(model, default_eval_fn, self.img_data) module_swap_list = [SubModule] ob_dict = compare_model_stub(model, q_model, module_swap_list, data, ShadowLogger) self.assertTrue(isinstance(q_model.mod1, Shadow)) self.assertFalse(isinstance(q_model.conv, Shadow)) for k, v in ob_dict.items(): torch.testing.assert_allclose(v["float"], v["quantized"].dequantize()) # Test adding stub to functionals model = ModelWithFunctionals().eval() model.qconfig = torch.quantization.get_default_qconfig( "fbgemm") q_model = prepare(model, inplace=False) q_model(data) q_model = convert(q_model) module_swap_list = [nnq.FloatFunctional] ob_dict = compare_model_stub(model, q_model, module_swap_list, data, ShadowLogger) self.assertEqual(len(ob_dict), 6) self.assertTrue(isinstance(q_model.mycat, Shadow)) self.assertTrue(isinstance(q_model.myadd, Shadow)) self.assertTrue(isinstance(q_model.mymul, Shadow)) self.assertTrue(isinstance(q_model.myadd_relu, Shadow)) self.assertTrue(isinstance(q_model.my_scalar_add, Shadow)) self.assertTrue(isinstance(q_model.my_scalar_mul, Shadow)) for k, v in ob_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape)
def test_compare_model_stub_conv_static(self): r"""Compare the output of static quantized conv layer and its float shadow module """ qengine = torch.backends.quantized.engine def compare_and_validate_results(float_model, q_model, module_swap_list, data): ob_dict = compare_model_stub(float_model, q_model, module_swap_list, data) self.assertEqual(len(ob_dict), 1) for k, v in ob_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape) model_list = [ AnnotatedConvModel(qengine), AnnotatedConvBnReLUModel(qengine) ] module_swap_list = [nn.Conv2d, nn.intrinsic.modules.fused.ConvReLU2d] for model in model_list: model.eval() if hasattr(model, "fuse_model"): model.fuse_model() q_model = quantize(model, test_only_eval_fn, self.img_data_2d) compare_and_validate_results(model, q_model, module_swap_list, self.img_data_2d[0][0])
def test_compare_model_outputs_linear_static(self): r"""Compare the output of linear layer in static quantized model and corresponding output of conv layer in float model """ qengine = torch.backends.quantized.engine def compare_and_validate_results(float_model, q_model, data): act_compare_dict = compare_model_outputs(float_model, q_model, data) expected_act_compare_dict_keys = { "fc1.quant.stats", "fc1.module.stats" } self.assertTrue( act_compare_dict.keys() == expected_act_compare_dict_keys) for k, v in act_compare_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape) linear_data = self.calib_data[0][0] model_list = [AnnotatedSingleLayerLinearModel(qengine)] for model in model_list: model.eval() if hasattr(model, "fuse_model"): model.fuse_model() q_model = quantize(model, test_only_eval_fn, self.calib_data) compare_and_validate_results(model, q_model, linear_data)
def test_compare_model_stub_partial(self): r"""Compare the output of static quantized linear layer and its float shadow module""" qengine = torch.backends.quantized.engine # TODO: Rebase on top of PR to remove compare and validate results here def compare_and_validate_results(float_model, q_model, module_swap_list, data): ob_dict = compare_model_stub(float_model, q_model, module_swap_list, data) self.assertEqual(len(ob_dict), 1) for k, v in ob_dict.items(): self.assertTrue(len(v["float"]) == len(v["quantized"])) for i, val in enumerate(v["quantized"]): self.assertTrue( v["float"][i].shape == v["quantized"][i].shape) linear_data = self.calib_data[0][0] module_swap_list = [nn.Linear] model_list = [AnnotatedTwoLayerLinearModel()] for model in model_list: model.eval() if hasattr(model, "fuse_model"): model.fuse_model() q_model = quantize(model, test_only_eval_fn, [self.calib_data]) compare_and_validate_results(model, q_model, module_swap_list, linear_data)
def test_quant_wrapper(self): r"""User need to modify the original code with QuantWrapper, and call the quantization utility functions. """ model = WrappedModel().eval() # since we didn't provide qconfig_dict, the model is modified inplace # but we can do `model = prepare(model)` as well prepare(model) self.checkObservers(model) test_only_eval_fn(model, self.calib_data) convert(model) def checkQuantized(model): self.checkLinear(model.fc) self.checkQuantDequant(model.sub) self.assertEqual(type(model.sub.module.fc1), nnq.Linear) self.assertEqual(type(model.sub.module.fc2), nnq.Linear) self.assertEqual(type(model.sub.module.relu), nnq.ReLU) test_only_eval_fn(model, self.calib_data) checkQuantized(model) # test one line API model = quantize(WrappedModel().eval(), test_only_eval_fn, self.calib_data, {}) checkQuantized(model)
def test_two_layers(self): r"""TwoLayerLinearModel has two Linear modules but we only quantize the second one `fc2`, and `fc1`is not quantized """ model = TwoLayerLinearModel().eval() qconfig_dict = {'fc2': default_qconfig} model = prepare(model, qconfig_dict) self.checkNoPrepModules(model) self.checkObservers(model) self.checkNoPrepModules(model.fc1) self.checkHasPrepModules(model.fc2) test_only_eval_fn(model, self.calib_data) convert(model) def checkQuantized(model): self.checkNoPrepModules(model) self.checkNoPrepModules(model.fc1) self.checkHasPrepModules(model.fc2) self.assertEqual(type(model.fc1), torch.nn.Linear) self.checkQuantizedLinear(model.fc2) test_only_eval_fn(model, self.calib_data) checkQuantized(model) # test one line API model = quantize(TwoLayerLinearModel().eval(), test_only_eval_fn, self.calib_data, qconfig_dict) checkQuantized(model)
def test_single_layer(self): r"""Quantize SingleLayerLinearModel which has one Linear module, make sure it is swapped to nnq.Linear which is the quantized version of the module """ model = SingleLayerLinearModel() qconfig_dict = {'': default_qconfig} model = prepare(model, qconfig_dict) # Check if observers and quant/dequant nodes are inserted self.checkNoPrepModules(model) self.checkHasPrepModules(model.fc1) self.checkObservers(model) default_eval_fn(model, calib_data) convert(model) def checkQuantized(model): self.checkNoPrepModules(model) self.checkHasPrepModules(model.fc1) self.checkQuantizedLinear(model.fc1) default_eval_fn(model, calib_data) checkQuantized(model) # test one line API model = quantize(SingleLayerLinearModel(), default_eval_fn, calib_data, qconfig_dict) checkQuantized(model)
def test_single_layer(self): r"""Compare the result of quantizing single linear layer in eager mode and graph mode """ # eager mode annotated_linear_model = AnnotatedSingleLayerLinearModel() linear_model = SingleLayerLinearModel() # copy the weight from eager mode so that we can # compare the result of the two quantized models later linear_model.fc1.weight = torch.nn.Parameter(annotated_linear_model.fc1.module.weight.detach()) linear_model.fc1.bias = torch.nn.Parameter(annotated_linear_model.fc1.module.bias.detach()) model_eager = quantize(annotated_linear_model, test_only_eval_fn, self.calib_data) qconfig_dict = { '': QConfig( activation=default_observer, weight=default_weight_observer) } model_script = quantize_script( torch.jit.script(linear_model), qconfig_dict, test_only_eval_fn, [self.calib_data], inplace=False) result_eager = model_eager(self.calib_data[0][0]) torch._C._jit_pass_quant_fusion(model_script._c._get_module('fc1')._get_method('forward').graph) result_script = model_script._c._get_method('forward')(self.calib_data[0][0]) self.assertEqual(result_eager, result_script)
def test_compare_model_stub_linear_static(self): r"""Compare the output of static quantized linear layer and its float shadow module """ qengine = torch.backends.quantized.engine def compare_and_validate_results(float_model, q_model, module_swap_list, data): ob_dict = compare_model_stub(float_model, q_model, module_swap_list, data, ShadowLogger) self.assertEqual(len(ob_dict), 1) for k, v in ob_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape) img_data = [( torch.rand(3, 5, dtype=torch.float), torch.randint(0, 1, (2, ), dtype=torch.long), ) for _ in range(2)] linear_data = img_data[0][0] module_swap_list = [nn.Linear] model_list = [AnnotatedSingleLayerLinearModel(qengine)] for model in model_list: model.eval() if hasattr(model, "fuse_model"): model.fuse_model() q_model = quantize(model, default_eval_fn, img_data) compare_and_validate_results(model, q_model, module_swap_list, linear_data)
def test_single_layer(self): r"""Quantize SingleLayerLinearModel which has one Linear module, make sure it is swapped to nnq.Linear which is the quantized version of the module """ model = SingleLayerLinearModel() prepare(model) # Check if observers and quant/dequant nodes are inserted self.checkNoPrepModules(model) self.checkHasPrepModules(model.fc1) self.checkObservers(model) test_only_eval_fn(model, self.calib_data) convert(model) def checkQuantized(model): self.checkNoPrepModules(model) self.checkHasPrepModules(model.fc1) self.checkWrappedQuantizedLinear(model.fc1) test_only_eval_fn(model, self.calib_data) self.checkScriptable(model, self.calib_data) checkQuantized(model) # test one line API model = quantize(SingleLayerLinearModel(), test_only_eval_fn, self.calib_data) checkQuantized(model)
def test_compare_model_outputs_conv_static(self): r"""Compare the output of conv layer in stataic quantized model and corresponding output of conv layer in float model """ qengine = torch.backends.quantized.engine def compare_and_validate_results(float_model, q_model, data): act_compare_dict = compare_model_outputs(float_model, q_model, data) expected_act_compare_dict_keys = {"conv.stats", "quant.stats"} self.assertTrue( act_compare_dict.keys() == expected_act_compare_dict_keys) for k, v in act_compare_dict.items(): self.assertTrue(v["float"][0].shape == v["quantized"][0].shape) model_list = [ AnnotatedConvModel(qengine), AnnotatedConvBnReLUModel(qengine) ] for model in model_list: model.eval() if hasattr(model, "fuse_model"): model.fuse_model() q_model = quantize(model, test_only_eval_fn, self.img_data_2d) compare_and_validate_results(model, q_model, self.img_data_2d[0][0])
def test_two_layers(self): r"""TwoLayerLinearModel has two Linear modules but we only quantize the second one `fc2`, and `fc1`is not quantized """ model = AnnotatedTwoLayerLinearModel() model = prepare(model) self.checkNoPrepModules(model) self.checkObservers(model) self.checkNoPrepModules(model.fc1) self.checkHasPrepModules(model.fc2) test_only_eval_fn(model, self.calib_data) model = convert(model) def checkQuantized(model): self.checkNoPrepModules(model) self.checkNoPrepModules(model.fc1) self.checkHasPrepModules(model.fc2) self.assertEqual(type(model.fc1), torch.nn.Linear) self.checkWrappedQuantizedLinear(model.fc2) test_only_eval_fn(model, self.calib_data) self.checkScriptable(model, self.calib_data) checkQuantized(model) # test one line API model = quantize(AnnotatedTwoLayerLinearModel(), test_only_eval_fn, self.calib_data) checkQuantized(model)
def _create_quantized_model(self, model_class: Type[torch.nn.Module], **kwargs): qengine = "qnnpack" with override_quantized_engine(qengine): qconfig = torch.quantization.get_default_qconfig(qengine) model = model_class(**kwargs) model = quantize(model, test_only_eval_fn, [self.calib_data]) return model
def test_compare_model_outputs(self): r"""Compare the output of conv layer in quantized model and corresponding output of conv layer in float model """ def compare_and_validate_results(float_model, q_model, data): act_compare_dict = compare_model_outputs(float_model, q_model, data) self.assertEqual(len(act_compare_dict), 2) expected_act_compare_dict_keys = {"conv.stats", "quant.stats"} self.assertTrue( act_compare_dict.keys() == expected_act_compare_dict_keys) for k, v in act_compare_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape) for qengine in supported_qengines: with override_quantized_engine(qengine): model_list = [ AnnotatedConvModel(qengine), AnnotatedConvBnReLUModel(qengine), ] data = self.img_data[0][0] module_swap_list = [ nn.Conv2d, nn.intrinsic.modules.fused.ConvReLU2d ] for model in model_list: model.eval() if hasattr(model, "fuse_model"): model.fuse_model() q_model = quantize(model, default_eval_fn, self.img_data) compare_and_validate_results(model, q_model, data) # Test functionals model = ModelWithFunctionals().eval() model.qconfig = torch.quantization.get_default_qconfig( "fbgemm") q_model = prepare(model, inplace=False) q_model(data) q_model = convert(q_model) act_compare_dict = compare_model_outputs(model, q_model, data) self.assertEqual(len(act_compare_dict), 7) expected_act_compare_dict_keys = { "mycat.stats", "myadd.stats", "mymul.stats", "myadd_relu.stats", "my_scalar_add.stats", "my_scalar_mul.stats", "quant.stats", } self.assertTrue( act_compare_dict.keys() == expected_act_compare_dict_keys) for k, v in act_compare_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape)
def test_qconfig_dict(self): data = [(torch.randn(10, 5, dtype=torch.float) * 20, 1)] # Eager mode qconfig = QConfig(activation=Observer, weight=WeightObserver) eager_module = AnnotatedNestedModel() eager_module.fc3.qconfig = qconfig eager_module.sub2.fc1.qconfig = qconfig # Assign weights eager_module.sub1.fc.weight.data.fill_(1.0) eager_module.sub2.fc1.module.weight.data.fill_(1.0) eager_module.sub2.fc2.weight.data.fill_(1.0) eager_module.fc3.module.weight.data.fill_(1.0) script_module = torch.jit.script(NestedModel()) # Copy weights for eager_module script_module.sub1.fc.weight = eager_module.sub1.fc.weight script_module.sub2.fc1.weight = eager_module.sub2.fc1.module.weight script_module.sub2.fc2.weight = eager_module.sub2.fc2.weight script_module.fc3.weight = eager_module.fc3.module.weight # Quantize eager module quantized_eager_module = quantize(eager_module, default_eval_fn, data) def get_forward(m): return m._c._get_method('forward') # Quantize script_module torch._C._jit_pass_constant_propagation( get_forward(script_module).graph) ScriptedObserver = torch.jit.script(Observer()) ScriptedWeightObserver = torch.jit.script(WeightObserver()) scripted_qconfig = QConfig(activation=ScriptedObserver._c, weight=ScriptedWeightObserver._c) qconfig_dict = {'sub2.fc1': scripted_qconfig, 'fc3': scripted_qconfig} torch._C._jit_pass_insert_observers(script_module._c, "forward", qconfig_dict) # Run script_module and Collect statistics get_forward(script_module)(data[0][0]) # Insert quantize and dequantize calls script_module._c = torch._C._jit_pass_insert_quant_dequant( script_module._c, "forward") # Note that observer modules are not removed right now torch._C._jit_pass_quant_fusion( script_module._c._get_method('forward').graph) get_forward(script_module)(data[0][0]) eager_result = quantized_eager_module(data[0][0]) script_result = get_forward(script_module)(data[0][0]) self.assertEqual(eager_result, script_result)
def test_single_layer(self, qconfig): r"""Quantize SingleLayerLinearModel which has one Linear module, make sure it is swapped to nnq.Linear which is the quantized version of the module """ model = AnnotatedSingleLayerLinearModel() model.qconfig = qconfig model = prepare(model) # Check if observers and quant/dequant nodes are inserted self.checkNoPrepModules(model) self.checkHasPrepModules(model.fc1) self.checkObservers(model) test_only_eval_fn(model, self.calib_data) model = convert(model) def checkQuantized(model): self.checkNoPrepModules(model) self.checkHasPrepModules(model.fc1) self.checkWrappedQuantizedLinear(model.fc1) test_only_eval_fn(model, self.calib_data) self.checkScriptable(model, self.calib_data) checkQuantized(model) # test one line API - out of place version base = AnnotatedSingleLayerLinearModel() base.qconfig = qconfig keys_before = set(list(base.state_dict().keys())) model = quantize(base, test_only_eval_fn, self.calib_data) checkQuantized(model) keys_after = set(list(base.state_dict().keys())) self.assertEqual(keys_before, keys_after) # simple check that nothing changed # in-place version model = AnnotatedSingleLayerLinearModel() model.qconfig = qconfig quantize(model, test_only_eval_fn, self.calib_data, inplace=True) checkQuantized(model)
def test_compare_model_stub_submodule_static(self): r"""Compare the output of static quantized submodule and its float shadow module""" qengine = torch.backends.quantized.engine model = ModelWithSubModules().eval() q_model = quantize(model, test_only_eval_fn, [self.img_data_2d]) module_swap_list = [SubModule, nn.Conv2d] ob_dict = compare_model_stub(model, q_model, module_swap_list, self.img_data_2d[0][0]) # Since conv is not quantized, we do not insert a shadow module # mod1 contains a linear that is quantized, so we insert a shadow module self.assertTrue(isinstance(q_model.mod1, Shadow)) self.assertFalse(isinstance(q_model.conv, Shadow))
def test_compare_weights(self): r"""Compare the weights of float and quantized conv layer """ # eager mode annotated_conv_model = AnnotatedConvModel().eval() quantized_annotated_conv_model = quantize(annotated_conv_model, default_eval_fn, self.img_data) weight_dict = compare_weights( annotated_conv_model.state_dict(), quantized_annotated_conv_model.state_dict(), ) self.assertEqual(len(weight_dict), 1) for k, v in weight_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape)
def test_nested3(self): r"""More complicated nested test case with child qconfig overrides parent qconfig """ model = NestedModel().eval() custum_options = { 'dtype': torch.quint8, 'qscheme': torch.per_tensor_affine } custom_qconfig = QConfig(weight=default_weight_observer(), activation=default_observer(**custum_options)) qconfig_dict = { 'fc3': default_qconfig, 'sub2': default_qconfig, 'sub2.fc1': custom_qconfig } model = prepare(model, qconfig_dict) def checkPrepModules(model, before_calib=False): if before_calib: self.checkObservers(model) self.checkNoPrepModules(model) self.checkNoPrepModules(model.sub1) self.checkNoPrepModules(model.sub1.fc) self.checkNoPrepModules(model.sub1.relu) self.checkNoPrepModules(model.sub2) self.checkHasPrepModules(model.sub2.fc1) self.checkHasPrepModules(model.sub2.fc2) self.checkHasPrepModules(model.fc3) checkPrepModules(model, True) test_only_eval_fn(model, self.calib_data) convert(model) def checkQuantized(model): checkPrepModules(model) self.checkQuantizedLinear(model.sub2.fc1) self.checkQuantizedLinear(model.sub2.fc2) self.checkQuantizedLinear(model.fc3) test_only_eval_fn(model, self.calib_data) checkQuantized(model) # test one line API model = quantize(NestedModel().eval(), test_only_eval_fn, self.calib_data, qconfig_dict) checkQuantized(model)
def test_compare_model_stub_submodule_static(self): r"""Compare the output of static quantized submodule and its float shadow module """ qengine = torch.backends.quantized.engine model = ModelWithSubModules().eval() q_model = quantize(model, test_only_eval_fn, self.img_data_2d) module_swap_list = [SubModule] ob_dict = compare_model_stub(model, q_model, module_swap_list, self.img_data_2d[0][0]) self.assertTrue(isinstance(q_model.mod1, Shadow)) self.assertFalse(isinstance(q_model.conv, Shadow)) for k, v in ob_dict.items(): torch.testing.assert_allclose(v["float"], v["quantized"].dequantize())
def test_fuse_module_eval(self): model = ModelForFusion(default_qconfig) model.eval() fuse_modules(model, [['conv1', 'bn1', 'relu1'], ['sub1.conv', 'sub1.bn']]) self.assertEqual(type(model.conv1), nni.ConvReLU2d, "Fused Conv + BN + Relu first layer (BN is folded)") self.assertEqual(type(model.conv1[0]), nn.Conv2d, "Fused Conv + BN + Relu (Conv + folded BN only)") self.assertEqual(type(model.conv1[1]), nn.ReLU, "Fused Conv + BN + Relu second layer (Relu only)") self.assertEqual(type(model.bn1), nn.Identity, "Fused Conv + BN + Relu second layer (Skipped BN)") self.assertEqual(type(model.relu1), nn.Identity, "Fused Conv + BN + Relu second layer (Skipped Relu)") self.assertEqual(type(model.sub1.conv), nn.Conv2d, "Fused submodule Conv + folded BN") self.assertEqual(type(model.sub1.bn), nn.Identity, "Fused submodule (skipped BN)") self.assertEqual(type(model.sub2.conv), nn.Conv2d, "Non-fused submodule Conv") self.assertEqual(type(model.sub2.relu), torch.nn.ReLU, "Non-fused submodule ReLU") prepare(model) self.checkObservers(model) test_only_eval_fn(model, self.img_data) convert(model) def checkQuantized(model): self.assertEqual(type(model.conv1), nniq.ConvReLU2d) self.assertEqual(type(model.bn1), nn.Identity) self.assertEqual(type(model.relu1), nn.Identity) self.assertEqual(type(model.sub1.conv), nnq.Conv2d) self.assertEqual(type(model.sub1.bn), nn.Identity) self.assertEqual(type(model.sub2.conv), nn.Conv2d) self.assertEqual(type(model.sub2.relu), nn.ReLU) test_only_eval_fn(model, self.img_data) checkQuantized(model) model = ModelForFusion(default_qat_qconfig).eval() fuse_modules(model, [['conv1', 'bn1', 'relu1'], ['sub1.conv', 'sub1.bn']]) model = quantize(model, test_only_eval_fn, self.img_data) checkQuantized(model)
def test_compare_weights(self): r"""Compare the weights of float and quantized conv layer """ def compare_and_validate_results(float_model, q_model): weight_dict = compare_weights(float_model.state_dict(), q_model.state_dict()) self.assertEqual(len(weight_dict), 1) for k, v in weight_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape) model_list = [AnnotatedConvModel(), AnnotatedConvBnReLUModel()] for model in model_list: model.eval() if hasattr(model, "fuse_model"): model.fuse_model() q_model = quantize(model, default_eval_fn, self.img_data) compare_and_validate_results(model, q_model)
def test_nested2(self): r"""Another test case for quantized, we will quantize all submodules of submodule sub2, this will include redundant quant/dequant, to remove them we need to manually call QuantWrapper or insert QuantStub/DeQuantStub, see `test_quant_dequant_wrapper` and `test_manual` """ model = NestedModel() qconfig_dict = { 'fc3': default_qconfig, 'sub2': default_qconfig } model = prepare(model, qconfig_dict) def checkPrepModules(model, before_calib=False): if before_calib: self.checkObservers(model) self.checkNoPrepModules(model) self.checkNoPrepModules(model.sub1) self.checkNoPrepModules(model.sub1.fc) self.checkNoPrepModules(model.sub1.relu) self.checkNoPrepModules(model.sub2) self.checkHasPrepModules(model.sub2.fc1) self.checkHasPrepModules(model.sub2.fc2) self.checkHasPrepModules(model.fc3) checkPrepModules(model, True) default_eval_fn(model, calib_data) convert(model) def checkQuantized(model): checkPrepModules(model) self.checkLinear(model.sub1.fc) self.assertEqual(type(model.sub1.relu), torch.nn.ReLU) self.checkQuantizedLinear(model.sub2.fc1) self.checkQuantizedLinear(model.sub2.fc2) self.checkQuantizedLinear(model.fc3) default_eval_fn(model, calib_data) checkQuantized(model) # test one line API model = quantize(NestedModel(), default_eval_fn, calib_data, qconfig_dict) checkQuantized(model)
def test_compare_weights_linear_static(self): r"""Compare the weights of float and static quantized linear layer""" qengine = torch.backends.quantized.engine def compare_and_validate_results(float_model, q_model): weight_dict = compare_weights(float_model.state_dict(), q_model.state_dict()) self.assertEqual(len(weight_dict), 1) for k, v in weight_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape) model_list = [AnnotatedSingleLayerLinearModel(qengine)] for model in model_list: model.eval() if hasattr(model, "fuse_model"): model.fuse_model() q_model = quantize(model, test_only_eval_fn, [self.calib_data]) compare_and_validate_results(model, q_model)
def test_single_layer(self): r"""Quantize SingleLayerLinearModel which has one Linear module, make sure it is swapped to nnq.Linear which is the quantized version of the module """ # eager mode model_eager = quantize(AnnotatedSingleLayerLinearModel(), test_only_eval_fn, self.calib_data) qconfig_dict = { '': QConfig(activation=default_observer, weight=default_weight_observer) } model_script = quantize_script( torch.jit.script(SingleLayerLinearModel()), qconfig_dict, test_only_eval_fn, [self.calib_data]) result_eager = model_eager(self.calib_data[0][0]) result_script = model_script._c._get_method('forward')( self.calib_data[0][0]) self.assertEqual(result_eager, result_script)
def test_nested1(self): r"""Test quantization for nested model, top level 'fc3' and 'fc1' of submodule 'sub2', 'sub2.fc2' is not quantized """ model = NestedModel() qconfig_dict = { 'fc3': default_qconfig, 'sub2.fc1': default_qconfig } def checkPrepModules(model, before_calib=False): if before_calib: self.checkObservers(model) self.checkNoPrepModules(model) self.checkNoPrepModules(model.sub1) self.checkNoPrepModules(model.sub1.fc) self.checkNoPrepModules(model.sub1.relu) self.checkNoPrepModules(model.sub2) self.checkHasPrepModules(model.sub2.fc1) self.checkNoPrepModules(model.sub2.fc2) self.checkHasPrepModules(model.fc3) model = prepare(model, qconfig_dict) checkPrepModules(model, True) default_eval_fn(model, calib_data) convert(model) def checkQuantized(model): checkPrepModules(model) self.checkLinear(model.sub1.fc) self.checkQuantizedLinear(model.fc3) self.checkQuantizedLinear(model.sub2.fc1) self.checkLinear(model.sub2.fc2) default_eval_fn(model, calib_data) checkQuantized(model) # test one line API model = quantize(NestedModel(), default_eval_fn, calib_data, qconfig_dict) checkQuantized(model)
def test_manual(self): r"""User inserts QuantStub and DeQuantStub in model code and call the quantization utility functions. """ model = ManualQuantModel() # propagate the qconfig of parents to children, model is changed # inplace prepare(model) self.checkObservers(model) default_eval_fn(model, calib_data) convert(model) def checkQuantized(model): self.assertEqual(type(model.fc), nnq.Linear) default_eval_fn(model, calib_data) checkQuantized(model) # test one line API model = quantize(ManualQuantModel(), default_eval_fn, calib_data) checkQuantized(model)
def test_nested3(self): r"""More complicated nested test case with child qconfig overrides parent qconfig """ model = AnnotatedCustomConfigNestedModel() model = prepare(model) def checkPrepModules(model, before_calib=False): if before_calib: self.checkObservers(model) self.checkNoPrepModules(model) self.checkNoPrepModules(model.sub1) self.checkNoPrepModules(model.sub1.fc) self.checkNoPrepModules(model.sub1.relu) self.checkNoPrepModules(model.sub2) self.checkHasPrepModules(model.sub2.fc1) self.checkHasPrepModules(model.sub2.fc2) self.checkHasPrepModules(model.fc3) checkPrepModules(model, True) test_only_eval_fn(model, self.calib_data) model = convert(model) def checkQuantized(model): checkPrepModules(model) self.checkWrappedQuantizedLinear(model.sub2.fc1) self.checkWrappedQuantizedLinear(model.sub2.fc2) self.checkWrappedQuantizedLinear(model.fc3) test_only_eval_fn(model, self.calib_data) self.checkScriptable(model, self.calib_data) checkQuantized(model) # test one line API model = quantize(AnnotatedCustomConfigNestedModel(), test_only_eval_fn, self.calib_data) checkQuantized(model)
def test_nested1(self): r"""Test quantization for nested model, top level 'fc3' and 'fc1' of submodule 'sub2', 'sub2.fc2' is not quantized """ model = AnnotatedNestedModel() def checkPrepModules(model, before_calib=False): if before_calib: self.checkObservers(model) self.checkNoPrepModules(model) self.checkNoPrepModules(model.sub1) self.checkNoPrepModules(model.sub1.fc) self.checkNoPrepModules(model.sub1.relu) self.checkNoPrepModules(model.sub2) self.checkHasPrepModules(model.sub2.fc1) self.checkNoPrepModules(model.sub2.fc2) self.checkHasPrepModules(model.fc3) model = prepare(model) checkPrepModules(model, True) test_only_eval_fn(model, self.calib_data) model = convert(model) def checkQuantized(model): checkPrepModules(model) self.checkLinear(model.sub1.fc) self.checkWrappedQuantizedLinear(model.fc3) self.checkWrappedQuantizedLinear(model.sub2.fc1) self.checkLinear(model.sub2.fc2) test_only_eval_fn(model, self.calib_data) self.checkScriptable(model, self.calib_data) checkQuantized(model) # test one line API model = quantize(AnnotatedNestedModel(), test_only_eval_fn, self.calib_data) checkQuantized(model)
def test_nested2(self): model = AnnotatedSubNestedModel() model = prepare(model) def checkPrepModules(model, before_calib=False): if before_calib: self.checkObservers(model) self.checkNoPrepModules(model) self.checkNoPrepModules(model.sub1) self.checkNoPrepModules(model.sub1.fc) self.checkNoPrepModules(model.sub1.relu) self.checkHasPrepModules(model.sub2) self.checkNoPrepModules(model.sub2.module.fc1) self.checkNoPrepModules(model.sub2.module.fc2) self.checkHasPrepModules(model.fc3) checkPrepModules(model, True) test_only_eval_fn(model, self.calib_data) model = convert(model) def checkQuantized(model): checkPrepModules(model) self.checkLinear(model.sub1.fc) self.assertEqual(type(model.sub1.relu), torch.nn.ReLU) self.checkQuantizedLinear(model.sub2.module.fc1) self.checkQuantizedLinear(model.sub2.module.fc2) self.checkWrappedQuantizedLinear(model.fc3) test_only_eval_fn(model, self.calib_data) self.checkScriptable(model, self.calib_data) checkQuantized(model) # test one line API model = quantize(AnnotatedSubNestedModel(), test_only_eval_fn, self.calib_data) checkQuantized(model)