def prepare_model_outputs( float_module, q_module, Logger=OutputLogger, allow_list=None ): r"""Prepare the model by attaching the logger to both float module and quantized module if they are in the allow_list. Args: float_module: float module used to generate the q_module q_module: module quantized from float_module Logger: type of logger to be attached to float_module and q_module allow_list: list of module types to attach logger """ torch._C._log_api_usage_once("quantization_api._numeric_suite.prepare_model_outputs") if allow_list is None: allow_list = get_default_compare_output_module_list() qconfig_debug = torch.quantization.QConfig(activation=Logger, weight=None) float_module.qconfig = qconfig_debug prepare(float_module, inplace=True, allow_list=allow_list) q_module.qconfig = qconfig_debug prepare( q_module, inplace=True, allow_list=allow_list, observer_non_leaf_module_list=NON_LEAF_MODULE_TO_ADD_OBSERVER_ALLOW_LIST, )
def test_fusion_sequential_model_eval(self): model = ModelWithSequentialFusion().eval() model.to(torch.float) fuse_modules(model, [['conv1', 'relu1'] , ['features.0.0', 'features.0.1', 'features.0.2'], ['features.1.0', 'features.1.1', 'features.1.2'], ['features.2.0', 'features.2.1', 'features.2.2'], ['classifier.0', 'classifier.1']], inplace=True) self.assertEqual(type(model.conv1), nni.ConvReLU2d, "Fused Conv + Relu: nni.ConvReLU2d") self.assertEqual(type(model.conv1[0]), nn.Conv2d, "Fused Conv + Relu: Conv2d") self.assertEqual(type(model.conv1[1]), nn.ReLU, "Fused Conv + Relu: Relu") self.assertEqual(type(model.relu1), nn.Identity, "Fused Conv + Relu: Identity") for i in range(3): self.assertEqual(type(model.features[i][0]), nni.ConvReLU2d, "Fused submodule Conv + folded BN") self.assertEqual(type(model.features[i][1]), nn.Identity, "Fused submodule (skipped BN)") self.assertEqual(type(model.features[i][2]), nn.Identity, "Non-fused submodule Conv") self.assertEqual(type(model.classifier[0]), nni.LinearReLU) self.assertEqual(type(model.classifier[1]), nn.Identity) model.qconfig = default_qconfig prepare(model, inplace=True) self.checkObservers(model) model(self.img_data[0][0]) convert(model, inplace=True) model(self.img_data[1][0]) self.checkModelWithSequentialQuantized(model)
def quant(net_i, scheme, trainer, quant_params=None): """ Quantizes the network accoring to the different possibilities post, dynamic and both """ if scheme == "post": net_i.to("cpu") net_i.eval() net_i.qconfig = get_default_qconfig("fbgemm") net_i.fuse_model() prepare(net_i, inplace=True) _, net_i = trainer.evaluate(net_i, quant_mode=True) convert(net_i, inplace=True) elif scheme == "dynamic": net_i.to("cpu") net_i = quantize_dynamic(net_i, quant_params, dtype=qint8) elif scheme == "both": net_i.to("cpu") net_i.eval() net_i = quantize_dynamic(net_i, quant_params, dtype=qint8) net_i.qconfig = get_default_qconfig("fbgemm") net_i.fuse_model() prepare(net_i, inplace=True) _, net_i = trainer.evaluate(net_i, quant_mode=True) convert(net_i, inplace=True) else: pass return net_i
def test_quant_wrapper(self): r"""User need to modify the original code with QuantWrapper, and call the quantization utility functions. """ model = WrappedModel().eval() # since we didn't provide qconfig_dict, the model is modified inplace # but we can do `model = prepare(model)` as well prepare(model) self.checkObservers(model) test_only_eval_fn(model, self.calib_data) convert(model) def checkQuantized(model): self.checkLinear(model.fc) self.checkQuantDequant(model.sub) self.assertEqual(type(model.sub.module.fc1), nnq.Linear) self.assertEqual(type(model.sub.module.fc2), nnq.Linear) self.assertEqual(type(model.sub.module.relu), nnq.ReLU) test_only_eval_fn(model, self.calib_data) checkQuantized(model) # test one line API model = quantize(WrappedModel().eval(), test_only_eval_fn, self.calib_data, {}) checkQuantized(model)
def test_single_layer(self): r"""Quantize SingleLayerLinearModel which has one Linear module, make sure it is swapped to nnq.Linear which is the quantized version of the module """ model = SingleLayerLinearModel() prepare(model) # Check if observers and quant/dequant nodes are inserted self.checkNoPrepModules(model) self.checkHasPrepModules(model.fc1) self.checkObservers(model) test_only_eval_fn(model, self.calib_data) convert(model) def checkQuantized(model): self.checkNoPrepModules(model) self.checkHasPrepModules(model.fc1) self.checkWrappedQuantizedLinear(model.fc1) test_only_eval_fn(model, self.calib_data) self.checkScriptable(model, self.calib_data) checkQuantized(model) # test one line API model = quantize(SingleLayerLinearModel(), test_only_eval_fn, self.calib_data) checkQuantized(model)
def test_two_layers(self): r"""TwoLayerLinearModel has two Linear modules but we only quantize the second one `fc2`, and `fc1`is not quantized """ model = AnnotatedTwoLayerLinearModel() prepare(model) self.checkNoPrepModules(model) self.checkObservers(model) self.checkNoPrepModules(model.fc1) self.checkHasPrepModules(model.fc2) test_only_eval_fn(model, self.calib_data) convert(model) def checkQuantized(model): self.checkNoPrepModules(model) self.checkNoPrepModules(model.fc1) self.checkHasPrepModules(model.fc2) self.assertEqual(type(model.fc1), torch.nn.Linear) self.checkWrappedQuantizedLinear(model.fc2) test_only_eval_fn(model, self.calib_data) self.checkScriptable(model, self.calib_data) checkQuantized(model) # test one line API model = quantize(AnnotatedTwoLayerLinearModel(), test_only_eval_fn, self.calib_data) checkQuantized(model)
def prepare_model_outputs( float_module, q_module, Logger=OutputLogger, allow_list=DEFAULT_NUMERIC_SUITE_COMPARE_MODEL_OUTPUT_ALLOWED_LIST, ): r"""Prepare the model by attaching the logger to both float module and quantized module if they are in the allow_list. Args: float_module: float module used to generate the q_module q_module: module quantized from float_module Logger: type of logger to be attached to float_module and q_module allow_list: list of module types to attach logger """ qconfig_debug = torch.quantization.QConfig(activation=Logger, weight=None) float_module.qconfig = qconfig_debug prepare(float_module, inplace=True, allow_list=allow_list) q_module.qconfig = qconfig_debug prepare( q_module, inplace=True, allow_list=allow_list, observer_non_leaf_module_list=NON_LEAF_MODULE_TO_ADD_OBSERVER_ALLOW_LIST, )
def test_batchnorm_relu_basic(self): """ Basic test of the PyTorch 3D batchnorm RELU Node on Glow. """ class SimpleQuantizedBatchNormRelu(nn.Module): def __init__(self, w, b, m, v): super(SimpleQuantizedBatchNormRelu, self).__init__() self.bn = torch.nn.BatchNorm3d(4) self.relu = torch.nn.ReLU() self.bn.weight = torch.nn.Parameter(w) self.bn.bias = torch.nn.Parameter(b) self.bn.running_mean = m self.bn.running_var = v self.q = QuantStub() self.dq = DeQuantStub() def forward(self, x): qx = self.q(x) qy = self.bn(qx) qy_relu = self.relu(qy) y = self.dq(qy_relu) return y C = 4 weight = torch.ones(C) + torch.rand(C) * 0.001 bias = torch.rand(C) * 0.0001 running_mean = torch.zeros(C) running_var = torch.ones(C) inputs = torch.randn((10, C, 2, 3, 4), requires_grad=False) model = SimpleQuantizedBatchNormRelu(weight, bias, running_mean, running_var) model.eval() model.qconfig = my_qconfig modules_to_fuse = [["bn", "relu"]] fuse_modules(model, modules_to_fuse, inplace=True) prepare(model, inplace=True) model.forward(inputs) convert(model, inplace=True) # Because of the difference of quantization between PyTorch & Glow # We set eps big enough. # Batchnorm introduced great accuracy issues, which could create up to # ~1e-2 difference in some rare cases. In order to prevent this test # to be flaky, atol is set to be 0.1 and rtol is set to 0.00001. utils.compare_tracing_methods( model, inputs, fusible_ops={"quantized::batch_norm3d_relu"}, atol=1e-1, rtol=1e-5, fp16=True, skip_to_glow=True, )
def test_tensor_observer(self): model = SingleLayerLinearModel() model.qconfig = default_debug_qconfig prepare(model) # run the evaluation and dump all tensors test_only_eval_fn(model, self.calib_data) test_only_eval_fn(model, self.calib_data) tensor_dict = {} dump_tensor(model, tensor_dict) # we can torch,save() and torch_load() in bento for further analysis self.assertTrue('fc1.module.activation' in tensor_dict.keys(), 'activation is not recorded in the dict') self.assertEqual(len(tensor_dict['fc1.module.activation']), 2 * len(self.calib_data))
def test_record_observer(self): model = SingleLayerLinearModel() model.qconfig = default_debug_qconfig prepare(model) # run the evaluation and dump all tensors test_only_eval_fn(model, self.calib_data) test_only_eval_fn(model, self.calib_data) observer_dict = {} get_observer_dict(model, observer_dict) self.assertTrue('fc1.module.observer' in observer_dict.keys(), 'observer is not recorded in the dict') self.assertEqual(len(observer_dict['fc1.module.observer'].get_tensor_value()), 2 * len(self.calib_data)) self.assertEqual(observer_dict['fc1.module.observer'].get_tensor_value()[0], model(self.calib_data[0][0]))
def test_compare_model_stub(self): r"""Compare the output of quantized conv layer and its float shadow module """ def compare_and_validate_results(float_model, q_model, module_swap_list, data): ob_dict = compare_model_stub(float_model, q_model, module_swap_list, data, ShadowLogger) self.assertEqual(len(ob_dict), 1) for k, v in ob_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape) for qengine in supported_qengines: with override_quantized_engine(qengine): model_list = [ AnnotatedConvModel(qengine), AnnotatedConvBnReLUModel(qengine), ] data = self.img_data[0][0] module_swap_list = [ nn.Conv2d, nn.intrinsic.modules.fused.ConvReLU2d ] for model in model_list: model.eval() if hasattr(model, "fuse_model"): model.fuse_model() q_model = quantize(model, default_eval_fn, self.img_data) compare_and_validate_results(model, q_model, module_swap_list, data) # Test adding stub to sub module model = ModelWithSubModules().eval() q_model = quantize(model, default_eval_fn, self.img_data) module_swap_list = [SubModule] ob_dict = compare_model_stub(model, q_model, module_swap_list, data, ShadowLogger) self.assertTrue(isinstance(q_model.mod1, Shadow)) self.assertFalse(isinstance(q_model.conv, Shadow)) for k, v in ob_dict.items(): torch.testing.assert_allclose(v["float"], v["quantized"].dequantize()) # Test adding stub to functionals model = ModelWithFunctionals().eval() model.qconfig = torch.quantization.get_default_qconfig( "fbgemm") q_model = prepare(model, inplace=False) q_model(data) q_model = convert(q_model) module_swap_list = [nnq.FloatFunctional] ob_dict = compare_model_stub(model, q_model, module_swap_list, data, ShadowLogger) self.assertEqual(len(ob_dict), 6) self.assertTrue(isinstance(q_model.mycat, Shadow)) self.assertTrue(isinstance(q_model.myadd, Shadow)) self.assertTrue(isinstance(q_model.mymul, Shadow)) self.assertTrue(isinstance(q_model.myadd_relu, Shadow)) self.assertTrue(isinstance(q_model.my_scalar_add, Shadow)) self.assertTrue(isinstance(q_model.my_scalar_mul, Shadow)) for k, v in ob_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape)
def test_compare_model_outputs_functional_static(self): r"""Compare the output of functional layer in static quantized model and corresponding output of conv layer in float model """ qengine = torch.backends.quantized.engine model = ModelWithFunctionals().eval() model.qconfig = torch.quantization.get_default_qconfig("fbgemm") q_model = prepare(model, inplace=False) q_model(self.img_data_2d[0][0]) q_model = convert(q_model) act_compare_dict = compare_model_outputs(model, q_model, self.img_data_2d[0][0]) self.assertEqual(len(act_compare_dict), 7) expected_act_compare_dict_keys = { "mycat.stats", "myadd.stats", "mymul.stats", "myadd_relu.stats", "my_scalar_add.stats", "my_scalar_mul.stats", "quant.stats", } self.assertTrue( act_compare_dict.keys() == expected_act_compare_dict_keys) for k, v in act_compare_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape)
def test_two_layers(self): r"""TwoLayerLinearModel has two Linear modules but we only quantize the second one `fc2`, and `fc1`is not quantized """ model = TwoLayerLinearModel().eval() qconfig_dict = {'fc2': default_qconfig} model = prepare(model, qconfig_dict) self.checkNoPrepModules(model) self.checkObservers(model) self.checkNoPrepModules(model.fc1) self.checkHasPrepModules(model.fc2) test_only_eval_fn(model, self.calib_data) convert(model) def checkQuantized(model): self.checkNoPrepModules(model) self.checkNoPrepModules(model.fc1) self.checkHasPrepModules(model.fc2) self.assertEqual(type(model.fc1), torch.nn.Linear) self.checkQuantizedLinear(model.fc2) test_only_eval_fn(model, self.calib_data) checkQuantized(model) # test one line API model = quantize(TwoLayerLinearModel().eval(), test_only_eval_fn, self.calib_data, qconfig_dict) checkQuantized(model)
def test_single_layer(self): r"""Quantize SingleLayerLinearModel which has one Linear module, make sure it is swapped to nnq.Linear which is the quantized version of the module """ model = SingleLayerLinearModel() qconfig_dict = {'': default_qconfig} model = prepare(model, qconfig_dict) # Check if observers and quant/dequant nodes are inserted self.checkNoPrepModules(model) self.checkHasPrepModules(model.fc1) self.checkObservers(model) default_eval_fn(model, calib_data) convert(model) def checkQuantized(model): self.checkNoPrepModules(model) self.checkHasPrepModules(model.fc1) self.checkQuantizedLinear(model.fc1) default_eval_fn(model, calib_data) checkQuantized(model) # test one line API model = quantize(SingleLayerLinearModel(), default_eval_fn, calib_data, qconfig_dict) checkQuantized(model)
def test_functional_module(self, train_mode): model = ModelWithFunctionals() x = torch.rand(10, 1, dtype=torch.float) xq = torch.quantize_per_tensor(x, 0.01, 30, torch.quint8) self.checkScriptable(model, [(x, x)], check_save_load=True) if train_mode: model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm') model = prepare_qat(model) else: model.qconfig = torch.quantization.get_default_qconfig('qnnpack') model = prepare(model) # Check if observers and quant/dequant nodes are inserted self.checkNoPrepModules(model) self.checkObservers(model) # Calibrate model(xq.dequantize()) model = convert(model) def checkQuantized(model): self.checkNoPrepModules(model) self.assertEquals(type(model.myadd), torch.nn.quantized.QFunctional) self.assertEquals(type(model.mycat), torch.nn.quantized.QFunctional) self.assertEquals(type(model.myadd_relu), torch.nn.quantized.QFunctional) checkQuantized(model) self.checkScriptable(model, [(xq, xq)], check_save_load=True)
def test_fuse_module_eval(self): model = ModelForFusion(default_qconfig) model.eval() fuse_modules(model, [['conv1', 'bn1', 'relu1'], ['sub1.conv', 'sub1.bn']]) self.assertEqual(type(model.conv1), nni.ConvReLU2d, "Fused Conv + BN + Relu first layer (BN is folded)") self.assertEqual(type(model.conv1[0]), nn.Conv2d, "Fused Conv + BN + Relu (Conv + folded BN only)") self.assertEqual(type(model.conv1[1]), nn.ReLU, "Fused Conv + BN + Relu second layer (Relu only)") self.assertEqual(type(model.bn1), nn.Identity, "Fused Conv + BN + Relu second layer (Skipped BN)") self.assertEqual(type(model.relu1), nn.Identity, "Fused Conv + BN + Relu second layer (Skipped Relu)") self.assertEqual(type(model.sub1.conv), nn.Conv2d, "Fused submodule Conv + folded BN") self.assertEqual(type(model.sub1.bn), nn.Identity, "Fused submodule (skipped BN)") self.assertEqual(type(model.sub2.conv), nn.Conv2d, "Non-fused submodule Conv") self.assertEqual(type(model.sub2.relu), torch.nn.ReLU, "Non-fused submodule ReLU") prepare(model) self.checkObservers(model) test_only_eval_fn(model, self.img_data) convert(model) def checkQuantized(model): self.assertEqual(type(model.conv1), nniq.ConvReLU2d) self.assertEqual(type(model.bn1), nn.Identity) self.assertEqual(type(model.relu1), nn.Identity) self.assertEqual(type(model.sub1.conv), nnq.Conv2d) self.assertEqual(type(model.sub1.bn), nn.Identity) self.assertEqual(type(model.sub2.conv), nn.Conv2d) self.assertEqual(type(model.sub2.relu), nn.ReLU) test_only_eval_fn(model, self.img_data) checkQuantized(model) model = ModelForFusion(default_qat_qconfig).eval() fuse_modules(model, [['conv1', 'bn1', 'relu1'], ['sub1.conv', 'sub1.bn']]) model = quantize(model, test_only_eval_fn, self.img_data) checkQuantized(model)
def quantization(self): if self.quant_method == 'dynamic': torch.backends.quantized.engine = 'fbgemm' if self.config == 'x86' else 'qnnpack' quant_model = quant.quantize_dynamic( self.model, {nn.Linear, nn.Conv2d, nn.Conv1d}, dtype=torch.qint8) else: # Post-Training Static Quantization quant_model = copy.deepcopy(self.model) quant_model.eval() quant_model.fuse_model() quant_model.qconfig = self.qconfig quant.prepare(quant_model, inplace=True) self.calibrate_model(quant_model, self.calibration_loader) quant.convert(quant_model, inplace=True) self.print_model_size(quant_model, 'Quantized Model') return quant_model
def test_no_qconfig_propagation(self): model = ModelWithNoQconfigPropagation() model.qconfig = torch.quantization.default_qconfig model = prepare(model) self.assertTrue(hasattr(model.fc1, 'qconfig'), "QConfig is expected to propagate") self.assertFalse(hasattr(model.no_quant_module, 'qconfig'), "QConfig is expected to NOT propagate")
def prepare_model_outputs( float_module, q_module, Logger=OutputLogger, white_list=DEFAULT_NUMERIC_SUITE_COMPARE_MODEL_OUTPUT_WHITE_LIST, ): r"""Prepare the model by attaching the logger to both float module and quantized module if they are in the white_list. Args: float_module: float module used to generate the q_module q_module: module quantized from float_module Logger: type of logger to be attached to float_module and q_module white_list: list of module types to attach logger """ qconfig_debug = torch.quantization.QConfig(activation=Logger, weight=None) float_module.qconfig = qconfig_debug prepare(float_module, inplace=True, white_list=white_list) q_module.qconfig = qconfig_debug prepare(q_module, inplace=True, white_list=white_list)
def test_resnet_base(self): r"""Test quantization for bottleneck topology used in resnet/resnext and add coverage for conversion of average pool and float functional """ model = ResNetBase().float().eval() model = QuantWrapper(model) model.qconfig = default_qconfig fuse_list = [['module.conv1', 'module.bn1', 'module.relu1']] fuse_modules(model, fuse_list) prepare(model) self.checkObservers(model) test_only_eval_fn(model, self.img_data) convert(model) def checkQuantized(model): self.assertEqual(type(model.module.conv1), nn._intrinsic.quantized.ConvReLU2d) self.assertEqual(type(model.module.myop), nn.quantized.QFunctional) self.assertEqual(type(model.module.avgpool), nn.AdaptiveAvgPool2d) test_only_eval_fn(model, self.img_data) checkQuantized(model)
def test_compare_model_outputs(self): r"""Compare the output of conv layer in quantized model and corresponding output of conv layer in float model """ def compare_and_validate_results(float_model, q_model, data): act_compare_dict = compare_model_outputs(float_model, q_model, data) self.assertEqual(len(act_compare_dict), 2) expected_act_compare_dict_keys = {"conv.stats", "quant.stats"} self.assertTrue( act_compare_dict.keys() == expected_act_compare_dict_keys) for k, v in act_compare_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape) for qengine in supported_qengines: with override_quantized_engine(qengine): model_list = [ AnnotatedConvModel(qengine), AnnotatedConvBnReLUModel(qengine), ] data = self.img_data[0][0] module_swap_list = [ nn.Conv2d, nn.intrinsic.modules.fused.ConvReLU2d ] for model in model_list: model.eval() if hasattr(model, "fuse_model"): model.fuse_model() q_model = quantize(model, default_eval_fn, self.img_data) compare_and_validate_results(model, q_model, data) # Test functionals model = ModelWithFunctionals().eval() model.qconfig = torch.quantization.get_default_qconfig( "fbgemm") q_model = prepare(model, inplace=False) q_model(data) q_model = convert(q_model) act_compare_dict = compare_model_outputs(model, q_model, data) self.assertEqual(len(act_compare_dict), 7) expected_act_compare_dict_keys = { "mycat.stats", "myadd.stats", "mymul.stats", "myadd_relu.stats", "my_scalar_add.stats", "my_scalar_mul.stats", "quant.stats", } self.assertTrue( act_compare_dict.keys() == expected_act_compare_dict_keys) for k, v in act_compare_dict.items(): self.assertTrue(v["float"].shape == v["quantized"].shape)
def test_fixed_qparam_ops(self): class M(torch.nn.Module): def __init__(self): super().__init__() self.sigmoid = torch.nn.Sigmoid() self.hardsigmoid = torch.nn.Hardsigmoid() self.tanh = torch.nn.Tanh() self.quant = QuantStub() self.dequant = DeQuantStub() def forward(self, x): x = self.quant(x) x = self.sigmoid(x) x = self.hardsigmoid(x) x = self.tanh(x) x = self.dequant(x) return x m = M().train() m.qconfig = default_qat_qconfig m = prepare_qat(m) for attr in ['sigmoid', 'hardsigmoid', 'tanh']: self.assertEqual(type(getattr(m, attr).activation_post_process), FixedQParamsFakeQuantize) data = torch.randn(1, 3, 2, 4) before_convert = m(data) m = convert(m) after_convert = m(data) self.assertEqual(before_convert, after_convert) # make sure activation post process is removed for attr in ['sigmoid', 'hardsigmoid', 'tanh']: # verify fake quant module is removd self.assertFalse( hasattr(getattr(m, attr), 'activation_post_process')) # verify that hooks are removed self.assertTrue(len(getattr(m, attr)._forward_hooks.items()) == 0) # make sure no fake quantize module is inserted for eval mode def checkNoFQModule(m): for attr in ['sigmoid', 'hardsigmoid', 'tanh']: self.assertFalse( hasattr(getattr(m, attr), "activation_post_process")) self.assertTrue( len(getattr(m, attr)._forward_hooks.items()) == 0) m = M().eval() m.qconfig = default_qconfig m = prepare(m) checkNoFQModule(m) m = convert(m) checkNoFQModule(m)
def test_manual(self): r"""User inserts QuantStub and DeQuantStub in model code and call the quantization utility functions. """ model = ManualQuantModel() # propagate the qconfig of parents to children, model is changed # inplace prepare(model) self.checkObservers(model) default_eval_fn(model, calib_data) convert(model) def checkQuantized(model): self.assertEqual(type(model.fc), nnq.Linear) default_eval_fn(model, calib_data) checkQuantized(model) # test one line API model = quantize(ManualQuantModel(), default_eval_fn, calib_data) checkQuantized(model)
def test_nested3(self): r"""More complicated nested test case with child qconfig overrides parent qconfig """ model = AnnotatedCustomConfigNestedModel() prepare(model) def checkPrepModules(model, before_calib=False): if before_calib: self.checkObservers(model) self.checkNoPrepModules(model) self.checkNoPrepModules(model.sub1) self.checkNoPrepModules(model.sub1.fc) self.checkNoPrepModules(model.sub1.relu) self.checkNoPrepModules(model.sub2) self.checkHasPrepModules(model.sub2.fc1) self.checkHasPrepModules(model.sub2.fc2) self.checkHasPrepModules(model.fc3) checkPrepModules(model, True) test_only_eval_fn(model, self.calib_data) convert(model) def checkQuantized(model): checkPrepModules(model) self.checkWrappedQuantizedLinear(model.sub2.fc1) self.checkWrappedQuantizedLinear(model.sub2.fc2) self.checkWrappedQuantizedLinear(model.fc3) test_only_eval_fn(model, self.calib_data) self.checkScriptable(model, self.calib_data) checkQuantized(model) # test one line API model = quantize(AnnotatedCustomConfigNestedModel(), test_only_eval_fn, self.calib_data) checkQuantized(model)
def test_nested1(self): r"""Test quantization for nested model, top level 'fc3' and 'fc1' of submodule 'sub2', 'sub2.fc2' is not quantized """ model = AnnotatedNestedModel() def checkPrepModules(model, before_calib=False): if before_calib: self.checkObservers(model) self.checkNoPrepModules(model) self.checkNoPrepModules(model.sub1) self.checkNoPrepModules(model.sub1.fc) self.checkNoPrepModules(model.sub1.relu) self.checkNoPrepModules(model.sub2) self.checkHasPrepModules(model.sub2.fc1) self.checkNoPrepModules(model.sub2.fc2) self.checkHasPrepModules(model.fc3) prepare(model) checkPrepModules(model, True) test_only_eval_fn(model, self.calib_data) convert(model) def checkQuantized(model): checkPrepModules(model) self.checkLinear(model.sub1.fc) self.checkWrappedQuantizedLinear(model.fc3) self.checkWrappedQuantizedLinear(model.sub2.fc1) self.checkLinear(model.sub2.fc2) test_only_eval_fn(model, self.calib_data) self.checkScriptable(model, self.calib_data) checkQuantized(model) # test one line API model = quantize(AnnotatedNestedModel(), test_only_eval_fn, self.calib_data) checkQuantized(model)
def test_nested2(self): model = AnnotatedSubNestedModel() prepare(model) def checkPrepModules(model, before_calib=False): if before_calib: self.checkObservers(model) self.checkNoPrepModules(model) self.checkNoPrepModules(model.sub1) self.checkNoPrepModules(model.sub1.fc) self.checkNoPrepModules(model.sub1.relu) self.checkHasPrepModules(model.sub2) self.checkNoPrepModules(model.sub2.module.fc1) self.checkNoPrepModules(model.sub2.module.fc2) self.checkHasPrepModules(model.fc3) checkPrepModules(model, True) test_only_eval_fn(model, self.calib_data) convert(model) def checkQuantized(model): checkPrepModules(model) self.checkLinear(model.sub1.fc) self.assertEqual(type(model.sub1.relu), torch.nn.ReLU) self.checkQuantizedLinear(model.sub2.module.fc1) self.checkQuantizedLinear(model.sub2.module.fc2) self.checkWrappedQuantizedLinear(model.fc3) test_only_eval_fn(model, self.calib_data) self.checkScriptable(model, self.calib_data) checkQuantized(model) # test one line API model = quantize(AnnotatedSubNestedModel(), test_only_eval_fn, self.calib_data) checkQuantized(model)
def test_skip_quant(self): r"""The case when we want to skip quantizing some layers """ model = SkipQuantModel() prepare(model) self.checkObservers(model) test_only_eval_fn(model, self.calib_data) convert(model) def checkQuantized(model): self.checkLinear(model.fc) self.checkQuantDequant(model.sub) self.checkQuantizedLinear(model.sub.module.fc1) self.checkQuantizedLinear(model.sub.module.fc2) self.assertEqual(type(model.sub.module.relu), nnq.ReLU) self.checkScriptable(model, self.calib_data) checkQuantized(model) # test one line API model = quantize(SkipQuantModel(), test_only_eval_fn, self.calib_data) checkQuantized(model)
def generate_intermediate_values(self, loader): self.model.qconfig = quantization.QConfig( activation=quantization.HistogramObserver.with_args( dtype=torch.qint8, qscheme=torch.per_tensor_affine), weight=quantization.HistogramObserver.with_args( dtype=torch.qint8, qscheme=torch.per_tensor_affine)) self.model = quantization.prepare( self.model, prehook=quantization.HistogramObserver.with_args( dtype=torch.qint8, qscheme=torch.per_tensor_affine)) register_hooks(self.model) self.model.to(self.devicy) for sample, _ in loader: sample = sample.to(self.devicy) _ = self.model(sample)
def test_nested3(self): r"""More complicated nested test case with child qconfig overrides parent qconfig """ model = NestedModel().eval() custum_options = { 'dtype': torch.quint8, 'qscheme': torch.per_tensor_affine } custom_qconfig = QConfig(weight=default_weight_observer(), activation=default_observer(**custum_options)) qconfig_dict = { 'fc3': default_qconfig, 'sub2': default_qconfig, 'sub2.fc1': custom_qconfig } model = prepare(model, qconfig_dict) def checkPrepModules(model, before_calib=False): if before_calib: self.checkObservers(model) self.checkNoPrepModules(model) self.checkNoPrepModules(model.sub1) self.checkNoPrepModules(model.sub1.fc) self.checkNoPrepModules(model.sub1.relu) self.checkNoPrepModules(model.sub2) self.checkHasPrepModules(model.sub2.fc1) self.checkHasPrepModules(model.sub2.fc2) self.checkHasPrepModules(model.fc3) checkPrepModules(model, True) test_only_eval_fn(model, self.calib_data) convert(model) def checkQuantized(model): checkPrepModules(model) self.checkQuantizedLinear(model.sub2.fc1) self.checkQuantizedLinear(model.sub2.fc2) self.checkQuantizedLinear(model.fc3) test_only_eval_fn(model, self.calib_data) checkQuantized(model) # test one line API model = quantize(NestedModel().eval(), test_only_eval_fn, self.calib_data, qconfig_dict) checkQuantized(model)
def test_nested2(self): r"""Another test case for quantized, we will quantize all submodules of submodule sub2, this will include redundant quant/dequant, to remove them we need to manually call QuantWrapper or insert QuantStub/DeQuantStub, see `test_quant_dequant_wrapper` and `test_manual` """ model = NestedModel() qconfig_dict = { 'fc3': default_qconfig, 'sub2': default_qconfig } model = prepare(model, qconfig_dict) def checkPrepModules(model, before_calib=False): if before_calib: self.checkObservers(model) self.checkNoPrepModules(model) self.checkNoPrepModules(model.sub1) self.checkNoPrepModules(model.sub1.fc) self.checkNoPrepModules(model.sub1.relu) self.checkNoPrepModules(model.sub2) self.checkHasPrepModules(model.sub2.fc1) self.checkHasPrepModules(model.sub2.fc2) self.checkHasPrepModules(model.fc3) checkPrepModules(model, True) default_eval_fn(model, calib_data) convert(model) def checkQuantized(model): checkPrepModules(model) self.checkLinear(model.sub1.fc) self.assertEqual(type(model.sub1.relu), torch.nn.ReLU) self.checkQuantizedLinear(model.sub2.fc1) self.checkQuantizedLinear(model.sub2.fc2) self.checkQuantizedLinear(model.fc3) default_eval_fn(model, calib_data) checkQuantized(model) # test one line API model = quantize(NestedModel(), default_eval_fn, calib_data, qconfig_dict) checkQuantized(model)