def test_qat_prepare_device_affinity(self): """ Tests that FX QAT prepare pass respects device affinity """ class Model(nn.Module): def __init__(self): super(Model, self).__init__() self.conv = nn.Conv2d(1, 1, 1) self.bn = nn.BatchNorm2d(1) self.relu = nn.ReLU() def forward(self, x): x = self.conv(x) x = self.bn(x) x = self.relu(x) return x model = Model() qengine = torch.backends.quantized.engine qconfig_dict = { '': torch.quantization.get_default_qat_qconfig(qengine) } device = torch.device('cuda:0') model.to(device) # symbolically trace model = symbolic_trace(model) # QAT prepare model = fuse_fx(model) model = prepare_fx(model, qconfig_dict) # ensure that running an input on CUDA works without any needed changes input = torch.randn(4, 1, 4, 4, device=device) model(input) # ensure all buffers and parameters are on the device we expect model_devices = {p.device for p in model.parameters()} | \ {p.device for p in model.buffers()} self.assertEqual(len(model_devices), 1) model_device = next(iter(model_devices)) self.assertEqual(model_device, device)
def _test_model_impl(self, mode, name, model, eager_quantizable_model, check_with_eager=True, diff_of_quant=None, diff_from_eager=None): if diff_of_quant is None or diff_from_eager is None: diff_of_quant = {} diff_from_eager = {} if mode not in diff_of_quant or mode not in diff_from_eager: diff_of_quant[mode] = {} diff_from_eager[mode] = {} input_tensor = torch.rand(1, 3, 224, 224) input_tensor_inception = torch.rand(1, 3, 299, 299) output_value = torch.randint(0, 1, (1, )) # print('quantizing:', name, ' mode:', mode) if name == 'inception_v3': input_value = input_tensor_inception else: input_value = input_tensor qconfig = default_qconfig if mode == 'static' else default_qat_qconfig qconfig_dict = {'': qconfig} graph_module = symbolic_trace(model) # print('graph module:', graph_module.src) script = torch.jit.script(graph_module) # make sure graph module and script module are both runanble original_out = graph_module(input_value) is_not_tuple_out = not isinstance(original_out, tuple) script_out = script(input_value) self.assertEqual( (original_out - script_out).abs().max(), 0, 'Reslut of original graph module and script module does not match') # set to train just before quantization if mode != 'static': model.train() graph_module = fuse_fx(graph_module) prepared = prepare_fx(graph_module, qconfig_dict) if mode == 'ddp': mp.spawn(run_ddp, args=(world_size, prepared), nprocs=world_size, join=True) elif mode == 'qat': assert prepared.training, 'prepared must be in training mode for qat' optimizer = torch.optim.SGD(prepared.parameters(), lr=0.0001) criterion = nn.CrossEntropyLoss() train_one_epoch(prepared, criterion, optimizer, [(input_value, output_value)], torch.device('cpu'), 1) else: for i in range(10): prepared(input_value) # print('after observation root:', prepared.root) qgraph = convert_fx(prepared) # print('after quantization root:', qgraph.root) # print('after quantization code:', qgraph.src) qgraph.eval() qgraph_script = torch.jit.script(qgraph) # print('quantized and scripted:', qgraph_script.graph) qgraph_out = qgraph(input_value) qgraph_script = qgraph_script(input_value) if is_not_tuple_out: diff_of_quant[mode][name] = (original_out - qgraph_out).abs().max() assert torch.allclose(qgraph_out, qgraph_script), 'graph, scripted graph' else: print('tuple output') if eager_quantizable_model is not None: # comparing to eager mode quantization qeager = eager_quantizable_model ref_out = qeager(input_value) qeager.qconfig = qconfig if mode == 'static': qeager.fuse_model() prepare(qeager, inplace=True) else: qeager.train() qeager.fuse_model() prepare_qat(qeager, inplace=True) # calibration if mode == 'ddp': mp.spawn(run_ddp, args=(world_size, qeager), nprocs=world_size, join=True) elif mode == 'qat': assert qeager.training, 'qeager should be in training mode for qat' optimizer = torch.optim.SGD(qeager.parameters(), lr=0.0001) train_one_epoch(qeager, criterion, optimizer, [(input_value, output_value)], torch.device('cpu'), 1) else: for i in range(10): qeager(input_value) # print('ref after observation:', qeager) convert(qeager, inplace=True) qeager.eval() # print('ref after quantization:', qeager) qeager_out = qeager(input_value) qeager_script = torch.jit.script(qeager) qscript_out = qeager_script(input_value) if is_not_tuple_out: diff_from_eager[mode][name] = (qeager_out - qgraph_out).abs().max() if check_with_eager: self.assertEqual( diff_from_eager[mode][name], 0, 'Result of graph mode quantization and ' + 'eager mode quantization on model: ' + name + ' should match. Mode: ' + mode + ' diff:' + str(diff_from_eager[mode][name]))
def checkGraphModeFxOp(self, model, inputs, quant_type, expected_node=None, expected_node_occurrence=None, expected_node_list=None, debug=False, print_debug_info=False): """ Quantizes model with graph mode quantization on fx and check if the quantized model contains the quantized_node Args: model: floating point torch.nn.Module inputs: one positional sample input arguments for model expected_node: NodeSpec e.g. NodeSpec.call_function(torch.quantize_per_tensor) expected_node_occurrence: a dict from NodeSpec to expected number of occurences (int) e.g. {NodeSpec.call_function(torch.quantize_per_tensor) : 1, NodeSpec.call_method('dequantize'): 1} expected_node_list: a list of NodeSpec, used to check the order of the occurrence of Node e.g. [NodeSpec.call_function(torch.quantize_per_tensor), NodeSpec.call_module(nnq.Conv2d), NodeSpec.call_function(F.hardtanh_), NodeSpec.call_method('dequantize')] """ # TODO: make img_data a single example instead of a list if type(inputs) == list: inputs = inputs[0] if quant_type == QuantType.QAT: model.train() else: model.eval() original = symbolic_trace(model) fused = fuse_fx(original) qconfig_dict = { '': get_default_qconfig(torch.backends.quantized.engine) } if quant_type == QuantType.DYNAMIC: prepare = prepare_dynamic_fx convert = convert_dynamic_fx else: prepare = prepare_fx convert = convert_fx prepared = prepare(fused, qconfig_dict) prepared(*inputs) qgraph = convert(prepared) qgraph_debug = convert(prepared, debug=True) result = qgraph(*inputs) result_debug = qgraph_debug(*inputs) self.assertEqual((result - result_debug).abs().max(), 0), \ 'Expecting debug and non-debug option to produce identical result' if print_debug_info: print() print('quant type:', quant_type) print('origianl graph module:', type(model)) self.printGraphModule(original) print() print('quantized graph module:', type(qgraph)) self.printGraphModule(qgraph) print() qgraph_to_check = qgraph_debug if debug else qgraph self.checkGraphModuleNodes(qgraph_to_check, expected_node, expected_node_occurrence, expected_node_list)