def test_quantize_network(self, model_name, model_builder, forward_fn_, _quantize_config): net = model_builder() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') qnet = QuantizedNetwork(net, _quantize_config.quantizer, input_infos=[ModelInputInfo(forward_fn_.keywords["input_size_"]), ], dummy_forward_fn=forward_fn_) qnet.to(self.device) forward_fn_(qnet) forward_fn_(qnet) check_graph(ctx.graph, model_name, _quantize_config.graph_dir)
def test_quantize_network(self, model_name, model_builder, input_size): net = model_builder() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') qnet = QuantizedNetwork(net, input_size) _ = qnet(torch.zeros(*input_size)) _ = qnet(torch.zeros(*input_size)) check_graph(to_networkx(ctx), model_name, 'quantized')
def test_quantize_network(self, model_name, model_builder, input_size, _quantize_config): net = model_builder() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') qnet = QuantizedNetwork(net, _quantize_config.quantizer, input_size, dummy_forward_fn=create_dummy_forward_fn(input_size)) _ = qnet(torch.zeros(*input_size)) _ = qnet(torch.zeros(*input_size)) check_graph(to_networkx(ctx), model_name, _quantize_config.graph_dir)
def test_resnet18__with_qinput(): net = test_models.ResNet18() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') input_shape = (1, 3, 32, 32) qnet = QuantizedNetwork(net, input_shape, quantize_inputs=True) _ = qnet(torch.zeros(*input_shape)) _ = qnet(torch.zeros(*input_shape)) check_graph(to_networkx(ctx), 'resnet18_qinput.dot', 'quantized')
def test_output_quantization(_quantize_config): net = test_models.UNet() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') input_shape = (1, 3, 360, 480) qnet = QuantizedNetwork(net, _quantize_config.quantizer, [ModelInputInfo(input_shape), ], quantize_outputs=True) _ = qnet(torch.zeros(*input_shape)) _ = qnet(torch.zeros(*input_shape)) check_graph(ctx.graph, 'unet_qoutput.dot', _quantize_config.graph_dir)
def test_resnet18__with_ignore(_quantize_config): net = test_models.ResNet18() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') input_shape = (1, 3, 32, 32) qnet = QuantizedNetwork(net, _quantize_config.quantizer, [ModelInputInfo(input_shape), ], ignored_scopes=['ResNet/Sequential[layer3]']) _ = qnet(torch.zeros(*input_shape)) _ = qnet(torch.zeros(*input_shape)) check_graph(ctx.graph, 'resnet18_ignore.dot', _quantize_config.graph_dir)
def test_resnet18__with_not_qinput(_quantize_config): net = test_models.ResNet18() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') input_shape = (1, 3, 32, 32) qnet = QuantizedNetwork(net, _quantize_config.quantizer, [ModelInputInfo(input_shape), ], quantize_inputs=False) _ = qnet(torch.zeros(*input_shape)) _ = qnet(torch.zeros(*input_shape)) check_graph(ctx.graph, 'resnet18_no_qinput.dot', _quantize_config.graph_dir)
def test_resnet18__with_ignore(): net = test_models.ResNet18() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') input_shape = (1, 3, 32, 32) qnet = QuantizedNetwork(net, input_shape, ignored_scopes=['ResNet/Sequential[layer3]']) _ = qnet(torch.zeros(*input_shape)) _ = qnet(torch.zeros(*input_shape)) check_graph(to_networkx(ctx), 'resnet18_ignore.dot', 'quantized')
def test_output_quantization(_quantize_config): net = test_models.UNet() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') input_shape = (1, 3, 360, 480) qnet = QuantizedNetwork(net, _quantize_config.quantizer, input_shape, dummy_forward_fn=create_dummy_forward_fn(input_shape), quantize_outputs=True) _ = qnet(torch.zeros(*input_shape)) _ = qnet(torch.zeros(*input_shape)) check_graph(to_networkx(ctx), 'unet_qoutput.dot', _quantize_config.graph_dir)
def test_resnet18__with_ignore(_quantize_config): net = test_models.ResNet18() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') input_shape = (1, 3, 32, 32) qnet = QuantizedNetwork(net, _quantize_config.quantizer, input_shape, dummy_forward_fn=create_dummy_forward_fn(input_shape), ignored_scopes=['ResNet/Sequential[layer3]']) _ = qnet(torch.zeros(*input_shape)) _ = qnet(torch.zeros(*input_shape)) check_graph(to_networkx(ctx), 'resnet18_ignore.dot', _quantize_config.graph_dir)
def test_resnet18__with_not_qinput(_quantize_config): net = test_models.ResNet18() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') input_shape = (1, 3, 32, 32) qnet = QuantizedNetwork(net, _quantize_config.quantizer, input_shape, dummy_forward_fn=create_dummy_forward_fn(input_shape), quantize_inputs=False) _ = qnet(torch.zeros(*input_shape)) _ = qnet(torch.zeros(*input_shape)) check_graph(to_networkx(ctx), 'resnet18_no_qinput.dot', _quantize_config.graph_dir)
def test_custom_quantizable_subgraph_patterns(_quantize_config): net = test_models.SENet18() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') input_shape = (1, 3, 32, 32) qnet = QuantizedNetwork(net, _quantize_config.quantizer, [ModelInputInfo(input_shape), ], quantize_outputs=False, quantizable_subgraph_patterns=(("sigmoid", "__mul__"), ("__iadd__", "batch_norm"))) _ = qnet(torch.zeros(*input_shape)) _ = qnet(torch.zeros(*input_shape)) check_graph(ctx.graph, 'senet_custom_patterns.dot', _quantize_config.graph_dir)
def test_gnmt_quantization(_quantize_config): net = GNMT(vocab_size=32) net = replace_lstm(net) forward_fn_ = gnmt_forward_fn(seq_len=10, batch_size=3, vocab_size=32) ctx = reset_context('orig') ctx = reset_context('quantized_graphs') qnet = QuantizedNetwork( net, _quantize_config.quantizer, dummy_forward_fn=forward_fn_, quantizable_subgraph_patterns=[["linear", "__add__"], ["sigmoid", "__mul__", "__add__"], ["__add__", "tanh", "__mul__"], ["sigmoid", "__mul__"]], scopes_without_shape_matching= ['GNMT/ResidualRecurrentDecoder[decoder]/RecurrentAttention[att_rnn]/BahdanauAttention[attn]'], disable_function_quantization_hooks=True, ) forward_fn_(qnet) forward_fn_(qnet) check_graph(ctx.graph, 'gnmt_variable.dot', _quantize_config.graph_dir)
def test_disable_shape_matching(_quantize_config): class MatMulModel(nn.Module): def __init__(self): super().__init__() self.dummy_param = torch.nn.Parameter(torch.ones([1])) def forward(self, inputs): half1, half2 = torch.chunk(inputs, 2, dim=2) return torch.bmm(half1, half2.transpose(1, 2)) model = MatMulModel() _ = reset_context('orig') _ = reset_context('quantized_graphs') input_shape_1 = (3, 32, 32) input_shape_2 = (4, 64, 64) qnet_no_shape = QuantizedNetwork(deepcopy(model), _quantize_config.quantizer, [ModelInputInfo(input_shape_1), ], scopes_without_shape_matching=['MatMulModel']) _ = qnet_no_shape(torch.zeros(*input_shape_1)) graph_1 = deepcopy(qnet_no_shape.get_quantized_graph()) _ = qnet_no_shape(torch.zeros(*input_shape_2)) graph_2 = deepcopy(qnet_no_shape.get_quantized_graph()) keys_1 = list(graph_1.get_all_node_keys()) keys_2 = list(graph_2.get_all_node_keys()) assert len(keys_1) == 1 assert keys_1 == keys_2 _ = reset_context('orig') _ = reset_context('quantized_graphs') qnet = QuantizedNetwork(model, _quantize_config.quantizer, [ModelInputInfo(input_shape_1), ]) _ = qnet(torch.zeros(*input_shape_1)) _ = qnet(torch.zeros(*input_shape_2)) # The second forward run should have led to an increase in registered node counts # since disable_shape_matching was False and the network was run with a different # shape of input tensor assert qnet.get_quantized_graph().get_nodes_count() > graph_1.get_nodes_count()