def test_compression_eval_trained(_params, tmp_path): p = _params args = p['args'] tc = p['test_config'] args['mode'] = 'test' args['log-dir'] = tmp_path args['workers'] = 4 args['seed'] = 1 checkpoint_path = os.path.join(args['checkpoint-save-dir'], tc['checkpoint_name'] + '_best.pth') args['resume'] = checkpoint_path if 'weights' in args: del args['weights'] reset_context('orig') reset_context('quantized_graphs') runner = Command( create_command_line(get_cli_dict_args(args), tc['sample_type'])) res = runner.run(timeout=tc['timeout']) assert res == 0 output_path = None for root, _, names in os.walk(str(tmp_path)): for name in names: if 'output' in name: output_path = os.path.join(root, name) assert os.path.exists(output_path) with open(output_path, "r") as f: last_line = f.readlines()[-1] acc1 = float(re.findall("\\d+\\.\\d+", last_line)[0]) assert torch.load(checkpoint_path)['best_acc1'] == approx( acc1, abs=tc['absolute_tolerance_eval'])
def test_can_load_quant_algo__with_defaults(): model = BasicConvTestModel() config = get_basic_quantization_config() reset_context('orig') reset_context('quantized_graphs') compression_algo = create_compression_algorithm(deepcopy(model), config) assert isinstance(compression_algo, Quantization) quant_model = compression_algo.model model_conv = get_all_modules_by_type(model, 'Conv2d') quant_model_conv = get_all_modules_by_type( quant_model.get_nncf_wrapped_module(), 'NNCFConv2d') assert len(model_conv) == len(quant_model_conv) for module_name in model_conv: scope = module_name.split('/') scope[-1] = scope[-1].replace('Conv2d', 'NNCFConv2d') quant_module_name = '/'.join(scope) assert quant_module_name in quant_model_conv store = [] for op in quant_model_conv[quant_module_name].pre_ops.values(): if isinstance(op, (UpdateInputs, UpdateWeight)) and isinstance( op.operand, SymmetricQuantizer): assert op.__class__.__name__ not in store store.append(op.__class__.__name__) assert UpdateWeight.__name__ in store
def test_number_of_nodes_for_module_in_loop__not_input_node(self): num_iter = 5 patch_torch_operators() class LoopModule(nn.Module): class Inner(nn.Module): def forward(self, x): s = F.sigmoid(x) t = F.tanh(x) result = F.sigmoid(x) * t + F.tanh(x) * s return result @staticmethod def nodes_number(): return 7 def __init__(self): super().__init__() self.inner = self.Inner() def forward(self, x): for _ in range(num_iter): x = self.inner(F.relu(x)) return x def nodes_number(self): return self.inner.nodes_number() + num_iter test_module = LoopModule() reset_context('test') with context('test') as ctx: _ = test_module(torch.zeros(1)) assert ctx.graph.get_nodes_count() == test_module.nodes_number()
def test_compression_train(_params, tmp_path): p = _params args = p['args'] tc = p['test_config'] args['mode'] = 'train' args['log-dir'] = tmp_path args['workers'] = 4 args['seed'] = 1 reset_context('orig') reset_context('quantized_graphs') runner = Command( create_command_line(get_cli_dict_args(args), tc['sample_type'])) res = runner.run(timeout=tc['timeout']) assert res == 0 checkpoint_path = os.path.join(args['checkpoint-save-dir'], tc['checkpoint_name'] + '_best.pth') assert os.path.exists(checkpoint_path) actual_acc = torch.load(checkpoint_path)['best_acc1'] ref_acc = tc['expected_accuracy'] better_accuracy_tolerance = 3 tolerance = tc[ 'absolute_tolerance_train'] if actual_acc < ref_acc else better_accuracy_tolerance assert actual_acc == approx(ref_acc, abs=tolerance)
def test_export_stacked_bi_lstm(tmp_path): p = LSTMTestSizes(3, 3, 3, 3) patch_torch_operators() config = get_empty_config(input_sample_size=(1, p.hidden_size, p.input_size)) config['compression'] = {'algorithm': 'quantization'} config.log_dir = str(tmp_path) reset_context('orig') reset_context('quantized_graphs') # TODO: batch_first=True fails with building graph: ambiguous call to mul or sigmoid test_rnn = NNCF_RNN('LSTM', input_size=p.input_size, hidden_size=p.hidden_size, num_layers=2, bidirectional=True, batch_first=False) algo, model = create_compressed_model(test_rnn, config) test_path = str(tmp_path.joinpath('test.onnx')) algo.export_model(test_path) assert os.path.exists(test_path) onnx_num = 0 model = onnx.load(test_path) # pylint: disable=no-member for node in model.graph.node: if node.op_type == 'FakeQuantize': onnx_num += 1 assert onnx_num == 54
def test_loaded_model_evals_according_to_saved_acc(_params, tmp_path): p = _params config_path = p['nncf_config_path'] checkpoint_path = p['checkpoint_path'] tmp_path = str(tmp_path) args = {} args['data'] = tmp_path + '/' + p['dataset'] args['dataset'] = p['dataset'] args['config'] = str(config_path) args['mode'] = 'test' args['log-dir'] = tmp_path args['workers'] = 4 args['seed'] = 1 args['resume'] = checkpoint_path if p['execution_mode'] == ExecutionMode.MULTIPROCESSING_DISTRIBUTED: args['multiprocessing-distributed'] = '' else: pytest.skip("DataParallel eval takes too long for this test to be run during pre-commit") reset_context('orig') reset_context('quantized_graphs') runner = Command(create_command_line(get_cli_dict_args(args), "classification")) res = runner.run() assert res == 0 acc1 = parse_best_acc1(tmp_path) assert torch.load(checkpoint_path)['best_acc1'] == pytest.approx(acc1)
def test_quantize_has_proper_is_weights_flag(): class Model(nn.Module): def __init__(self, size=1): super().__init__() self.size = size self.conv = nn.Conv2d(size, size, size) def forward(self, x): return self.conv(x) model = Model() reset_context('orig') reset_context('quantized_graphs') quant_model = QuantizedNetwork(model, create_quantize_module, inputs_shape=(1, 1, 2, 2), dummy_forward_fn=create_dummy_forward_fn( (1, 1, 2, 2))) for module in quant_model.modules(): if isinstance(module, NNCFConv2d): for op in module.pre_ops.values(): assert isinstance(op, (UpdateWeight, UpdateInputs)) assert op.operand.is_weights is isinstance(op, UpdateWeight) for _, aq in quant_model.activation_quantizers.items(): assert aq.is_weights is False
def test_quantization_configs__custom(): model = BasicConvTestModel() config = get_basic_quantization_config() config['compression'].update({ "weights": { "mode": "asymmetric", "per_channel": True, "signed": False, "bits": 4 }, "activations": { "mode": "asymmetric", "bits": 4, "signed": True, }, }) reset_context('orig') reset_context('quantized_graphs') compression_algo = create_compression_algorithm(model, config) weight_quantizers, activation_quantizers = split_quantizers( compression_algo.model) ref_weight_qconfig = QuantizerConfig(4, QuantizationMode.ASYMMETRIC, None, True, None, True) for wq in weight_quantizers: compare_qconfigs(ref_weight_qconfig, wq.config) ref_activation_qconfig = QuantizerConfig(4, QuantizationMode.ASYMMETRIC, True, False, None, False) for wq in activation_quantizers: compare_qconfigs(ref_activation_qconfig, wq.config)
def test_compression_eval_trained(_params, tmp_path): p = _params args = p['args'] tc = p['test_config'] args['mode'] = 'test' args['log-dir'] = tmp_path args['workers'] = 4 args['seed'] = 1 checkpoint_path = os.path.join(args['checkpoint-save-dir'], tc['checkpoint_name'] + '_best.pth') args['resume'] = checkpoint_path if 'weights' in args: del args['weights'] reset_context('orig') reset_context('quantized_graphs') runner = Command( create_command_line(get_cli_dict_args(args), tc['sample_type'])) res = runner.run(timeout=tc['timeout']) assert res == 0 acc1 = parse_best_acc1(tmp_path) assert torch.load(checkpoint_path)['best_acc1'] == approx( acc1, abs=tc['absolute_tolerance_eval'])
def test_quantize_network(self, model_name, model_builder, input_size): net = model_builder() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') qnet = QuantizedNetwork(net, input_size) _ = qnet(torch.zeros(*input_size)) _ = qnet(torch.zeros(*input_size)) check_graph(to_networkx(ctx), model_name, 'quantized')
def test_cpu_only_mode_produces_cpu_only_model(config, tmp_path, mocker): reset_context('orig') reset_context('quantized_graphs') c = config config_factory = ConfigFactory(config['config'], tmp_path / 'config.json') args = { "--data": c["dataset_path"], "--config": config_factory.serialize(), "--log-dir": tmp_path, "--batch-size": c["batch_size"] * torch.cuda.device_count(), "--workers": 1, "--epochs": 1, "--cpu-only": None } command_line = " ".join(key if val is None else "{} {}".format(key, val) for key, val in args.items()) if config["sample_type"] == "classification": import examples.classification.main as sample if is_binarization(config['config']): mocker.patch( "examples.classification.binarization_worker.train_epoch_bin") mocker.patch( "examples.classification.binarization_worker.validate") import examples.classification.binarization_worker as bin_worker bin_worker.validate.return_value = (0, 0) else: mocker.patch("examples.classification.main.train_epoch") mocker.patch("examples.classification.main.validate") sample.validate.return_value = (0, 0) elif config["sample_type"] == "semantic_segmentation": import examples.semantic_segmentation.main as sample import examples.semantic_segmentation.train mocker.spy(examples.semantic_segmentation.train.Train, "__init__") elif config["sample_type"] == "object_detection": import examples.object_detection.main as sample mocker.patch("examples.object_detection.main.train") sample.main(shlex.split(command_line)) # pylint: disable=no-member if config["sample_type"] == "classification": if is_binarization(config['config']): import examples.classification.binarization_worker as bin_worker model_to_be_trained = bin_worker.train_epoch_bin.call_args[0][ 2] # model else: model_to_be_trained = sample.train_epoch.call_args[0][1] # model elif config["sample_type"] == "semantic_segmentation": model_to_be_trained = examples.semantic_segmentation.train.Train.__init__.call_args[ 0][1] # model elif config["sample_type"] == "object_detection": model_to_be_trained = sample.train.call_args[0][0] # net for p in model_to_be_trained.parameters(): assert not p.is_cuda
def test_number_of_nodes_for_module_with_nested_loops(self): num_iter = 5 patch_torch_operators() class TestIterModule(nn.Module): @ITERATION_MODULES.register() class TestIterModule_ResetPoint(nn.Module): def __init__(self, loop_module): super().__init__() self.loop_module = loop_module def forward(self, x): return self.loop_module(F.relu(x)) def __init__(self): super().__init__() self.loop_module = self.LoopModule2() self.reset_point = self.TestIterModule_ResetPoint( self.loop_module) def forward(self, x): for _ in range(num_iter): x = self.reset_point(x) return x class LoopModule2(nn.Module): @ITERATION_MODULES.register() class LoopModule2_ResetPoint(nn.Module): def __init__(self, inner): super().__init__() self.inner = inner def forward(self, x): return self.inner(F.relu(x)) def __init__(self): super().__init__() self.inner = self.Inner() self.reset_helper = self.LoopModule2_ResetPoint(self.inner) def forward(self, x): for _ in range(num_iter): self.reset_helper(x) return x class Inner(nn.Module): def forward(self, x): s = F.sigmoid(x) t = F.tanh(x) result = t + s return result test_module = TestIterModule() reset_context('test') with context('test') as ctx: _ = test_module(torch.zeros(1)) assert ctx.graph.get_nodes_count() == num_iter
def test_quantize_network(self, model_name, model_builder, input_size, _quantize_config): net = model_builder() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') qnet = QuantizedNetwork(net, _quantize_config.quantizer, input_size, dummy_forward_fn=create_dummy_forward_fn(input_size)) _ = qnet(torch.zeros(*input_size)) _ = qnet(torch.zeros(*input_size)) check_graph(to_networkx(ctx), model_name, _quantize_config.graph_dir)
def test_resnet18__with_qinput(): net = test_models.ResNet18() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') input_shape = (1, 3, 32, 32) qnet = QuantizedNetwork(net, input_shape, quantize_inputs=True) _ = qnet(torch.zeros(*input_shape)) _ = qnet(torch.zeros(*input_shape)) check_graph(to_networkx(ctx), 'resnet18_qinput.dot', 'quantized')
def test_quantize_network(self, model_name, model_builder, forward_fn_, _quantize_config): net = model_builder() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') qnet = QuantizedNetwork(net, _quantize_config.quantizer, input_infos=[ModelInputInfo(forward_fn_.keywords["input_size_"]), ], dummy_forward_fn=forward_fn_) qnet.to(self.device) forward_fn_(qnet) forward_fn_(qnet) check_graph(ctx.graph, model_name, _quantize_config.graph_dir)
def test_output_quantization(_quantize_config): net = test_models.UNet() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') input_shape = (1, 3, 360, 480) qnet = QuantizedNetwork(net, _quantize_config.quantizer, [ModelInputInfo(input_shape), ], quantize_outputs=True) _ = qnet(torch.zeros(*input_shape)) _ = qnet(torch.zeros(*input_shape)) check_graph(ctx.graph, 'unet_qoutput.dot', _quantize_config.graph_dir)
def test_resnet18__with_ignore(_quantize_config): net = test_models.ResNet18() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') input_shape = (1, 3, 32, 32) qnet = QuantizedNetwork(net, _quantize_config.quantizer, [ModelInputInfo(input_shape), ], ignored_scopes=['ResNet/Sequential[layer3]']) _ = qnet(torch.zeros(*input_shape)) _ = qnet(torch.zeros(*input_shape)) check_graph(ctx.graph, 'resnet18_ignore.dot', _quantize_config.graph_dir)
def test_resnet18__with_not_qinput(_quantize_config): net = test_models.ResNet18() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') input_shape = (1, 3, 32, 32) qnet = QuantizedNetwork(net, _quantize_config.quantizer, [ModelInputInfo(input_shape), ], quantize_inputs=False) _ = qnet(torch.zeros(*input_shape)) _ = qnet(torch.zeros(*input_shape)) check_graph(ctx.graph, 'resnet18_no_qinput.dot', _quantize_config.graph_dir)
def test_resnet18__with_ignore(_quantize_config): net = test_models.ResNet18() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') input_shape = (1, 3, 32, 32) qnet = QuantizedNetwork(net, _quantize_config.quantizer, input_shape, dummy_forward_fn=create_dummy_forward_fn(input_shape), ignored_scopes=['ResNet/Sequential[layer3]']) _ = qnet(torch.zeros(*input_shape)) _ = qnet(torch.zeros(*input_shape)) check_graph(to_networkx(ctx), 'resnet18_ignore.dot', _quantize_config.graph_dir)
def test_resnet18__with_ignore(): net = test_models.ResNet18() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') input_shape = (1, 3, 32, 32) qnet = QuantizedNetwork(net, input_shape, ignored_scopes=['ResNet/Sequential[layer3]']) _ = qnet(torch.zeros(*input_shape)) _ = qnet(torch.zeros(*input_shape)) check_graph(to_networkx(ctx), 'resnet18_ignore.dot', 'quantized')
def test_output_quantization(_quantize_config): net = test_models.UNet() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') input_shape = (1, 3, 360, 480) qnet = QuantizedNetwork(net, _quantize_config.quantizer, input_shape, dummy_forward_fn=create_dummy_forward_fn(input_shape), quantize_outputs=True) _ = qnet(torch.zeros(*input_shape)) _ = qnet(torch.zeros(*input_shape)) check_graph(to_networkx(ctx), 'unet_qoutput.dot', _quantize_config.graph_dir)
def test_resnet18__with_not_qinput(_quantize_config): net = test_models.ResNet18() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') input_shape = (1, 3, 32, 32) qnet = QuantizedNetwork(net, _quantize_config.quantizer, input_shape, dummy_forward_fn=create_dummy_forward_fn(input_shape), quantize_inputs=False) _ = qnet(torch.zeros(*input_shape)) _ = qnet(torch.zeros(*input_shape)) check_graph(to_networkx(ctx), 'resnet18_no_qinput.dot', _quantize_config.graph_dir)
def test_scale_and_sign_init_for_quant_algo(): model = TwoConvTestModel() config = get_empty_config() config['compression'] = { 'algorithm': 'quantization', 'initializer': { 'num_init_steps': 1 } } reset_context('orig') reset_context('quantized_graphs') compression_algo = create_compression_algorithm(model, config) model = compression_algo.model input_sample_size = config.input_sample_size class OnesDatasetMock: def __init__(self, input_size): self.input_size = input_size super().__init__() def __getitem__(self, index): return torch.ones(self.input_size), torch.ones(1) def __len__(self): return 1 data_loader = torch.utils.data.DataLoader(OnesDatasetMock( input_sample_size[1:]), batch_size=1, num_workers=1, shuffle=False) compression_algo.initialize(data_loader) model_conv = get_all_modules_by_type(model, 'Quantize') ref_table = { '.*Sequential\\[0\\].*UpdateWeight.*': (True, 1), '.*Sequential\\[1\\].*UpdateWeight. *': (False, 1), '.*activation_quantizers.*Sequential\\[0\\].*': (True, 4), '.*activation_quantizers.*Sequential\\[1\\].*': (True, 24) } for name, module in model_conv.items(): for pattern, ref_values in ref_table.items(): match = re.search(pattern, name) if match: assert isinstance(module, Quantize) assert module.signed == ref_values[ 0], 'sign is not matched for {}'.format(name) assert module.scale == ref_values[ 1], 'scale is not matched for {}'.format(name)
def activation_quantizers_dumping_worker(current_gpu, config, tmp_path): model = resnet50(pretrained=False) reset_context('orig') reset_context('quantized_graphs') algo = create_compression_algorithm(model, config) model = algo.model path = get_path_to_keys(tmp_path, current_gpu) print(path) with open(path, 'w') as f: f.writelines("%s\n" % key for key in model.activation_quantizers.keys())
def test_can_create_quant_loss_and_scheduler(): model = BasicConvTestModel() config = get_basic_quantization_config() reset_context('orig') reset_context('quantized_graphs') compression_algo = create_compression_algorithm(model, config) loss = compression_algo.loss assert isinstance(loss, CompressionLoss) scheduler = compression_algo.scheduler assert isinstance(scheduler, CompressionScheduler)
def test_custom_quantizable_subgraph_patterns(_quantize_config): net = test_models.SENet18() ctx = reset_context('orig') ctx = reset_context('quantized_graphs') input_shape = (1, 3, 32, 32) qnet = QuantizedNetwork(net, _quantize_config.quantizer, [ModelInputInfo(input_shape), ], quantize_outputs=False, quantizable_subgraph_patterns=(("sigmoid", "__mul__"), ("__iadd__", "batch_norm"))) _ = qnet(torch.zeros(*input_shape)) _ = qnet(torch.zeros(*input_shape)) check_graph(ctx.graph, 'senet_custom_patterns.dot', _quantize_config.graph_dir)
def test_can_restore_binary_mask_on_magnitude_quant_algo_resume(): config = get_empty_config() config["compression"] = [ {"algorithm": "magnitude_sparsity", "weight_importance": "abs", "params": {"schedule": "multistep", "sparsity_levels": [0.3, 0.5]}}, {"algorithm": "quantization"}] reset_context('orig') reset_context('quantized_graphs') magnitude_quant_algo = create_compression_algorithm(MagnitudeTestModel(), config) # load_state doesn't support CPU + Quantization sparse_model = torch.nn.DataParallel(magnitude_quant_algo.model) sparse_model.cuda() with torch.no_grad(): sparse_model(torch.ones([1, 1, 10, 10])) reset_context('orig') reset_context('quantized_graphs') config = get_empty_config() config["compression"] = [{"algorithm": "const_sparsity"}, {"algorithm": "quantization"}] const_algo = create_compression_algorithm(MagnitudeTestModel(), config) const_sparse_model = const_algo.model load_state(const_sparse_model, sparse_model.state_dict()) op = const_sparse_model.module.conv1.pre_ops['0'] check_equal(ref_mask_1, op.operand.binary_mask) op = const_sparse_model.module.conv2.pre_ops['0'] check_equal(ref_mask_2, op.operand.binary_mask)
def test_ambiguous_function(): class Model(nn.Module): def __init__(self): super().__init__() self.layers = nn.ModuleList( [nn.Conv2d(1, 1, 1), nn.Conv2d(1, 1, 1)]) def forward(self, x): for layer in self.layers: x = F.relu(layer(x)) reset_context('orig') reset_context('quantized_graphs') mod = Model() QuantizedNetwork(mod, inputs_shape=(1, 1, 1, 1))
def test_build_graph(self, model_name, model_builder, input_size): net = model_builder() ctx = reset_context('test') with context('test') as c: _ = net(torch.zeros(input_size)) c.reset_scope_operator_call_counters() _ = net(torch.zeros(input_size)) check_graph(to_networkx(ctx), model_name, 'original')
def test_quantization_configs__with_defaults(): model = BasicConvTestModel() config = get_basic_quantization_config() reset_context('orig') reset_context('quantized_graphs') compression_algo = create_compression_algorithm(deepcopy(model), config) weight_quantizers, activation_quantizers = split_quantizers( compression_algo.model) ref_weight_qconfig = QuantizerConfig(8, QuantizationMode.SYMMETRIC, None, False, None, True) for wq in weight_quantizers: compare_qconfigs(ref_weight_qconfig, wq.config) ref_activation_qconfig = QuantizerConfig(8, QuantizationMode.SYMMETRIC, None, False, None, False) for wq in activation_quantizers: compare_qconfigs(ref_activation_qconfig, wq.config)