def test_override_no_clip(overrides, e_clip_acts, e_n_stds, rnn_model, rnn_model_stats): quantizer = PostTrainLinearQuantizer(rnn_model, clip_acts="AVG", clip_n_stds=0, overrides=overrides, model_activation_stats=rnn_model_stats) quantizer.prepare_model(torch.randn(1, 1, 20)) assert isinstance(quantizer.model.rnn.cells[0].eltwisemult_hidden, RangeLinearQuantEltwiseMultWrapper) assert quantizer.model.rnn.cells[0].eltwisemult_hidden.output_quant_settings.clip_mode == e_clip_acts assert quantizer.model.rnn.cells[0].eltwisemult_hidden.output_quant_settings.clip_n_stds == e_n_stds
def test_acts_quant_params_linear(act1_type, act2_type, bn_out_stats): # prepare model: model = LinearBNSplitAct(act1_type, act2_type) stats = gen_stats_for_model(model) stats['bn']['output'] = bn_out_stats quantizer = PostTrainLinearQuantizer( model, model_activation_stats=deepcopy(stats)) quantizer.prepare_model(torch.randn(10, 10)) # get quant params: expected_quant_params_keys = { 'linear.output_zero_point', 'linear.output_scale', 'act1.output_zero_point', 'act1.output_scale', 'act2.output_zero_point', 'act2.output_scale' } assert set(quantizer.acts_quant_params) == expected_quant_params_keys quantizer.set_act_quant_param('linear.output_zero_point', 2.) quantizer.set_act_quant_param('linear.output_scale', 30.) assert model.linear.output_zero_point == 2. assert model.linear.output_scale == 30. expected_quant_param_linear_dict = { 'output_zero_point': torch.tensor(2.), 'output_scale': 30. } assert dict(model.linear.named_acts_quant_params() ) == expected_quant_param_linear_dict new_config = {'linear.output_zero_point': 4., 'act2.output_scale': 50} quantizer.update_acts_quant_params(new_config) assert model.linear.output_zero_point == 4 assert model.act2.output_scale == 50
def test_stats_fusion_just_bn(): model = ConvBnActPool(None, False) stats = gen_stats_for_model(model) quantizer = PostTrainLinearQuantizer(model, model_activation_stats=deepcopy(stats)) quantizer.prepare_model(torch.randn((10, 10, 20, 20))) expected = deepcopy(stats) expected.pop('bn') # After BN folding BN stats are removed expected['conv']['output'] = deepcopy(stats['bn']['output']) assert quantizer.model_activation_stats == expected
def test_stats_fusion_split_act(act1_type, act2_type, bn_out_stats, linear_out_expected_stats): model = LinearBNSplitAct(act1_type, act2_type) stats = gen_stats_for_model(model) stats['bn']['output'] = bn_out_stats quantizer = PostTrainLinearQuantizer(model, model_activation_stats=deepcopy(stats)) quantizer.prepare_model(torch.randn(10, 10)) expected = deepcopy(stats) expected.pop('bn') # After BN folding BN stats are removed expected['linear']['output'] = linear_out_expected_stats assert quantizer.model_activation_stats == expected
def test_stats_fusion_sequential(act_type, act_as_module, bn_out_stats, conv_out_expected_stats): model = ConvBnActPool(act_type, act_as_module) stats = gen_stats_for_model(model) stats['bn']['output'] = bn_out_stats quantizer = PostTrainLinearQuantizer(model, model_activation_stats=deepcopy(stats)) quantizer.prepare_model(torch.randn((10, 10, 20, 20))) expected = deepcopy(stats) expected.pop('bn') # After BN folding BN stats are removed expected['conv']['output'] = conv_out_expected_stats if act_as_module: expected['act']['inputs'][0] = conv_out_expected_stats assert quantizer.model_activation_stats == expected
def test_acts_quant_params_rnn(rnn_model): model = DummyWordLangModel(nn.Embedding(41, 20), rnn_model).cuda() stats = gen_stats_for_model(model) quantizer = PostTrainLinearQuantizer(model, model_activation_stats=deepcopy(stats)) dummy_input = torch.randint(0, 41, size=(79, 23)) quantizer.prepare_model(dummy_input) new_config = { 'rnn.rnn.cells.0.act_o.output_scale': 4, 'embedding.w_scale': torch.tensor(59.0) } quantizer.update_acts_quant_params(new_config) assert model.rnn.rnn.cells[0].act_o.output_scale == 4 assert model.embedding.w_scale == 59.0
def test_stats_fusion_no_fuse(model, input_shape): stats = gen_stats_for_model(model) quantizer = PostTrainLinearQuantizer( model, model_activation_stats=deepcopy(stats)) quantizer.prepare_model(torch.randn(input_shape)) assert quantizer.model_activation_stats == stats
#!/usr/bin/env python3 import settings.optimizeNetwork_settings as s from common.mask_util import MaskTable, compensateNetwork from common.nnTools import test from distiller.quantization import PostTrainLinearQuantizer from copy import deepcopy #create reference model ref_mask_table=MaskTable(s.bits,s.quant_mode, s.mask_mode, s.network, [] , False) ref_quantized = PostTrainLinearQuantizer( deepcopy(s.network), bits_activations=s.aw_bits, bits_parameters=s.aw_bits, bits_accum=s.acc_bits, mode=s.quant_mode, mask_table=ref_mask_table, scale_approx_mult_bits=s.bits) ref_quantized.prepare_model(s.dummy_input) ref_quantized.model.eval() ref_quantized.model.to("cpu") #loading mask mask_table = MaskTable(s.bits,s.quant_mode, s.mask_mode, s.network, mask_file=s.config_fname) quantizer = PostTrainLinearQuantizer( deepcopy(s.network), bits_activations=s.aw_bits, bits_parameters=s.aw_bits, bits_accum=s.acc_bits, mode=s.quant_mode, mask_table=mask_table, scale_approx_mult_bits=s.bits) quantizer.prepare_model(s.dummy_input) quantizer.model.eval() compensateNetwork(ref_quantized.model, quantizer.model, s.test_set, "../conf_files/conf_path.json",
,train=False #where data will be located ,download=True #download if is not present offline(run only the first time) ,transform=transform_test ) data_loader= torch.utils.data.DataLoader( train_set ,shuffle=False ,batch_size=batch_size) dummy_input = (torch.zeros([1,3,32,32])) test_preds = get_all_preds(network, data_loader,device="cpu") ref_correct = test_preds.argmax(dim=1).eq(torch.LongTensor(train_set.targets)).sum().item() print(ref_correct) quant_mode_list = [LinearQuantMode.SYMMETRIC,LinearQuantMode.ASYMMETRIC_UNSIGNED,LinearQuantMode.ASYMMETRIC_SIGNED] with open("../reports/data_vgg11bn_CIFAR10_postTrainQuantizing.txt","w") as log_pointer: log_pointer.write("Reference accuracy = {}\n".format(ref_correct)) for quant_mode in quant_mode_list: for qw_bits in range(3,bits+1): quantizer = PostTrainLinearQuantizer( deepcopy(network), bits_activations=bits, bits_parameters=qw_bits, bits_accum=acc_bits, mode=quant_mode,scale_approx_mult_bits=bits) quantizer.model.to(device) quantizer.prepare_model(dummy_input) quantizer.model.eval() test_preds = get_all_preds(quantizer.model, data_loader,device=device) preds_correct = test_preds.argmax(dim=1).eq(torch.LongTensor(train_set.targets).to(device)).sum().item() accuracy = preds_correct/len(train_set) log_pointer.write(rep_string.format(quant_mode,qw_bits,preds_correct,accuracy)) del quantizer