Пример #1
0
def test_override_no_clip(overrides, e_clip_acts, e_n_stds, rnn_model, rnn_model_stats):
    quantizer = PostTrainLinearQuantizer(rnn_model, clip_acts="AVG", clip_n_stds=0, overrides=overrides,
                                         model_activation_stats=rnn_model_stats)
    quantizer.prepare_model(torch.randn(1, 1, 20))
    assert isinstance(quantizer.model.rnn.cells[0].eltwisemult_hidden, RangeLinearQuantEltwiseMultWrapper)
    assert quantizer.model.rnn.cells[0].eltwisemult_hidden.output_quant_settings.clip_mode == e_clip_acts
    assert quantizer.model.rnn.cells[0].eltwisemult_hidden.output_quant_settings.clip_n_stds == e_n_stds
Пример #2
0
def test_acts_quant_params_linear(act1_type, act2_type, bn_out_stats):
    # prepare model:
    model = LinearBNSplitAct(act1_type, act2_type)
    stats = gen_stats_for_model(model)
    stats['bn']['output'] = bn_out_stats
    quantizer = PostTrainLinearQuantizer(
        model, model_activation_stats=deepcopy(stats))
    quantizer.prepare_model(torch.randn(10, 10))
    # get quant params:
    expected_quant_params_keys = {
        'linear.output_zero_point', 'linear.output_scale',
        'act1.output_zero_point', 'act1.output_scale',
        'act2.output_zero_point', 'act2.output_scale'
    }
    assert set(quantizer.acts_quant_params) == expected_quant_params_keys
    quantizer.set_act_quant_param('linear.output_zero_point', 2.)
    quantizer.set_act_quant_param('linear.output_scale', 30.)
    assert model.linear.output_zero_point == 2.
    assert model.linear.output_scale == 30.
    expected_quant_param_linear_dict = {
        'output_zero_point': torch.tensor(2.),
        'output_scale': 30.
    }
    assert dict(model.linear.named_acts_quant_params()
                ) == expected_quant_param_linear_dict
    new_config = {'linear.output_zero_point': 4., 'act2.output_scale': 50}
    quantizer.update_acts_quant_params(new_config)
    assert model.linear.output_zero_point == 4
    assert model.act2.output_scale == 50
Пример #3
0
def test_stats_fusion_just_bn():
    model = ConvBnActPool(None, False)
    stats = gen_stats_for_model(model)
    quantizer = PostTrainLinearQuantizer(model, model_activation_stats=deepcopy(stats))
    quantizer.prepare_model(torch.randn((10, 10, 20, 20)))

    expected = deepcopy(stats)
    expected.pop('bn')  # After BN folding BN stats are removed
    expected['conv']['output'] = deepcopy(stats['bn']['output'])
    assert quantizer.model_activation_stats == expected
Пример #4
0
def test_stats_fusion_split_act(act1_type, act2_type, bn_out_stats, linear_out_expected_stats):
    model = LinearBNSplitAct(act1_type, act2_type)
    stats = gen_stats_for_model(model)
    stats['bn']['output'] = bn_out_stats
    quantizer = PostTrainLinearQuantizer(model, model_activation_stats=deepcopy(stats))
    quantizer.prepare_model(torch.randn(10, 10))

    expected = deepcopy(stats)
    expected.pop('bn')  # After BN folding BN stats are removed
    expected['linear']['output'] = linear_out_expected_stats
    assert quantizer.model_activation_stats == expected
Пример #5
0
def test_acts_quant_params_rnn(rnn_model):
    model = DummyWordLangModel(nn.Embedding(41, 20), rnn_model).cuda()
    stats = gen_stats_for_model(model)
    quantizer = PostTrainLinearQuantizer(model, model_activation_stats=deepcopy(stats))
    dummy_input = torch.randint(0, 41, size=(79, 23))
    quantizer.prepare_model(dummy_input)
    new_config = {
        'rnn.rnn.cells.0.act_o.output_scale': 4,
        'embedding.w_scale': torch.tensor(59.0)
    }
    quantizer.update_acts_quant_params(new_config)
    assert model.rnn.rnn.cells[0].act_o.output_scale == 4
    assert model.embedding.w_scale == 59.0
Пример #6
0
def test_stats_fusion_sequential(act_type, act_as_module, bn_out_stats, conv_out_expected_stats):
    model = ConvBnActPool(act_type, act_as_module)
    stats = gen_stats_for_model(model)
    stats['bn']['output'] = bn_out_stats
    quantizer = PostTrainLinearQuantizer(model, model_activation_stats=deepcopy(stats))
    quantizer.prepare_model(torch.randn((10, 10, 20, 20)))

    expected = deepcopy(stats)
    expected.pop('bn')  # After BN folding BN stats are removed
    expected['conv']['output'] = conv_out_expected_stats
    if act_as_module:
        expected['act']['inputs'][0] = conv_out_expected_stats

    assert quantizer.model_activation_stats == expected
Пример #7
0
def test_stats_fusion_no_fuse(model, input_shape):
    stats = gen_stats_for_model(model)
    quantizer = PostTrainLinearQuantizer(
        model, model_activation_stats=deepcopy(stats))
    quantizer.prepare_model(torch.randn(input_shape))
    assert quantizer.model_activation_stats == stats
Пример #8
0
#!/usr/bin/env python3

import settings.optimizeNetwork_settings as s

from common.mask_util import MaskTable, compensateNetwork
from common.nnTools import test
from distiller.quantization import PostTrainLinearQuantizer

from copy import deepcopy

#create reference model
ref_mask_table=MaskTable(s.bits,s.quant_mode, s.mask_mode, s.network, [] , False)
ref_quantized = PostTrainLinearQuantizer( deepcopy(s.network), bits_activations=s.aw_bits, bits_parameters=s.aw_bits, bits_accum=s.acc_bits,
                    mode=s.quant_mode, mask_table=ref_mask_table,
                    scale_approx_mult_bits=s.bits)
ref_quantized.prepare_model(s.dummy_input)
ref_quantized.model.eval()
ref_quantized.model.to("cpu")

#loading mask
mask_table = MaskTable(s.bits,s.quant_mode, s.mask_mode, s.network, mask_file=s.config_fname)
quantizer = PostTrainLinearQuantizer( deepcopy(s.network), bits_activations=s.aw_bits, bits_parameters=s.aw_bits, bits_accum=s.acc_bits,
                    mode=s.quant_mode, mask_table=mask_table,
                    scale_approx_mult_bits=s.bits)

quantizer.prepare_model(s.dummy_input)
quantizer.model.eval()
compensateNetwork(ref_quantized.model,
                    quantizer.model,
                    s.test_set,
                    "../conf_files/conf_path.json",
    ,train=False  #where data will be located
    ,download=True              #download if is not present offline(run only the first time)
    ,transform=transform_test
)

data_loader= torch.utils.data.DataLoader(
    train_set
    ,shuffle=False
    ,batch_size=batch_size)

dummy_input = (torch.zeros([1,3,32,32]))
test_preds = get_all_preds(network, data_loader,device="cpu")
ref_correct = test_preds.argmax(dim=1).eq(torch.LongTensor(train_set.targets)).sum().item()
print(ref_correct)

quant_mode_list = [LinearQuantMode.SYMMETRIC,LinearQuantMode.ASYMMETRIC_UNSIGNED,LinearQuantMode.ASYMMETRIC_SIGNED]
with open("../reports/data_vgg11bn_CIFAR10_postTrainQuantizing.txt","w") as log_pointer:
    log_pointer.write("Reference accuracy = {}\n".format(ref_correct))
    for quant_mode in quant_mode_list:
        for qw_bits in range(3,bits+1):
            quantizer = PostTrainLinearQuantizer(   deepcopy(network), bits_activations=bits, bits_parameters=qw_bits, bits_accum=acc_bits,
                                                            mode=quant_mode,scale_approx_mult_bits=bits)
            quantizer.model.to(device)
            quantizer.prepare_model(dummy_input)
            quantizer.model.eval()
            test_preds = get_all_preds(quantizer.model, data_loader,device=device)
            preds_correct = test_preds.argmax(dim=1).eq(torch.LongTensor(train_set.targets).to(device)).sum().item()
            accuracy =  preds_correct/len(train_set)
            log_pointer.write(rep_string.format(quant_mode,qw_bits,preds_correct,accuracy))
            del quantizer