def test_fake_quant_per_channel_other_prec(self): kernel_size = 3 quant_desc_input = QuantDescriptor(num_bits=4) quant_desc_weight = QuantDescriptor(num_bits=3, axis=(0)) quant_conv_object = quant_conv.QuantConv1d( _NUM_IN_CHANNELS, _NUM_OUT_CHANNELS, kernel_size, bias=False, quant_desc_input=quant_desc_input, quant_desc_weight=quant_desc_weight) test_input = torch.randn(16, _NUM_IN_CHANNELS, 16) test_input_quantizer = TensorQuantizer(quant_desc_input) weight_quantizer = TensorQuantizer(quant_desc_weight) quant_input = test_input_quantizer(test_input) weight_copy = quant_conv_object.weight.clone() quant_weight = weight_quantizer(weight_copy) out1 = F.conv1d(quant_input, quant_weight) out2 = quant_conv_object(test_input) np.testing.assert_array_equal(out1.detach().cpu().numpy(), out2.detach().cpu().numpy())
def test_against_unquantized(self): kernel_size = 3 test_input = torch.randn(16, _NUM_IN_CHANNELS, 24).cuda() torch.manual_seed(12345) if torch.cuda.is_available(): torch.cuda.manual_seed_all(12345) fake_quant_conv1d = quant_conv.QuantConv1d( _NUM_IN_CHANNELS, _NUM_OUT_CHANNELS, kernel_size, bias=True, quant_desc_input=QuantDescriptor(num_bits=16), quant_desc_weight=QuantDescriptor(num_bits=16, axis=(0))) # Reset seed. Make sure weight and bias are the same torch.manual_seed(12345) if torch.cuda.is_available(): torch.cuda.manual_seed_all(12345) conv1d = nn.Conv1d(_NUM_IN_CHANNELS, _NUM_OUT_CHANNELS, kernel_size, bias=True) fake_quant_output = fake_quant_conv1d(test_input) output = conv1d(test_input) test_utils.compare(fake_quant_output, output, rtol=1e-5, atol=1e-4)
def test_fake_quant_per_channel_bias(self): kernel_size = 3 quant_conv_object = quant_conv.QuantConv1d( _NUM_IN_CHANNELS, _NUM_OUT_CHANNELS, kernel_size, bias=True, quant_desc_weight=QuantDescriptor(axis=(0))) test_input = torch.randn(16, _NUM_IN_CHANNELS, 16) quant_input = tensor_quant.fake_tensor_quant( test_input, torch.max(torch.abs(test_input))) weight_copy = quant_conv_object.weight.clone() quant_weight = tensor_quant.fake_tensor_quant( weight_copy, torch.max(torch.abs(weight_copy).view(_NUM_OUT_CHANNELS, -1), dim=1, keepdim=True)[0].view(_NUM_OUT_CHANNELS, 1, 1)) out1 = F.conv1d(quant_input, quant_weight, bias=quant_conv_object.bias) out2 = quant_conv_object(test_input) np.testing.assert_array_equal(out1.detach().cpu().numpy(), out2.detach().cpu().numpy())
def test_fake_quant_per_tensor(self): kernel_size = 3 quant_conv_object = quant_conv.QuantConv1d( _NUM_IN_CHANNELS, _NUM_OUT_CHANNELS, kernel_size, bias=False, quant_desc_weight=QuantDescriptor()) test_input = torch.randn(16, _NUM_IN_CHANNELS, 16) quant_input = tensor_quant.fake_tensor_quant(test_input, torch.max(torch.abs(test_input))) weight_copy = quant_conv_object.weight.clone() quant_weight = tensor_quant.fake_tensor_quant(weight_copy, torch.max(torch.abs(weight_copy))) out1 = F.conv1d(quant_input, quant_weight) out2 = quant_conv_object(test_input) np.testing.assert_array_equal(out1.detach().cpu().numpy(), out2.detach().cpu().numpy())
def test_fake_quant_input(self): kernel_size = 3 quant_conv_object = quant_conv.QuantConv1d( _NUM_IN_CHANNELS, _NUM_OUT_CHANNELS, kernel_size, bias=False) quant_conv_object.weight_quantizer.disable() test_input = torch.randn(20, _NUM_IN_CHANNELS, 50) quant_input = tensor_quant.fake_tensor_quant(test_input, torch.max(torch.abs(test_input))) out1 = F.conv1d(quant_input, quant_conv_object.weight) out2 = quant_conv_object(test_input) np.testing.assert_array_equal(out1.detach().cpu().numpy(), out2.detach().cpu().numpy())
def test_no_quant(self): kernel_size = 8 quant_conv_object = quant_conv.QuantConv1d( _NUM_IN_CHANNELS, _NUM_OUT_CHANNELS, kernel_size, bias=False) quant_conv_object.input_quantizer.disable() quant_conv_object.weight_quantizer.disable() test_input = torch.randn(16, _NUM_IN_CHANNELS, 256) weight_copy = quant_conv_object.weight.clone() quant_weight = weight_copy out1 = F.conv1d(test_input, quant_weight) out2 = quant_conv_object(test_input) np.testing.assert_array_equal(out1.detach().cpu().numpy(), out2.detach().cpu().numpy())
def test_weight_fake_quant_per_channel(self): kernel_size = 3 quant_conv_object = quant_conv.QuantConv1d( _NUM_IN_CHANNELS, _NUM_OUT_CHANNELS, kernel_size, bias=False, quant_desc_weight=QuantDescriptor(axis=(0))) quant_conv_object.input_quantizer.disable() test_input = torch.randn(16, _NUM_IN_CHANNELS, 256) weight_copy = quant_conv_object.weight.clone() amax = quant_utils.reduce_amax(weight_copy, axis=(1, 2)) quant_weight = tensor_quant.fake_tensor_quant(weight_copy, amax) out1 = F.conv1d(test_input, quant_weight) out2 = quant_conv_object(test_input) np.testing.assert_array_equal(out1.detach().cpu().numpy(), out2.detach().cpu().numpy())