def test_per_channel_scale(self, verbose): """Quantizer performs per channel scaling""" x_np = np.random.rand(15, 15, 64, 128).astype('float32') x_torch = torch.Tensor(x_np).cuda() # Pytorch filter layout seems to be KCRS, reduce max to shape [K, 1, 1, 1] to test per channel scale # Shrink max a little, so that clip behavior is tested amax_x_np = 0.7 * np.max(np.abs(x_np), axis=(1, 2, 3), keepdims=True) quant_x_np = test_utils.quant_np(x_np, amax_x_np) quantizer = tensor_quantizer.TensorQuantizer( tensor_quant.QuantDescriptor(num_bits=8, axis=(0), fake_quant=False, scale_amax=0.7)) quantizer.cuda() module_quant_x = quantizer(x_torch) # np.testing.assert_array_equal(quant_x_torch.cpu().numpy(), quant_x_np) # Pytorch numerics is not the same as numpy, it will be off by 1 error = np.abs(module_quant_x.cpu().numpy() - quant_x_np) np.testing.assert_array_less(error, 2) if verbose: mismatches = np.where(error >= 1) print("Mismatches:") print(" Original: ", x_np[mismatches]) print(" numpy: ", quant_x_np[mismatches]) print(" TensorQuantizer: ", module_quant_x.cpu().numpy()[mismatches])
def test_per_channel_scale(self): """ fake_tensor_quant performs per channel quantization """ x_np = np.random.rand(15, 15, 64, 128).astype('float32') x_torch = torch.Tensor(x_np).cuda() # Pytorch filter layout seems to be KCRS, reduce max to shape [K, 1, 1, 1] to test per channel scale # Shrink max a little, so that clip behavior is tested amax_x_np = 0.7 * np.max(np.abs(x_np), axis=(1, 2, 3), keepdims=True) # Pytorch's max function doesn't support reduces multiple axis, and returns (max, argmax) tuple, # so it has to be reduced by multiple torch.max amax_x_torch = 0.7 * torch.max(torch.max( torch.max(x_torch, dim=1, keepdim=True)[0], dim=2, keepdim=True)[0], dim=3, keepdim=True)[0] quant_x_np = test_utils.quant_np(x_np, amax_x_np) quant_x_torch, _ = tensor_quant.tensor_quant(x_torch, amax_x_torch) # np.testing.assert_array_equal(quant_x_torch.cpu().numpy(), quant_x_np) # Pytorch numerics is not the same as numpy, it will be off by 1 np.testing.assert_array_less( np.abs(quant_x_torch.cpu().numpy() - quant_x_np), 2) if verbose: mismatches = np.where( np.abs(quant_x_torch.cpu().numpy() - quant_x_np) >= 1) print("Mismatches:") print(" Original: ", x_np[mismatches]) print(" numpy: ", quant_x_np[mismatches]) print(" Pytorch: ", quant_x_torch.cpu().numpy()[mismatches])
def test_per_tensor_scale(self): """Quantizer performs expected quantization""" x_np = np.random.rand(1023) x_torch = torch.Tensor(x_np) quant_x_np = test_utils.quant_np(x_np, np.max(np.abs(x_np))) quantizer = tensor_quantizer.TensorQuantizer(tensor_quant.QuantDescriptor(num_bits=8, fake_quant=False)) module_quant_x = quantizer(x_torch) np.testing.assert_array_equal(module_quant_x.cpu().numpy(), quant_x_np)
def test_cuda_ext_inplace(self): x_np = np.random.rand(1023).astype('float32') x_torch = torch.Tensor(x_np).cuda() quant_x_np = test_utils.quant_np(x_np, np.max(np.abs(x_np)), fake=True) cuda_ext.fake_tensor_quant_(x_torch, torch.max(torch.abs(x_torch))) np.testing.assert_array_equal(x_torch.cpu().numpy(), quant_x_np) # Test fp16 x_np_fp16 = np.random.rand(1023).astype('float16') x_torch_fp16 = torch.Tensor(x_np_fp16).cuda().half() quant_x_np_fp16 = test_utils.quant_np(x_np_fp16, np.max(np.abs(x_np_fp16)), fake=True) cuda_ext.fake_tensor_quant_(x_torch_fp16, torch.max(torch.abs(x_torch_fp16))) np.testing.assert_array_almost_equal(x_torch_fp16.cpu().numpy(), quant_x_np_fp16, decimal=2)
def test_per_tensor_scale(self): """ tensor_quant matches numpy quantization """ torch.set_default_tensor_type('torch.cuda.FloatTensor') # Test on GPU x_np = np.random.rand(1023) x_torch = torch.Tensor(x_np) quant_x_np = test_utils.quant_np(x_np, np.max(np.abs(x_np))) quant_x_torch, _ = tensor_quant.tensor_quant( x_torch, torch.max(torch.abs(x_torch))) np.testing.assert_array_equal(quant_x_torch.cpu().numpy(), quant_x_np) torch.set_default_tensor_type('torch.FloatTensor')
def test_unsigned(self): x_np = np.random.rand(1023).astype('float32') x_torch = torch.Tensor(x_np).cuda() quant_x_np = test_utils.quant_np(x_np, np.max(np.abs(x_np)), num_bits=9, fake=True) quant_x_torch = tensor_quant.fake_tensor_quant( x_torch, torch.max(torch.abs(x_torch)), 8, True) np.testing.assert_array_almost_equal(quant_x_torch.cpu().numpy(), quant_x_np)
def test_per_tensor_scale(self): """ fake_tensor_quant matches numpy quantization """ x_np = np.random.rand(13).astype('float32') print(x_np) x_torch = torch.Tensor(x_np).cuda() quant_x_np = test_utils.quant_np(x_np, np.max(np.abs(x_np)), fake=True) quant_x_torch = tensor_quant.fake_tensor_quant( x_torch, torch.max(torch.abs(x_torch))) np.testing.assert_array_almost_equal(quant_x_torch.cpu().numpy(), quant_x_np)
def test_learn_amax(self): """Test the clip implied by learn_amax""" x_np = np.random.rand(1023).astype(np.float32) x_torch = torch.Tensor(x_np) amax = 0.5 quant_x_np = test_utils.quant_np(x_np, 0.5, fake=True) quantizer = tensor_quantizer.TensorQuantizer( tensor_quant.QuantDescriptor(num_bits=8, amax=amax, learn_amax=True)) assert hasattr(quantizer, 'clip') module_quant_x = quantizer(x_torch) np.testing.assert_array_equal(module_quant_x.cpu().detach().numpy(), quant_x_np)
def test_scale_amax(self): x_np = np.random.rand(1023).astype(np.float32) x_torch = torch.Tensor(x_np) amax = 0.5 scale_amax = 0.9 quant_x_np = test_utils.quant_np(x_np, amax * scale_amax, fake=True) quantizer = tensor_quantizer.TensorQuantizer( tensor_quant.QuantDescriptor(num_bits=8, amax=amax, scale_amax=scale_amax)) module_quant_x = quantizer(x_torch) np.testing.assert_array_equal(module_quant_x.cpu().detach().numpy(), quant_x_np) # Test twice. There was a but in scale amax logic that modify the amax every time module_quant_x = quantizer(x_torch) np.testing.assert_array_equal(module_quant_x.cpu().detach().numpy(), quant_x_np)
def test_full_range(self): """ fake_tensor_quant uses the full integer range when narrow=False """ x_np = np.random.rand(1023).astype('float32') x_torch = torch.Tensor(x_np).cuda() amax = np.max(np.abs(x_np)) quant_x_np = test_utils.quant_np(x_np, amax, num_bits=9, fake=True, narrow_range=False) quant_x_torch = tensor_quant.fake_tensor_quant( x_torch, torch.max(torch.abs(x_torch)), 8, True, False) np.testing.assert_array_almost_equal(quant_x_torch.cpu().numpy(), quant_x_np)
def test_unsigned(self): x_np = np.random.rand(1023).astype('float32') x_torch = torch.Tensor(x_np) quant_x_np = test_utils.quant_np(x_np, np.max(np.abs(x_np)), num_bits=9, fake=False) quant_x_torch, _ = tensor_quant.tensor_quant( x_torch, torch.max(torch.abs(x_torch)), 8, True) np.testing.assert_array_almost_equal(quant_x_torch.cpu().numpy(), quant_x_np) x_torch = torch.randn(3, 7) with pytest.raises(TypeError, match="Negative values encountered"): tensor_quant.tensor_quant(x_torch, torch.max(torch.abs(x_torch)), 8, True)