Пример #1
0
    def test_quantize_APoT_rand_k1(self):
        # generate random size of tensor2quantize between 1 -> 20
        size = random.randint(1, 20)

        # generate tensor with random fp values between 0 -> 1000
        tensor2quantize = 1000 * torch.rand(size, dtype=torch.float)

        apot_observer = APoTObserver(b=8, k=1)
        apot_observer(tensor2quantize)
        alpha, gamma, quantization_levels, level_indices = apot_observer.calculate_qparams(
            signed=False)

        # get apot quantized tensor result
        qtensor = quantize_APoT(tensor2quantize=tensor2quantize,
                                alpha=alpha,
                                gamma=gamma,
                                quantization_levels=quantization_levels,
                                level_indices=level_indices)

        # get uniform quantization quantized tensor result
        uniform_observer = MinMaxObserver()
        uniform_observer(tensor2quantize)
        scale, zero_point = uniform_observer.calculate_qparams()

        uniform_quantized = quantize_per_tensor(input=tensor2quantize,
                                                scale=scale,
                                                zero_point=zero_point,
                                                dtype=torch.quint8).int_repr()

        qtensor_data = qtensor.data.int()
        uniform_quantized_tensor = uniform_quantized.data.int()

        self.assertTrue(torch.equal(qtensor_data, uniform_quantized_tensor))
Пример #2
0
    def test_calculate_qparams_3terms(self):
        obs = APoTObserver(max_val=1.0, b=6, k=2)

        obs_result = obs.calculate_qparams(signed=False)

        # calculate expected gamma value
        gamma_test = 0
        for i in range(3):
            gamma_test += 2**(-i)

        gamma_test = 1 / gamma_test

        # check gamma value
        self.assertEqual(obs_result[0], gamma_test)

        # check quantization levels size
        quantlevels_size_test = int(len(obs_result[1]))
        quantlevels_size = 2**6
        self.assertEqual(quantlevels_size_test, quantlevels_size)

        # check level indices size
        levelindices_size_test = int(len(obs_result[2]))
        self.assertEqual(levelindices_size_test, 64)

        # check level indices unique values
        level_indices_test_list = obs_result[2].tolist()
        self.assertEqual(len(level_indices_test_list),
                         len(set(level_indices_test_list)))
Пример #3
0
    def test_calculate_qparams_signed(self):
        obs = APoTObserver(max_val=1.0, b=4, k=2)
        obs_result = obs.calculate_qparams(signed=True)

        # calculate expected gamma value
        gamma_test = 0
        for i in range(2):
            gamma_test += 2**(-i)

        gamma_test = 1 / gamma_test

        # check gamma value
        self.assertEqual(obs_result[0], gamma_test)

        # check quantization levels size
        quantlevels_size_test = int(len(obs_result[1]))
        self.assertEqual(quantlevels_size_test, 49)

        # check negatives of each element contained
        # in quantization levels
        quantlevels_test_list = obs_result[1].tolist()
        negatives_contained = True
        for ele in quantlevels_test_list:
            if not (-ele) in quantlevels_test_list:
                negatives_contained = False
        self.assertTrue(negatives_contained)

        # check level indices size
        levelindices_size_test = int(len(obs_result[2]))
        self.assertEqual(levelindices_size_test, 49)

        # check level indices unique elements
        level_indices_test_list = obs_result[2].tolist()
        self.assertEqual(len(level_indices_test_list),
                         len(set(level_indices_test_list)))
Пример #4
0
    def test_int_repr(self):
        # generate tensor with random fp values
        tensor2quantize = tensor2quantize = torch.tensor(
            [0, 0.0215, 0.1692, 0.385, 1, 0.0391])

        observer = APoTObserver(b=4, k=2)

        observer.forward(tensor2quantize)

        qparams = observer.calculate_qparams(signed=False)

        # get apot quantized tensor result
        qtensor = quantize_APoT(tensor2quantize=tensor2quantize,
                                alpha=qparams[0],
                                gamma=qparams[1],
                                quantization_levels=qparams[2],
                                level_indices=qparams[3])

        qtensor_data = qtensor.int_repr().int()

        # expected qtensor values calculated based on
        # corresponding level_indices to nearest quantization level
        # for each fp value in tensor2quantize
        # e.g.
        # 0.0215 in tensor2quantize nearest 0.0208 in quantization_levels -> 3 in level_indices
        expected_qtensor_data = torch.tensor([0, 3, 8, 13, 5, 12],
                                             dtype=torch.int32)

        self.assertTrue(torch.equal(qtensor_data, expected_qtensor_data))
Пример #5
0
    def test_calculate_qparams_k1(self):
        obs = APoTObserver(b=6, k=1)

        obs.min_val = torch.tensor([0.0])
        obs.max_val = torch.tensor([1.0])

        alpha, gamma, quantization_levels, level_indices = obs.calculate_qparams(
            signed=False)

        # calculate expected gamma value
        gamma_test = 0
        for i in range(6):
            gamma_test += 2**(-i)

        gamma_test = 1 / gamma_test

        # check gamma value
        self.assertEqual(gamma, gamma_test)

        # check quantization levels size
        quantlevels_size_test = int(len(quantization_levels))
        quantlevels_size = 2**6
        self.assertEqual(quantlevels_size_test, quantlevels_size)

        # check level indices size
        levelindices_size_test = int(len(level_indices))
        level_indices_size = 2**6
        self.assertEqual(levelindices_size_test, level_indices_size)

        # check level indices unique values
        level_indices_test_list = level_indices.tolist()
        self.assertEqual(len(level_indices_test_list),
                         len(set(level_indices_test_list)))
Пример #6
0
class APoTFakeQuantize(FakeQuantizeBase):
    alpha: Tensor
    gamma: Tensor
    quantization_levels: Tensor
    level_indices: Tensor

    def __init__(self, **observer_kwargs):
        super().__init__()
        self.activation_post_process = APoTObserver(**observer_kwargs)

    def calculate_qparams(self, signed: bool):  # type: ignore[override]
        return self.activation_post_process.calculate_qparams(signed=signed)

    def forward(self, X: torch.Tensor, signed: bool):  # type: ignore[override]
        if self.observer_enabled[0] == 1:
            self.activation_post_process.forward(X)
            self.alpha, self.gamma, self.quantization_levels, self.level_indices = \
                self.activation_post_process.calculate_qparams(signed)
        if self.fake_quant_enabled[0] == 1:
            assert (self.alpha is not None and self.gamma is not None
                    and self.quantization_levels is not None
                    and self.level_indices
                    is not None), "Must set qparams for fake quant"

            X = fake_quantize_function.apply(X, self.alpha, self.gamma,
                                             self.quantization_levels,
                                             self.level_indices)

        return X
Пример #7
0
    def test_dequantize_dim(self):
        # make observer
        observer = APoTObserver(4, 2)

        # generate random size of tensor2quantize between 1 -> 20
        size1 = random.randint(1, 20)
        size2 = random.randint(1, 20)
        size3 = random.randint(1, 20)

        # make tensor2quantize: random fp values between 0 -> 1000
        tensor2quantize = 1000 * torch.rand(
            size1, size2, size3, dtype=torch.float)

        observer.forward(tensor2quantize)

        alpha, gamma, quantization_levels, level_indices = observer.calculate_qparams(
            signed=False)

        # make mock apot_tensor
        original_apot = quantize_APoT(tensor2quantize=tensor2quantize,
                                      alpha=alpha,
                                      gamma=gamma,
                                      quantization_levels=quantization_levels,
                                      level_indices=level_indices)

        # dequantize apot_tensor
        dequantize_result = dequantize_APoT(apot_tensor=original_apot)

        self.assertEqual(original_apot.data.size(), dequantize_result.size())
Пример #8
0
    def test_quantize_APoT_k2(self):
        r"""
        given b = 4, k = 2, alpha = 1.0, we know:
        (from APoT paper example: https://arxiv.org/pdf/1909.13144.pdf)

        quantization_levels = tensor([0.0000, 0.0208, 0.0417, 0.0625, 0.0833, 0.1250, 0.1667,
        0.1875, 0.2500, 0.3333, 0.3750, 0.5000, 0.6667, 0.6875, 0.7500, 1.0000])

        level_indices = tensor([ 0, 3, 12, 15,  2, 14,  8, 11, 10, 1, 13,  9,  4,  7,  6,  5]))
        """

        # generate tensor with random fp values
        tensor2quantize = torch.tensor([0, 0.0215, 0.1692, 0.385, 1, 0.0391])

        observer = APoTObserver(b=4, k=2)
        observer.forward(tensor2quantize)
        alpha, gamma, quantization_levels, level_indices = observer.calculate_qparams(signed=False)

        # get apot quantized tensor result
        qtensor = quantize_APoT(tensor2quantize=tensor2quantize,
                                alpha=alpha,
                                gamma=gamma,
                                quantization_levels=quantization_levels,
                                level_indices=level_indices)

        qtensor_data = qtensor.data.int()

        # expected qtensor values calculated based on
        # corresponding level_indices to nearest quantization level
        # for each fp value in tensor2quantize
        # e.g.
        # 0.0215 in tensor2quantize nearest 0.0208 in quantization_levels -> 3 in level_indices
        expected_qtensor = torch.tensor([0, 3, 8, 13, 5, 12], dtype=torch.int32)

        self.assertTrue(torch.equal(qtensor_data, expected_qtensor))
Пример #9
0
    def test_backward(self):
        input = torch.randn(20, dtype=torch.double, requires_grad=True)

        observer = APoTObserver(b=4, k=2)
        observer(input)
        alpha, gamma, quantization_levels, level_indices = observer.calculate_qparams(signed=False)

        test = gradcheck(fake_quantize_function.apply, (input, alpha, gamma, quantization_levels, level_indices), atol=1e-4)
Пример #10
0
    def test_calculate_qparams_invalid(self):
        obs = APoTObserver(b=0, k=0)
        obs.min_val = torch.tensor([0.0])
        obs.max_val = torch.tensor([0.0])

        with self.assertRaises(AssertionError):
            alpha, gamma, quantization_levels, level_indices = obs.calculate_qparams(
                signed=False)
Пример #11
0
    def __init__(self, b, k, max_val, signed, dtype=torch.quint8) -> None:
        self.signed = signed

        # check for valid inputs of b, k
        assert (k and k != 0)
        assert (b % k == 0)
        self.b = b
        self.k = k
        self.n = b // k

        # make observer, get quantizion levels and level indices
        obs = APoTObserver(max_val=max_val, b=b, k=k)
        obs_result = obs.calculate_qparams(signed=signed)
        self.quantization_levels = obs_result[1]
        self.level_indices = obs_result[2]
Пример #12
0
    def test_forward(self):
        obs = APoTObserver(b=4, k=2)

        X = torch.tensor([0.0, -100.23, -37.18, 3.42, 8.93, 9.21, 87.92])

        X = obs.forward(X)

        alpha, gamma, quantization_levels, level_indices = obs.calculate_qparams(
            signed=True)

        min_val = torch.min(X)
        max_val = torch.max(X)

        expected_alpha = torch.max(-min_val, max_val)

        self.assertEqual(alpha, expected_alpha)
Пример #13
0
    def test_fake_calc_qparams(self):
        apot_fake = APoTFakeQuantize(b=4, k=2)
        apot_fake.activation_post_process.min_val = torch.tensor([0.0])
        apot_fake.activation_post_process.max_val = torch.tensor([1.0])

        alpha, gamma, quantization_levels, level_indices = apot_fake.calculate_qparams(signed=False)

        observer = APoTObserver(b=4, k=2)
        observer.min_val = torch.tensor([0.0])
        observer.max_val = torch.tensor([1.0])

        qparams_expected = observer.calculate_qparams(signed=False)

        self.assertEqual(alpha, qparams_expected[0])
        self.assertTrue(torch.equal(gamma, qparams_expected[1]))
        self.assertTrue(torch.equal(quantization_levels, qparams_expected[2]))
        self.assertTrue(torch.equal(level_indices, qparams_expected[3]))
Пример #14
0
    def test_calculate_qparams_signed(self):
        obs = APoTObserver(b=4, k=2)

        obs.min_val = torch.tensor([0.0])
        obs.max_val = torch.tensor([1.0])
        alpha, gamma, quantization_levels, level_indices = obs.calculate_qparams(
            signed=True)
        alpha_test = torch.max(-obs.min_val, obs.max_val)

        # check alpha value
        self.assertEqual(alpha, alpha_test)

        # calculate expected gamma value
        gamma_test = 0
        for i in range(2):
            gamma_test += 2**(-i)

        gamma_test = 1 / gamma_test

        # check gamma value
        self.assertEqual(gamma, gamma_test)

        # check quantization levels size
        quantlevels_size_test = int(len(quantization_levels))
        self.assertEqual(quantlevels_size_test, 49)

        # check negatives of each element contained
        # in quantization levels
        quantlevels_test_list = quantization_levels.tolist()
        negatives_contained = True
        for ele in quantlevels_test_list:
            if not (-ele) in quantlevels_test_list:
                negatives_contained = False
        self.assertTrue(negatives_contained)

        # check level indices size
        levelindices_size_test = int(len(level_indices))
        self.assertEqual(levelindices_size_test, 49)

        # check level indices unique elements
        level_indices_test_list = level_indices.tolist()
        self.assertEqual(len(level_indices_test_list),
                         len(set(level_indices_test_list)))
Пример #15
0
    def test_forward(self):
        # generate a tensor of size 20 with random values
        # between 0 -> 1000 to quantize -> dequantize
        X = 1000 * torch.rand(20)

        observer = APoTObserver(b=4, k=2)
        observer.forward(X)
        alpha, gamma, quantization_levels, level_indices = observer.calculate_qparams(signed=False)

        apot_fake = APoTFakeQuantize(b=4, k=2)
        apot_fake.enable_observer()
        apot_fake.enable_fake_quant()

        X_reduced_precision_fp = apot_fake.forward(torch.clone(X), False)

        # get X_expected by converting fp -> apot -> fp to simulate quantize -> dequantize
        X_to_apot = quantize_APoT(X, alpha, gamma, quantization_levels, level_indices)
        X_expected = dequantize_APoT(X_to_apot)

        self.assertTrue(torch.equal(X_reduced_precision_fp, X_expected))
Пример #16
0
    def __init__(self, weight2quantize: torch.Tensor, b: int, k: int):
        assert weight2quantize.dim() == 2
        assert b % k == 0

        super().__init__()

        self.b = b
        self.k = k
        self.n = self.b // self.k

        observer = APoTObserver(b=self.b, k=self.k)

        observer(weight2quantize)

        self.alpha, self.gamma, self.quantization_levels, self.level_indices = observer.calculate_qparams(
            signed=False)

        quantized_weight = quantize_APoT(weight2quantize, self.alpha,
                                         self.gamma, self.quantization_levels,
                                         self.level_indices)
        self.weight = quantized_weight.data
        self.weight_transposed = torch.transpose(self.weight, 0, 1)
Пример #17
0
    def test_dequantize_quantize_rand_b6(self):
        # make observer
        observer = APoTObserver(12, 4)

        # generate random size of tensor2quantize between 1 -> 20
        size = random.randint(1, 20)

        # make tensor2quantize: random fp values between 0 -> 1000
        tensor2quantize = 1000 * torch.rand(size, dtype=torch.float)

        observer.forward(tensor2quantize)

        alpha, gamma, quantization_levels, level_indices = observer.calculate_qparams(
            signed=False)

        # make mock apot_tensor
        original_apot = quantize_APoT(tensor2quantize=tensor2quantize,
                                      alpha=alpha,
                                      gamma=gamma,
                                      quantization_levels=quantization_levels,
                                      level_indices=level_indices)

        original_input = torch.clone(original_apot.data).int()

        # dequantize apot_tensor
        dequantize_result = dequantize_APoT(apot_tensor=original_apot)

        # quantize apot_tensor
        final_apot = quantize_APoT(tensor2quantize=dequantize_result,
                                   alpha=alpha,
                                   gamma=gamma,
                                   quantization_levels=quantization_levels,
                                   level_indices=level_indices)

        result = final_apot.data.int()

        self.assertTrue(torch.equal(original_input, result))
Пример #18
0
    def test_calculate_qparams(self):
        t = torch.Tensor()
        obs = APoTObserver(t, t, t, 0, 0)

        with self.assertRaises(NotImplementedError):
            obs.calculate_qparams()
Пример #19
0
    def test_calculate_qparams_invalid(self):
        obs = APoTObserver(max_val=0.0, b=0, k=0)

        with self.assertRaises(AssertionError):
            obs_result = obs.calculate_qparams(signed=False)
Пример #20
0
 def __init__(self, **observer_kwargs):
     super().__init__()
     self.activation_post_process = APoTObserver(**observer_kwargs)