示例#1
0
    def from_observed(cls, other):
        converted = torch.quantization.convert(other,
                                               mapping=None,
                                               inplace=False,
                                               remove_qconfig=True,
                                               convert_custom_config_dict=None)
        # Remove the parameters for the bias_k and bias_v to quantize them
        # TODO: This is a potential source of accuracy drop.
        #       quantized cat takes the scale and zp of the first
        #       element, which might lose the precision in the bias_k
        #       and the bias_v (which are cat'ed with k/v being first).
        if converted.bias_k is not None:
            bias_k = converted._parameters.pop('bias_k')
            sc, zp = torch._choose_qparams_per_tensor(bias_k,
                                                      reduce_range=False)
            bias_k = torch.quantize_per_tensor(bias_k, sc, zp, torch.quint8)
            setattr(converted, 'bias_k', bias_k)  # noqa: B010

        if converted.bias_v is not None:
            bias_v = converted._parameters.pop('bias_v')
            sc, zp = torch._choose_qparams_per_tensor(bias_k,
                                                      reduce_range=False)
            bias_v = torch.quantize_per_tensor(bias_v, sc, zp, torch.quint8)
            setattr(converted, 'bias_v', bias_v)  # noqa: B010

        return converted
示例#2
0
 def test_choose_qparams(self, X, reduce_range):
     X, (scale, zero_point, torch_type) = X
     X = torch.from_numpy(X)
     X_scale, X_zp = _calculate_dynamic_qparams(X, torch.quint8, reduce_range=reduce_range)
     qparams = torch._choose_qparams_per_tensor(X, reduce_range)
     np.testing.assert_array_almost_equal(X_scale, qparams[0], decimal=3)
     self.assertEqual(X_zp, qparams[1])
示例#3
0
    def test_per_tensor_dynamic_quant_observers(self, X, reduce_range):

        X, (scale, zero_point, torch_type) = X
        x = torch.from_numpy(X)

        obs = MinMaxDynamicQuantObserver(dtype=torch.quint8, reduce_range=reduce_range)

        result = obs(x)
        qparams = obs.calculate_qparams()
        ref = torch._choose_qparams_per_tensor(x, reduce_range)

        self.assertEqual(ref[0], qparams[0])
        self.assertEqual(ref[1], qparams[1])