示例#1
0
    def quantize_generator(self,
                           quantize=False,
                           embed_quantize=EmbedQuantizeType.NONE):
        if quantize:
            qconfig_dict = {torch.nn.Linear: tq.per_channel_dynamic_qconfig}
            # embedding quantization
            if embed_quantize != EmbedQuantizeType.NONE:

                # 8-bit embedding quantization
                if embed_quantize == EmbedQuantizeType.BIT_8:
                    qconfig_dict[
                        torch.nn.Embedding] = float_qparams_weight_only_qconfig

                # 4-bit embedding quantization
                elif embed_quantize == EmbedQuantizeType.BIT_4:
                    raise NotImplementedError(
                        "4bit embedding quantization not yet supported")
                else:
                    raise NotImplementedError(
                        "Embedding Quantization should be either 8bit or 4bit")

            self.model = tq.quantize_dynamic(
                self.model,
                qconfig_dict,
                dtype=torch.qint8,
                inplace=False,
            )

            self.length_prediction_model = tq.quantize_dynamic(
                self.length_prediction_model,
                {torch.nn.Linear: tq.per_channel_dynamic_qconfig},
                dtype=torch.qint8,
                inplace=False,
            )
示例#2
0
 def quantize(self):
     """Quantize the model during export."""
     # by default only quantize the linear modules, override this method if your
     # model wants other modules quantized.
     # By default we dynamic quantize Linear for PyText models.
     # Todo: we can also add quantized torch.nn.LSTM/GRU support in the future.
     tq.quantize_dynamic(
         self, {torch.nn.Linear, torch.nn.LSTM}, dtype=torch.qint8, inplace=True
     )
示例#3
0
    def test_compare_model_outputs_linear_dynamic(self):
        r"""Compare the output of linear layer in dynamic quantized model and corresponding
        output of conv layer in float model
        """
        qengine = torch.backends.quantized.engine

        def compare_and_validate_results(float_model, q_model, data):
            act_compare_dict = compare_model_outputs(float_model, q_model,
                                                     data)
            expected_act_compare_dict_keys = {"fc1.stats"}

            self.assertTrue(
                act_compare_dict.keys() == expected_act_compare_dict_keys)
            for k, v in act_compare_dict.items():
                self.assertTrue(len(v["float"]) == len(v["quantized"]))
                for i, val in enumerate(v["quantized"]):
                    self.assertTrue(
                        v["float"][i].shape == v["quantized"][i].shape)

        linear_data = self.calib_data[0][0]

        model_list = [SingleLayerLinearDynamicModel(qengine)]
        for model in model_list:
            model.eval()
            if hasattr(model, "fuse_model"):
                model.fuse_model()
            q_model = quantize_dynamic(model)
            compare_and_validate_results(model, q_model, linear_data)
示例#4
0
    def test_compare_model_stub_lstm_dynamic(self):
        r"""Compare the output of dynamic quantized LSTM layer and its float shadow module"""

        qengine = torch.backends.quantized.engine

        def compare_and_validate_results(float_model, q_model,
                                         module_swap_list, input, hidden):
            ob_dict = compare_model_stub(float_model, q_model,
                                         module_swap_list, input, hidden)
            self.assertEqual(len(ob_dict), 1)
            for k, v in ob_dict.items():
                self.assertTrue(len(v["float"]) == len(v["quantized"]))
                for i, val in enumerate(v["quantized"]):
                    self.assertTrue(
                        v["float"][i].shape == v["quantized"][i].shape)

        lstm_input = torch.rand((1, 1, 2))
        lstm_hidden = (torch.rand(1, 1, 2), torch.rand(1, 1, 2))

        model_list = [LSTMwithHiddenDynamicModel(qengine)]
        module_swap_list = [nn.Linear, nn.LSTM]
        for model in model_list:
            model.eval()
            if hasattr(model, "fuse_model"):
                model.fuse_model()
            q_model = quantize_dynamic(model)
            compare_and_validate_results(model, q_model, module_swap_list,
                                         lstm_input, lstm_hidden)
示例#5
0
    def test_compare_model_stub_linear_dynamic(self):
        r"""Compare the output of dynamic quantized linear layer and its float shadow module"""

        qengine = torch.backends.quantized.engine

        def compare_and_validate_results(float_model, q_model,
                                         module_swap_list, data):
            ob_dict = compare_model_stub(float_model, q_model,
                                         module_swap_list, data)
            self.assertEqual(len(ob_dict), 1)
            for k, v in ob_dict.items():
                self.assertTrue(len(v["float"]) == len(v["quantized"]))
                for i, val in enumerate(v["quantized"]):
                    self.assertTrue(
                        v["float"][i].shape == v["quantized"][i].shape)

        linear_data = self.calib_data[0][0]

        model_list = [SingleLayerLinearDynamicModel(qengine)]
        module_swap_list = [nn.Linear, nn.LSTM]
        for model in model_list:
            model.eval()
            if hasattr(model, "fuse_model"):
                model.fuse_model()
            q_model = quantize_dynamic(model)
            compare_and_validate_results(model, q_model, module_swap_list,
                                         linear_data)
def _main():
    args = _parse_args()
    _init_logging(args.debug)
    loader = Loader()
    model = _get_model(args.model_file, args.dict_dir).eval()
    encoder = Encoder(model)
    decoder = _get_decoder()
    _LG.info(encoder)

    if args.quantize:
        _LG.info('Quantizing the model')
        model.encoder.transformer.pos_conv_embed.__prepare_scriptable__()
        encoder = tq.quantize_dynamic(encoder,
                                      qconfig_spec={torch.nn.Linear},
                                      dtype=torch.qint8)
        _LG.info(encoder)

    # test
    if args.test_file:
        _LG.info('Testing with %s', args.test_file)
        waveform = loader(args.test_file)
        emission = encoder(waveform)
        transcript = decoder(emission)
        _LG.info(transcript)

    torch.jit.script(loader).save(os.path.join(args.output_path, 'loader.zip'))
    torch.jit.script(decoder).save(
        os.path.join(args.output_path, 'decoder.zip'))
    scripted = torch.jit.script(encoder)
    if args.optimize_for_mobile:
        scripted = optimize_for_mobile(scripted)
    scripted.save(os.path.join(args.output_path, 'encoder.zip'))
示例#7
0
    def _test_quantize_torchscript(self, model):
        model.eval()

        batch_size, num_frames = 3, 1024

        # Remove the weight normalization forward hook
        model.encoder.transformer.pos_conv_embed.__prepare_scriptable__()
        quantized = tq.quantize_dynamic(
            model, qconfig_spec={torch.nn.Linear}, dtype=torch.qint8)

        # A lazy way to check that Modules are different
        assert str(quantized) != str(model), "Dynamic quantization did not modify the module."

        torch.manual_seed(0)
        waveforms = torch.randn(batch_size, num_frames)
        lengths = torch.randint(low=0, high=num_frames, size=[batch_size, ])

        ref_out, ref_len = quantized(waveforms, lengths)

        # Script
        scripted = torch_script(quantized)

        hyp_out, hyp_len = scripted(waveforms, lengths)

        self.assertEqual(hyp_out, ref_out)
        self.assertEqual(hyp_len, ref_len)
def _main():
    args = _parse_args()
    _init_logging(args.debug)
    _LG.info('Loading model: %s', args.model)
    model, labels = _get_model(args.model)
    _LG.info('Labels: %s', labels)
    _LG.info('Building pipeline')
    loader = Loader()
    encoder = Encoder(model)
    decoder = _get_decoder(labels)
    _LG.info(encoder)

    if args.quantize:
        _LG.info('Quantizing the model')
        model.encoder.transformer.pos_conv_embed.__prepare_scriptable__()
        encoder = tq.quantize_dynamic(
            encoder, qconfig_spec={torch.nn.Linear}, dtype=torch.qint8)
        _LG.info(encoder)

    # test
    if args.test_file:
        _LG.info('Testing with %s', args.test_file)
        waveform = loader(args.test_file)
        emission = encoder(waveform)
        transcript = decoder(emission)
        _LG.info(transcript)

    torch.jit.script(loader).save(os.path.join(args.output_path, 'loader.zip'))
    torch.jit.script(encoder).save(os.path.join(args.output_path, 'encoder.zip'))
    torch.jit.script(decoder).save(os.path.join(args.output_path, 'decoder.zip'))
示例#9
0
    def __init__(
        self,
        model_list,
        tgt_dict_eos,
        beam_size: int = 2,
        quantize: bool = False,
        record_attention: bool = False,
    ):
        super().__init__()
        self.models = model_list
        self.target_dict_eos = tgt_dict_eos
        self.beam_size = beam_size
        self.record_attention = record_attention

        # Script the encoder model
        encoder_ens = EncoderEnsemble(self.models, self.beam_size)
        if quantize:
            encoder_ens = tq.quantize_dynamic(
                encoder_ens,
                {torch.nn.Linear},  # Add after bug fix torch.nn.LSTM
                dtype=torch.qint8,
                inplace=False,
            )

        self.encoder_ens = torch.jit.script(encoder_ens)

        # Script the decoder step
        decoder_ens = DecoderBatchedStepEnsemble(
            self.models, beam_size, record_attention=record_attention
        )
        if quantize:
            decoder_ens = tq.quantize_dynamic(
                decoder_ens,
                {torch.nn.Linear},  # Add after bug fix torch.nn.LSTM
                dtype=torch.qint8,
                inplace=False,
            )

        self.decoder_ens = torch.jit.script(decoder_ens)
示例#10
0
    def test_compare_weights_lstm_dynamic(self):
        r"""Compare the weights of float and dynamic quantized LSTM layer"""

        qengine = torch.backends.quantized.engine

        def compare_and_validate_results(float_model, q_model):
            weight_dict = compare_weights(float_model.state_dict(),
                                          q_model.state_dict())
            self.assertEqual(len(weight_dict), 1)
            for k, v in weight_dict.items():
                self.assertTrue(len(v["float"]) == len(v["quantized"]))
                for i, val in enumerate(v["quantized"]):
                    self.assertTrue(
                        v["float"][i].shape == v["quantized"][i].shape)

        model_list = [LSTMwithHiddenDynamicModel(qengine)]
        for model in model_list:
            model.eval()
            if hasattr(model, "fuse_model"):
                model.fuse_model()
            q_model = quantize_dynamic(model)
            compare_and_validate_results(model, q_model)
示例#11
0
    def test_compare_model_outputs_lstm_dynamic(self):
        r"""Compare the output of LSTM layer in dynamic quantized model and corresponding
        output of conv layer in float model
        """
        qengine = torch.backends.quantized.engine

        def compare_and_validate_results(float_model, q_model, input, hidden):
            act_compare_dict = compare_model_outputs(float_model, q_model,
                                                     input, hidden)
            expected_act_compare_dict_keys = {"lstm.stats"}

            self.assertTrue(
                act_compare_dict.keys() == expected_act_compare_dict_keys)
            for k, v in act_compare_dict.items():
                self.assertTrue(len(v["float"]) == len(v["quantized"]))
                for i, val in enumerate(v["quantized"]):
                    self.assertTrue(
                        len(v["float"][i]) == len(v["quantized"][i]))
                    if i == 0:
                        self.assertTrue(v["float"][i][0].shape ==
                                        v["quantized"][i][0].shape)
                    else:
                        self.assertTrue(v["float"][i][0].shape ==
                                        v["quantized"][i][0].shape)
                        self.assertTrue(v["float"][i][1].shape ==
                                        v["quantized"][i][1].shape)

        lstm_input = torch.rand((1, 1, 2))
        lstm_hidden = (torch.rand(1, 1, 2), torch.rand(1, 1, 2))

        model_list = [LSTMwithHiddenDynamicModel(qengine)]
        for model in model_list:
            model.eval()
            if hasattr(model, "fuse_model"):
                model.fuse_model()
            q_model = quantize_dynamic(model)
            compare_and_validate_results(model, q_model, lstm_input,
                                         lstm_hidden)