Пример #1
0
    def do_aquant(self, args: argparse.Namespace):
        """
Attempt to calculate quantization for graph using one or more sample input files."""
        self._check_graph()
        stats_collector = ActivationRangesCollector()
        # if replaying state file then load the activation stats if they are present
        if args.scheme == 'SQ8':
            bits = 8
        else:
            bits = args.force_width
        if self.replaying_history and self.history_stats:
            astats = self.history_stats
        else:
            input_args = self._get_input_args(args)
            processed_input = False
            for file_per_input in glob_input_files(args.input_files,
                                                   self.G.num_inputs):
                LOG.info("input file %s", file_per_input)
                processed_input = True
                data = [
                    import_data(input_file, **input_args)
                    for input_file in file_per_input
                ]
                stats_collector.collect_stats(self.G, data)
            if not processed_input:
                self.perror("No input files found")
                return
            astats = stats_collector.stats
            self._record_stats(astats)

        quantizer = UnifiedQuantizer(args.scheme,
                                     astats,
                                     quantized_dimension=args.quant_dimension,
                                     narrow_weights=not args.no_narrow_weights,
                                     bits=bits)

        qrecs = quantizer.quantize(self.G)
        self.G.quantization = qrecs
        # These should now be unnecessary
        # if args.scheme == 'SQ8':
        #     concats_matcher = EqualizeSymmetricMultiplicativeQuantivedConcats()
        #     concats_matcher.match(self.G, set_identity=False)
        #     rnns_matcher = PropagateUpRNNInputQ()
        #     rnns_matcher.match(self.G, set_identity=False)
        #     softmax_qrec_matcher = PropagateSoftmaxSymQrec()
        #     softmax_qrec_matcher.match(self.G, set_identity=False)
        #     sig_swish_qrec_matcher = PropagateUpSigSwishInputQ()
        #     sig_swish_qrec_matcher.match(self.G, set_identity=False)
        LOG.info("Quantization set. Use qshow command to see it.")
Пример #2
0
    def do_fquant(self, args: argparse.Namespace):
        """
Attempt to calculate a fake quantization for graph using random tensors and parameters.
This is intended to allow code generation for performance testing even if no real
weights and input data are avalaible."""
        self._check_graph()
        self.G.constant_store.fake = True
        stats_collector = ActivationRangesCollector()
        for _ in range(args.num_inference):
            if args.uniform:
                input_tensors = [
                    np.random.uniform(-args.uniform, args.uniform,
                                      inp.dims.shape)
                    for inp in self.G.input_nodes()
                ]
            else:
                input_tensors = [
                    np.random.normal(0, 0.2, inp.dims.shape)
                    for inp in self.G.input_nodes()
                ]
            stats_collector.collect_stats(self.G, input_tensors)
        if args.scheme == 'SQ8':
            bits = 8
        else:
            bits = args.force_width
        astats = stats_collector.stats

        quantizer = UnifiedQuantizer(args.scheme,
                                     astats,
                                     quantized_dimension=args.quant_dimension,
                                     narrow_weights=not args.no_narrow_weights,
                                     bits=bits)
        self._record_stats(astats)
        qrecs = quantizer.quantize(self.G)
        self.G.quantization = qrecs
        if args.scheme == 'SQ8':
            concats_matcher = EqualizeSymmetricMultiplicativeQuantivedConcats()
            concats_matcher.match(self.G, set_identity=False)
            softmax_qrec_matcher = PropagateSoftmaxSymQrec()
            softmax_qrec_matcher.match(self.G, set_identity=False)
        self.G.constant_store.fake = False
Пример #3
0
    def do_aquant(self, args: argparse.Namespace):
        """
Attempt to calculate quantization for graph using one or more sample input files."""
        self._check_graph()
        stats_collector = ActivationRangesCollector()
        # if replaying state file then load the activation stats if they are present
        opts = get_options_from_args(args)
        if self.replaying_history and self.history_stats:
            astats = self.history_stats
        else:
            input_args = self._get_input_args(args)
            processed_input = False
            for file_per_input in glob_input_files(args.input_files,
                                                   self.G.num_inputs):
                LOG.info("input file %s", file_per_input)
                processed_input = True
                data = [
                    import_data(input_file, **input_args)
                    for input_file in file_per_input
                ]
                stats_collector.collect_stats(self.G, data)
            if not processed_input:
                self.perror("No input files found")
                return
            astats = stats_collector.stats
            self._record_stats(astats)

        if args.force_width:
            opts['bits'] = args.force_width

        quantizer = UnifiedQuantizer(args.scheme, astats, **opts)
        # clear the existing quantization
        self.G.quantization = None
        qrecs = quantizer.quantize(self.G)
        self.G.quantization = qrecs
        RemoveUnnecessaryQuantizeOperators().match(self.G)
        self.G.add_dimensions()
        LOG.info("Quantization set. Use qshow command to see it.")
Пример #4
0
    def do_fquant(self, args: argparse.Namespace):
        """
Attempt to calculate a fake quantization for graph using random tensors and parameters.
This is intended to allow code generation for performance testing even if no real
weights and input data are avalaible."""
        self._check_graph()
        opts = get_options_from_args(args)
        if self.replaying_history and self.history_stats:
            astats = self.history_stats
        else:
            self.G.constant_store.fake = True
            stats_collector = ActivationRangesCollector()
            for _ in range(args.num_inference):
                if args.uniform:
                    input_tensors = [np.random.uniform(-args.uniform, args.uniform, inp.dims.shape)
                                     for inp in self.G.input_nodes()]
                else:
                    input_tensors = [np.random.normal(0, 0.2, inp.dims.shape)
                                     for inp in self.G.input_nodes()]
                stats_collector.collect_stats(self.G, input_tensors)
            astats = stats_collector.stats
            self._record_stats(astats)
            self.G.constant_store.fake = False

        if args.force_width:
            opts['bits'] = args.force_width

        quantizer = UnifiedQuantizer(args.scheme, astats,
                                     **opts)

        # clear the existing quantization
        self.G.quantization = None
        qrecs = quantizer.quantize(self.G)
        self.G.quantization = qrecs
        RemoveUnnecessaryQuantizeOperators().match(self.G)
        self.G.add_dimensions()
        LOG.info("Quantization set. Use qshow command to see it.")